Vineedhar commited on
Commit
6788682
1 Parent(s): 46702b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -8
app.py CHANGED
@@ -1,17 +1,45 @@
 
1
  import streamlit as st
2
- from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  # Load your model and tokenizer from Hugging Face
5
  model_name = "orYx-models/finetuned-tiny-llama-medical-papers"
6
- token = "Tinyllama_secret" # Replace <your_token> with your actual Hugging Face API token
7
- model = AutoModelForSequenceClassification.from_pretrained(model_name, token=token)
8
- tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
9
 
10
  # Define the pipeline with your model
11
  pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
12
 
13
- text = st.text_area("Enter some text:")
14
 
15
- if text:
16
- out = pipe(text)
17
- st.json(out)
 
1
+ from time import perf_counter
2
  import streamlit as st
3
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, GenerationConfig
4
+
5
+ def generate_response(user_input):
6
+ prompt = formatted_prompt(user_input)
7
+
8
+ inputs = tokenizer([prompt], return_tensors="pt")
9
+ generation_config = GenerationConfig(
10
+ penalty_alpha=0.6,
11
+ do_sample=True,
12
+ top_k=5,
13
+ temperature=0.5,
14
+ repetition_penalty=1.2,
15
+ max_new_tokens=500,
16
+ pad_token_id=tokenizer.eos_token_id
17
+ )
18
+ start_time = perf_counter()
19
+
20
+ inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
21
+
22
+ outputs = model.generate(**inputs, generation_config=generation_config)
23
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+ output_time = perf_counter() - start_time
25
+ st.write(response)
26
+ st.write(f"Time taken for inference: {round(output_time, 2)} seconds")
27
+
28
+ @st.cache(allow_output_mutation=True)
29
+ def load_model_and_tokenizer(model_name, token):
30
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, token=token)
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
32
+ return model, tokenizer
33
 
34
  # Load your model and tokenizer from Hugging Face
35
  model_name = "orYx-models/finetuned-tiny-llama-medical-papers"
36
+ token = "Tinyllama_secret" # Replace <your_token> with your actual Hugging Face Spaces secret
37
+ model, tokenizer = load_model_and_tokenizer(model_name, token)
 
38
 
39
  # Define the pipeline with your model
40
  pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
41
 
42
+ user_input = st.text_area("Enter some text:")
43
 
44
+ if user_input:
45
+ generate_response(user_input)