Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline, AutoTokenizer | |
def generate( | |
model_name, | |
system_input, | |
user_initial_message, | |
assistant_initial_message, | |
user_input, | |
): | |
pipe = pipeline("text-generation", model=model_name, device="cpu") | |
message_template = [ | |
{"role": "system", "content": system_input}, | |
{"role": "user", "content": user_initial_message}, | |
{"role": "assistant", "content": assistant_initial_message}, | |
{"role": "user", "content": user_input}, | |
] | |
prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True) | |
if model_name == "Felladrin/Pythia-31M-Chat-v1": | |
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=2, repetition_penalty=1.0016) | |
elif model_name == "Felladrin/Llama-68M-Chat-v1": | |
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.043) | |
elif model_name == "Felladrin/Smol-Llama-101M-Chat-v1": | |
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.105) | |
elif model_name == "Felladrin/Llama-160M-Chat-v1": | |
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.01) | |
elif model_name == "Felladrin/TinyMistral-248M-SFT-v4": | |
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=5, repetition_penalty=1.001) | |
else: | |
outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.65, top_k=35, top_p=0.55, repetition_penalty=1.176) | |
return outputs[0]["generated_text"] | |
model_choices = [ | |
"Felladrin/Llama-160M-Chat-v1", | |
"Felladrin/Llama-68M-Chat-v1", | |
"Felladrin/Minueza-32Mx2-Chat", | |
"Felladrin/Minueza-32M-UltraChat", | |
"Felladrin/Minueza-32M-Deita", | |
"Felladrin/Smol-Llama-101M-Chat-v1", | |
"Felladrin/Pythia-31M-Chat-v1", | |
"Felladrin/Minueza-32M-Chat", | |
"Felladrin/TinyMistral-248M-SFT-v4", | |
] | |
g = gr.Interface( | |
fn=generate, | |
inputs=[ | |
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True), | |
gr.components.Textbox(lines=2, label="System Message", value="You are a highly knowledgeable and friendly assistant. Your goal is to understand and respond to user inquiries with clarity. Your interactions are always respectful, helpful, and focused on delivering the most accurate information to the user."), | |
gr.components.Textbox(lines=2, label="User Initial Message", value="Hey! Got a question for you!"), | |
gr.components.Textbox(lines=2, label="Assistant Initial Message", value="Sure! What's it?"), | |
gr.components.Textbox(lines=2, label="User Message", value="Can you list some potential applications for quantum computing?"), | |
], | |
outputs=[gr.Textbox(lines=24, label="Output")], | |
title="A place to try out text-generation models fine-tuned by Felladrin", | |
concurrency_limit=1 | |
) | |
g.launch(max_threads=2) | |