File size: 3,096 Bytes
1bfc1de
 
 
 
 
cbfc805
a99204e
 
1bfc1de
 
aa0977e
a99204e
cbfc805
a99204e
 
 
cbfc805
 
a99204e
1bfc1de
a99204e
ffc6a33
 
 
 
 
 
 
 
8746c19
 
ffc6a33
 
a99204e
1bfc1de
 
4d9f180
 
540b197
8746c19
859b999
2b62c2d
0258fd5
4d9f180
 
540b197
4d9f180
a322642
1bfc1de
 
 
 
c8b330d
 
 
 
1bfc1de
c8b330d
99ab7c8
1bfc1de
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from transformers import pipeline, AutoTokenizer

def generate(
    model_name,
    system_input,
    user_initial_message,
    assistant_initial_message,
    user_input,
):
    pipe = pipeline("text-generation", model=model_name, device="cpu")
    
    message_template = [
        {"role": "system", "content": system_input},
        {"role": "user", "content": user_initial_message},
        {"role": "assistant", "content": assistant_initial_message},
        {"role": "user", "content": user_input},
    ]
    
    prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
    
    if model_name == "Felladrin/Pythia-31M-Chat-v1":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=2, repetition_penalty=1.0016)
    elif model_name == "Felladrin/Llama-68M-Chat-v1":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.043)
    elif model_name == "Felladrin/Smol-Llama-101M-Chat-v1":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.105)
    elif model_name == "Felladrin/Llama-160M-Chat-v1":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.01)
    elif model_name == "Felladrin/TinyMistral-248M-Chat-v2":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=5, repetition_penalty=1.0)
    else:
        outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.65, top_k=35, top_p=0.55, repetition_penalty=1.176)
        
    return outputs[0]["generated_text"]

model_choices = [
    "Felladrin/Llama-160M-Chat-v1",
    "Felladrin/Minueza-32Mx2-Chat",
    "Felladrin/TinyMistral-248M-Chat-v2",
    "Felladrin/Llama-68M-Chat-v1",
    "Felladrin/Minueza-32M-UltraChat",
    "Felladrin/Minueza-32M-Deita",
    "Felladrin/Smol-Llama-101M-Chat-v1",
    "Felladrin/Pythia-31M-Chat-v1",
    "Felladrin/Minueza-32M-Chat",
]

g = gr.Interface(
    fn=generate,
    inputs=[
        gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
        gr.components.Textbox(lines=2, label="System Message", value="You are a highly knowledgeable and friendly assistant. Your goal is to understand and respond to user inquiries with clarity. Your interactions are always respectful, helpful, and focused on delivering the most accurate information to the user."),
        gr.components.Textbox(lines=2, label="User Initial Message", value="Hey! Got a question for you!"),
        gr.components.Textbox(lines=2, label="Assistant Initial Message", value="Sure! What's it?"),
        gr.components.Textbox(lines=2, label="User Message", value="Can you list some potential applications for quantum computing?"),
    ],
    outputs=[gr.Textbox(lines=24, label="Output")],
    title="A place to try out text-generation models fine-tuned by Felladrin",
    concurrency_limit=1
)

g.launch(max_threads=2)