File size: 1,737 Bytes
1e2d981
6da1c26
1e2d981
2603190
c2aa89c
 
2603190
1cd873c
fca7347
c2aa89c
 
 
8926d1f
c2aa89c
1e2d981
 
 
 
 
 
 
 
 
 
68eded2
1e2d981
 
 
 
 
 
 
 
 
1cd873c
1d9d6ab
1cd873c
1e2d981
1cd873c
1e2d981
92cb988
1d9d6ab
 
 
52ae9af
d47337b
027f768
d47337b
1e2d981
8a8d916
1e2d981
 
 
02743b6
6da1c26
02743b6
 
1cd873c
1e2d981
 
 
 
 
 
 
 
 
6da1c26
1e2d981
 
 
 
8a8d916
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from llama_cpp import Llama

model = "bartowski/Llama-3.2-1B-Instruct-GGUF"
llm = Llama.from_pretrained(
    repo_id=model,
    filename="Llama-3.2-1B-Instruct-Q8_0.gguf",
    verbose=True,
    use_mmap=True,
    use_mlock=True,
    n_threads=2,
    n_threads_batch=2,
    n_ctx=2000,
)


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    completion = llm.create_chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p
    )

    for message in completion:
        delta = message['choices'][0]['delta']
        if 'content' in delta:
            response += delta['content']
            yield response


demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value="You are a helpful assistant.",
            label="System message",
        ),
        gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    description=model,
)


if __name__ == "__main__":
    demo.launch()