File size: 3,020 Bytes
1ecdaca
 
f4a7d4e
 
 
c60e96c
f4a7d4e
1ecdaca
c60e96c
 
f4a7d4e
 
1ecdaca
 
 
 
 
 
 
 
f4a7d4e
1ecdaca
c60e96c
1ecdaca
f4a7d4e
 
 
 
 
1ecdaca
 
f4a7d4e
c60e96c
 
 
f4a7d4e
 
 
 
 
 
 
 
 
 
 
 
 
c60e96c
f4a7d4e
c60e96c
f4a7d4e
c60e96c
 
f4a7d4e
 
 
 
 
c60e96c
 
f4a7d4e
c60e96c
1ecdaca
 
 
c60e96c
 
 
 
 
 
1ecdaca
 
 
 
 
c60e96c
1ecdaca
c60e96c
1ecdaca
c60e96c
 
1ecdaca
 
c60e96c
1ecdaca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Inicjalizacja InferenceClient
client = InferenceClient("01-ai/Yi-Coder-9B-Chat")

# Inicjalizacja tokenizera i modelu
model_path = "01-ai/Yi-Coder-9B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto").eval()

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    use_local_model: bool,
):
    # Przygotowanie wiadomości do kontekstu
    messages = [{"role": "system", "content": system_message}]
    for user, assistant in history:
        if user:
            messages.append({"role": "user", "content": user})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})
    messages.append({"role": "user", "content": message})

    if use_local_model:
        # Użycie lokalnego modelu
        input_text = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
        input_ids = tokenizer.encode(input_text, return_tensors="pt")
        input_ids = input_ids.to(model.device)
        
        with torch.no_grad():
            output = model.generate(
                input_ids,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
            )
        
        response = tokenizer.decode(output[0], skip_special_tokens=True)
        yield response.split("assistant:")[-1].strip()
    else:
        # Użycie Hugging Face Inference API
        response = ""
        for chunk in client.text_generation(
            "\n".join([f"{m['role']}: {m['content']}" for m in messages]),
            max_new_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            response += chunk
            yield response.split("assistant:")[-1].strip()

# Tworzenie interfejsu Gradio
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value="Odpowiadasz w języku polskim. Jesteś Coder/Developer/Programista i tworzysz pełny kod.",
            label="Wiadomość systemowa"
        ),
        gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Maksymalna liczba nowych tokenów"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperatura"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (próbkowanie nucleus)",
        ),
        gr.Checkbox(label="Użyj lokalnego modelu", value=False),
    ],
    title="Zaawansowany interfejs czatu AI",
    description="Czatuj z modelem AI, korzystając z Hugging Face Inference API lub lokalnego modelu.",
)

if __name__ == "__main__":
    demo.launch()