Spaces:
Sleeping
Sleeping
File size: 4,168 Bytes
68300d0 8cbfc7e 1275349 fe67270 68300d0 464f8f9 fe67270 78ac0ef 4c2e13f 78ac0ef 4c2e13f 78ac0ef 1275349 43d5ac4 78ac0ef 1275349 4c2e13f 78ac0ef 464f8f9 fe67270 464f8f9 78ac0ef fe67270 78ac0ef 32f05f8 f747916 32f05f8 f747916 43d5ac4 32b5fa7 32f05f8 43d5ac4 32b5fa7 43d5ac4 32b5fa7 32f05f8 464f8f9 32f05f8 32b5fa7 464f8f9 43d5ac4 fe67270 43d5ac4 fe67270 43d5ac4 fe67270 43d5ac4 fe67270 43d5ac4 fe67270 43d5ac4 68300d0 78ac0ef 32f05f8 78ac0ef 0062f54 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
model = None
def load_model():
global model
try:
print("Начинаем загрузку модели из Hub...")
model_path = hf_hub_download(
repo_id="AugustLight/LLight-3.2-3B-Instruct",
filename="Llight.Q8_0.gguf",
repo_type="model"
)
print(f"Модель загружена в: {model_path}")
model = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=4,
n_batch=512
)
print("Модель успешно инициализирована!")
return model
except Exception as e:
print(f"Подробная ошибка при загрузке модели: {str(e)}")
raise e
def respond(message, history, system_message, max_new_tokens, temperature, top_p):
try:
global model
if model is None:
model = load_model()
context = f"{system_message}\n\n"
for user_msg, assistant_msg in history:
context += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
context += f"User: {message}\nAssistant: "
print(f"Генерируем ответ для контекста длиной {len(context)} символов")
response_text = ""
# Используем генерацию с потоком
for response in model(
prompt=context,
max_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
stop=["User:", "\n\n", "<|endoftext|>"],
echo=False,
stream=True
):
chunk = response['choices'][0]['text']
response_text += chunk
print(f"Промежуточный ответ: {chunk}")
yield response_text # Отправляем накопленный текст
print("Ответ сгенерирован полностью.")
except Exception as e:
error_msg = f"Произошла ошибка: {str(e)}"
print(error_msg)
yield error_msg
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Сообщение")
with gr.Accordion("Параметры", open=False):
system = gr.Textbox(
value="Ты дружелюбный и полезный ассистент. Отвечай обдуманно и по делу.",
label="System message"
)
max_new_tokens = gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.3,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
)
clear = gr.Button("Очистить")
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history, system_message, max_new_tokens, temperature, top_p):
message = history[-1][0]
for response_text in respond(message, history[:-1], system_message, max_new_tokens, temperature, top_p):
history[-1][1] = response_text
yield history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, [chatbot, system, max_new_tokens, temperature, top_p], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
try:
print("Инициализация приложения...")
model = load_model()
print("Модель загружена успешно при старте")
except Exception as e:
print(f"Ошибка при инициализации: {str(e)}")
demo.launch() |