Spaces:

AugustLight
/

LLight-3.2-3b-Instruct

Sleeping

File size: 4,168 Bytes

68300d0
8cbfc7e
1275349
fe67270
 
68300d0
464f8f9
fe67270
78ac0ef
4c2e13f
78ac0ef
4c2e13f
 
 
 
 
 
 
78ac0ef
1275349
 
43d5ac4
 
 
78ac0ef
1275349
4c2e13f
78ac0ef
 
 
 
 
464f8f9
fe67270
464f8f9
78ac0ef
fe67270
78ac0ef
32f05f8
f747916
32f05f8
f747916
 
 
 
43d5ac4
 
32b5fa7
 
32f05f8
 
 
 
 
43d5ac4
 
32b5fa7
43d5ac4
 
 
 
32b5fa7
 
32f05f8
464f8f9
32f05f8
 
32b5fa7
464f8f9
43d5ac4
 
 
 
 
 
 
fe67270
43d5ac4
 
 
 
 
 
fe67270
43d5ac4
 
 
 
 
 
fe67270
43d5ac4
 
fe67270
 
 
43d5ac4
fe67270
43d5ac4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68300d0
 
78ac0ef
 
32f05f8
78ac0ef
 
 
 
0062f54

import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

model = None

def load_model():
    global model
    try:
        print("Начинаем загрузку модели из Hub...")
        
        model_path = hf_hub_download(
            repo_id="AugustLight/LLight-3.2-3B-Instruct",
            filename="Llight.Q8_0.gguf",
            repo_type="model"
        )
        
        print(f"Модель загружена в: {model_path}")
        
        model = Llama(
            model_path=model_path,
            n_ctx=2048,
            n_threads=4,
            n_batch=512
        )
        
        print("Модель успешно инициализирована!")
        return model
        
    except Exception as e:
        print(f"Подробная ошибка при загрузке модели: {str(e)}")
        raise e

def respond(message, history, system_message, max_new_tokens, temperature, top_p):
    try:
        global model
        if model is None:
            model = load_model()
            
        context = f"{system_message}\n\n"
        for user_msg, assistant_msg in history:
            context += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
        context += f"User: {message}\nAssistant: "
        print(f"Генерируем ответ для контекста длиной {len(context)} символов")
        
        response_text = ""
        
        # Используем генерацию с потоком
        for response in model(
            prompt=context,
            max_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            stop=["User:", "\n\n", "<|endoftext|>"],
            echo=False,
            stream=True
        ):
            chunk = response['choices'][0]['text']
            response_text += chunk
            print(f"Промежуточный ответ: {chunk}")
            yield response_text  # Отправляем накопленный текст
            
        print("Ответ сгенерирован полностью.")
        
    except Exception as e:
        error_msg = f"Произошла ошибка: {str(e)}"
        print(error_msg)
        yield error_msg

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Сообщение")
    
    with gr.Accordion("Параметры", open=False):
        system = gr.Textbox(
            value="Ты дружелюбный и полезный ассистент. Отвечай обдуманно и по делу.",
            label="System message"
        )
        max_new_tokens = gr.Slider(
            minimum=1,
            maximum=2048,
            value=512,
            step=1,
            label="Max new tokens"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=2.0,
            value=0.3,
            step=0.1,
            label="Temperature"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )

    clear = gr.Button("Очистить")
    
    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history, system_message, max_new_tokens, temperature, top_p):
        message = history[-1][0]
        for response_text in respond(message, history[:-1], system_message, max_new_tokens, temperature, top_p):
            history[-1][1] = response_text
            yield history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, system, max_new_tokens, temperature, top_p], chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    try:
        print("Инициализация приложения...")
        model = load_model()
        print("Модель загружена успешно при старте")
    except Exception as e:
        print(f"Ошибка при инициализации: {str(e)}")
    
    demo.launch()