File size: 1,610 Bytes
8be9a5e
 
f1d297d
 
 
8be9a5e
3ebac32
 
 
8be9a5e
0a0f8d3
1304134
0a0f8d3
 
 
1304134
0a0f8d3
 
 
 
6f91c68
0a0f8d3
 
6f91c68
0a0f8d3
 
 
 
 
 
 
 
6f91c68
dc30f13
 
0a0f8d3
6f91c68
dc30f13
 
0a0f8d3
1304134
0a0f8d3
6f91c68
f1d297d
3ebac32
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
import threading
import gradio as gr
from llama_index.llms.ollama import Ollama
from llama_index.core.llms import ChatMessage

# Configurer le proxy Ollama
os.environ['OLLAMA_HOST'] = '127.0.0.1:11434'
os.environ['OLLAMA_ORIGINS'] = '*'

llm = Ollama(model="llama3", request_timeout=120.0)

def get_completion(prompt):
    response = llm.complete(prompt)
    return response

def chat_with_llm(messages):
    chat_messages = [ChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
    response = llm.chat(chat_messages)
    return response

def generate_response(prompt):
    return get_completion(prompt)

def generate_chat_response(history):
    messages = [{"role": "system", "content": "You are a pirate with a colorful personality"}]
    for item in history:
        messages.append({"role": "user", "content": item[0]})
        if item[1]:
            messages.append({"role": "assistant", "content": item[1]})
    response = chat_with_llm(messages)
    return response["content"]

single_input = gr.Textbox(lines=2, placeholder="Enter your prompt here...")
single_output = gr.Textbox()
single_interface = gr.Interface(fn=generate_response, inputs=single_input, outputs=single_output, title="LLM Single Completion")

chat_input = gr.Chatbot()
chat_output = gr.Textbox()
chat_interface = gr.Interface(fn=generate_chat_response, inputs=chat_input, outputs=chat_output, title="LLM Chat")

app = gr.TabbedInterface([single_interface, chat_interface], ["Single Completion", "Chat"])

if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7860, share=True)