Spaces:
Sleeping
Sleeping
from langchain.llms import LlamaCpp | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
import gradio as gr | |
import re | |
import os | |
# MODEL_PATH = "persian_llama_7b.Q8_K_M.gguf" | |
# TEMPRATURE = 0.3 | |
# MAX_TOKENS = 800 | |
# for k,v in os.environ.items(): | |
# if(k=="MODEL_PATH"): | |
# MODEL_PATH = v | |
# if(k== "TEMPRATURE"): | |
# TEMPRATURE = v | |
# if(k == "MAX_TOKENS"): | |
# MAX_TOKENS = v | |
# print("model: "+MODEL_PATH) | |
# print("temp: "+TEMPRATURE) | |
# print("max_tokens: "+MAX_TOKENS) | |
n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool. | |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. | |
n_ctx=2048 | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
path = "mostafaamiri/persian-llama-7b-GGUF-Q4/persian_llama_7b.Q8_K_M.gguf" | |
# persian_llama_7b.Q4_K_M.gguf | |
# persian_llama_7b.Q8_K_M.gguf | |
# persian_llama_7b.f32.gguf | |
# Make sure the model path is correct for your system! | |
llm = LlamaCpp( | |
model_path= path, | |
n_gpu_layers=n_gpu_layers, n_batch=n_batch, | |
callback_manager=callback_manager, | |
verbose=True, | |
n_ctx=n_ctx, | |
temperature=TEMPRATURE, | |
max_tokens=MAX_TOKENS, | |
top_p=1, | |
) | |
def generate_output(text): | |
result = "" | |
for s in llm.stream(text): | |
result += s | |
yield result | |
def clear(): | |
return "", "" | |
# def like_log(input, output): | |
# with open("like_log.txt", "a") as f: | |
# f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n") | |
# def dislike_log(input, output): | |
# with open("dislike_log.txt", "a") as f: | |
# f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n") | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown( | |
dal_image+ | |
""" | |
<br> | |
<div dir="rtl"> | |
<h1> | |
مدل هوش مصنوعی دال | |
</h1> | |
<p dir="rtl"> | |
تماس با ما با | |
<br/> | |
info[@]aidal.ir | |
</p> | |
</div> | |
""") | |
with gr.Row(): | |
inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True) | |
with gr.Row(): | |
submit_btn= gr.Button("ارسال", variant="primary") | |
clear_btn = gr.ClearButton(value="پاک کردن", variant="secondary") | |
with gr.Row(): | |
outputs=gr.Textbox(label="خروجی",rtl=True) | |
submit_btn.click(fn=generate_output, | |
inputs= [inputs], | |
outputs= [outputs]) | |
clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs]) | |
# with gr.Row(): | |
# like_btn= gr.Button("👍🏾") | |
# dislike_btn= gr.Button("👎🏾") | |
# like_btn.click(fn=like_log, | |
# inputs= [inputs, outputs], | |
# outputs=[] | |
# ) | |
# dislike_btn.click(fn=dislike_log, | |
# inputs= [inputs, outputs], | |
# outputs=[] | |
) | |
# gr_interface = gr.Interface(fn=generate_output, | |
# inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True), | |
# outputs=gr.Textbox(label="خروجی",rtl=True), | |
# live=False, | |
# flagging_options=["👍🏾","👎🏾"], | |
# concurrency_limit=5) | |
demo.launch(server_name='0.0.0.0',share=True) |