Spaces:

mostafaamiri
/

persianllama

Sleeping

File size: 3,613 Bytes

from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import gradio as gr
import re
import os


# MODEL_PATH = "persian_llama_7b.Q8_K_M.gguf"
# TEMPRATURE = 0.3
# MAX_TOKENS = 800
# for k,v in os.environ.items():
#     if(k=="MODEL_PATH"):
#         MODEL_PATH = v
#     if(k== "TEMPRATURE"):
#         TEMPRATURE = v
#     if(k == "MAX_TOKENS"):
#         MAX_TOKENS = v

# print("model: "+MODEL_PATH)
# print("temp: "+TEMPRATURE)
# print("max_tokens: "+MAX_TOKENS)
n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
n_ctx=2048

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
path = "mostafaamiri/persian-llama-7b-GGUF-Q4/persian_llama_7b.Q8_K_M.gguf"
# persian_llama_7b.Q4_K_M.gguf
# persian_llama_7b.Q8_K_M.gguf
# persian_llama_7b.f32.gguf
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path= path,
    n_gpu_layers=n_gpu_layers, n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=True,
    n_ctx=n_ctx,
    temperature=TEMPRATURE,
    max_tokens=MAX_TOKENS,
    top_p=1,
)


def generate_output(text):
    result = ""
    for s in llm.stream(text):
        result += s
        yield result


def clear():
    return "", ""

# def like_log(input, output):
#     with open("like_log.txt", "a") as f:
#         f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n")

# def dislike_log(input, output):
#     with open("dislike_log.txt", "a") as f:
#         f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n")


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
    dal_image+
    """
    <br>
    <div dir="rtl">
    <h1>
    مدل هوش مصنوعی دال
    </h1>
    <p dir="rtl">
    تماس با ما با
    <br/>
    info[@]aidal.ir 
    </p>
    </div>
    """)
    with gr.Row():
        inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True)
        
    with gr.Row():
        submit_btn= gr.Button("ارسال", variant="primary")
        clear_btn = gr.ClearButton(value="پاک کردن", variant="secondary")
    with gr.Row():
        outputs=gr.Textbox(label="خروجی",rtl=True)
    submit_btn.click(fn=generate_output,
                    inputs= [inputs],
                    outputs= [outputs])
    clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs])
    # with gr.Row():
    #     like_btn= gr.Button("👍🏾")
    #     dislike_btn= gr.Button("👎🏾")
    # like_btn.click(fn=like_log,
    #                inputs= [inputs, outputs],
    #                outputs=[]
    #                )
    # dislike_btn.click(fn=dislike_log,
    #                inputs= [inputs, outputs],
    #                outputs=[]
                   )
    # gr_interface = gr.Interface(fn=generate_output,
    #                             inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True),
    #                             outputs=gr.Textbox(label="خروجی",rtl=True),
    #                             live=False,
    #                             flagging_options=["👍🏾","👎🏾"],
    #                             concurrency_limit=5)
    
demo.launch(server_name='0.0.0.0',share=True)