from langchain.llms import LlamaCpp from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler import gradio as gr import re import os # MODEL_PATH = "persian_llama_7b.Q8_K_M.gguf" # TEMPRATURE = 0.3 # MAX_TOKENS = 800 # for k,v in os.environ.items(): # if(k=="MODEL_PATH"): # MODEL_PATH = v # if(k== "TEMPRATURE"): # TEMPRATURE = v # if(k == "MAX_TOKENS"): # MAX_TOKENS = v # print("model: "+MODEL_PATH) # print("temp: "+TEMPRATURE) # print("max_tokens: "+MAX_TOKENS) n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool. n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU. n_ctx=2048 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) path = "mostafaamiri/persian-llama-7b-GGUF-Q4/persian_llama_7b.Q8_K_M.gguf" # persian_llama_7b.Q4_K_M.gguf # persian_llama_7b.Q8_K_M.gguf # persian_llama_7b.f32.gguf # Make sure the model path is correct for your system! llm = LlamaCpp( model_path= path, n_gpu_layers=n_gpu_layers, n_batch=n_batch, callback_manager=callback_manager, verbose=True, n_ctx=n_ctx, temperature=TEMPRATURE, max_tokens=MAX_TOKENS, top_p=1, ) def generate_output(text): result = "" for s in llm.stream(text): result += s yield result def clear(): return "", "" # def like_log(input, output): # with open("like_log.txt", "a") as f: # f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n") # def dislike_log(input, output): # with open("dislike_log.txt", "a") as f: # f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n") with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( dal_image+ """

مدل هوش مصنوعی دال

تماس با ما با
info[@]aidal.ir

""") with gr.Row(): inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True) with gr.Row(): submit_btn= gr.Button("ارسال", variant="primary") clear_btn = gr.ClearButton(value="پاک کردن", variant="secondary") with gr.Row(): outputs=gr.Textbox(label="خروجی",rtl=True) submit_btn.click(fn=generate_output, inputs= [inputs], outputs= [outputs]) clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs]) # with gr.Row(): # like_btn= gr.Button("👍🏾") # dislike_btn= gr.Button("👎🏾") # like_btn.click(fn=like_log, # inputs= [inputs, outputs], # outputs=[] # ) # dislike_btn.click(fn=dislike_log, # inputs= [inputs, outputs], # outputs=[] ) # gr_interface = gr.Interface(fn=generate_output, # inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True), # outputs=gr.Textbox(label="خروجی",rtl=True), # live=False, # flagging_options=["👍🏾","👎🏾"], # concurrency_limit=5) demo.launch(server_name='0.0.0.0',share=True)