Spaces:

mostafaamiri
/

persianllama

Sleeping

App Files Files Community

mostafaamiri commited on Jan 14

Commit

a8a13f3

•

1 Parent(s): 95c389b

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -58

app.py CHANGED Viewed

@@ -6,30 +6,14 @@ import re
 import os
-# MODEL_PATH = "persian_llama_7b.Q8_K_M.gguf"
-# TEMPRATURE = 0.3
-# MAX_TOKENS = 800
-# for k,v in os.environ.items():
-#     if(k=="MODEL_PATH"):
-#         MODEL_PATH = v
-#     if(k== "TEMPRATURE"):
-#         TEMPRATURE = v
-#     if(k == "MAX_TOKENS"):
-#         MAX_TOKENS = v
-# print("model: "+MODEL_PATH)
-# print("temp: "+TEMPRATURE)
-# print("max_tokens: "+MAX_TOKENS)
 n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
 n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
 n_ctx=2048
 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 path = "mostafaamiri/persian-llama-7b-GGUF-Q4/persian_llama_7b.Q8_K_M.gguf"
-# persian_llama_7b.Q4_K_M.gguf
-# persian_llama_7b.Q8_K_M.gguf
-# persian_llama_7b.f32.gguf
-# Make sure the model path is correct for your system!
 llm = LlamaCpp(
     model_path= path,
     n_gpu_layers=n_gpu_layers, n_batch=n_batch,
@@ -52,31 +36,7 @@ def generate_output(text):
 def clear():
     return "", ""
-# def like_log(input, output):
-#     with open("like_log.txt", "a") as f:
-#         f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n")
-# def dislike_log(input, output):
-#     with open("dislike_log.txt", "a") as f:
-#         f.write("{\"model\": \""+MODEL_PATH+"\",\n\"temprature\": "+TEMPRATURE+",\n\"input\": \""+input+"\",\n\"output\": \""+output+"\"},\n")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-    dal_image+
-    """
-    <br>
-    <div dir="rtl">
-    <h1>
-    مدل هوش مصنوعی دال
-    </h1>
-    <p dir="rtl">
-    تماس با ما با
-    <br/>
-    info[@]aidal.ir
-    </p>
-    </div>
-    """)
     with gr.Row():
         inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True)
@@ -89,22 +49,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     inputs= [inputs],
                     outputs= [outputs])
     clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs])
-    # with gr.Row():
-    #     like_btn= gr.Button("👍🏾")
-    #     dislike_btn= gr.Button("👎🏾")
-    # like_btn.click(fn=like_log,
-    #                inputs= [inputs, outputs],
-    #                outputs=[]
-    #                )
-    # dislike_btn.click(fn=dislike_log,
-    #                inputs= [inputs, outputs],
-    #                outputs=[]
-                   )
-    # gr_interface = gr.Interface(fn=generate_output,
-    #                             inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True),
-    #                             outputs=gr.Textbox(label="خروجی",rtl=True),
-    #                             live=False,
-    #                             flagging_options=["👍🏾","👎🏾"],
-    #                             concurrency_limit=5)
 demo.launch(server_name='0.0.0.0',share=True)

 import os
 n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
 n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
 n_ctx=2048
 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 path = "mostafaamiri/persian-llama-7b-GGUF-Q4/persian_llama_7b.Q8_K_M.gguf"
 llm = LlamaCpp(
     model_path= path,
     n_gpu_layers=n_gpu_layers, n_batch=n_batch,
 def clear():
     return "", ""
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True)
                     inputs= [inputs],
                     outputs= [outputs])
     clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs])
 demo.launch(server_name='0.0.0.0',share=True)