Spaces:
Running
Running
File size: 2,033 Bytes
01e3b36 bce5fbb 3a0befc 01e3b36 dcc1a9d 01e3b36 9ccbda0 01e3b36 48acf2c 01e3b36 9ccbda0 48acf2c 9ccbda0 01e3b36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
## Imports
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
import copy
## Download the GGUF model
model_name = "kazuma313/lora_model_dokter_consultasi_q4_k_m"
model_file = "lora_model_dokter_consultasi_q4_k_m-unsloth.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
model_path = hf_hub_download(model_name, filename=model_file)
llm = Llama(
model_path=model_path,
n_ctx=2048, # Context length to use
# n_threads=4, # Number of CPU threads to use
# n_gpu_layers=0 # Number of model layers to offload to GPU
# chat_format="chatml",
verbose=False
)
prompt_template="""<|begin_of_text|>Dibawah ini adalah percakapan antara dokter dengan pasiennya yang ingin berkonsultasi terkait kesehatan. Tuliskan jawaban yang tepat dan lengkap sesuai sesuai pertanyaan dari pasien.<|end_of_text|>
### Pertanyaan:
{ask}
### Jawaban:
"""
def output_inference(tanya, history):
temp = ""
prompt = prompt_template.format(ask=tanya)
output = llm(
prompt,
stop=["<|end_of_text|>"],
max_tokens=512,
temperature=0.3,
top_p=0.95,
top_k=40,
min_p=0.05,
typical_p=1.0,
stream=True,
)
for out in output:
stream = copy.deepcopy(out)
temp += stream["choices"][0]["text"]
yield temp
history = ["init", prompt]
gr.ChatInterface(
output_inference,
chatbot=gr.Chatbot(height=300),
textbox=gr.Textbox(placeholder="Tanya saya kesehatan anda", container=False, scale=7),
title="Konsultasi dokter",
description="Tanya saja semua keluhan mu",
theme="soft",
examples=["apa saja tips hidup sehat?", "apa penyebab dari minum alkohol berlebihan?", "apa yang terjadi jika pola tidur tidak teratur?", "berapa hasil dari 10 + 5?"],
cache_examples=True,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
).launch() |