File size: 2,056 Bytes
01e3b36
 
 
bce5fbb
3a0befc
01e3b36
 
 
 
 
 
dcc1a9d
 
 
c6d5b34
dcc1a9d
 
 
c6d5b34
01e3b36
 
 
 
 
 
 
 
 
 
9ccbda0
01e3b36
48acf2c
01e3b36
5b85bb3
1759dcd
5b85bb3
8f1c766
5b85bb3
 
 
 
8f1c766
5b85bb3
 
9ccbda0
5b85bb3
 
 
48acf2c
9ccbda0
c6d5b34
 
01e3b36
 
 
 
 
 
 
17ad0d0
01e3b36
 
 
 
17ad0d0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
## Imports
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
import copy

## Download the GGUF model
model_name = "kazuma313/lora_model_dokter_consultasi_q4_k_m"
model_file = "lora_model_dokter_consultasi_q4_k_m-unsloth.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
model_path = hf_hub_download(model_name, filename=model_file)

llm = Llama(
    model_path=model_path,
    n_ctx=2048,  # Context length to use
    n_threads=4,            # Number of CPU threads to use
    # n_gpu_layers=0        # Number of model layers to offload to GPU
    # chat_format="chatml",
    verbose=False
    )

prompt_template="""<|begin_of_text|>Dibawah ini adalah percakapan antara dokter dengan pasiennya yang ingin berkonsultasi terkait kesehatan. Tuliskan jawaban yang tepat dan lengkap sesuai sesuai pertanyaan dari pasien.<|end_of_text|>

### Pertanyaan:
{ask}

### Jawaban:
"""

def output_inference(tanya, history):
    temp = ""
    prompt = prompt_template.format(ask=tanya)
    
    output = llm(
    prompt,
    stop=["<|end_of_text|>","Pertanyaan:","Jawaban:", "###"],
    max_tokens=512,
    temperature=0.2,
    top_p=0.95,
    top_k=40,
    min_p=0.05,
    typical_p=1.0,
    repeat_penalty=1.1,
    stream=True,
    )    
    for out in output:
      stream = copy.deepcopy(out)
      temp += stream["choices"][0]["text"]
      yield temp
    
    history = ["init", prompt]
    
    
gr.ChatInterface(
    output_inference,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Tanya saya kesehatan anda", container=False, scale=7),
    title="Konsultasi dokter",
    description="Tanya saja semua keluhan mu",
    theme="soft",
    examples=["apa saja tips agar badan sehat?", "apa efek samping dari minum alkohol berlebihan?", "berapa hasil dari 10 + 5?"],
    cache_examples=True,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()