Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,15 @@ model_name = "kazuma313/lora_model_dokter_consultasi_q4_k_m"
|
|
8 |
model_file = "lora_model_dokter_consultasi_q4_k_m-unsloth.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
|
9 |
model_path = hf_hub_download(model_name, filename=model_file)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
prompt_template="""<|begin_of_text|>Dibawah ini adalah percakapan antara dokter dengan pasiennya yang ingin berkonsultasi terkait kesehatan. Tuliskan jawaban yang tepat dan lengkap sesuai sesuai pertanyaan dari pasien.<|end_of_text|>
|
13 |
|
|
|
8 |
model_file = "lora_model_dokter_consultasi_q4_k_m-unsloth.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
|
9 |
model_path = hf_hub_download(model_name, filename=model_file)
|
10 |
|
11 |
+
llm = Llama(
|
12 |
+
model_path=model_path,
|
13 |
+
n_ctx=2048, # Context length to use
|
14 |
+
# n_threads=4, # Number of CPU threads to use
|
15 |
+
# n_gpu_layers=0 # Number of model layers to offload to GPU
|
16 |
+
# chat_format="chatml",
|
17 |
+
verbose=False
|
18 |
+
|
19 |
+
)
|
20 |
|
21 |
prompt_template="""<|begin_of_text|>Dibawah ini adalah percakapan antara dokter dengan pasiennya yang ingin berkonsultasi terkait kesehatan. Tuliskan jawaban yang tepat dan lengkap sesuai sesuai pertanyaan dari pasien.<|end_of_text|>
|
22 |
|