Spaces:
Runtime error
Runtime error
File size: 6,095 Bytes
b0a6dfd b33dee8 b0a6dfd b33dee8 b0a6dfd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
import gradio as gr
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-7b",trust_remote_code=True)
model = PeftModel.from_pretrained(model, "fadliaulawi/internlm-7b-finetuned")
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-7b", padding_side="left", use_fast = False,trust_remote_code=True)
def generate_prompt(
instruction, input, label
):
# template = {
# "description": "Template used by Alpaca-LoRA.",
# "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
# "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
# "response_split": "### Response:"
# }
# <s>[INST] <<SYS>>
# {{ system_prompt }}
# <</SYS>>
# {{ user_message }} [/INST]
# return '''<s>[INST] <<SYS>>\n{0}\n<</SYS>>\n\n{1} {2} [/INST]'''.format(template['prompt_input'].format(instruction=instruction, input=input), template['response_split'], label)
template = {
"description": "Template used by Alpaca-LoRA.",
"prompt_input": "Di bawah ini adalah instruksi yang menjelaskan tugas, dipasangkan dengan masukan yang memberikan konteks lebih lanjut. Tulis tanggapan yang melengkapi permintaan dengan tepat.\n\n### Instruksi:\n{instruction}\n\n### Masukan:\n{input}",
#"prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
"response_split": "### Tanggapan:"
}
if input:
res = template["prompt_input"].format(instruction=instruction, input=input)
#else:
# res = template["prompt_no_input"].format(instruction=instruction)
res = f"{res} \n\n### Tanggapan:\n"
if label:
res = f"{res}{label}"
return res
def user(message, history):
return "", history + [[message, None]]
def generate_and_tokenize_prompt(data_point):
full_prompt = generate_prompt(
data_point["instruction"],
data_point["input"],
data_point["output"],
)
# print(full_prompt)
# return
cutoff_len = 256
tokenizer.pad_token = tokenizer.eos_token
result = tokenizer(
full_prompt,
truncation=True,
max_length=cutoff_len,
padding=True,
return_tensors=None,
)
if (result["input_ids"][-1] != tokenizer.eos_token_id and len(result["input_ids"]) < cutoff_len):
result["input_ids"].append(tokenizer.eos_token_id)
result["attention_mask"].append(1)
# result["labels"] = result["input_ids"].copy()
return result
def bot(history,temperature, max_new_tokens, top_p,top_k):
user_message = history[-1][0]
data = {
'instruction': "Jika Anda seorang dokter, silakan menjawab pertanyaan medis berdasarkan deskripsi pasien.",
'input': user_message,
'output': ''
}
new_user_input_ids = generate_and_tokenize_prompt(data)
# append the new user input tokens to the chat history
bot_input_ids = torch.LongTensor([new_user_input_ids['input_ids']])
# generate a response
response = model.generate(
input_ids=bot_input_ids,
pad_token_id=tokenizer.eos_token_id,
temperature = float(temperature),
max_new_tokens=max_new_tokens,
top_p=float(top_p),
top_k=top_k,
do_sample=True
)
# clean up response before returning
response = tokenizer.batch_decode(response, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
sections = response.split("###")
response = sections[3]
response=response.split("Tanggapan:")[1].strip()
history[-1][1] = response
return history
with gr.Blocks() as demo:
gr.Markdown(
"""# ChatDoctor - InternLM 7b 🩺
A [ChatDoctor - InternLM 7b](https://huggingface.co/fadliaulawi/internlm-7b-finetuned) demo.
From the [InternLM 7b](https://huggingface.co/internlm/internlm-7b) model and finetuned on the Indonesian translation of [ChatDoctor](https://github.com/Kent0n-Li/ChatDoctor) dataset.
"""
)
chatbot = gr.Chatbot()
msg = gr.Textbox()
submit = gr.Button("Submit")
clear = gr.Button("Clear")
examples = gr.Examples(examples=["Dokter, aku mengalami kelelahan akhir-akhir ini.", "Dokter, aku merasa pusing, lemah dan sakit dada tajam akhir-akhir ini.",
"Dokter, aku merasa sangat depresi akhir-akhir ini dan juga mengalami perubahan suhu tubuhku.",
"Dokter, saya sudah beberapa minggu mengalami suara serak dan tidak kunjung membaik meski sudah minum obat. Apa masalahnya?"
],inputs=[msg])
gr.Markdown(
"""## Adjust the additional inputs:"""
)
temperature = gr.Slider(0, 5, value=0.8, step=0.1, label='Temperature',info="Controls randomness, higher values increase diversity.")
max_length = gr.Slider(0, 1024, value=50, step=1, label='Max Length',info="The maximum numbers of output's tokens.")
top_p = gr.Slider(0, 1, value=0.8, step=0.1, label='Top P',info="The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.")
top_k = gr.Slider(0, 50, value=10, step=1, label='Top K',info="Sample from the k most likely next tokens at each step. Lower k focuses on higher probability tokens.")
submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, [chatbot,temperature,max_length,top_p,top_k], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue(concurrency_count=100).launch() |