dennyaw's picture
add trust remote code
b33dee8
raw
history blame contribute delete
No virus
6.1 kB
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
import gradio as gr
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-7b",trust_remote_code=True)
model = PeftModel.from_pretrained(model, "fadliaulawi/internlm-7b-finetuned")
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-7b", padding_side="left", use_fast = False,trust_remote_code=True)
def generate_prompt(
instruction, input, label
):
# template = {
# "description": "Template used by Alpaca-LoRA.",
# "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
# "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
# "response_split": "### Response:"
# }
# <s>[INST] <<SYS>>
# {{ system_prompt }}
# <</SYS>>
# {{ user_message }} [/INST]
# return '''<s>[INST] <<SYS>>\n{0}\n<</SYS>>\n\n{1} {2} [/INST]'''.format(template['prompt_input'].format(instruction=instruction, input=input), template['response_split'], label)
template = {
"description": "Template used by Alpaca-LoRA.",
"prompt_input": "Di bawah ini adalah instruksi yang menjelaskan tugas, dipasangkan dengan masukan yang memberikan konteks lebih lanjut. Tulis tanggapan yang melengkapi permintaan dengan tepat.\n\n### Instruksi:\n{instruction}\n\n### Masukan:\n{input}",
#"prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
"response_split": "### Tanggapan:"
}
if input:
res = template["prompt_input"].format(instruction=instruction, input=input)
#else:
# res = template["prompt_no_input"].format(instruction=instruction)
res = f"{res} \n\n### Tanggapan:\n"
if label:
res = f"{res}{label}"
return res
def user(message, history):
return "", history + [[message, None]]
def generate_and_tokenize_prompt(data_point):
full_prompt = generate_prompt(
data_point["instruction"],
data_point["input"],
data_point["output"],
)
# print(full_prompt)
# return
cutoff_len = 256
tokenizer.pad_token = tokenizer.eos_token
result = tokenizer(
full_prompt,
truncation=True,
max_length=cutoff_len,
padding=True,
return_tensors=None,
)
if (result["input_ids"][-1] != tokenizer.eos_token_id and len(result["input_ids"]) < cutoff_len):
result["input_ids"].append(tokenizer.eos_token_id)
result["attention_mask"].append(1)
# result["labels"] = result["input_ids"].copy()
return result
def bot(history,temperature, max_new_tokens, top_p,top_k):
user_message = history[-1][0]
data = {
'instruction': "Jika Anda seorang dokter, silakan menjawab pertanyaan medis berdasarkan deskripsi pasien.",
'input': user_message,
'output': ''
}
new_user_input_ids = generate_and_tokenize_prompt(data)
# append the new user input tokens to the chat history
bot_input_ids = torch.LongTensor([new_user_input_ids['input_ids']])
# generate a response
response = model.generate(
input_ids=bot_input_ids,
pad_token_id=tokenizer.eos_token_id,
temperature = float(temperature),
max_new_tokens=max_new_tokens,
top_p=float(top_p),
top_k=top_k,
do_sample=True
)
# clean up response before returning
response = tokenizer.batch_decode(response, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
sections = response.split("###")
response = sections[3]
response=response.split("Tanggapan:")[1].strip()
history[-1][1] = response
return history
with gr.Blocks() as demo:
gr.Markdown(
"""# ChatDoctor - InternLM 7b 🩺
A [ChatDoctor - InternLM 7b](https://huggingface.co/fadliaulawi/internlm-7b-finetuned) demo.
From the [InternLM 7b](https://huggingface.co/internlm/internlm-7b) model and finetuned on the Indonesian translation of [ChatDoctor](https://github.com/Kent0n-Li/ChatDoctor) dataset.
"""
)
chatbot = gr.Chatbot()
msg = gr.Textbox()
submit = gr.Button("Submit")
clear = gr.Button("Clear")
examples = gr.Examples(examples=["Dokter, aku mengalami kelelahan akhir-akhir ini.", "Dokter, aku merasa pusing, lemah dan sakit dada tajam akhir-akhir ini.",
"Dokter, aku merasa sangat depresi akhir-akhir ini dan juga mengalami perubahan suhu tubuhku.",
"Dokter, saya sudah beberapa minggu mengalami suara serak dan tidak kunjung membaik meski sudah minum obat. Apa masalahnya?"
],inputs=[msg])
gr.Markdown(
"""## Adjust the additional inputs:"""
)
temperature = gr.Slider(0, 5, value=0.8, step=0.1, label='Temperature',info="Controls randomness, higher values increase diversity.")
max_length = gr.Slider(0, 1024, value=50, step=1, label='Max Length',info="The maximum numbers of output's tokens.")
top_p = gr.Slider(0, 1, value=0.8, step=0.1, label='Top P',info="The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.")
top_k = gr.Slider(0, 50, value=10, step=1, label='Top K',info="Sample from the k most likely next tokens at each step. Lower k focuses on higher probability tokens.")
submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, [chatbot,temperature,max_length,top_p,top_k], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue(concurrency_count=100).launch()