Spaces:
Running
Running
import gradio as gr | |
from peft import PeftModel, PeftConfig | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from huggingface_hub import login | |
import torch | |
import re | |
import os | |
model_name = "google/gemma-2b" | |
peft_model = "kazuma313/gemma-dokter-ft" | |
device_map = "auto" | |
hf_token = os.getenv('hftoken', add_to_git_credential=True) | |
login(token=hf_token) | |
from accelerate import disk_offload | |
save_dir="gemma-dokter-ft" | |
disk_offload(model=model, offload_dir=save_dir) | |
# config = PeftConfig.from_pretrained(peft_model) | |
base_model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
token=hf_token, | |
low_cpu_mem_usage=True, | |
return_dict=True, | |
torch_dtype=torch.float16, | |
device_map=device_map, | |
) | |
model = PeftModel.from_pretrained(base_model, peft_model) | |
model = model.merge_and_unload() | |
# Reload tokenizer to save it | |
tokenizer = AutoTokenizer.from_pretrained(model_name, | |
token=hf_token, | |
trust_remote_code=True) | |
tokenizer.pad_token = tokenizer.eos_token | |
tokenizer.padding_side = "right" | |
def echo(message, history, tokens): | |
pattern = r'Step \d+/\d+|^\d+\.\s*' | |
input_ids = tokenizer(message, return_tensors="pt") | |
outputs = model.generate(**input_ids, max_length=tokens) | |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True).split('Answer:')[-1] | |
clean_answer = re.sub(pattern, '', answer) | |
return clean_answer | |
demo = gr.ChatInterface(echo, | |
examples = [["what is the negative effect of alcohol?"], | |
["i have lack of sleep, what happend if continously do this?"]], | |
title="dokter Bot", | |
retry_btn=None, | |
undo_btn="Delete Previous", | |
clear_btn="Clear", | |
additional_inputs=[ | |
gr.Slider(64, 256, value=80) | |
], | |
) | |
demo.launch() |