kazuma313's picture
Update app.py
dd3757f verified
raw
history blame
1.77 kB
import gradio as gr
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import re
import os
model_name = "google/gemma-2b"
peft_model = "kazuma313/gemma-dokter-ft"
device_map = "auto"
hf_token = os.getenv('hftoken')
base_model = AutoModelForCausalLM.from_pretrained(
model_name,
token=hf_token,
low_cpu_mem_usage=True,
return_dict=True,
torch_dtype=torch.float16,
device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, peft_model)
model = model.merge_and_unload()
# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name,
token=hf_token,
trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
def echo(message, history, tokens):
pattern = r'Step \d+/\d+|^\d+\.\s*'
input_ids = tokenizer(message, return_tensors="pt")
outputs = model.generate(**input_ids, max_length=tokens)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True).split('Answer:')[-1]
clean_answer = re.sub(pattern, '', answer)
return clean_answer
demo = gr.ChatInterface(echo,
examples = [["what is the negative effect of alcohol?"],
["i have lack of sleep, what happend if continously do this?"]],
title="dokter Bot",
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
additional_inputs=[
gr.Slider(64, 256, value=124)
],
)
demo.launch()