import gradio as gr from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import login import torch import re import os model_name = "google/gemma-2b" peft_model = "kazuma313/gemma-dokter-ft" device_map = "auto" hf_token = os.getenv('hftoken') login(token=hf_token, add_to_git_credential=True) from accelerate import disk_offload save_dir="gemma-dokter-ft" disk_offload(model=peft_model, offload_dir=save_dir) # config = PeftConfig.from_pretrained(peft_model) base_model = AutoModelForCausalLM.from_pretrained( model_name, token=hf_token, low_cpu_mem_usage=True, return_dict=True, torch_dtype=torch.float16, device_map=device_map, ) model = PeftModel.from_pretrained(base_model, peft_model) model = model.merge_and_unload() # Reload tokenizer to save it tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" def echo(message, history, tokens): pattern = r'Step \d+/\d+|^\d+\.\s*' input_ids = tokenizer(message, return_tensors="pt") outputs = model.generate(**input_ids, max_length=tokens) answer = tokenizer.decode(outputs[0], skip_special_tokens=True).split('Answer:')[-1] clean_answer = re.sub(pattern, '', answer) return clean_answer demo = gr.ChatInterface(echo, examples = [["what is the negative effect of alcohol?"], ["i have lack of sleep, what happend if continously do this?"]], title="dokter Bot", retry_btn=None, undo_btn="Delete Previous", clear_btn="Clear", additional_inputs=[ gr.Slider(64, 256, value=80) ], ) demo.launch()