Spaces:
Sleeping
Sleeping
#import torch | |
#from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
#from peft import PeftConfig, PeftModel | |
from peft import AutoPeftModelForCausalLM | |
from transformers import AutoTokenizer | |
class InferencePipeline: | |
def __init__(self, conf, api_key): | |
self.conf = conf | |
self.token = api_key | |
self.model, self.tokenizer = self.get_model() | |
def get_model(self): | |
model = AutoPeftModelForCausalLM.from_pretrained( | |
self.conf["model"]["model_name"], | |
load_in_4bit = self.conf["model"]["load_in_4bit"], | |
) | |
tokenizer = AutoTokenizer.from_pretrained(self.path) | |
return model, tokenizer | |
def infer(self, prompt): | |
inputs = self.tokenizer([prompt], return_tensors = "pt").to("cuda") | |
outputs = model.generate(**inputs, | |
max_new_tokens = self.conf["model"]["max_new_tokens"], | |
use_cache = True) | |
outputs = tokenizer.batch_decode(outputs) | |
return outputs | |