Spaces:
Sleeping
Sleeping
File size: 1,051 Bytes
471d3b8 2f01a3b 471d3b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
#import torch
#from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
#from peft import PeftConfig, PeftModel
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
class InferencePipeline:
def __init__(self, conf, api_key):
self.conf = conf
self.token = api_key
self.model, self.tokenizer = self.get_model()
def get_model(self):
model = AutoPeftModelForCausalLM.from_pretrained(
self.conf["model"]["model_name"],
load_in_4bit = not self.conf["model"]["load_in_4bit"],
)
tokenizer = AutoTokenizer.from_pretrained(self.path)
return model, tokenizer
def infer(self, prompt):
inputs = self.tokenizer([prompt], return_tensors = "pt").to("cuda")
outputs = model.generate(**inputs,
max_new_tokens = self.conf["model"]["max_new_tokens"],
use_cache = True)
outputs = tokenizer.batch_decode(outputs)
return outputs
|