asadmasad commited on
Commit
e83f7e4
1 Parent(s): 53a0f86

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +47 -0
handler.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict
2
+
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
5
+
6
+
7
+ class EndpointHandler:
8
+ def __init__(self, path=""):
9
+ # load model and processor from path
10
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
11
+ # try:
12
+ # config = AutoConfig.from_pretrained(path)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ path,
15
+ # return_dict=True,
16
+ # load_in_8bit=True,
17
+ device_map="auto",
18
+ torch_dtype=torch.float16,
19
+ trust_remote_code=True,
20
+ )
21
+ # model.resize_token_embeddings(len(self.tokenizer))
22
+ # model = PeftModel.from_pretrained(model, path)
23
+ # except Exception:
24
+ # model = AutoModelForCausalLM.from_pretrained(
25
+ # path, device_map="auto", load_in_8bit=True, torch_dtype=torch.float16, trust_remote_code=True
26
+ # )
27
+ self.model = model
28
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
29
+
30
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
31
+ # process input
32
+ inputs = data.pop("inputs", data)
33
+ parameters = data.pop("parameters", None)
34
+
35
+ # preprocess
36
+ inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
37
+
38
+ # pass inputs with all kwargs in data
39
+ if parameters is not None:
40
+ outputs = self.model.generate(**inputs, **parameters)
41
+ else:
42
+ outputs = self.model.generate(**inputs)
43
+
44
+ # postprocess the prediction
45
+ prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+
47
+ return [{"generated_text": prediction}]