zzz99 commited on
Commit
5fa13ec
1 Parent(s): 8817cbf

Delete handler.py

Browse files
Files changed (1) hide show
  1. handler.py +0 -53
handler.py DELETED
@@ -1,53 +0,0 @@
1
- from typing import Any, Dict
2
-
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
5
-
6
-
7
- class EndpointHandler:
8
- def __init__(self, path=""):
9
- # load model and processor from path
10
- self.tokenizer = AutoTokenizer.from_pretrained(path)
11
- # try:
12
- # config = AutoConfig.from_pretrained(path)
13
- model = AutoModelForCausalLM.from_pretrained(
14
- path,
15
- # return_dict=True,
16
- # load_in_8bit=True,
17
- device_map="auto",
18
- torch_dtype=torch.float16,
19
- trust_remote_code=True,
20
- )
21
- # model.resize_token_embeddings(len(self.tokenizer))
22
- # model = PeftModel.from_pretrained(model, path)
23
- # except Exception:
24
- # model = AutoModelForCausalLM.from_pretrained(
25
- # path, device_map="auto", load_in_8bit=True, torch_dtype=torch.float16, trust_remote_code=True
26
- # )
27
- self.model = model
28
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
29
-
30
- def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
31
- # process input
32
- inputs = data.pop("inputs", data)
33
- parameters = data.pop("parameters", None)
34
-
35
- messages=[{ 'role': 'user', 'content': inputs}]
36
- inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.device)
37
-
38
- # preprocess
39
- # inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
40
-
41
- # pass inputs with all kwargs in data
42
- if parameters is not None:
43
- outputs = self.model.generate(inputs, max_new_tokens=880, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id, **parameters)
44
- else:
45
- outputs = self.model.generate(inputs, max_new_tokens=880, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id)
46
-
47
- # postprocess the prediction
48
- prediction = self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
49
-
50
- return [{"generated_text": prediction}]
51
-
52
- # outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
53
- # print(tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True))