from typing import Dict, List, Any from transformers import AutoTokenizer, AutoModelForSequenceClassification class EndpointHandler: def __init__(self, path="" ): # Preload all the elements you are going to need at inference. # pseudo: self.model = AutoModelForSequenceClassification.from_pretrained(path) self.tokenizer = AutoTokenizer.from_pretrained("roberta-large", padding_side='right') def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: input_dict = data.pop("inputs", data) self.model.eval() input = self.tokenizer(input_dict['answer'], input_dict['source'], truncation=True, max_length=None, return_tensors="pt" ) # input.to(device) # with torch.no_grad(): # output = model(**input) output = model(**input) prediction = output.logits.argmax(dim=-1) #smax = nn.Softmax(dim=1) #score = smax(output.logits) return [{ "label": prediction.item(), #"score": score[0][0].item() } ]