Spaces:
Runtime error
Runtime error
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup | |
from sklearn.metrics import classification_report, f1_score | |
from torch.utils.data import Dataset, DataLoader | |
from tqdm.auto import tqdm | |
from config import params | |
from torch import nn | |
import pandas as pd | |
import numpy as np | |
import warnings | |
import random | |
import torch | |
import os | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
class Dataset(Dataset): | |
def __init__(self, texts, max_len): | |
self.texts = texts | |
self.tokenizer = BertTokenizer.from_pretrained(params['pretrained_model_name']) | |
self.max_len = max_len | |
def __len__(self): | |
return len(self.texts) | |
def __getitem__(self, item): | |
text = str(self.texts[item]) | |
encoding = self.tokenizer.encode_plus( | |
text, | |
add_special_tokens=True, | |
max_length=self.max_len, | |
return_token_type_ids=False, | |
pad_to_max_length=True, | |
return_attention_mask=True, | |
truncation=True, | |
return_tensors='pt', | |
) | |
return { | |
'text': text, | |
'input_ids': encoding['input_ids'].flatten(), | |
'attention_mask': encoding['attention_mask'].flatten(), | |
} | |
class SentimentClassifier(nn.Module): | |
def __init__(self, n_classes): | |
super(SentimentClassifier, self).__init__() | |
self.bert = BertModel.from_pretrained(params['pretrained_model_name']) | |
self.drop = nn.Dropout(params['dropout']) | |
self.out = nn.Linear(self.bert.config.hidden_size, n_classes) | |
def forward(self, input_ids, attention_mask): | |
bert_output = self.bert( | |
input_ids=input_ids, | |
attention_mask=attention_mask, | |
return_dict=False | |
) | |
last_hidden_state, pooled_output = bert_output | |
output = self.drop(pooled_output) | |
return self.out(output) | |
class PredictionModel: | |
def __init__(self): | |
self.model = SentimentClassifier(n_classes = 6) | |
self.loss_fn = nn.CrossEntropyLoss().to(device) | |
def create_data_loader(self, X_test, max_len, batch_size): | |
ds = Dataset( | |
texts= np.array(X_test), | |
max_len=max_len | |
) | |
return DataLoader( | |
ds, | |
batch_size=batch_size | |
) | |
def predict(self, X_test: list): | |
data_loader = self.create_data_loader(X_test, params['max_length'], params['batch_size']) | |
self.model.load_state_dict(torch.load(params['path_to_model_bin'])) | |
self.model.eval() | |
losses = [] | |
y_pred = [] | |
with torch.no_grad(): | |
for d in data_loader: | |
input_ids = d["input_ids"].to(device) | |
attention_mask = d["attention_mask"].to(device) | |
outputs = self.model( | |
input_ids=input_ids, | |
attention_mask=attention_mask | |
) | |
_, preds = torch.max(outputs, dim=1) | |
y_pred += preds.tolist() | |
return y_pred | |