Spaces:

fnavales
/

hate-speech

Runtime error

App Files Files Community

fnavales commited on Nov 9, 2022

Commit

464d4fc

•

1 Parent(s): 8235435

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -107

app.py CHANGED Viewed

@@ -1,112 +1,5 @@
 import gradio as gr
-<<<<<<< HEAD
 from detoxify import Detoxify
-=======
-import torch.nn as nn
-import torch
-from transformers import BertTokenizerFast as BertTokenizer, BertModel
-import pytorch_lightning as pl
-BERT_MODEL_NAME = 'bert-base-uncased'
-tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
-LABEL_COLUMNS = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
-MAX_TOKEN_COUNT = 300
-class ToxicCommentTagger(pl.LightningModule):
-    def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
-        super().__init__()
-        self.bert = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True)
-        self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
-        self.n_training_steps = n_training_steps
-        self.n_warmup_steps = n_warmup_steps
-        self.criterion = nn.BCELoss()
-    def forward(self, input_ids, attention_mask, labels=None):
-        output = self.bert(input_ids, attention_mask=attention_mask)
-        output = self.classifier(output.pooler_output)
-        output = torch.sigmoid(output)
-        loss = 0
-        if labels is not None:
-            loss = self.criterion(output, labels)
-        return loss, output
-def predict(model, tokenizer, sentence):
-    encoding = tokenizer.encode_plus(
-        sentence,
-        add_special_tokens=False,
-        max_length=MAX_TOKEN_COUNT,
-        return_token_type_ids=False,
-        padding="max_length",
-        return_attention_mask=True,
-        return_tensors='pt'
-    )
-    # define target chunksize
-    chunksize = MAX_TOKEN_COUNT
-    # split into chunks of 510 tokens, we also convert to list (default is tuple which is immutable)
-    input_id_chunks = list(encoding['input_ids'][0].split(chunksize - 2))
-    mask_chunks = list(encoding['attention_mask'][0].split(chunksize - 2))
-    # loop through each chunk
-    for i in range(len(input_id_chunks)):
-        # add CLS and SEP tokens to input IDs
-        input_id_chunks[i] = torch.cat([
-            torch.tensor([101]), input_id_chunks[i], torch.tensor([102])
-        ])
-        # add attention tokens to attention mask
-        mask_chunks[i] = torch.cat([
-            torch.tensor([1]), mask_chunks[i], torch.tensor([1])
-        ])
-        # get required padding length
-        pad_len = chunksize - input_id_chunks[i].shape[0]
-        # check if tensor length satisfies required chunk size
-        if pad_len > 0:
-            # if padding length is more than 0, we must add padding
-            input_id_chunks[i] = torch.cat([
-                input_id_chunks[i], torch.Tensor([0] * pad_len)
-            ])
-            mask_chunks[i] = torch.cat([
-                mask_chunks[i], torch.Tensor([0] * pad_len)
-            ])
-    input_ids = torch.stack(input_id_chunks)
-    attention_mask = torch.stack(mask_chunks)
-    input_dict = {
-        'input_ids': input_ids.long(),
-        'attention_mask': attention_mask.int()
-    }
-    _, test_prediction = model(**input_dict)
-    test_prediction = test_prediction.numpy()
-    output = {}
-    for chunk in test_prediction:
-        for label, prediction in zip(LABEL_COLUMNS, chunk):
-            if label in output:
-                output[label] = max(prediction, output[label])
-            else:
-                output[label] = prediction
-    return output
-model = ToxicCommentTagger.load_from_checkpoint(
-    './best-checkpoint.ckpt',
-    n_classes=len(LABEL_COLUMNS)
-)
-model.eval()
-model.freeze()
->>>>>>> 2a04af3d9d5ddbaa3eb1631c0e56d215462a7e36
 all_categories = {'all_categories': [
                                     'toxicity',

 import gradio as gr
 from detoxify import Detoxify
 all_categories = {'all_categories': [
                                     'toxicity',