Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,9 +5,10 @@ from transformers import BertTokenizerFast as BertTokenizer, BertModel
|
|
5 |
import pytorch_lightning as pl
|
6 |
|
7 |
|
8 |
-
BERT_MODEL_NAME = 'bert-base-
|
9 |
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
|
10 |
LABEL_COLUMNS = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
|
|
11 |
|
12 |
|
13 |
class ToxicCommentTagger(pl.LightningModule):
|
@@ -36,7 +37,7 @@ def predict(model, tokenizer, sentence):
|
|
36 |
encoding = tokenizer.encode_plus(
|
37 |
sentence,
|
38 |
add_special_tokens=False,
|
39 |
-
max_length=
|
40 |
return_token_type_ids=False,
|
41 |
padding="max_length",
|
42 |
return_attention_mask=True,
|
@@ -44,7 +45,7 @@ def predict(model, tokenizer, sentence):
|
|
44 |
)
|
45 |
|
46 |
# define target chunksize
|
47 |
-
chunksize =
|
48 |
|
49 |
# split into chunks of 510 tokens, we also convert to list (default is tuple which is immutable)
|
50 |
input_id_chunks = list(encoding['input_ids'][0].split(chunksize - 2))
|
|
|
5 |
import pytorch_lightning as pl
|
6 |
|
7 |
|
8 |
+
BERT_MODEL_NAME = 'bert-base-uncased'
|
9 |
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
|
10 |
LABEL_COLUMNS = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
|
11 |
+
MAX_TOKEN_COUNT = 300
|
12 |
|
13 |
|
14 |
class ToxicCommentTagger(pl.LightningModule):
|
|
|
37 |
encoding = tokenizer.encode_plus(
|
38 |
sentence,
|
39 |
add_special_tokens=False,
|
40 |
+
max_length=MAX_TOKEN_COUNT,
|
41 |
return_token_type_ids=False,
|
42 |
padding="max_length",
|
43 |
return_attention_mask=True,
|
|
|
45 |
)
|
46 |
|
47 |
# define target chunksize
|
48 |
+
chunksize = MAX_TOKEN_COUNT
|
49 |
|
50 |
# split into chunks of 510 tokens, we also convert to list (default is tuple which is immutable)
|
51 |
input_id_chunks = list(encoding['input_ids'][0].split(chunksize - 2))
|