Spaces:
Paused
Paused
Upload 5 files
Browse files- app.py +48 -0
- model/sst-2-english/config.json +31 -0
- model/sst-2-english/pytorch_model.bin +3 -0
- model/sst-2-english/tokenizer_config.json +1 -0
- model/sst-2-english/vocab.txt +0 -0
app.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import string
|
2 |
+
import gradio as gr
|
3 |
+
import requests
|
4 |
+
import torch
|
5 |
+
from transformers import (
|
6 |
+
AutoConfig,
|
7 |
+
AutoModelForSequenceClassification,
|
8 |
+
AutoTokenizer,
|
9 |
+
)
|
10 |
+
|
11 |
+
custom_labels = {0: "neg", 1: "pos"}
|
12 |
+
model_dir = r'model\sst-2-english'
|
13 |
+
# model = pipeline("sentiment-analysis",model=model_dir,device=0)
|
14 |
+
# print(model("you are bad boy."))
|
15 |
+
config = AutoConfig.from_pretrained(model_dir, num_labels=2, finetuning_task="text-classification")
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(model_dir)
|
17 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_dir, config=config)
|
18 |
+
model.config.id2label = custom_labels
|
19 |
+
model.config.label2id = {v: k for k, v in custom_labels.items()}
|
20 |
+
def inference(input_text):
|
21 |
+
inputs = tokenizer.batch_encode_plus(
|
22 |
+
[input_text],
|
23 |
+
max_length=512,
|
24 |
+
pad_to_max_length=True,
|
25 |
+
truncation=True,
|
26 |
+
padding="max_length",
|
27 |
+
return_tensors="pt",
|
28 |
+
)
|
29 |
+
|
30 |
+
with torch.no_grad():
|
31 |
+
logits = model(**inputs).logits
|
32 |
+
|
33 |
+
predicted_class_id = logits.argmax().item()
|
34 |
+
output = model.config.id2label[predicted_class_id]
|
35 |
+
return output
|
36 |
+
|
37 |
+
demo = gr.Interface(
|
38 |
+
fn=inference,
|
39 |
+
inputs=gr.Textbox(label="Input Text", scale=2, container=False),
|
40 |
+
outputs=gr.Textbox(label="Output Label"),
|
41 |
+
examples = [
|
42 |
+
["My last two weather pics from the storm on August 2nd. People packed up real fast after the temp dropped and winds picked up.", 1],
|
43 |
+
["Lying Clinton sinking! Donald Trump singing: Let's Make America Great Again!", 0],
|
44 |
+
],
|
45 |
+
title="Tutorial: BERT-based Text Classificatioin",
|
46 |
+
)
|
47 |
+
|
48 |
+
demo.launch(debug=True)
|
model/sst-2-english/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation": "gelu",
|
3 |
+
"architectures": [
|
4 |
+
"DistilBertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.1,
|
7 |
+
"dim": 768,
|
8 |
+
"dropout": 0.1,
|
9 |
+
"finetuning_task": "sst-2",
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "NEGATIVE",
|
13 |
+
"1": "POSITIVE"
|
14 |
+
},
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"label2id": {
|
17 |
+
"NEGATIVE": 0,
|
18 |
+
"POSITIVE": 1
|
19 |
+
},
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "distilbert",
|
22 |
+
"n_heads": 12,
|
23 |
+
"n_layers": 6,
|
24 |
+
"output_past": true,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"qa_dropout": 0.1,
|
27 |
+
"seq_classif_dropout": 0.2,
|
28 |
+
"sinusoidal_pos_embds": false,
|
29 |
+
"tie_weights_": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
model/sst-2-english/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60554cbd7781b09d87f1ececbea8c064b94e49a7f03fd88e8775bfe6cc3d9f88
|
3 |
+
size 267844284
|
model/sst-2-english/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_max_length": 512, "do_lower_case": true}
|
model/sst-2-english/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|