fakeym commited on
Commit
e7dc454
1 Parent(s): e5b5d59

Upload 5 files

Browse files
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+ import gradio as gr
3
+ import requests
4
+ import torch
5
+ from transformers import (
6
+ AutoConfig,
7
+ AutoModelForSequenceClassification,
8
+ AutoTokenizer,
9
+ )
10
+
11
+ custom_labels = {0: "neg", 1: "pos"}
12
+ model_dir = r'model\sst-2-english'
13
+ # model = pipeline("sentiment-analysis",model=model_dir,device=0)
14
+ # print(model("you are bad boy."))
15
+ config = AutoConfig.from_pretrained(model_dir, num_labels=2, finetuning_task="text-classification")
16
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
17
+ model = AutoModelForSequenceClassification.from_pretrained(model_dir, config=config)
18
+ model.config.id2label = custom_labels
19
+ model.config.label2id = {v: k for k, v in custom_labels.items()}
20
+ def inference(input_text):
21
+ inputs = tokenizer.batch_encode_plus(
22
+ [input_text],
23
+ max_length=512,
24
+ pad_to_max_length=True,
25
+ truncation=True,
26
+ padding="max_length",
27
+ return_tensors="pt",
28
+ )
29
+
30
+ with torch.no_grad():
31
+ logits = model(**inputs).logits
32
+
33
+ predicted_class_id = logits.argmax().item()
34
+ output = model.config.id2label[predicted_class_id]
35
+ return output
36
+
37
+ demo = gr.Interface(
38
+ fn=inference,
39
+ inputs=gr.Textbox(label="Input Text", scale=2, container=False),
40
+ outputs=gr.Textbox(label="Output Label"),
41
+ examples = [
42
+ ["My last two weather pics from the storm on August 2nd. People packed up real fast after the temp dropped and winds picked up.", 1],
43
+ ["Lying Clinton sinking! Donald Trump singing: Let's Make America Great Again!", 0],
44
+ ],
45
+ title="Tutorial: BERT-based Text Classificatioin",
46
+ )
47
+
48
+ demo.launch(debug=True)
model/sst-2-english/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "finetuning_task": "sst-2",
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "NEGATIVE",
13
+ "1": "POSITIVE"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "NEGATIVE": 0,
18
+ "POSITIVE": 1
19
+ },
20
+ "max_position_embeddings": 512,
21
+ "model_type": "distilbert",
22
+ "n_heads": 12,
23
+ "n_layers": 6,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "qa_dropout": 0.1,
27
+ "seq_classif_dropout": 0.2,
28
+ "sinusoidal_pos_embds": false,
29
+ "tie_weights_": true,
30
+ "vocab_size": 30522
31
+ }
model/sst-2-english/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60554cbd7781b09d87f1ececbea8c064b94e49a7f03fd88e8775bfe6cc3d9f88
3
+ size 267844284
model/sst-2-english/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_max_length": 512, "do_lower_case": true}
model/sst-2-english/vocab.txt ADDED
The diff for this file is too large to render. See raw diff