File size: 7,152 Bytes
5efc817
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from transformers import pipeline

import gradio as gr


pipelines_text = {
    'Spam': {'BERT': pipeline("text-classification", model="mariagrandury/distilbert-base-uncased-finetuned-sms-spam-detection"),
             'RoBERTa': pipeline("text-classification", model="mariagrandury/roberta-base-finetuned-sms-spam-detection")
            },
    'Sentiment': {
        'BERT': pipeline("text-classification", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student"),
        'RoBERTa': pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
    },
    'Emotion': {'BERT': pipeline("text-classification", model="bhadresh-savani/bert-base-go-emotion"),
             'RoBERTa': pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
            }
}

def parseImage(file, radio): 
    return file.name

max_textboxes = 100
def change_textboxes(n):
    return [gr.Textbox.update(visible=True, interactive=True)]*n + [gr.Textbox.update(visible=False, interactive=True)]*(max_textboxes-int(n))

def parseText(text_upload_file, delimeter_dropdown):
    delimeter_mapping = {'New Line': '\n','Tab': '\t','Comma': ','}
    delimeter = delimeter_mapping[delimeter_dropdown]
    text_boxes = ['' for i in range(max_textboxes)]
    with open(text_upload_file.name, 'r') as f:
        text_upload = f.read()
        for idx, text in enumerate(text_upload.split(delimeter)):
            text_boxes[idx] = text
    return text_boxes

def annotateText(text_boxes_slider, annotation_radio, model_dropdown, *text_boxes_texbox):
    
    text_boxes_texbox = [text for text in text_boxes_texbox]
    res_label = ['' for i in range(max_textboxes)]
    res_score = ['' for i in range(max_textboxes)]
    
    # predictions
    pipe = pipelines_text[annotation_radio][model_dropdown]
    predictions = pipe([text_boxes_texbox[i] for i in range(text_boxes_slider)])
    for idx, pred in enumerate(predictions):
        # special case for spam (might change later)
        if annotation_radio == 'Spam':
            res_label[idx] = 'Not Spam' if pred['label'] == 'LABEL_0' else 'Spam'
        else:
            res_label[idx] = pred['label']
        res_score[idx] = '{:.2f}'.format(pred['score'])        
            
    with open('annotations.csv', 'w') as f:
        f.write('text,annotation,confidence\n')
        for idx in range(max_textboxes):
            if text_boxes_texbox[idx]:
                f.write('{},{},{}\n'.format(text_boxes_texbox[idx], res_label[idx], res_score[idx]))
        
    return ['./annotations.csv'] + text_boxes_texbox + res_label + res_score

with gr.Blocks() as demo:
    gr.Markdown("# Data Annotation Tool")
    gr.Markdown('Upload a file or enter text in the Data Viewer section. Sample files are at the end of the page.')
    with gr.Tab("Text"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("## Data Upload")
                text_upload_file = gr.File(file_types=['text'])
                delimeter_dropdown = gr.Dropdown(choices=['New Line','Tab','Comma'], label='Delimeter')
                text_upload_button = gr.Button('Parse File')
                
        with gr.Row():
            with gr.Column():
                gr.Markdown("## Data Viewer")
                # slider component
                text_boxes_slider = gr.Slider(1, max_textboxes, value=3, step=1)
                # text box components (3 visible and max_textboxes-3 not visible)
                text_boxes_texbox = [gr.Textbox(show_label=False,interactive=True) for i in range(3)] + [gr.Textbox(show_label=False, visible=False) for i in range(max_textboxes-3)]
                annotation_radio = gr.Radio(choices=['Spam', 'Sentiment', 'Emotion'], label='Annotation', value='RoBERTa')
                model_dropdown = gr.Dropdown(choices=['BERT', 'RoBERTa'], label='Model')
                text_submit_button = gr.Button('Annotate Data')
        with gr.Row():
            gr.Markdown("## Data Output")
        with gr.Row():
            with gr.Column(scale=6):
                gr.Markdown("Text")
                text_output_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)]
            with gr.Column(scale=1):
                gr.Markdown("Annotations")
                text_output_annotations_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)]
            with gr.Column(scale=1):
                gr.Markdown("Confidence")
                text_output_confidence_boxes = [gr.Textbox(show_label=False,interactive=False) for i in range(3)] + [gr.Textbox(show_label=False, visible=False, interactive=False) for i in range(max_textboxes-3)]                
        
        text_ouput_file = gr.File(label='File Output', file_types=['csv'])
        
        gr.Markdown("## Test Examples")
        with gr.Row():
            with gr.Column():
                gr.Examples(
                    examples=[['./examples/text/spam.txt', 'New Line'],['./examples/text/sentiment.txt', 'New Line'],['./examples/text/emotion.txt', 'New Line']],
                    fn=parseText,
                    inputs=[text_upload_file, delimeter_dropdown],
                    outputs=text_boxes_texbox,
                    cache_examples=True
                )
    
    # event listeners
    text_upload_button.click(fn=parseText, inputs=[text_upload_file, delimeter_dropdown], outputs=text_boxes_texbox)
    
    text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_boxes_texbox)
    text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_boxes)
    text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_annotations_boxes)
    text_boxes_slider.change(fn=change_textboxes, inputs=text_boxes_slider, outputs=text_output_confidence_boxes)
    
    text_submit_button.click(fn=annotateText, inputs=[text_boxes_slider, annotation_radio, model_dropdown] + text_boxes_texbox, outputs=[text_ouput_file]+text_output_boxes + text_output_annotations_boxes+text_output_confidence_boxes)
    
    
    with gr.Tab("Image"):
        with gr.Row():
            gr.Markdown("## Coming Soon!")
#         with gr.Row():
#                 file_image = gr.File(file_count=['directory'],file_types=['image'], label='File Upload')
#                 image = gr.Image()
#         with gr.Row():
#                 radio_image = gr.Radio(choices=['Object Detection'], label='Annotation')
#                 models_image = gr.Dropdown(choices=['DETR'], label='Model')            
#         with gr.Row():
#             button_image = gr.Button('Submit')
#         with gr.Row():
#             output_image = gr.File(label='File Output', file_types=['image'])
    

    # image tab event listeners
#     button_image.click(fn=doImage, inputs=[file_image, radio_image], outputs=output_image)
    

if __name__ == "__main__":
    demo.launch()