File size: 5,845 Bytes
5e1514b
 
0bf42ca
 
 
 
 
 
5e1514b
0bf42ca
5e1514b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bf42ca
 
5e1514b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bf42ca
5e1514b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df513ba
5e1514b
 
df513ba
5e1514b
 
 
 
 
 
df513ba
 
 
5e1514b
 
 
 
 
df513ba
5e1514b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df513ba
5e1514b
df513ba
 
 
 
 
5e1514b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df513ba
5e1514b
df513ba
 
b58280d
5e1514b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import gradio as gr
import os
from transformers import AutoTokenizer
from .get_loss.get_loss_hf import run_get_loss

# os.system('git clone https://github.com/EleutherAI/lm-evaluation-harness')
# os.system('cd lm-evaluation-harness')
# os.system('pip install -e .')
# 第一个功能:基于输入文本和对应的损失值对文本进行着色展示

def color_text(text_list=["hi", "FreshEval"], loss_list=[0.1,0.7]):
    """
    根据损失值为文本着色。
    """
    highlighted_text = []
    for text, loss in zip(text_list, loss_list):
        # color = "#FF0000" if float(loss) > 0.5 else "#00FF00"
        color=loss
        # highlighted_text.append({"text": text, "bg_color": color})
        highlighted_text.append((text, color))

        print(highlighted_text)
    return highlighted_text

# 第二个功能:根据 ID 列表和 tokenizer 将 ID 转换为文本,并展示
def get_text(ids_list=[0.1,0.7], tokenizer=None):
    """
    给定一个 ID 列表和 tokenizer 名称,将这些 ID 转换成文本。
    """
    return ['Hi', 'Adam']
    # tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    # text = tokenizer.decode(eval(ids_list), skip_special_tokens=True)
    # 这里只是简单地返回文本,但是可以根据实际需求添加颜色或其他样式
    # return text


def get_ids_loss(text, tokenizer, model):
    """
    给定一个文本,model and its tokenizer,返回其对应的 IDs 和损失值。
    """
    # tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
    # model = AutoModelForCausalLM.from_pretrained(model_name)
    # 这里只是简单地返回 IDs 和损失值,但是可以根据实际需求添加颜色或其他样式
    return [1, 2], [0.1, 0.7]


def color_pipeline(text=["hi", "FreshEval"],  model=None):
    """
    给定一个文本,返回其对应的着色文本。
    """
    # rtn_dic=run_get_loss()
    # {'logit':logit,'input_ids':input_chunk,'tokenizer':tokenizer,'neg_log_prob_temp':neg_log_prob_temp}
    tokenizer=None # get tokenizer
    ids, loss = get_ids_loss(text, tokenizer, model)
    text = get_text(ids, tokenizer)
    return color_text(text, loss)


# TODO can this be global ? maybe need session to store info of the user

# 创建 Gradio 界面
with gr.Blocks() as demo:
    with gr.Tab("color your text"):
        with gr.Row():
            text_input = gr.Textbox(label="input text", placeholder="input your text here...")
            # TODO craw and drop the file

            # loss_input = gr.Number(label="loss")
            model_input = gr.Textbox(label="model name", placeholder="input your model name here... now I am trying phi-2...")
            # TODO select models that can be used online
            # TODO maybe add our own models


            color_text_output = gr.HTML(label="colored text")
            # gr.Markdown("## Text Examples")
            # gr.Examples(
            #     [["hi", "Adam"], [0.1,0.7]],
            #     [text_input, loss_input],
            #     cache_examples=True,
            #     fn=color_text,
            #     outputs=color_text_output
            # )
        color_text_button = gr.Button("color the text").click(color_pipeline, inputs=[text_input, model_input], outputs=gr.HighlightedText(label="colored text"))


        date_time_input = gr.Textbox(label="the date when the text is generated")#TODO add date time input
        description_input = gr.Textbox(label="description of the text")
        submit_button = gr.Button("submit a post or record").click()
        #TODO add model and its score

    with gr.Tab('test your qeustion'):
        '''
        use extract, or use ppl
        '''
        question=gr.Textbox(placeholder='input your question here...')
        answer=gr.Textbox(placeholder='input your answer here...')
        other_choices=gr.Textbox(placeholder='input your other choices here...')
        
        test_button=gr.Button('test').click()
        #TODO add the model and its score

        def test_question(question, answer, other_choices):
            '''
            use extract, or use ppl
            '''
            answer_ppl, other_choices_ppl = get_ppl(question, answer, other_choices)
            return answer_ppl, other_choices_ppl



    with gr.Tab("model text ppl with time"):
        '''
        see the matplotlib example, to see ppl with time, select the models
        '''
        # load the json file with time,

    
    with gr.Tab("model quesion acc with time"):
        '''
        see the matplotlib example, to see ppl with time, select the models
        ''' 
        #


    with gr.Tab("hot questions"):
        '''
        see the questions and answers
        '''
        with gr.Tab("ppl"):
            '''
            see the questions
            '''
        

demo.launch(debug=True)





# import gradio as gr
# import os
# os.system('python -m spacy download en_core_web_sm')
# import spacy
# from spacy import displacy

# nlp = spacy.load("en_core_web_sm")

# def text_analysis(text):
#     doc = nlp(text)
#     html = displacy.render(doc, style="dep", page=True)
#     html = (
#         "<div style='max-width:100%; max-height:360px; overflow:auto'>"
#         + html
#         + "</div>"
#     )
#     pos_count = {
#         "char_count": len(text),
#         "token_count": 0,
#     }
#     pos_tokens = []

#     for token in doc:
#         pos_tokens.extend([(token.text, token.pos_), (" ", None)])

#     return pos_tokens, pos_count, html

# demo = gr.Interface(
#     text_analysis,
#     gr.Textbox(placeholder="Enter sentence here..."),
#     ["highlight", "json", "html"],
#     examples=[
#         ["What a beautiful morning for a walk!"],
#         ["It was the best of times, it was the worst of times."],
#     ],
# )

# demo.launch()



# # lm-eval 
# # lm-evaluation-harness