jijivski commited on
Commit
8b7042b
1 Parent(s): bccb671

add one picture

Browse files
Files changed (6) hide show
  1. .gitignore +3 -1
  2. app.py +99 -15
  3. data/ob.csv +32 -0
  4. data/ob.py +15 -0
  5. data/tmp.csv +0 -0
  6. plot.py +79 -0
.gitignore CHANGED
@@ -1,2 +1,4 @@
1
  get_loss/__pycache__/
2
- *.pyc
 
 
 
1
  get_loss/__pycache__/
2
+ *.pyc
3
+ gradio_cached_examples/
4
+ get_loss/__pycache__/get_loss_hf.cpython-310.pyc
app.py CHANGED
@@ -4,6 +4,8 @@ from transformers import AutoTokenizer
4
  from get_loss.get_loss_hf import run_get_loss
5
  import pdb
6
  from types import SimpleNamespace
 
 
7
  # os.system('git clone https://github.com/EleutherAI/lm-evaluation-harness')
8
  # os.system('cd lm-evaluation-harness')
9
  # os.system('pip install -e .')
@@ -56,6 +58,45 @@ def get_text(ids_list=[0.1,0.7], tokenizer=None):
56
  # # 这里只是简单地返回 IDs 和损失值,但是可以根据实际需求添加颜色或其他样式
57
  # return [1, 2], [0.1, 0.7]
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def color_pipeline(texts=["Hi","FreshEval","!"], model=None):
61
  """
@@ -83,17 +124,19 @@ with gr.Blocks() as demo:
83
  with gr.Tab("color your text"):
84
  with gr.Row():
85
  text_input = gr.Textbox(label="input text", placeholder="input your text here...")
 
86
  # TODO craw and drop the file
87
 
88
  # loss_input = gr.Number(label="loss")
89
- model_input = gr.Textbox(label="model name", placeholder="input your model name here... now I am trying phi-2...")
90
- output_box=gr.HighlightedText(label="colored text")
91
- # gr.Examples(
92
- # [
93
- # # ["Hi FreshEval !", "microsoft/phi-2"],
94
- # ["Hello FreshBench !", "/home/sribd/chenghao/models/phi-2"],
95
- # ],
96
- # [text_input, model_input],
 
97
  # cache_examples=True,
98
  # # cache_examples=False,
99
  # fn=color_pipeline,
@@ -103,10 +146,15 @@ with gr.Blocks() as demo:
103
  # TODO maybe add our own models
104
 
105
 
106
- color_text_output = gr.HTML(label="colored text")
107
-
108
  color_text_button = gr.Button("color the text").click(color_pipeline, inputs=[text_input, model_input], outputs=output_box)
109
 
 
 
 
 
 
 
110
 
111
  date_time_input = gr.Textbox(label="the date when the text is generated")#TODO add date time input
112
  description_input = gr.Textbox(label="description of the text")
@@ -117,18 +165,37 @@ with gr.Blocks() as demo:
117
  '''
118
  use extract, or use ppl
119
  '''
120
- question=gr.Textbox(placeholder='input your question here...')
121
- answer=gr.Textbox(placeholder='input your answer here...')
122
- other_choices=gr.Textbox(placeholder='input your other choices here...')
123
 
124
- test_button=gr.Button('test').click()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  #TODO add the model and its score
126
 
127
  def test_question(question, answer, other_choices):
128
  '''
129
  use extract, or use ppl
130
  '''
131
- answer_ppl, other_choices_ppl = get_ppl(question, answer, other_choices)
132
  return answer_ppl, other_choices_ppl
133
 
134
 
@@ -139,6 +206,23 @@ with gr.Blocks() as demo:
139
  '''
140
  # load the json file with time,
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
  with gr.Tab("model quesion acc with time"):
144
  '''
 
4
  from get_loss.get_loss_hf import run_get_loss
5
  import pdb
6
  from types import SimpleNamespace
7
+ import pandas as pd
8
+ import plotly.express as px
9
  # os.system('git clone https://github.com/EleutherAI/lm-evaluation-harness')
10
  # os.system('cd lm-evaluation-harness')
11
  # os.system('pip install -e .')
 
58
  # # 这里只是简单地返回 IDs 和损失值,但是可以根据实际需求添加颜色或其他样式
59
  # return [1, 2], [0.1, 0.7]
60
 
61
+ def harness_eval(question, choices, answer_index, model=None,tokenizer=None):
62
+ '''
63
+ use harness to test one question, can specify the model, (extract or ppl)
64
+ '''
65
+ # TODO add the model and its score
66
+ # torch.nn.functional.softmax(output.logits, dim=0)
67
+ # topk = torch.topk(output.logits, 5)
68
+
69
+ return {'A':0.5, 'B':0.3, 'C':0.1, 'D':0.1}
70
+
71
+
72
+
73
+
74
+ def plotly_plot():#(df, x, y, color,title, x_title, y_title):
75
+ # plotly_plot(sample_df, 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl')
76
+ df=pd.read_csv('./data/tmp.csv')
77
+ df['date'] = pd.to_datetime(df['date'])
78
+ # sort by date
79
+ df.sort_values(by='date', inplace=True)
80
+
81
+ # use a dic to filter the dataframe
82
+ df = df[df['file_name'] == 'arxiv_computer_science']
83
+
84
+ x,y,color,title, x_title, y_title='date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl'
85
+
86
+ fig = px.line(df, x=x, y=y, color=color,title=title)
87
+ fig.update_xaxes(title_text=x_title)
88
+ fig.update_yaxes(title_text=y_title)
89
+ # fig.update_layout()
90
+ return fig
91
+
92
+ # def plotly_plot(df, x, y, color, title, x_title, y_title):
93
+ # fig = px.line(df, x=x, y=y, color=color, title=title)
94
+ # fig.update_xaxes(title_text=x_title)
95
+ # fig.update_yaxes(title_text=y_title)
96
+ # return fig
97
+
98
+
99
+
100
 
101
  def color_pipeline(texts=["Hi","FreshEval","!"], model=None):
102
  """
 
124
  with gr.Tab("color your text"):
125
  with gr.Row():
126
  text_input = gr.Textbox(label="input text", placeholder="input your text here...")
127
+ # file_input = gr.File(file_count="multiple",label='to add content')#
128
  # TODO craw and drop the file
129
 
130
  # loss_input = gr.Number(label="loss")
131
+ model_input = gr.Textbox(label="model name", placeholder="input your model name here... now I am trying phi-2...")#TODO make a choice here
132
+ output_box=gr.HighlightedText(label="colored text")#,interactive=True
133
+
134
+ gr.Examples(
135
+ [
136
+ ["Hi FreshEval !", "microsoft/phi-2"],
137
+ ["Hello FreshBench !", "/home/sribd/chenghao/models/phi-2"],
138
+ ],
139
+ [text_input, model_input],)
140
  # cache_examples=True,
141
  # # cache_examples=False,
142
  # fn=color_pipeline,
 
146
  # TODO maybe add our own models
147
 
148
 
149
+ color_text_output = gr.HTML(label="colored text")
 
150
  color_text_button = gr.Button("color the text").click(color_pipeline, inputs=[text_input, model_input], outputs=output_box)
151
 
152
+ # markdown
153
+ gr.Markdown('### How to use this app')
154
+
155
+
156
+
157
+
158
 
159
  date_time_input = gr.Textbox(label="the date when the text is generated")#TODO add date time input
160
  description_input = gr.Textbox(label="description of the text")
 
165
  '''
166
  use extract, or use ppl
167
  '''
168
+ question=gr.Textbox(label="input question", placeholder='input your question here...')
169
+ answer_index=gr.Textbox(label="right answer index", placeholder='index for right anser here, start with 0')#TODO add multiple choices,
170
+ choices=gr.Textbox(placeholder='input your other choices here...')
171
 
172
+ # test_button=gr.Button('test').click(harness_eval())# TODO figure out the input and output
173
+
174
+ answer_type=gr.Dropdown(label="answer type", choices=['extract', 'ppl'])
175
+ #TODO add the model and its score
176
+ answer_label=gr.Label('the answers\'s detail')# RETURN the answer and its score,in the form of dic{str: float}
177
+
178
+ test_question_button=gr.Button('test question').click(harness_eval,inputs=[question, choices, answer_index ,answer_type],outputs=[answer_label])
179
+
180
+ forecast_q='A Ukrainian counteroffensive began in 2023, though territorial gains by November 2023 were limited (Economist, BBC, Newsweek). The question will be suspended on 31 July 2024 and the outcome determined using data as reported in the Brookings Institution\'s "Ukraine Index" (Brookings Institution - Ukraine Index, see "Percentage of Ukraine held by Russia" chart). If there is a discrepancy between the chart data and the downloaded data (see "Get the data" within the "NET TERRITORIAL GAINS" chart border), the downloaded data will be used for resolution.'
181
+ answer_list=['Less than 5%','At least 5%, but less than 10%','At least 10%, but less than 15%','At least 15%, but less than 20%','20% or more' ]
182
+
183
+ gr.Examples([
184
+ [forecast_q, '&&&&&&'.join(answer_list), '0']
185
+ ],
186
+ [question, choices, answer_index])
187
+
188
+ date_time_input = gr.Textbox(label="the date when the text is generated")#TODO add date time input
189
+ description_input = gr.Textbox(label="description of the text")
190
+ submit_button = gr.Button("submit a post or record").click()
191
+
192
  #TODO add the model and its score
193
 
194
  def test_question(question, answer, other_choices):
195
  '''
196
  use extract, or use ppl
197
  '''
198
+ answer_ppl, other_choices_ppl = (question, answer, other_choices)
199
  return answer_ppl, other_choices_ppl
200
 
201
 
 
206
  '''
207
  # load the json file with time,
208
 
209
+ # sample_df=pd.DataFrame({'time':pd.date_range('2021-01-01', periods=6), 'ppl': [1,2,3,4,5,6]})
210
+ pd_df=pd.read_csv('./data/tmp.csv')
211
+ pd_df['date'] = pd.to_datetime(pd_df['date'])
212
+ print(pd_df.head)
213
+ # gr_df=gr.Dataframe(pd_df)
214
+ gr_df=pd_df
215
+
216
+
217
+ # print(gr_df.head)
218
+ print('done')
219
+ # sample
220
+ plot=gr.Plot(label='model text ppl')
221
+ # plotly_plot(gr_df, 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl')
222
+ # draw_pic_button=gr.Button('draw the pic').click(plotly_plot,inputs=['gr_df', 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl'],outputs=[plot])
223
+ draw_pic_button=gr.Button('draw the pic').click(plotly_plot,inputs=[],outputs=[plot])
224
+
225
+
226
 
227
  with gr.Tab("model quesion acc with time"):
228
  '''
data/ob.csv ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,day,Mexico,UK
2
+ 0,60,98458.24943217951,39579.56206409874
3
+ 1,61,100786.91721556247,40515.671040143054
4
+ 2,62,103131.45192653117,41458.158415648744
5
+ 3,63,105491.7031365984,42406.96371936348
6
+ 4,64,107867.5242167411,43362.02800739426
7
+ 5,65,110258.77218336452,44323.29380128607
8
+ 6,66,112665.30755279331,45290.70502952821
9
+ 7,67,115086.99420369453,46264.206972249296
10
+ 8,68,117523.69924688818,47243.74620888196
11
+ 9,69,119975.29290204526,48229.270568596265
12
+ 10,70,122441.64838081415,49220.72908331732
13
+ 11,71,124922.64177595275,50218.07194315709
14
+ 12,72,127418.1519560776,51221.25045410429
15
+ 13,73,129928.06046567051,52230.21699782768
16
+ 14,74,132452.25143001205,53244.92499345999
17
+ 15,75,134990.61146473512,54265.32886123903
18
+ 16,76,137543.02958971576,55291.38398789239
19
+ 17,77,140109.39714703834,56323.046693659926
20
+ 18,78,142689.60772279178,57360.2742008565
21
+ 19,79,145283.55707247148,58403.02460388389
22
+ 20,80,147891.1430497762,59451.256840607704
23
+ 21,81,150512.26553860537,60504.930665020765
24
+ 22,82,153146.82638807516,61564.006621119916
25
+ 23,83,155794.7293503834,62628.4460179282
26
+ 24,84,158455.88002136638,63698.21090559912
27
+ 25,85,161130.1857835995,64773.26405254332
28
+ 26,86,163817.55575190464,65853.56892352313
29
+ 27,87,166517.90072113517,66939.08965866231
30
+ 28,88,169231.13311611864,68029.79105332344
31
+ 29,89,171957.1669436441,69125.63853880709
32
+ 30,90,174695.91774638867,70226.59816383067
data/ob.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+
4
+ def plotly_plot(df, x, y, color, title, x_title, y_title):
5
+ fig = px.line(df, x=x, y=y, color=color, title=title)
6
+ fig.update_xaxes(title_text=x_title)
7
+ fig.update_yaxes(title_text=y_title)
8
+ return fig
9
+
10
+ pd_df = pd.read_csv('./tmp.csv')
11
+ pd_df['date'] = pd.to_datetime(pd_df['date'])
12
+
13
+ fig=plotly_plot(pd_df, 'date', 'loss_mean_at_1000', 'model', 'ppl with time', 'time', 'ppl')
14
+ fig.show()
15
+
data/tmp.csv ADDED
The diff for this file is too large to render. See raw diff
 
plot.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import altair
2
+
3
+ import gradio as gr
4
+ from math import sqrt
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import plotly.express as px
8
+ import pandas as pd
9
+ import pdb
10
+
11
+ def outbreak(plot_type, r, month, countries, social_distancing):
12
+ months = ["January", "February", "March", "April", "May"]
13
+ m = months.index(month)
14
+ start_day = 30 * m
15
+ final_day = 30 * (m + 1)
16
+ x = np.arange(start_day, final_day + 1)
17
+ pop_count = {"USA": 350, "Canada": 40, "Mexico": 300, "UK": 120}
18
+ if social_distancing:
19
+ r = sqrt(r)
20
+ # df = pd.DataFrame({"day": x})
21
+ # for country in countries:
22
+ # df[country] = x ** (r) * (pop_count[country] + 1)
23
+ df=pd.read_csv('./data/tmp.csv')
24
+ print(df.head())
25
+ # pdb.set_trace()
26
+
27
+ if plot_type == "Matplotlib":
28
+ fig = plt.figure()
29
+ plt.plot(df["day"], df[countries].to_numpy())
30
+ plt.title("Outbreak in " + month)
31
+ plt.ylabel("Cases")
32
+ plt.xlabel("Days since Day 0")
33
+ plt.legend(countries)
34
+ return fig
35
+ elif plot_type == "Plotly":
36
+ fig = px.line(df, x="day", y=countries)
37
+ fig.update_layout(
38
+ title="Outbreak in " + month,
39
+ xaxis_title="Cases",
40
+ yaxis_title="Days Since Day 0",
41
+ )
42
+ return fig
43
+ elif plot_type == "Altair":
44
+ df = df.melt(id_vars="day").rename(columns={"variable": "country"})
45
+ fig = altair.Chart(df).mark_line().encode(x="day", y='value', color='country')
46
+ return fig
47
+ else:
48
+ raise ValueError("A plot type must be selected")
49
+
50
+
51
+ inputs = [
52
+ gr.Dropdown(["Matplotlib", "Plotly", "Altair"], label="Plot Type"),
53
+ gr.Slider(1, 4, 3.2, label="R"),
54
+ gr.Dropdown(["January", "February", "March", "April", "May"], label="Month"),
55
+ gr.CheckboxGroup(
56
+ ["USA", "Canada", "Mexico", "UK"], label="Countries", value=["USA", "Canada"]
57
+ ),
58
+ gr.Checkbox(label="Social Distancing?"),
59
+ ]
60
+ outputs = gr.Plot()
61
+
62
+ demo = gr.Interface(
63
+ fn=outbreak,
64
+ inputs=inputs,
65
+ outputs=outputs,
66
+ examples=[
67
+ ["Matplotlib", 2, "March", ["Mexico", "UK"], True],
68
+ ["Altair", 2, "March", ["Mexico", "Canada"], True],
69
+ ["Plotly", 3.6, "February", ["Canada", "Mexico", "UK"], False],
70
+ ],
71
+ cache_examples=True,
72
+ )
73
+
74
+ if __name__ == "__main__":
75
+ demo.launch()
76
+
77
+
78
+
79
+