jijivski commited on
Commit
6fcbb68
1 Parent(s): 3a0a132

pic,about solid and dash into app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -5
app.py CHANGED
@@ -94,7 +94,7 @@ def plotly_plot_text():#(df, x, y, color,title, x_title, y_title):
94
  # fig.update_layout()
95
  return fig
96
 
97
- def plotly_plot_question():#(df, x, y, color,title, x_title, y_title):
98
  # plotly_plot(sample_df, 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl')
99
  df=pd.read_csv('./data/meta_gjo_df.csv')
100
  df['date'] = pd.to_datetime(df['End Time'])
@@ -104,11 +104,83 @@ def plotly_plot_question():#(df, x, y, color,title, x_title, y_title):
104
  # use a dic to filter the dataframe
105
  # df = df[df['file_name'] == 'arxiv_computer_science']
106
 
107
- x,y,color,title, x_title, y_title='date', 'Right Possibility', 'model','Right Possibility with time', 'time', 'Right Possibility'
108
 
109
- fig = px.line(df, x=x, y=y, color=color,title=title)
110
- fig.update_xaxes(title_text=x_title)
111
- fig.update_yaxes(title_text=y_title)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # fig.update_layout()
113
  return fig
114
 
 
94
  # fig.update_layout()
95
  return fig
96
 
97
+ def plotly_plot_question(use_start=True):#(df, x, y, color,title, x_title, y_title):
98
  # plotly_plot(sample_df, 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl')
99
  df=pd.read_csv('./data/meta_gjo_df.csv')
100
  df['date'] = pd.to_datetime(df['End Time'])
 
104
  # use a dic to filter the dataframe
105
  # df = df[df['file_name'] == 'arxiv_computer_science']
106
 
107
+ # x,y,color,title, x_title, y_title='date', 'Right Possibility', 'model','Right Possibility with time', 'time', 'Right Possibility'
108
 
109
+ # fig = px.line(df, x=x, y=y, color=color,title=title)
110
+ # fig.update_xaxes(title_text=x_title)
111
+ # fig.update_yaxes(title_text=y_title)
112
+ if not use_start:
113
+ data['Start Time']=data['End Time']
114
+
115
+ # # Convert the 'Release Date' and 'Start Time' columns to datetime
116
+ data['Release Date'] = pd.to_datetime(data['Release Date'])
117
+ data['Start Time'] = pd.to_datetime(data['Start Time'])
118
+
119
+ data_cleaned = data.dropna(subset=['Release Date', 'Start Time'])
120
+ if time_diff:
121
+ if gjo:
122
+ data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - data_cleaned['Release Date']) / pd.Timedelta(days=90)).round().astype(int)
123
+ else:
124
+ data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - data_cleaned['Release Date']) / pd.Timedelta(days=365)).round().astype(int)
125
+ else:
126
+ time_point= datetime(2015, 1, 1)
127
+ data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - time_point) / pd.Timedelta(days=90)).round().astype(int)
128
+ # Step 1: Fill missing months with linear interpolation (if necessary)
129
+ # Note: This dataset might not have explicit missing months, but we will ensure continuity for plotting
130
+ # pdb.set_trace()
131
+ # data_cleaned
132
+ # data_cleaned['Time Difference (Months)'].value_counts()
133
+ # Ensure 'Time Difference (Months)' is sorted for each model before applying rolling mean
134
+ data_cleaned.sort_values(by=['Model_x', 'Time Difference (Months)'], inplace=True)
135
+
136
+ import plotly.graph_objects as go
137
+ from plotly.subplots import make_subplots
138
+ import plotly.express as px
139
+ from scipy.interpolate import CubicSpline
140
+
141
+
142
+ # Initialize figure with subplots
143
+ # fig = make_subplots(rows=2, cols=1, subplot_titles=('Accuracy (Acc)', 'Right Possibility'))
144
+ # make this pic large enough
145
+ fig = make_subplots(rows=2, cols=1, subplot_titles=('Accuracy (Acc)', 'Right Possibility'),vertical_spacing=0.1)
146
+
147
+
148
+ colors = px.colors.qualitative.Plotly # Use Plotly's qualitative colors for consistency
149
+
150
+ # Iterate over each unique model to plot their data
151
+ for i, (model_name, group) in enumerate(data_cleaned.groupby('Model_x')):
152
+ color = colors[i % len(colors)] # Cycle through colors
153
+ # mean accuracy and right possibility for each model
154
+ group=group.groupby(['Model_x', 'Time Difference (Months)'])\
155
+ .agg({'Acc':'mean','Right Possibility':'mean','Release Date':'first','Start Time':'first'}).reset_index()
156
+
157
+ # Divide the data into before and after based on 'Release Date' and 'Start Time'
158
+ before = group[group['Release Date'] >= group['Start Time']]
159
+ after = group[group['Release Date'] < group['Start Time']]
160
+
161
+ # Concat the last row of 'before' to 'after' if 'before' is not empty
162
+ if not before.empty:
163
+ after = pd.concat([before.iloc[[-1]], after])
164
+
165
+ # ================================================================================
166
+ before = CubicSpline(before['Time Difference (Months)'], before['Acc'])
167
+ after = CubicSpline(after['Time Difference (Months)'], after['Acc'])
168
+
169
+ before = CubicSpline(before['Time Difference (Months)'], before['Right Possibility'])
170
+ after = CubicSpline(after['Time Difference (Months)'], after['Right Possibility'])
171
+ # ================================================================================
172
+
173
+
174
+ # Plot 'Acc' on the first subplot
175
+ fig.add_trace(go.Scatter(x=before['Time Difference (Months)'], y=before['Acc'], mode='lines', name=model_name + ' (Acc before)', line=dict(color=color)), row=1, col=1)
176
+ fig.add_trace(go.Scatter(x=after['Time Difference (Months)'], y=after['Acc'], mode='lines', name=model_name + ' (Acc after)', line=dict(color=color, dash='dash')), row=1, col=1)
177
+
178
+ # Plot 'Right Possibility' on the second subplot
179
+ fig.add_trace(go.Scatter(x=before['Time Difference (Months)'], y=before['Right Possibility'], mode='lines', name=model_name + ' (Right Possibility before)', line=dict(color=color)), row=2, col=1)
180
+ fig.add_trace(go.Scatter(x=after['Time Difference (Months)'], y=after['Right Possibility'], mode='lines', name=model_name + ' (Right Possibility after)', line=dict(color=color, dash='dash')), row=2, col=1)
181
+
182
+ # Update layout if needed
183
+ fig.update_layout(height=600, width=800, title_text="Model Performance Over Time")
184
  # fig.update_layout()
185
  return fig
186