Spaces:
Build error
Build error
jijivski
commited on
Commit
•
6fcbb68
1
Parent(s):
3a0a132
pic,about solid and dash into app.py
Browse files
app.py
CHANGED
@@ -94,7 +94,7 @@ def plotly_plot_text():#(df, x, y, color,title, x_title, y_title):
|
|
94 |
# fig.update_layout()
|
95 |
return fig
|
96 |
|
97 |
-
def plotly_plot_question():#(df, x, y, color,title, x_title, y_title):
|
98 |
# plotly_plot(sample_df, 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl')
|
99 |
df=pd.read_csv('./data/meta_gjo_df.csv')
|
100 |
df['date'] = pd.to_datetime(df['End Time'])
|
@@ -104,11 +104,83 @@ def plotly_plot_question():#(df, x, y, color,title, x_title, y_title):
|
|
104 |
# use a dic to filter the dataframe
|
105 |
# df = df[df['file_name'] == 'arxiv_computer_science']
|
106 |
|
107 |
-
x,y,color,title, x_title, y_title='date', 'Right Possibility', 'model','Right Possibility with time', 'time', 'Right Possibility'
|
108 |
|
109 |
-
fig = px.line(df, x=x, y=y, color=color,title=title)
|
110 |
-
fig.update_xaxes(title_text=x_title)
|
111 |
-
fig.update_yaxes(title_text=y_title)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
# fig.update_layout()
|
113 |
return fig
|
114 |
|
|
|
94 |
# fig.update_layout()
|
95 |
return fig
|
96 |
|
97 |
+
def plotly_plot_question(use_start=True):#(df, x, y, color,title, x_title, y_title):
|
98 |
# plotly_plot(sample_df, 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl')
|
99 |
df=pd.read_csv('./data/meta_gjo_df.csv')
|
100 |
df['date'] = pd.to_datetime(df['End Time'])
|
|
|
104 |
# use a dic to filter the dataframe
|
105 |
# df = df[df['file_name'] == 'arxiv_computer_science']
|
106 |
|
107 |
+
# x,y,color,title, x_title, y_title='date', 'Right Possibility', 'model','Right Possibility with time', 'time', 'Right Possibility'
|
108 |
|
109 |
+
# fig = px.line(df, x=x, y=y, color=color,title=title)
|
110 |
+
# fig.update_xaxes(title_text=x_title)
|
111 |
+
# fig.update_yaxes(title_text=y_title)
|
112 |
+
if not use_start:
|
113 |
+
data['Start Time']=data['End Time']
|
114 |
+
|
115 |
+
# # Convert the 'Release Date' and 'Start Time' columns to datetime
|
116 |
+
data['Release Date'] = pd.to_datetime(data['Release Date'])
|
117 |
+
data['Start Time'] = pd.to_datetime(data['Start Time'])
|
118 |
+
|
119 |
+
data_cleaned = data.dropna(subset=['Release Date', 'Start Time'])
|
120 |
+
if time_diff:
|
121 |
+
if gjo:
|
122 |
+
data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - data_cleaned['Release Date']) / pd.Timedelta(days=90)).round().astype(int)
|
123 |
+
else:
|
124 |
+
data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - data_cleaned['Release Date']) / pd.Timedelta(days=365)).round().astype(int)
|
125 |
+
else:
|
126 |
+
time_point= datetime(2015, 1, 1)
|
127 |
+
data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - time_point) / pd.Timedelta(days=90)).round().astype(int)
|
128 |
+
# Step 1: Fill missing months with linear interpolation (if necessary)
|
129 |
+
# Note: This dataset might not have explicit missing months, but we will ensure continuity for plotting
|
130 |
+
# pdb.set_trace()
|
131 |
+
# data_cleaned
|
132 |
+
# data_cleaned['Time Difference (Months)'].value_counts()
|
133 |
+
# Ensure 'Time Difference (Months)' is sorted for each model before applying rolling mean
|
134 |
+
data_cleaned.sort_values(by=['Model_x', 'Time Difference (Months)'], inplace=True)
|
135 |
+
|
136 |
+
import plotly.graph_objects as go
|
137 |
+
from plotly.subplots import make_subplots
|
138 |
+
import plotly.express as px
|
139 |
+
from scipy.interpolate import CubicSpline
|
140 |
+
|
141 |
+
|
142 |
+
# Initialize figure with subplots
|
143 |
+
# fig = make_subplots(rows=2, cols=1, subplot_titles=('Accuracy (Acc)', 'Right Possibility'))
|
144 |
+
# make this pic large enough
|
145 |
+
fig = make_subplots(rows=2, cols=1, subplot_titles=('Accuracy (Acc)', 'Right Possibility'),vertical_spacing=0.1)
|
146 |
+
|
147 |
+
|
148 |
+
colors = px.colors.qualitative.Plotly # Use Plotly's qualitative colors for consistency
|
149 |
+
|
150 |
+
# Iterate over each unique model to plot their data
|
151 |
+
for i, (model_name, group) in enumerate(data_cleaned.groupby('Model_x')):
|
152 |
+
color = colors[i % len(colors)] # Cycle through colors
|
153 |
+
# mean accuracy and right possibility for each model
|
154 |
+
group=group.groupby(['Model_x', 'Time Difference (Months)'])\
|
155 |
+
.agg({'Acc':'mean','Right Possibility':'mean','Release Date':'first','Start Time':'first'}).reset_index()
|
156 |
+
|
157 |
+
# Divide the data into before and after based on 'Release Date' and 'Start Time'
|
158 |
+
before = group[group['Release Date'] >= group['Start Time']]
|
159 |
+
after = group[group['Release Date'] < group['Start Time']]
|
160 |
+
|
161 |
+
# Concat the last row of 'before' to 'after' if 'before' is not empty
|
162 |
+
if not before.empty:
|
163 |
+
after = pd.concat([before.iloc[[-1]], after])
|
164 |
+
|
165 |
+
# ================================================================================
|
166 |
+
before = CubicSpline(before['Time Difference (Months)'], before['Acc'])
|
167 |
+
after = CubicSpline(after['Time Difference (Months)'], after['Acc'])
|
168 |
+
|
169 |
+
before = CubicSpline(before['Time Difference (Months)'], before['Right Possibility'])
|
170 |
+
after = CubicSpline(after['Time Difference (Months)'], after['Right Possibility'])
|
171 |
+
# ================================================================================
|
172 |
+
|
173 |
+
|
174 |
+
# Plot 'Acc' on the first subplot
|
175 |
+
fig.add_trace(go.Scatter(x=before['Time Difference (Months)'], y=before['Acc'], mode='lines', name=model_name + ' (Acc before)', line=dict(color=color)), row=1, col=1)
|
176 |
+
fig.add_trace(go.Scatter(x=after['Time Difference (Months)'], y=after['Acc'], mode='lines', name=model_name + ' (Acc after)', line=dict(color=color, dash='dash')), row=1, col=1)
|
177 |
+
|
178 |
+
# Plot 'Right Possibility' on the second subplot
|
179 |
+
fig.add_trace(go.Scatter(x=before['Time Difference (Months)'], y=before['Right Possibility'], mode='lines', name=model_name + ' (Right Possibility before)', line=dict(color=color)), row=2, col=1)
|
180 |
+
fig.add_trace(go.Scatter(x=after['Time Difference (Months)'], y=after['Right Possibility'], mode='lines', name=model_name + ' (Right Possibility after)', line=dict(color=color, dash='dash')), row=2, col=1)
|
181 |
+
|
182 |
+
# Update layout if needed
|
183 |
+
fig.update_layout(height=600, width=800, title_text="Model Performance Over Time")
|
184 |
# fig.update_layout()
|
185 |
return fig
|
186 |
|