Spaces:

jijivski
/

FreshBench

Build error

App Files Files Community

jijivski commited on Mar 17, 2024

Commit

6fcbb68

1 Parent(s): 3a0a132

pic,about solid and dash into app.py

Browse files

Files changed (1) hide show

app.py +77 -5

app.py CHANGED Viewed

@@ -94,7 +94,7 @@ def plotly_plot_text():#(df, x, y, color,title, x_title, y_title):
     # fig.update_layout()
     return fig
-def plotly_plot_question():#(df, x, y, color,title, x_title, y_title):
         # plotly_plot(sample_df, 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl')
     df=pd.read_csv('./data/meta_gjo_df.csv')
     df['date'] = pd.to_datetime(df['End Time'])
@@ -104,11 +104,83 @@ def plotly_plot_question():#(df, x, y, color,title, x_title, y_title):
     # use a dic to filter the dataframe
     # df = df[df['file_name'] == 'arxiv_computer_science']
-    x,y,color,title, x_title, y_title='date', 'Right Possibility', 'model','Right Possibility with time', 'time', 'Right Possibility'
-    fig = px.line(df, x=x, y=y, color=color,title=title)
-    fig.update_xaxes(title_text=x_title)
-    fig.update_yaxes(title_text=y_title)
     # fig.update_layout()
     return fig

     # fig.update_layout()
     return fig
+def plotly_plot_question(use_start=True):#(df, x, y, color,title, x_title, y_title):
         # plotly_plot(sample_df, 'date', 'loss_mean_at_1000', 'model','ppl with time', 'time', 'ppl')
     df=pd.read_csv('./data/meta_gjo_df.csv')
     df['date'] = pd.to_datetime(df['End Time'])
     # use a dic to filter the dataframe
     # df = df[df['file_name'] == 'arxiv_computer_science']
+    # x,y,color,title, x_title, y_title='date', 'Right Possibility', 'model','Right Possibility with time', 'time', 'Right Possibility'
+    # fig = px.line(df, x=x, y=y, color=color,title=title)
+    # fig.update_xaxes(title_text=x_title)
+    # fig.update_yaxes(title_text=y_title)
+    if not use_start:
+        data['Start Time']=data['End Time']
+    # # Convert the 'Release Date' and 'Start Time' columns to datetime
+    data['Release Date'] = pd.to_datetime(data['Release Date'])
+    data['Start Time'] = pd.to_datetime(data['Start Time'])
+    data_cleaned = data.dropna(subset=['Release Date', 'Start Time'])
+    if time_diff:
+        if gjo:
+            data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - data_cleaned['Release Date']) / pd.Timedelta(days=90)).round().astype(int)
+        else:
+            data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - data_cleaned['Release Date']) / pd.Timedelta(days=365)).round().astype(int)
+    else:
+        time_point= datetime(2015, 1, 1)
+        data_cleaned['Time Difference (Months)'] = ((data_cleaned['Start Time'] - time_point) / pd.Timedelta(days=90)).round().astype(int)
+    # Step 1: Fill missing months with linear interpolation (if necessary)
+    # Note: This dataset might not have explicit missing months, but we will ensure continuity for plotting
+    # pdb.set_trace()
+    # data_cleaned
+    # data_cleaned['Time Difference (Months)'].value_counts()
+    # Ensure 'Time Difference (Months)' is sorted for each model before applying rolling mean
+    data_cleaned.sort_values(by=['Model_x', 'Time Difference (Months)'], inplace=True)
+    import plotly.graph_objects as go
+    from plotly.subplots import make_subplots
+    import plotly.express as px
+    from scipy.interpolate import CubicSpline
+    # Initialize figure with subplots
+    # fig = make_subplots(rows=2, cols=1, subplot_titles=('Accuracy (Acc)', 'Right Possibility'))
+    # make this pic large enough
+    fig = make_subplots(rows=2, cols=1, subplot_titles=('Accuracy (Acc)', 'Right Possibility'),vertical_spacing=0.1)
+    colors = px.colors.qualitative.Plotly  # Use Plotly's qualitative colors for consistency
+    # Iterate over each unique model to plot their data
+    for i, (model_name, group) in enumerate(data_cleaned.groupby('Model_x')):
+        color = colors[i % len(colors)]  # Cycle through colors
+        #  mean accuracy and right possibility for each model
+        group=group.groupby(['Model_x', 'Time Difference (Months)'])\
+            .agg({'Acc':'mean','Right Possibility':'mean','Release Date':'first','Start Time':'first'}).reset_index()
+        # Divide the data into before and after based on 'Release Date' and 'Start Time'
+        before = group[group['Release Date'] >= group['Start Time']]
+        after = group[group['Release Date'] < group['Start Time']]
+        # Concat the last row of 'before' to 'after' if 'before' is not empty
+        if not before.empty:
+            after = pd.concat([before.iloc[[-1]], after])
+        # ================================================================================
+        before = CubicSpline(before['Time Difference (Months)'], before['Acc'])
+        after = CubicSpline(after['Time Difference (Months)'], after['Acc'])
+        before = CubicSpline(before['Time Difference (Months)'], before['Right Possibility'])
+        after = CubicSpline(after['Time Difference (Months)'], after['Right Possibility'])
+        # ================================================================================
+        # Plot 'Acc' on the first subplot
+        fig.add_trace(go.Scatter(x=before['Time Difference (Months)'], y=before['Acc'], mode='lines', name=model_name + ' (Acc before)', line=dict(color=color)), row=1, col=1)
+        fig.add_trace(go.Scatter(x=after['Time Difference (Months)'], y=after['Acc'], mode='lines', name=model_name + ' (Acc after)', line=dict(color=color, dash='dash')), row=1, col=1)
+        # Plot 'Right Possibility' on the second subplot
+        fig.add_trace(go.Scatter(x=before['Time Difference (Months)'], y=before['Right Possibility'], mode='lines', name=model_name + ' (Right Possibility before)', line=dict(color=color)), row=2, col=1)
+        fig.add_trace(go.Scatter(x=after['Time Difference (Months)'], y=after['Right Possibility'], mode='lines', name=model_name + ' (Right Possibility after)', line=dict(color=color, dash='dash')), row=2, col=1)
+    # Update layout if needed
+    fig.update_layout(height=600, width=800, title_text="Model Performance Over Time")
     # fig.update_layout()
     return fig