|
import streamlit as st |
|
import plotly.express as px |
|
import numpy as np |
|
import plotly.graph_objects as go |
|
from sklearn.metrics import r2_score |
|
from collections import OrderedDict |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import streamlit as st |
|
import re |
|
from matplotlib.colors import ListedColormap |
|
|
|
|
|
|
|
|
|
def format_numbers(x): |
|
if abs(x) >= 1e6: |
|
|
|
return f'{x/1e6:,.1f}M' |
|
elif abs(x) >= 1e3: |
|
|
|
return f'{x/1e3:,.1f}K' |
|
else: |
|
|
|
return f'{x:,.1f}' |
|
|
|
|
|
|
|
def line_plot(data, x_col, y1_cols, y2_cols, title): |
|
fig = go.Figure() |
|
|
|
for y1_col in y1_cols: |
|
fig.add_trace(go.Scatter(x=data[x_col], y=data[y1_col], mode='lines', name=y1_col,line=dict(color='#11B6BD'))) |
|
|
|
for y2_col in y2_cols: |
|
fig.add_trace(go.Scatter(x=data[x_col], y=data[y2_col], mode='lines', name=y2_col, yaxis='y2',line=dict(color='#739FAE'))) |
|
if len(y2_cols)!=0: |
|
fig.update_layout(yaxis=dict(), yaxis2=dict(overlaying='y', side='right')) |
|
else: |
|
fig.update_layout(yaxis=dict(), yaxis2=dict(overlaying='y', side='right')) |
|
if title: |
|
fig.update_layout(title=title) |
|
fig.update_xaxes(showgrid=False) |
|
fig.update_yaxes(showgrid=False) |
|
fig.update_layout(legend=dict( |
|
orientation="h", |
|
yanchor="top", |
|
y=1.1, |
|
xanchor="center", |
|
x=0.5 |
|
)) |
|
|
|
return fig |
|
|
|
|
|
def line_plot_target(df,target,title): |
|
|
|
coefficients = np.polyfit(df['date'].view('int64'), df[target], 1) |
|
trendline = np.poly1d(coefficients) |
|
fig = go.Figure() |
|
|
|
fig.add_trace(go.Scatter(x=df['date'], y=df[target], mode='lines', name=target,line=dict(color='#11B6BD'))) |
|
trendline_x = df['date'] |
|
trendline_y = trendline(df['date'].view('int64')) |
|
|
|
|
|
fig.add_trace(go.Scatter(x=trendline_x, y=trendline_y, mode='lines', name='Trendline', line=dict(color='#739FAE'))) |
|
|
|
fig.update_layout( |
|
title=title, |
|
xaxis=dict(type='date') |
|
) |
|
|
|
for year in df['date'].dt.year.unique()[1:]: |
|
|
|
january_1 = pd.Timestamp(year=year, month=1, day=1) |
|
fig.add_shape( |
|
go.layout.Shape( |
|
type="line", |
|
x0=january_1, |
|
x1=january_1, |
|
y0=0, |
|
y1=1, |
|
xref="x", |
|
yref="paper", |
|
line=dict(color="grey", width=1.5, dash="dash"), |
|
) |
|
) |
|
fig.update_layout(legend=dict( |
|
orientation="h", |
|
yanchor="top", |
|
y=1.1, |
|
xanchor="center", |
|
x=0.5 |
|
)) |
|
return fig |
|
|
|
def correlation_plot(df,selected_features,target): |
|
custom_cmap = ListedColormap(['#08083B', "#11B6BD"]) |
|
corr_df=df[selected_features] |
|
corr_df=pd.concat([corr_df,df[target]],axis=1) |
|
fig, ax = plt.subplots(figsize=(16, 12)) |
|
sns.heatmap(corr_df.corr(),annot=True, cmap='Blues', fmt=".2f", linewidths=0.5,mask=np.triu(corr_df.corr())) |
|
|
|
plt.xticks(rotation=45) |
|
plt.yticks(rotation=0) |
|
return fig |
|
|
|
def summary(data,selected_feature,spends,Target=None): |
|
|
|
if Target: |
|
sum_df = data[selected_feature] |
|
sum_df['Year']=data['date'].dt.year |
|
sum_df=sum_df.groupby('Year')[selected_feature].sum() |
|
sum_df=sum_df.reset_index() |
|
total_sum = sum_df.sum(numeric_only=True) |
|
total_sum['Year'] = 'Total' |
|
sum_df = pd.concat([sum_df, total_sum.to_frame().T],axis=0,ignore_index=True).copy() |
|
|
|
|
|
sum_df.set_index(['Year'],inplace=True) |
|
sum_df=sum_df.applymap(format_numbers) |
|
spends_col=[col for col in sum_df.columns if any(keyword in col for keyword in ['spends', 'cost'])] |
|
for col in spends_col: |
|
sum_df[col]=sum_df[col].map(lambda x: f'${x}') |
|
|
|
|
|
|
|
return sum_df |
|
else: |
|
|
|
selected_feature.append(spends) |
|
|
|
if len(selected_feature)>1: |
|
imp_clicks=selected_feature[0] |
|
spends_col=selected_feature[1] |
|
|
|
selected_feature=list(set(selected_feature)) |
|
|
|
if len(selected_feature)>1: |
|
sum_df = data[selected_feature] |
|
sum_df['Year']=data['date'].dt.year |
|
sum_df=sum_df.groupby('Year')[selected_feature].agg('sum') |
|
|
|
sum_df['CPM/CPC']=(sum_df[spends_col] / sum_df[imp_clicks])*1000 |
|
sum_df.loc['Grand Total']=sum_df.sum() |
|
|
|
sum_df=sum_df.applymap(format_numbers) |
|
sum_df.fillna('-',inplace=True) |
|
sum_df=sum_df.replace({"0.0":'-','nan':'-'}) |
|
|
|
sum_df[spends_col]=sum_df[spends_col].map(lambda x: f'${x}') |
|
return sum_df |
|
else: |
|
sum_df = data[selected_feature] |
|
sum_df['Year']=data['date'].dt.year |
|
sum_df=sum_df.groupby('Year')[selected_feature].agg('sum') |
|
sum_df.loc['Grand Total']=sum_df.sum() |
|
sum_df=sum_df.applymap(format_numbers) |
|
sum_df.fillna('-',inplace=True) |
|
sum_df=sum_df.replace({"0.0":'-','nan':'-'}) |
|
spends_col=[col for col in sum_df.columns if any(keyword in col for keyword in ['spends', 'cost'])] |
|
for col in spends_col: |
|
sum_df[col]=sum_df[col].map(lambda x: f'${x}') |
|
return sum_df |
|
|
|
|
|
def sanitize_key(key, prefix=""): |
|
|
|
key = re.sub(r'[^a-zA-Z0-9]', '', key) |
|
return f"{prefix}{key}" |
|
|
|
|
|
|
|
|
|
|