|
import streamlit as st
|
|
import pandas as pd
|
|
import plotly.express as px
|
|
import plotly.graph_objects as go
|
|
import numpy as np
|
|
import plotly.express as px
|
|
import plotly.graph_objects as go
|
|
import pandas as pd
|
|
import seaborn as sns
|
|
import matplotlib.pyplot as plt
|
|
import datetime
|
|
from utilities import set_header,initialize_data,load_local_css
|
|
from scipy.optimize import curve_fit
|
|
import statsmodels.api as sm
|
|
from plotly.subplots import make_subplots
|
|
|
|
st.set_page_config(
|
|
page_title="Data Validation",
|
|
page_icon=":shark:",
|
|
layout="wide",
|
|
initial_sidebar_state='collapsed'
|
|
)
|
|
load_local_css('styles.css')
|
|
set_header()
|
|
|
|
def format_numbers(x):
|
|
if abs(x) >= 1e6:
|
|
|
|
return f'{x/1e6:,.1f}M'
|
|
elif abs(x) >= 1e3:
|
|
|
|
return f'{x/1e3:,.1f}K'
|
|
else:
|
|
|
|
return f'{x:,.1f}'
|
|
|
|
def format_axis(x):
|
|
if isinstance(x, tuple):
|
|
x = x[0]
|
|
if abs(x) >= 1e6:
|
|
return f'{x / 1e6:.0f}M'
|
|
elif abs(x) >= 1e3:
|
|
return f'{x / 1e3:.0f}k'
|
|
else:
|
|
return f'{x:.0f}'
|
|
|
|
|
|
attributred_app_installs=pd.read_csv("attributed_app_installs.csv")
|
|
attributred_app_installs_tactic=pd.read_excel('attributed_app_installs_tactic.xlsx')
|
|
data=pd.read_excel('Channel_wise_imp_click_spends.xlsx')
|
|
data['Date']=pd.to_datetime(data['Date'])
|
|
st.header('Saturation Curves')
|
|
|
|
|
|
st.markdown('Data QC')
|
|
|
|
st.markdown('Channel wise summary')
|
|
summary_df=data.groupby(data['Date'].dt.strftime('%B %Y')).sum()
|
|
summary_df=summary_df.sort_index(key=lambda x: pd.to_datetime(x, format='%B %Y'))
|
|
st.dataframe(summary_df.applymap(format_numbers))
|
|
|
|
|
|
|
|
def line_plot_target(df,target,title):
|
|
df=df
|
|
df['Date_unix'] = df['Date'].apply(lambda x: x.timestamp())
|
|
|
|
|
|
coefficients = np.polyfit(df['Date_unix'], df[target], 1)
|
|
|
|
coefficients = np.polyfit(df['Date'].view('int64'), df[target], 1)
|
|
trendline = np.poly1d(coefficients)
|
|
fig = go.Figure()
|
|
|
|
fig.add_trace(go.Scatter(x=df['Date'], y=df[target], mode='lines', name=target,line=dict(color='#11B6BD')))
|
|
trendline_x = df['Date']
|
|
trendline_y = trendline(df['Date'].view('int64'))
|
|
|
|
|
|
fig.add_trace(go.Scatter(x=trendline_x, y=trendline_y, mode='lines', name='Trendline', line=dict(color='#739FAE')))
|
|
|
|
fig.update_layout(
|
|
title=title,
|
|
xaxis=dict(type='date')
|
|
)
|
|
|
|
for year in df['Date'].dt.year.unique()[1:]:
|
|
|
|
january_1 = pd.Timestamp(year=year, month=1, day=1)
|
|
fig.add_shape(
|
|
go.layout.Shape(
|
|
type="line",
|
|
x0=january_1,
|
|
x1=january_1,
|
|
y0=0,
|
|
y1=1,
|
|
xref="x",
|
|
yref="paper",
|
|
line=dict(color="grey", width=1.5, dash="dash"),
|
|
)
|
|
)
|
|
|
|
return fig
|
|
channels_d= data.columns[:28]
|
|
channels=list(set([col.replace('_impressions','').replace('_clicks','').replace('_spend','') for col in channels_d if col.lower()!='date']))
|
|
channel= st.selectbox('Select Channel_name',channels)
|
|
target_column = st.selectbox('Select Channel)',[col for col in data.columns if col.startswith(channel)])
|
|
fig=line_plot_target(data, target=str(target_column), title=f'{str(target_column)} Over Time')
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
st.header('Build saturation curve')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
col=st.columns(2)
|
|
with col[0]:
|
|
if st.checkbox('Cap Outliers'):
|
|
x = data[target_column]
|
|
x.index=data['Date']
|
|
|
|
result = sm.tsa.seasonal_decompose(x, model='additive')
|
|
x_resid=result.resid
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
x_mean = np.mean(x)
|
|
x_std = np.std(x)
|
|
x_scaled = (x - x_mean) / x_std
|
|
lower_threshold = -2.0
|
|
upper_threshold = 2.0
|
|
x_scaled = np.clip(x_scaled, lower_threshold, upper_threshold)
|
|
else:
|
|
x = data[target_column]
|
|
x_mean = np.mean(x)
|
|
x_std = np.std(x)
|
|
x_scaled = (x - x_mean) / x_std
|
|
with col[1]:
|
|
if st.checkbox('Attributed'):
|
|
column=[col for col in attributred_app_installs.columns if col in target_column]
|
|
data['app_installs_appsflyer']=attributred_app_installs[column]
|
|
y=data['app_installs_appsflyer']
|
|
title='Attributed-App_installs_appsflyer'
|
|
|
|
|
|
|
|
else:
|
|
y=data["app_installs_appsflyer"]
|
|
title='App_installs_appsflyer'
|
|
|
|
|
|
def sigmoid(x, K, a, x0):
|
|
return K / (1 + np.exp(-a * (x - x0)))
|
|
|
|
initial_K = np.max(y)
|
|
initial_a = 1
|
|
initial_x0 = 0
|
|
columns=st.columns(3)
|
|
|
|
|
|
with columns[0]:
|
|
K = st.number_input('K (Amplitude)', min_value=0.01, max_value=2.0 * np.max(y), value=float(initial_K), step=5.0)
|
|
with columns[1]:
|
|
a = st.number_input('a (Slope)', min_value=0.01, max_value=5.0, value=float(initial_a), step=0.5)
|
|
with columns[2]:
|
|
x0 = st.number_input('x0 (Center)', min_value=float(min(x_scaled)), max_value=float(max(x_scaled)), value=float(initial_x0), step=2.0)
|
|
params, _ = curve_fit(sigmoid, x_scaled, y, p0=[K, a, x0], maxfev=20000)
|
|
|
|
|
|
x_slider = st.slider('X Value', min_value=float(min(x)), max_value=float(max(x))+1, value=float(x_mean), step=1.)
|
|
|
|
|
|
x_slider_scaled = (x_slider - x_mean) / x_std
|
|
y_slider_fit = sigmoid(x_slider_scaled, *params)
|
|
|
|
|
|
st.write(f'{target_column}: {format_numbers(x_slider)}')
|
|
st.write(f'Corresponding App_installs: {format_numbers(y_slider_fit)}')
|
|
|
|
|
|
fig = px.scatter(data_frame=data, x=x_scaled, y=y, labels={'x': f'{target_column}', 'y': 'App Installs'}, title=title)
|
|
|
|
|
|
x_fit = np.linspace(min(x_scaled), max(x_scaled), 100)
|
|
y_fit = sigmoid(x_fit, *params)
|
|
fig.add_trace(px.line(x=x_fit, y=y_fit).data[0])
|
|
fig.data[1].update(line=dict(color='orange'))
|
|
fig.add_vline(x=x_slider_scaled, line_dash='dash', line_color='red', annotation_text=f'{format_numbers(x_slider)}')
|
|
|
|
x_tick_labels = {format_axis(x_scaled[i]): format_axis(x[i]) for i in range(len(x_scaled))}
|
|
num_points = 30
|
|
keys = list(x_tick_labels.keys())
|
|
values = list(x_tick_labels.values())
|
|
spacing = len(keys) // num_points
|
|
if spacing==0:
|
|
spacing=15
|
|
selected_keys = keys[::spacing]
|
|
selected_values = values[::spacing]
|
|
else:
|
|
selected_keys = keys[::spacing]
|
|
selected_values = values[::spacing]
|
|
|
|
|
|
fig.update_xaxes(tickvals=selected_keys, ticktext=selected_values)
|
|
fig.update_xaxes(tickvals=list(x_tick_labels.keys()), ticktext=list(x_tick_labels.values()))
|
|
|
|
|
|
fig.update_xaxes(showgrid=False)
|
|
fig.update_yaxes(showgrid=False)
|
|
fig.update_layout(
|
|
width=600,
|
|
height=600
|
|
)
|
|
st.plotly_chart(fig)
|
|
|
|
|
|
|
|
|
|
st.markdown('Tactic level')
|
|
if channel=='paid_social':
|
|
|
|
tactic_data=pd.read_excel("Tatcic_paid.xlsx",sheet_name='paid_social_impressions')
|
|
else:
|
|
tactic_data=pd.read_excel("Tatcic_paid.xlsx",sheet_name='digital_app_display_impressions')
|
|
target_column = st.selectbox('Select Channel)',[col for col in tactic_data.columns if col!='Date' and col!='app_installs_appsflyer'])
|
|
fig=line_plot_target(tactic_data, target=str(target_column), title=f'{str(target_column)} Over Time')
|
|
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
if st.checkbox('Cap Outliers',key='tactic1'):
|
|
x = tactic_data[target_column]
|
|
x_mean = np.mean(x)
|
|
x_std = np.std(x)
|
|
x_scaled = (x - x_mean) / x_std
|
|
lower_threshold = -2.0
|
|
upper_threshold = 2.0
|
|
x_scaled = np.clip(x_scaled, lower_threshold, upper_threshold)
|
|
else:
|
|
x = tactic_data[target_column]
|
|
x_mean = np.mean(x)
|
|
x_std = np.std(x)
|
|
x_scaled = (x - x_mean) / x_std
|
|
|
|
if st.checkbox('Attributed',key='tactic2'):
|
|
column=[col for col in attributred_app_installs_tactic.columns if col in target_column]
|
|
tactic_data['app_installs_appsflyer']=attributred_app_installs_tactic[column]
|
|
y=tactic_data['app_installs_appsflyer']
|
|
title='Attributed-App_installs_appsflyer'
|
|
|
|
|
|
|
|
else:
|
|
y=data["app_installs_appsflyer"]
|
|
title='App_installs_appsflyer'
|
|
|
|
|
|
def sigmoid(x, K, a, x0):
|
|
return K / (1 + np.exp(-a * (x - x0)))
|
|
|
|
|
|
|
|
|
|
|
|
initial_K = np.max(y)
|
|
initial_a = 1
|
|
initial_x0 = 0
|
|
K = st.number_input('K (Amplitude)', min_value=0.01, max_value=2.0 * np.max(y), value=float(initial_K), step=5.0,key='tactic3')
|
|
a = st.number_input('a (Slope)', min_value=0.01, max_value=5.0, value=float(initial_a), step=2.0,key='tactic41')
|
|
x0 = st.number_input('x0 (Center)', min_value=float(min(x_scaled)), max_value=float(max(x_scaled)), value=float(initial_x0), step=2.0,key='tactic4')
|
|
params, _ = curve_fit(sigmoid, x_scaled, y, p0=[K, a, x0], maxfev=20000)
|
|
|
|
|
|
x_slider = st.slider('X Value', min_value=float(min(x)), max_value=float(max(x)), value=float(x_mean), step=1.,key='tactic7')
|
|
|
|
|
|
x_slider_scaled = (x_slider - x_mean) / x_std
|
|
y_slider_fit = sigmoid(x_slider_scaled, *params)
|
|
|
|
|
|
st.write(f'{target_column}: {format_axis(x_slider)}')
|
|
st.write(f'Corresponding App_installs: {format_axis(y_slider_fit)}')
|
|
|
|
|
|
fig = px.scatter(data_frame=data, x=x_scaled, y=y, labels={'x': f'{target_column}', 'y': 'App Installs'}, title=title)
|
|
|
|
|
|
x_fit = np.linspace(min(x_scaled), max(x_scaled), 100)
|
|
y_fit = sigmoid(x_fit, *params)
|
|
fig.add_trace(px.line(x=x_fit, y=y_fit).data[0])
|
|
fig.data[1].update(line=dict(color='orange'))
|
|
fig.add_vline(x=x_slider_scaled, line_dash='dash', line_color='red', annotation_text=f'{format_numbers(x_slider)}')
|
|
|
|
|
|
|
|
x_tick_labels = {format_axis((x_scaled[i],0)): format_axis(x[i]) for i in range(len(x_scaled))}
|
|
num_points = 50
|
|
keys = list(x_tick_labels.keys())
|
|
values = list(x_tick_labels.values())
|
|
spacing = len(keys) // num_points
|
|
if spacing==0:
|
|
spacing=2
|
|
selected_keys = keys[::spacing]
|
|
selected_values = values[::spacing]
|
|
else:
|
|
selected_keys = keys[::spacing]
|
|
selected_values = values[::spacing]
|
|
|
|
|
|
fig.update_xaxes(tickvals=selected_keys, ticktext=selected_values)
|
|
|
|
|
|
fig.update_xaxes(tickformat=".f")
|
|
fig.update_yaxes(tickformat=".f")
|
|
|
|
|
|
fig.update_xaxes(showgrid=False)
|
|
fig.update_yaxes(showgrid=False)
|
|
fig.update_layout(
|
|
width=600,
|
|
height=600
|
|
)
|
|
st.plotly_chart(fig) |