|
import streamlit as st |
|
import pandas as pd |
|
import json |
|
from classes import Channel, Scenario |
|
import numpy as np |
|
from plotly.subplots import make_subplots |
|
import plotly.graph_objects as go |
|
from classes import class_to_dict |
|
from collections import OrderedDict |
|
import io |
|
import plotly |
|
from pathlib import Path |
|
import pickle |
|
import yaml |
|
from yaml import SafeLoader |
|
from streamlit.components.v1 import html |
|
import smtplib |
|
from scipy.optimize import curve_fit |
|
from sklearn.metrics import r2_score |
|
from classes import class_from_dict, class_convert_to_dict |
|
import os |
|
import base64 |
|
import sqlite3 |
|
import datetime |
|
from classes import numerize |
|
|
|
color_palette = [ |
|
"#F3F3F0", |
|
"#5E7D7E", |
|
"#2FA1FF", |
|
"#00EDED", |
|
"#00EAE4", |
|
"#304550", |
|
"#EDEBEB", |
|
"#7FBEFD", |
|
"#003059", |
|
"#A2F3F3", |
|
"#E1D6E2", |
|
"#B6B6B6", |
|
] |
|
|
|
|
|
CURRENCY_INDICATOR = "$" |
|
|
|
database_file = r"DB/User.db" |
|
|
|
conn = sqlite3.connect(database_file, check_same_thread=False) |
|
c = conn.cursor() |
|
|
|
|
|
def update_db(page_name): |
|
|
|
modified_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") |
|
|
|
c.execute( |
|
"Update sessions SET last_edited_page= ?, updated_time =? WHERE project_name =? AND owner =?", |
|
( |
|
page_name, |
|
modified_time, |
|
st.session_state["project_name"], |
|
st.session_state["username"], |
|
), |
|
) |
|
|
|
conn.commit() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def nav_page(page_name, timeout_secs=3): |
|
nav_script = """ |
|
<script type="text/javascript"> |
|
function attempt_nav_page(page_name, start_time, timeout_secs) { |
|
var links = window.parent.document.getElementsByTagName("a"); |
|
for (var i = 0; i < links.length; i++) { |
|
if (links[i].href.toLowerCase().endsWith("/" + page_name.toLowerCase())) { |
|
links[i].click(); |
|
return; |
|
} |
|
} |
|
var elasped = new Date() - start_time; |
|
if (elasped < timeout_secs * 1000) { |
|
setTimeout(attempt_nav_page, 100, page_name, start_time, timeout_secs); |
|
} else { |
|
alert("Unable to navigate to page '" + page_name + "' after " + timeout_secs + " second(s)."); |
|
} |
|
} |
|
window.addEventListener("load", function() { |
|
attempt_nav_page("%s", new Date(), %d); |
|
}); |
|
</script> |
|
""" % ( |
|
page_name, |
|
timeout_secs, |
|
) |
|
html(nav_script) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
path = os.path.dirname(__file__) |
|
|
|
file_ = open(f"{path}/mastercard_logo.png", "rb") |
|
|
|
contents = file_.read() |
|
|
|
data_url = base64.b64encode(contents).decode("utf-8") |
|
|
|
file_.close() |
|
|
|
|
|
DATA_PATH = "./data" |
|
|
|
IMAGES_PATH = "./data/images_224_224" |
|
dir |
|
|
|
|
|
def load_local_css(file_name): |
|
|
|
with open(file_name) as f: |
|
|
|
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
path1 = os.path.dirname(__file__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DATA_PATH1 = "./data" |
|
|
|
IMAGES_PATH1 = "./data/images_224_224" |
|
|
|
|
|
def set_header(): |
|
return st.markdown( |
|
f"""<div class='main-header'> |
|
<!-- <h1></h1> --> |
|
<div > |
|
<img class='blend-logo' src="data:image;base64,{data_url}", alt="Logo"> |
|
</div>""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def s_curve(x, K, b, a, x0): |
|
return K / (1 + b * np.exp(-a * (x - x0))) |
|
|
|
|
|
def panel_level(input_df, date_column="Date"): |
|
|
|
if date_column not in input_df.index.names: |
|
input_df = input_df.set_index(date_column) |
|
|
|
|
|
numeric_columns_df = input_df.select_dtypes(include="number") |
|
|
|
|
|
aggregated_df = numeric_columns_df.groupby(input_df.index).sum() |
|
|
|
|
|
aggregated_df = aggregated_df.reset_index() |
|
|
|
return aggregated_df |
|
|
|
|
|
def fetch_actual_data( |
|
panel=None, |
|
target_file="Overview_data_test.xlsx", |
|
updated_rcs=None, |
|
metrics=None, |
|
): |
|
excel = pd.read_excel(Path(target_file), sheet_name=None) |
|
|
|
|
|
raw_df = excel["RAW DATA MMM"] |
|
spend_df = excel["SPEND INPUT"] |
|
contri_df = excel["CONTRIBUTION MMM"] |
|
|
|
|
|
if panel is not None and panel != "Aggregated": |
|
raw_df = raw_df[raw_df["Panel"] == panel].drop(columns=["Panel"]) |
|
spend_df = spend_df[spend_df["Panel"] == panel].drop(columns=["Panel"]) |
|
contri_df = contri_df[contri_df["Panel"] == panel].drop(columns=["Panel"]) |
|
elif panel == "Aggregated": |
|
raw_df = panel_level(raw_df, date_column="Date") |
|
spend_df = panel_level(spend_df, date_column="Week") |
|
contri_df = panel_level(contri_df, date_column="Date") |
|
|
|
|
|
|
|
|
|
unnamed_cols = [col for col in raw_df.columns if col.lower().startswith("unnamed")] |
|
|
|
|
|
exclude_columns = [ |
|
"Date", |
|
"Region", |
|
"Controls_Grammarly_Index_SeasonalAVG", |
|
"Controls_Quillbot_Index", |
|
"Daily_Positive_Outliers", |
|
"External_RemoteClass_Index", |
|
"Intervals ON 20190520-20190805 | 20200518-20200803 | 20210517-20210802", |
|
"Intervals ON 20190826-20191209 | 20200824-20201207 | 20210823-20211206", |
|
"Intervals ON 20201005-20201019", |
|
"Promotion_PercentOff", |
|
"Promotion_TimeBased", |
|
"Seasonality_Indicator_Chirstmas", |
|
"Seasonality_Indicator_NewYears_Days", |
|
"Seasonality_Indicator_Thanksgiving", |
|
"Trend 20200302 / 20200803", |
|
] + unnamed_cols |
|
|
|
raw_df["Date"] = pd.to_datetime(raw_df["Date"]) |
|
contri_df["Date"] = pd.to_datetime(contri_df["Date"]) |
|
input_df = raw_df.sort_values(by="Date") |
|
output_df = contri_df.sort_values(by="Date") |
|
spend_df["Week"] = pd.to_datetime( |
|
spend_df["Week"], format="%Y-%m-%d", errors="coerce" |
|
) |
|
spend_df.sort_values(by="Week", inplace=True) |
|
|
|
|
|
|
|
|
|
channel_list = [col for col in input_df.columns if col not in exclude_columns] |
|
channel_list = list(set(channel_list) - set(["fb_level_achieved_tier_1", "ga_app"])) |
|
|
|
|
|
infeasible_channels = [ |
|
c |
|
for c in contri_df.select_dtypes(include=["float", "int"]).columns |
|
if contri_df[c].sum() <= 0 |
|
] |
|
|
|
channel_list = list(set(channel_list) - set(infeasible_channels)) |
|
|
|
upper_limits = {} |
|
output_cols = [] |
|
actual_output_dic = {} |
|
actual_input_dic = {} |
|
|
|
for inp_col in channel_list: |
|
|
|
spends = input_df[inp_col].values |
|
x = spends.copy() |
|
|
|
upper_limits[inp_col] = 2 * x.max() |
|
|
|
|
|
|
|
out_col = inp_col |
|
y = output_df[out_col].values.copy() |
|
actual_output_dic[inp_col] = y.copy() |
|
actual_input_dic[inp_col] = x.copy() |
|
|
|
output_cols.append(out_col) |
|
print(actual_input_dic) |
|
return pd.DataFrame(actual_input_dic), pd.DataFrame(actual_output_dic) |
|
|
|
|
|
def initialize_data( |
|
panel=None, |
|
target_file="Overview_data_test.xlsx", |
|
updated_rcs=None, |
|
metrics=None, |
|
): |
|
|
|
|
|
|
|
|
|
print(f"## [DEBUG] [UTILS]: {target_file}") |
|
excel = pd.read_excel(Path(target_file), sheet_name=None) |
|
|
|
|
|
raw_df = excel["RAW DATA MMM"] |
|
spend_df = excel["SPEND INPUT"] |
|
contri_df = excel["CONTRIBUTION MMM"] |
|
|
|
|
|
if panel is not None and panel != "Aggregated": |
|
raw_df = raw_df[raw_df["Panel"] == panel].drop(columns=["Panel"]) |
|
spend_df = spend_df[spend_df["Panel"] == panel].drop(columns=["Panel"]) |
|
contri_df = contri_df[contri_df["Panel"] == panel].drop(columns=["Panel"]) |
|
elif panel == "Aggregated": |
|
raw_df = panel_level(raw_df, date_column="Date") |
|
spend_df = panel_level(spend_df, date_column="Week") |
|
contri_df = panel_level(contri_df, date_column="Date") |
|
|
|
|
|
unique_key = f"{metrics}-{panel}" |
|
|
|
unnamed_cols = [col for col in raw_df.columns if col.lower().startswith("unnamed")] |
|
|
|
exclude_columns = [ |
|
"Date", |
|
"Region", |
|
"Controls_Grammarly_Index_SeasonalAVG", |
|
"Controls_Quillbot_Index", |
|
"Daily_Positive_Outliers", |
|
"External_RemoteClass_Index", |
|
"Intervals ON 20190520-20190805 | 20200518-20200803 | 20210517-20210802", |
|
"Intervals ON 20190826-20191209 | 20200824-20201207 | 20210823-20211206", |
|
"Intervals ON 20201005-20201019", |
|
"Promotion_PercentOff", |
|
"Promotion_TimeBased", |
|
"Seasonality_Indicator_Chirstmas", |
|
"Seasonality_Indicator_NewYears_Days", |
|
"Seasonality_Indicator_Thanksgiving", |
|
"Trend 20200302 / 20200803", |
|
] + unnamed_cols |
|
|
|
raw_df["Date"] = pd.to_datetime(raw_df["Date"]) |
|
contri_df["Date"] = pd.to_datetime(contri_df["Date"]) |
|
input_df = raw_df.sort_values(by="Date") |
|
output_df = contri_df.sort_values(by="Date") |
|
spend_df["Week"] = pd.to_datetime( |
|
spend_df["Week"], format="%Y-%m-%d", errors="coerce" |
|
) |
|
spend_df.sort_values(by="Week", inplace=True) |
|
|
|
|
|
|
|
|
|
channel_list = [col for col in input_df.columns if col not in exclude_columns] |
|
|
|
|
|
|
|
infeasible_channels = [ |
|
c |
|
for c in contri_df.select_dtypes(include=["float", "int"]).columns |
|
if contri_df[c].sum() <= 0 |
|
] |
|
channel_list = list(set(channel_list) - set(infeasible_channels)) |
|
|
|
response_curves = {} |
|
mapes = {} |
|
rmses = {} |
|
upper_limits = {} |
|
powers = {} |
|
r2 = {} |
|
conv_rates = {} |
|
output_cols = [] |
|
channels = {} |
|
sales = None |
|
dates = input_df.Date.values |
|
actual_output_dic = {} |
|
actual_input_dic = {} |
|
|
|
for inp_col in channel_list: |
|
|
|
spends = input_df[inp_col].values |
|
x = spends.copy() |
|
|
|
upper_limits[inp_col] = 2 * x.max() |
|
|
|
|
|
out_col = [_col for _col in output_df.columns if _col.startswith(inp_col)][0] |
|
y = output_df[out_col].values.copy() |
|
actual_output_dic[inp_col] = y.copy() |
|
actual_input_dic[inp_col] = x.copy() |
|
|
|
output_cols.append(out_col) |
|
|
|
|
|
power = np.ceil(np.log(x.max()) / np.log(10)) - 3 |
|
if power >= 0: |
|
x = x / 10**power |
|
|
|
x = x.astype("float64") |
|
y = y.astype("float64") |
|
|
|
|
|
|
|
|
|
|
|
|
|
if y.max() <= 0.01: |
|
if x.max() <= 0.0: |
|
|
|
bounds = ((0, 0, 0, 0), (3 * 0.01, 1000, 1, 0.01)) |
|
|
|
else: |
|
|
|
bounds = ((0, 0, 0, 0), (3 * 0.01, 1000, 1, x.max())) |
|
else: |
|
bounds = ((0, 0, 0, 0), (3 * y.max(), 1000, 1, x.max())) |
|
|
|
|
|
|
|
|
|
|
|
params, _ = curve_fit( |
|
s_curve, |
|
x, |
|
y, |
|
p0=(2 * y.max(), 0.01, 1e-5, x.max()), |
|
bounds=bounds, |
|
maxfev=int(1e5), |
|
) |
|
mape = (100 * abs(1 - s_curve(x, *params) / y.clip(min=1))).mean() |
|
rmse = np.sqrt(((y - s_curve(x, *params)) ** 2).mean()) |
|
r2_ = r2_score(y, s_curve(x, *params)) |
|
|
|
response_curves[inp_col] = { |
|
"K": params[0], |
|
"b": params[1], |
|
"a": params[2], |
|
"x0": params[3], |
|
} |
|
|
|
updated_rcs_key = f"{metrics}#@{panel}#@{inp_col}" |
|
if updated_rcs is not None and updated_rcs_key in list(updated_rcs.keys()): |
|
response_curves[inp_col] = updated_rcs[updated_rcs_key] |
|
|
|
mapes[inp_col] = mape |
|
rmses[inp_col] = rmse |
|
r2[inp_col] = r2_ |
|
powers[inp_col] = power |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
conv = ( |
|
spend_df.set_index("Week")[inp_col] |
|
/ input_df.set_index("Date")[inp_col].clip(lower=1) |
|
).reset_index() |
|
conv.rename(columns={"index": "Week"}, inplace=True) |
|
conv["year"] = conv.Week.dt.year |
|
conv_rates[inp_col] = list(conv.drop("Week", axis=1).mean().to_dict().values())[ |
|
0 |
|
] |
|
|
|
|
|
|
|
|
|
channel = Channel( |
|
name=inp_col, |
|
dates=dates, |
|
spends=spends, |
|
|
|
conversion_rate=conv_rates[inp_col], |
|
response_curve_type="s-curve", |
|
response_curve_params={ |
|
"K": params[0], |
|
"b": params[1], |
|
"a": params[2], |
|
"x0": params[3], |
|
}, |
|
bounds=np.array([-10, 10]), |
|
) |
|
channels[inp_col] = channel |
|
if sales is None: |
|
sales = channel.actual_sales |
|
else: |
|
sales += channel.actual_sales |
|
other_contributions = ( |
|
output_df.drop([*output_cols], axis=1).sum(axis=1, numeric_only=True).values |
|
) |
|
correction = output_df.drop("Date", axis=1).sum(axis=1).values - ( |
|
sales + other_contributions |
|
) |
|
scenario = Scenario( |
|
name="default", |
|
channels=channels, |
|
constant=other_contributions, |
|
correction=correction, |
|
) |
|
|
|
st.session_state["initialized"] = True |
|
st.session_state["actual_df"] = input_df |
|
st.session_state["raw_df"] = raw_df |
|
st.session_state["contri_df"] = output_df |
|
default_scenario_dict = class_to_dict(scenario) |
|
st.session_state["default_scenario_dict"] = default_scenario_dict |
|
st.session_state["scenario"] = scenario |
|
st.session_state["channels_list"] = channel_list |
|
st.session_state["optimization_channels"] = { |
|
channel_name: False for channel_name in channel_list |
|
} |
|
st.session_state["rcs"] = response_curves.copy() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.session_state["powers"] = powers |
|
st.session_state["actual_contribution_df"] = pd.DataFrame(actual_output_dic) |
|
st.session_state["actual_input_df"] = pd.DataFrame(actual_input_dic) |
|
|
|
for channel in channels.values(): |
|
st.session_state[channel.name] = numerize( |
|
channel.actual_total_spends * channel.conversion_rate, 1 |
|
) |
|
|
|
st.session_state["xlsx_buffer"] = io.BytesIO() |
|
|
|
if Path("../saved_scenarios.pkl").exists(): |
|
with open("../saved_scenarios.pkl", "rb") as f: |
|
st.session_state["saved_scenarios"] = pickle.load(f) |
|
else: |
|
st.session_state["saved_scenarios"] = OrderedDict() |
|
|
|
|
|
st.session_state["optimization_channels"] = { |
|
channel_name: False for channel_name in channel_list |
|
} |
|
st.session_state["disable_download_button"] = True |
|
|
|
rcs_data = {} |
|
for channel in st.session_state["rcs"]: |
|
|
|
x = list(st.session_state["actual_input_df"][channel].values.astype(float)) |
|
y = list( |
|
st.session_state["actual_contribution_df"][channel].values.astype(float) |
|
) |
|
power = float(np.ceil(np.log(max(x)) / np.log(10)) - 3) |
|
x_plot = list(np.linspace(0, 5 * max(x), 100)) |
|
|
|
rcs_data[channel] = { |
|
"K": float(st.session_state["rcs"][channel]["K"]), |
|
"b": float(st.session_state["rcs"][channel]["b"]), |
|
"a": float(st.session_state["rcs"][channel]["a"]), |
|
"x0": float(st.session_state["rcs"][channel]["x0"]), |
|
"power": power, |
|
"x": x, |
|
"y": y, |
|
"x_plot": x_plot, |
|
} |
|
|
|
return rcs_data, scenario |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_channel_summary(scenario): |
|
|
|
|
|
data = { |
|
"Channel": [ |
|
"Paid Search", |
|
"Ga will cid baixo risco", |
|
"Digital tactic others", |
|
"Fb la tier 1", |
|
"Fb la tier 2", |
|
"Paid social others", |
|
"Programmatic", |
|
"Kwai", |
|
"Indicacao", |
|
"Infleux", |
|
"Influencer", |
|
], |
|
"Spends": [ |
|
"$ 11.3K", |
|
"$ 155.2K", |
|
"$ 50.7K", |
|
"$ 125.4K", |
|
"$ 125.2K", |
|
"$ 105K", |
|
"$ 3.3M", |
|
"$ 47.5K", |
|
"$ 55.9K", |
|
"$ 632.3K", |
|
"$ 48.3K", |
|
], |
|
"Revenue": [ |
|
"558.0K", |
|
"3.5M", |
|
"5.2M", |
|
"3.1M", |
|
"3.1M", |
|
"2.1M", |
|
"20.8M", |
|
"1.6M", |
|
"728.4K", |
|
"22.9M", |
|
"4.8M", |
|
], |
|
} |
|
|
|
|
|
df = pd.DataFrame(data) |
|
|
|
|
|
df["Spends"] = ( |
|
df["Spends"] |
|
.replace({"\$": "", "K": "*1e3", "M": "*1e6"}, regex=True) |
|
.map(pd.eval) |
|
.astype(int) |
|
) |
|
df["Revenue"] = ( |
|
df["Revenue"] |
|
.replace({"\$": "", "K": "*1e3", "M": "*1e6"}, regex=True) |
|
.map(pd.eval) |
|
.astype(int) |
|
) |
|
|
|
|
|
df["ROI"] = (df["Revenue"] - df["Spends"]) / df["Spends"] |
|
|
|
|
|
format_currency = lambda x: f"${x:,.1f}" |
|
format_roi = lambda x: f"{x:.1f}" |
|
|
|
df["Spends"] = [ |
|
"$ 11.3K", |
|
"$ 155.2K", |
|
"$ 50.7K", |
|
"$ 125.4K", |
|
"$ 125.2K", |
|
"$ 105K", |
|
"$ 3.3M", |
|
"$ 47.5K", |
|
"$ 55.9K", |
|
"$ 632.3K", |
|
"$ 48.3K", |
|
] |
|
df["Revenue"] = [ |
|
"$ 536.3K", |
|
"$ 3.4M", |
|
"$ 5M", |
|
"$ 3M", |
|
"$ 3M", |
|
"$ 2M", |
|
"$ 20M", |
|
"$ 1.5M", |
|
"$ 7.1M", |
|
"$ 22M", |
|
"$ 4.6M", |
|
] |
|
df["ROI"] = df["ROI"].apply(format_roi) |
|
|
|
return df |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_contribution_pie(): |
|
color_palette = [ |
|
"#F3F3F0", |
|
"#5E7D7E", |
|
"#2FA1FF", |
|
"#00EDED", |
|
"#00EAE4", |
|
"#304550", |
|
"#EDEBEB", |
|
"#7FBEFD", |
|
"#003059", |
|
"#A2F3F3", |
|
"#E1D6E2", |
|
"#B6B6B6", |
|
] |
|
total_contribution_fig = make_subplots( |
|
rows=1, |
|
cols=2, |
|
subplot_titles=["Spends", "Revenue"], |
|
specs=[[{"type": "pie"}, {"type": "pie"}]], |
|
) |
|
|
|
channels_list = [ |
|
"Paid Search", |
|
"Ga will cid baixo risco", |
|
"Digital tactic others", |
|
"Fb la tier 1", |
|
"Fb la tier 2", |
|
"Paid social others", |
|
"Programmatic", |
|
"Kwai", |
|
"Indicacao", |
|
"Infleux", |
|
"Influencer", |
|
"Non Media", |
|
] |
|
|
|
|
|
colors_map = { |
|
col: color_palette[i % len(color_palette)] |
|
for i, col in enumerate(channels_list) |
|
} |
|
colors_map["Non Media"] = color_palette[ |
|
5 |
|
] |
|
|
|
|
|
spends_values = [0.5, 3.36, 1.1, 2.7, 2.7, 2.27, 70.6, 1, 1, 13.7, 1, 0] |
|
revenue_values = [1, 4, 5, 3, 3, 2, 50.8, 1.5, 0.7, 13, 0, 16] |
|
|
|
|
|
total_contribution_fig.add_trace( |
|
go.Pie( |
|
labels=[channel_name for channel_name in channels_list], |
|
values=spends_values, |
|
marker=dict( |
|
colors=[colors_map[channel_name] for channel_name in channels_list] |
|
), |
|
hole=0.3, |
|
), |
|
row=1, |
|
col=1, |
|
) |
|
|
|
|
|
total_contribution_fig.add_trace( |
|
go.Pie( |
|
labels=[channel_name for channel_name in channels_list], |
|
values=revenue_values, |
|
marker=dict( |
|
colors=[colors_map[channel_name] for channel_name in channels_list] |
|
), |
|
hole=0.3, |
|
), |
|
row=1, |
|
col=2, |
|
) |
|
|
|
total_contribution_fig.update_traces( |
|
textposition="inside", texttemplate="%{percent:.1%}" |
|
) |
|
total_contribution_fig.update_layout( |
|
uniformtext_minsize=12, |
|
title="Channel contribution", |
|
uniformtext_mode="hide", |
|
) |
|
return total_contribution_fig |
|
|
|
|
|
def create_contribuion_stacked_plot(scenario): |
|
weekly_contribution_fig = make_subplots( |
|
rows=1, |
|
cols=2, |
|
subplot_titles=["Spends", "Revenue"], |
|
specs=[[{"type": "bar"}, {"type": "bar"}]], |
|
) |
|
raw_df = st.session_state["raw_df"] |
|
df = raw_df.sort_values(by="Date") |
|
x = df.Date |
|
weekly_spends_data = [] |
|
weekly_sales_data = [] |
|
|
|
for i, channel_name in enumerate(st.session_state["channels_list"]): |
|
color = color_palette[i % len(color_palette)] |
|
|
|
weekly_spends_data.append( |
|
go.Bar( |
|
x=x, |
|
y=scenario.channels[channel_name].actual_spends |
|
* scenario.channels[channel_name].conversion_rate, |
|
name=channel_name_formating(channel_name), |
|
hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}", |
|
legendgroup=channel_name, |
|
marker_color=color, |
|
) |
|
) |
|
|
|
weekly_sales_data.append( |
|
go.Bar( |
|
x=x, |
|
y=scenario.channels[channel_name].actual_sales, |
|
name=channel_name_formating(channel_name), |
|
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}", |
|
legendgroup=channel_name, |
|
showlegend=False, |
|
marker_color=color, |
|
) |
|
) |
|
|
|
for _d in weekly_spends_data: |
|
weekly_contribution_fig.add_trace(_d, row=1, col=1) |
|
for _d in weekly_sales_data: |
|
weekly_contribution_fig.add_trace(_d, row=1, col=2) |
|
|
|
weekly_contribution_fig.add_trace( |
|
go.Bar( |
|
x=x, |
|
y=scenario.constant + scenario.correction, |
|
name="Non Media", |
|
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}", |
|
marker_color=color_palette[-1], |
|
), |
|
row=1, |
|
col=2, |
|
) |
|
|
|
weekly_contribution_fig.update_layout( |
|
barmode="stack", |
|
title="Channel contribution by week", |
|
xaxis_title="Date", |
|
) |
|
weekly_contribution_fig.update_xaxes(showgrid=False) |
|
weekly_contribution_fig.update_yaxes(showgrid=False) |
|
return weekly_contribution_fig |
|
|
|
|
|
def create_channel_spends_sales_plot(channel): |
|
if channel is not None: |
|
x = channel.dates |
|
_spends = channel.actual_spends * channel.conversion_rate |
|
_sales = channel.actual_sales |
|
channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]]) |
|
channel_sales_spends_fig.add_trace( |
|
go.Bar( |
|
x=x, |
|
y=_sales, |
|
marker_color=color_palette[ |
|
3 |
|
], |
|
name="Revenue", |
|
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}", |
|
), |
|
secondary_y=False, |
|
) |
|
|
|
channel_sales_spends_fig.add_trace( |
|
go.Scatter( |
|
x=x, |
|
y=_spends, |
|
line=dict( |
|
color=color_palette[2] |
|
), |
|
name="Spends", |
|
hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}", |
|
), |
|
secondary_y=True, |
|
) |
|
|
|
channel_sales_spends_fig.update_layout( |
|
xaxis_title="Date", |
|
yaxis_title="Revenue", |
|
yaxis2_title="Spends ($)", |
|
title="Channel spends and Revenue week-wise", |
|
) |
|
channel_sales_spends_fig.update_xaxes(showgrid=False) |
|
channel_sales_spends_fig.update_yaxes(showgrid=False) |
|
else: |
|
raw_df = st.session_state["raw_df"] |
|
df = raw_df.sort_values(by="Date") |
|
x = df.Date |
|
scenario = class_from_dict(st.session_state["default_scenario_dict"]) |
|
_sales = scenario.constant + scenario.correction |
|
channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]]) |
|
channel_sales_spends_fig.add_trace( |
|
go.Bar( |
|
x=x, |
|
y=_sales, |
|
marker_color=color_palette[ |
|
0 |
|
], |
|
name="Revenue", |
|
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}", |
|
), |
|
secondary_y=False, |
|
) |
|
|
|
channel_sales_spends_fig.update_layout( |
|
xaxis_title="Date", |
|
yaxis_title="Revenue", |
|
yaxis2_title="Spends ($)", |
|
title="Channel spends and Revenue week-wise", |
|
) |
|
channel_sales_spends_fig.update_xaxes(showgrid=False) |
|
channel_sales_spends_fig.update_yaxes(showgrid=False) |
|
|
|
return channel_sales_spends_fig |
|
|
|
|
|
def format_numbers(value, n_decimals=1, include_indicator=True): |
|
if value is None: |
|
return None |
|
_value = value if value < 1 else numerize(value, n_decimals) |
|
if include_indicator: |
|
return f"{CURRENCY_INDICATOR} {_value}" |
|
else: |
|
return f"{_value}" |
|
|
|
|
|
def decimal_formater(num_string, n_decimals=1): |
|
parts = num_string.split(".") |
|
if len(parts) == 1: |
|
return num_string + "." + "0" * n_decimals |
|
else: |
|
to_be_padded = n_decimals - len(parts[-1]) |
|
if to_be_padded > 0: |
|
return num_string + "0" * to_be_padded |
|
else: |
|
return num_string |
|
|
|
|
|
def channel_name_formating(channel_name): |
|
name_mod = channel_name.replace("_", " ") |
|
if name_mod.lower().endswith(" imp"): |
|
name_mod = name_mod.replace("Imp", "Spend") |
|
elif name_mod.lower().endswith(" clicks"): |
|
name_mod = name_mod.replace("Clicks", "Spend") |
|
return name_mod |
|
|
|
|
|
def send_email(email, message): |
|
s = smtplib.SMTP("smtp.gmail.com", 587) |
|
s.starttls() |
|
s.login("geethu4444@gmail.com", "jgydhpfusuremcol") |
|
s.sendmail("geethu4444@gmail.com", email, message) |
|
s.quit() |
|
|
|
|
|
def project_selection(): |
|
|
|
users = { |
|
"ioannis": "Ioannis Papadopoulos", |
|
"sharon": "Sharon Sheng", |
|
"herman": "Herman Kwong", |
|
"ismail": "Ismail Mohammed", |
|
"geetha": "Geetha Krishna", |
|
"srishti": "Srishti Verma", |
|
"samkeet": "Samkeet Sangai", |
|
"manoj": "Manoj P", |
|
"loveesh": "Loveesh Bhatt", |
|
"bhavya": "Bhavya Jayantilal Kanzariya", |
|
"pritisha": "Pritisha Punukollu", |
|
"ashish": "Ashish Sharma", |
|
"swarupa": "Swarupa Parepalli", |
|
} |
|
first_name = st.text_input("Enter Name").lower() |
|
|
|
if st.button("Load saved projects"): |
|
|
|
if len(first_name) == 0 or first_name not in users.keys(): |
|
st.warning("Enter a valid name") |
|
st.stop() |
|
|
|
st.session_state["username"] = users[first_name] |
|
|
|
c.execute( |
|
"SELECT email, user_id, user_type FROM users WHERE username = ?", |
|
(st.session_state["username"],), |
|
) |
|
|
|
user_data = c.fetchone() |
|
email, user_id, user_type = user_data |
|
|
|
c.execute( |
|
"SELECT Distinct project_name, last_edited_page, updated_time as last_updated FROM sessions WHERE owner=?", |
|
(st.session_state["username"],), |
|
) |
|
|
|
session_summary = c.fetchall() |
|
|
|
folder_path = r"Users" |
|
user_folder_path = os.path.join(folder_path, email) |
|
|
|
session_summary_df = pd.DataFrame( |
|
session_summary, |
|
columns=["Project Name", "Last Page Edited", "Modified Date"], |
|
) |
|
|
|
session_summary_df["Modified Date"] = session_summary_df["Modified Date"].map( |
|
lambda x: pd.to_datetime(x) |
|
) |
|
|
|
session_summary_df = session_summary_df.sort_values( |
|
by=["Modified Date"], ascending=False |
|
) |
|
|
|
st.session_state["summary_df"] = session_summary_df |
|
|
|
|
|
if len(session_summary_df) == 0: |
|
st.warning("No projects found please create a project in home page") |
|
st.stop() |
|
st.session_state["project_name"] = session_summary_df.iloc[0][0] |
|
st.session_state["project_path"] = os.path.join( |
|
user_folder_path, st.session_state["project_name"] |
|
) |
|
project_dct_path = os.path.join( |
|
st.session_state["project_path"], "project_dct.pkl" |
|
) |
|
|
|
with open(project_dct_path, "rb") as f: |
|
try: |
|
st.session_state["project_dct"] = pickle.load(f) |
|
st.rerun() |
|
except Exception as e: |
|
st.warning( |
|
"Something went wrong Unable to load saved details / data is lost due to app refresh. Please go to Home page and create a new project." |
|
) |
|
st.stop() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import json |
|
import glob |
|
import pickle |
|
import streamlit as st |
|
|
|
|
|
|
|
def get_panels_names(file_selected): |
|
raw_data_mmm_df = pd.read_excel(file_selected, sheet_name="RAW DATA MMM") |
|
|
|
if "Panel" in raw_data_mmm_df.columns: |
|
panel = list(set(raw_data_mmm_df["Panel"])) |
|
elif "panel_1" in raw_data_mmm_df.columns: |
|
panel = list(set(raw_data_mmm_df["panel_1"])) |
|
else: |
|
panel = ["Aggregated"] |
|
|
|
return panel |
|
|
|
|
|
|
|
def get_metrics_names(directory): |
|
|
|
last_portions = [] |
|
|
|
|
|
patterns = [ |
|
os.path.join(directory, "*@#*.xlsx"), |
|
os.path.join(directory, "*@#*.xls"), |
|
] |
|
|
|
|
|
for pattern in patterns: |
|
files = glob.glob(pattern) |
|
|
|
|
|
for file in files: |
|
base_name = os.path.basename(file) |
|
last_portion = base_name.split("@#")[-1] |
|
last_portion = last_portion.replace(".xlsx", "").replace( |
|
".xls", "" |
|
) |
|
last_portions.append(last_portion) |
|
|
|
return last_portions |
|
|
|
|
|
|
|
def load_json_files(original_path, modified_path): |
|
try: |
|
with open(original_path, "r") as json_file: |
|
original_data = json.load(json_file) |
|
print("Original RCS data loaded successfully.") |
|
with open(modified_path, "r") as json_file: |
|
modified_data = json.load(json_file) |
|
print("Modified RCS data loaded successfully.") |
|
except: |
|
st.toast("Failed to Load/Update. Tool reset to default settings.", icon="⚠️") |
|
|
|
original_json_file_path = os.path.join( |
|
st.session_state["project_path"], "rcs_data_original.json" |
|
) |
|
modified_json_file_path = os.path.join( |
|
st.session_state["project_path"], "rcs_data_modified.json" |
|
) |
|
|
|
|
|
if os.path.exists(original_json_file_path): |
|
os.remove(original_json_file_path) |
|
|
|
|
|
if os.path.exists(modified_json_file_path): |
|
os.remove(modified_json_file_path) |
|
|
|
|
|
generate_rcs_data(original_path, modified_path) |
|
original_data, modified_data = load_json_files(original_path, modified_path) |
|
|
|
return original_data, modified_data |
|
|
|
|
|
|
|
def name_formating(name): |
|
|
|
name_mod = name.replace("_", " ") |
|
|
|
|
|
name_mod = name_mod.title() |
|
|
|
return name_mod |
|
|
|
|
|
|
|
def load_pickle_files(original_path, modified_path): |
|
try: |
|
|
|
with open(original_path, "rb") as pickle_file: |
|
original_data = pickle.load(pickle_file) |
|
print("Original scenario data loaded successfully from pickle file.") |
|
|
|
|
|
with open(modified_path, "rb") as pickle_file: |
|
modified_data = pickle.load(pickle_file) |
|
print("Modified scenario data loaded successfully from pickle file.") |
|
except: |
|
st.toast("Failed to Load/Update. Tool reset to default settings.", icon="⚠️") |
|
|
|
original_pickle_file_path = os.path.join( |
|
st.session_state["project_path"], "scenario_data_original.pkl" |
|
) |
|
modified_pickle_file_path = os.path.join( |
|
st.session_state["project_path"], "scenario_data_modified.pkl" |
|
) |
|
|
|
|
|
if os.path.exists(original_pickle_file_path): |
|
os.remove(original_pickle_file_path) |
|
|
|
|
|
if os.path.exists(modified_pickle_file_path): |
|
os.remove(modified_pickle_file_path) |
|
|
|
|
|
generate_rcs_data(original_path, modified_path) |
|
original_data, modified_data = load_pickle_files(original_path, modified_path) |
|
|
|
return original_data, modified_data |
|
|
|
|
|
|
|
def generate_rcs_data(original_path, modified_path): |
|
|
|
directory = os.path.join(st.session_state["project_path"], "metrics_level_data") |
|
|
|
|
|
metrics_list = get_metrics_names(directory) |
|
|
|
|
|
all_rcs_data_original = {} |
|
all_rcs_data_modified = {} |
|
|
|
|
|
for metric in metrics_list: |
|
|
|
file_selected = f"metrics_level_data/data_test_overview_panel@#{metric}.xlsx" |
|
file_selected_path = os.path.join( |
|
st.session_state["project_path"], file_selected |
|
) |
|
|
|
|
|
panel_list = get_panels_names(file_selected_path) |
|
|
|
|
|
if os.path.exists(modified_path): |
|
with open(modified_path, "r") as json_file: |
|
modified_data = json.load(json_file) |
|
|
|
|
|
for panel in panel_list: |
|
|
|
rcs_dict_original, scenario = initialize_data( |
|
panel=panel if panel != "Aggregated" else None, |
|
target_file=file_selected_path, |
|
updated_rcs={}, |
|
metrics=metric, |
|
) |
|
|
|
|
|
if metric not in all_rcs_data_original: |
|
all_rcs_data_original[metric] = {} |
|
|
|
|
|
all_rcs_data_original[metric][panel] = rcs_dict_original |
|
|
|
|
|
if metric not in all_rcs_data_modified: |
|
all_rcs_data_modified[metric] = {} |
|
|
|
|
|
for channel in rcs_dict_original: |
|
all_rcs_data_modified[metric][panel] = all_rcs_data_modified[ |
|
metric |
|
].get(panel, {}) |
|
|
|
try: |
|
updated_rcs_dict = modified_data[metric][panel][channel] |
|
except: |
|
updated_rcs_dict = { |
|
"K": rcs_dict_original[channel]["K"], |
|
"b": rcs_dict_original[channel]["b"], |
|
"a": rcs_dict_original[channel]["a"], |
|
"x0": rcs_dict_original[channel]["x0"], |
|
} |
|
|
|
all_rcs_data_modified[metric][panel][channel] = updated_rcs_dict |
|
|
|
|
|
with open(original_path, "w") as json_file: |
|
json.dump(all_rcs_data_original, json_file, indent=4) |
|
|
|
|
|
with open(modified_path, "w") as json_file: |
|
json.dump(all_rcs_data_modified, json_file, indent=4) |
|
|
|
|
|
|
|
def generate_scenario_data(original_path, modified_path): |
|
|
|
directory = os.path.join(st.session_state["project_path"], "metrics_level_data") |
|
|
|
|
|
metrics_list = get_metrics_names(directory) |
|
|
|
|
|
all_scenario_data_original = {} |
|
all_scenario_data_modified = {} |
|
|
|
|
|
for metric in metrics_list: |
|
|
|
file_selected = f"metrics_level_data/data_test_overview_panel@#{metric}.xlsx" |
|
file_selected_path = os.path.join( |
|
st.session_state["project_path"], file_selected |
|
) |
|
|
|
|
|
panel_list = get_panels_names(file_selected_path) |
|
|
|
|
|
if os.path.exists(modified_path): |
|
with open(modified_path, "rb") as pickle_file: |
|
modified_data = pickle.load(pickle_file) |
|
|
|
|
|
for panel in panel_list: |
|
|
|
rcs_dict_original, scenario = initialize_data( |
|
panel=panel if panel != "Aggregated" else None, |
|
target_file=file_selected_path, |
|
updated_rcs={}, |
|
metrics=metric, |
|
) |
|
|
|
|
|
if metric not in all_scenario_data_original: |
|
all_scenario_data_original[metric] = {} |
|
|
|
|
|
all_scenario_data_original[metric][panel] = class_convert_to_dict(scenario) |
|
|
|
|
|
if metric not in all_scenario_data_modified: |
|
all_scenario_data_modified[metric] = {} |
|
|
|
|
|
try: |
|
all_scenario_data_modified[metric][panel] = modified_data[metric][panel] |
|
except: |
|
all_scenario_data_modified[metric][panel] = class_convert_to_dict( |
|
scenario |
|
) |
|
|
|
|
|
with open(original_path, "wb") as pickle_file: |
|
pickle.dump(all_scenario_data_original, pickle_file) |
|
|
|
|
|
with open(modified_path, "wb") as pickle_file: |
|
pickle.dump(all_scenario_data_modified, pickle_file) |
|
|
|
|
|
|
|
|