#

MMM LiME

#

H & M Recommendations

#

""", unsafe_allow_html=True, ) # def set_header(): # logo_path = "./path/to/your/local/LIME_logo.png" # Replace with the actual file path # text = "LiME" # return st.markdown(f"""

#

{text}

#

""", unsafe_allow_html=True) def s_curve(x, K, b, a, x0): return K / (1 + b * np.exp(-a * (x - x0))) def panel_level(input_df, date_column="Date"): # Ensure 'Date' is set as the index if date_column not in input_df.index.names: input_df = input_df.set_index(date_column) # Select numeric columns only (excluding 'Date' since it's now the index) numeric_columns_df = input_df.select_dtypes(include="number") # Group by 'Date' (which is the index) and sum the numeric columns aggregated_df = numeric_columns_df.groupby(input_df.index).sum() # Reset index if you want 'Date' back as a column aggregated_df = aggregated_df.reset_index() return aggregated_df def fetch_actual_data( panel=None, target_file="Overview_data_test.xlsx", updated_rcs=None, metrics=None, ): excel = pd.read_excel(Path(target_file), sheet_name=None) # Extract dataframes for raw data, spend input, and contribution MMM raw_df = excel["RAW DATA MMM"] spend_df = excel["SPEND INPUT"] contri_df = excel["CONTRIBUTION MMM"] # Check if the panel is not None if panel is not None and panel != "Aggregated": raw_df = raw_df[raw_df["Panel"] == panel].drop(columns=["Panel"]) spend_df = spend_df[spend_df["Panel"] == panel].drop(columns=["Panel"]) contri_df = contri_df[contri_df["Panel"] == panel].drop(columns=["Panel"]) elif panel == "Aggregated": raw_df = panel_level(raw_df, date_column="Date") spend_df = panel_level(spend_df, date_column="Week") contri_df = panel_level(contri_df, date_column="Date") # Revenue_df = excel['Revenue'] ## remove sesonalities, indices etc ... unnamed_cols = [col for col in raw_df.columns if col.lower().startswith("unnamed")] ## remove sesonalities, indices etc ... exclude_columns = [ "Date", "Region", "Controls_Grammarly_Index_SeasonalAVG", "Controls_Quillbot_Index", "Daily_Positive_Outliers", "External_RemoteClass_Index", "Intervals ON 20190520-20190805 | 20200518-20200803 | 20210517-20210802", "Intervals ON 20190826-20191209 | 20200824-20201207 | 20210823-20211206", "Intervals ON 20201005-20201019", "Promotion_PercentOff", "Promotion_TimeBased", "Seasonality_Indicator_Chirstmas", "Seasonality_Indicator_NewYears_Days", "Seasonality_Indicator_Thanksgiving", "Trend 20200302 / 20200803", ] + unnamed_cols raw_df["Date"] = pd.to_datetime(raw_df["Date"]) contri_df["Date"] = pd.to_datetime(contri_df["Date"]) input_df = raw_df.sort_values(by="Date") output_df = contri_df.sort_values(by="Date") spend_df["Week"] = pd.to_datetime( spend_df["Week"], format="%Y-%m-%d", errors="coerce" ) spend_df.sort_values(by="Week", inplace=True) # spend_df['Week'] = pd.to_datetime(spend_df['Week'], errors='coerce') # spend_df = spend_df.sort_values(by='Week') channel_list = [col for col in input_df.columns if col not in exclude_columns] channel_list = list(set(channel_list) - set(["fb_level_achieved_tier_1", "ga_app"])) # SRISHTI infeasible_channels = [ c for c in contri_df.select_dtypes(include=["float", "int"]).columns if contri_df[c].sum() <= 0 ] # st.write(channel_list) channel_list = list(set(channel_list) - set(infeasible_channels)) upper_limits = {} output_cols = [] actual_output_dic = {} actual_input_dic = {} for inp_col in channel_list: # st.write(inp_col) spends = input_df[inp_col].values x = spends.copy() # upper limit for penalty upper_limits[inp_col] = 2 * x.max() # contribution # out_col = [_col for _col in output_df.columns if _col.startswith(inp_col)][0] out_col = inp_col y = output_df[out_col].values.copy() actual_output_dic[inp_col] = y.copy() actual_input_dic[inp_col] = x.copy() ##output cols aggregation output_cols.append(out_col) print(actual_input_dic) return pd.DataFrame(actual_input_dic), pd.DataFrame(actual_output_dic) def initialize_data( panel=None, target_file="Overview_data_test.xlsx", updated_rcs=None, metrics=None, ): # uopx_conv_rates = {'streaming_impressions' : 0.007,'digital_impressions' : 0.007,'search_clicks' : 0.00719,'tv_impressions' : 0.000173, # "digital_clicks":0.005,"streaming_clicks":0.004,'streaming_spends':1,"tv_spends":1,"search_spends":1, # "digital_spends":1} # print('State initialized') print(f"## [DEBUG] [UTILS]: {target_file}") excel = pd.read_excel(Path(target_file), sheet_name=None) # Extract dataframes for raw data, spend input, and contribution MMM raw_df = excel["RAW DATA MMM"] spend_df = excel["SPEND INPUT"] contri_df = excel["CONTRIBUTION MMM"] # Check if the panel is not None if panel is not None and panel != "Aggregated": raw_df = raw_df[raw_df["Panel"] == panel].drop(columns=["Panel"]) spend_df = spend_df[spend_df["Panel"] == panel].drop(columns=["Panel"]) contri_df = contri_df[contri_df["Panel"] == panel].drop(columns=["Panel"]) elif panel == "Aggregated": raw_df = panel_level(raw_df, date_column="Date") spend_df = panel_level(spend_df, date_column="Week") contri_df = panel_level(contri_df, date_column="Date") # Revenue_df = excel['Revenue'] unique_key = f"{metrics}-{panel}" ## remove sesonalities, indices etc ... unnamed_cols = [col for col in raw_df.columns if col.lower().startswith("unnamed")] ## remove sesonalities, indices etc ... exclude_columns = [ "Date", "Region", "Controls_Grammarly_Index_SeasonalAVG", "Controls_Quillbot_Index", "Daily_Positive_Outliers", "External_RemoteClass_Index", "Intervals ON 20190520-20190805 | 20200518-20200803 | 20210517-20210802", "Intervals ON 20190826-20191209 | 20200824-20201207 | 20210823-20211206", "Intervals ON 20201005-20201019", "Promotion_PercentOff", "Promotion_TimeBased", "Seasonality_Indicator_Chirstmas", "Seasonality_Indicator_NewYears_Days", "Seasonality_Indicator_Thanksgiving", "Trend 20200302 / 20200803", ] + unnamed_cols raw_df["Date"] = pd.to_datetime(raw_df["Date"]) contri_df["Date"] = pd.to_datetime(contri_df["Date"]) input_df = raw_df.sort_values(by="Date") output_df = contri_df.sort_values(by="Date") spend_df["Week"] = pd.to_datetime( spend_df["Week"], format="%Y-%m-%d", errors="coerce" ) spend_df.sort_values(by="Week", inplace=True) # spend_df['Week'] = pd.to_datetime(spend_df['Week'], errors='coerce') # spend_df = spend_df.sort_values(by='Week') channel_list = [col for col in input_df.columns if col not in exclude_columns] # channel_list = list( # set(channel_list) - set(["fb_level_achieved_tier_1", "ga_app"]) # ) infeasible_channels = [ c for c in contri_df.select_dtypes(include=["float", "int"]).columns if contri_df[c].sum() <= 0 ] channel_list = list(set(channel_list) - set(infeasible_channels)) response_curves = {} mapes = {} rmses = {} upper_limits = {} powers = {} r2 = {} conv_rates = {} output_cols = [] channels = {} sales = None dates = input_df.Date.values actual_output_dic = {} actual_input_dic = {} for inp_col in channel_list: # st.write(inp_col) spends = input_df[inp_col].values x = spends.copy() # upper limit for penalty upper_limits[inp_col] = 2 * x.max() # contribution out_col = [_col for _col in output_df.columns if _col.startswith(inp_col)][0] y = output_df[out_col].values.copy() actual_output_dic[inp_col] = y.copy() actual_input_dic[inp_col] = x.copy() ##output cols aggregation output_cols.append(out_col) ## scale the input power = np.ceil(np.log(x.max()) / np.log(10)) - 3 if power >= 0: x = x / 10**power x = x.astype("float64") y = y.astype("float64") # print('#printing yyyyyyyyy') # print(inp_col) # print(x.max()) # print(y.max()) # SRISHTI if y.max() <= 0.01: if x.max() <= 0.0: # st.write("here-here") bounds = ((0, 0, 0, 0), (3 * 0.01, 1000, 1, 0.01)) else: # st.write("here") bounds = ((0, 0, 0, 0), (3 * 0.01, 1000, 1, x.max())) else: bounds = ((0, 0, 0, 0), (3 * y.max(), 1000, 1, x.max())) # bounds = ((y.max(), 3*y.max()),(0,1000),(0,1),(0,x.max())) # bounds = ((0, 0, 0, 0), (3 * y.max(), 1000, 1, x.max())) # bounds = ((y.max(), 3*y.max()),(0,1000),(0,1),(0,x.max())) params, _ = curve_fit( s_curve, x, y, p0=(2 * y.max(), 0.01, 1e-5, x.max()), bounds=bounds, maxfev=int(1e5), ) mape = (100 * abs(1 - s_curve(x, *params) / y.clip(min=1))).mean() rmse = np.sqrt(((y - s_curve(x, *params)) ** 2).mean()) r2_ = r2_score(y, s_curve(x, *params)) response_curves[inp_col] = { "K": params[0], "b": params[1], "a": params[2], "x0": params[3], } updated_rcs_key = f"{metrics}#@{panel}#@{inp_col}" if updated_rcs is not None and updated_rcs_key in list(updated_rcs.keys()): response_curves[inp_col] = updated_rcs[updated_rcs_key] mapes[inp_col] = mape rmses[inp_col] = rmse r2[inp_col] = r2_ powers[inp_col] = power ## conversion rates # spend_col = [ # _col # for _col in spend_df.columns # if _col.startswith(inp_col.rsplit("_", 1)[0]) # ][0] # print('#printing spendssss') # print(spend_col) conv = ( spend_df.set_index("Week")[inp_col] / input_df.set_index("Date")[inp_col].clip(lower=1) ).reset_index() conv.rename(columns={"index": "Week"}, inplace=True) conv["year"] = conv.Week.dt.year conv_rates[inp_col] = list(conv.drop("Week", axis=1).mean().to_dict().values())[ 0 ] ##print('Before',conv_rates[inp_col]) # conv_rates[inp_col] = uopx_conv_rates[inp_col] ##print('After',(conv_rates[inp_col])) channel = Channel( name=inp_col, dates=dates, spends=spends, # conversion_rate = np.mean(list(conv_rates[inp_col].values())), conversion_rate=conv_rates[inp_col], response_curve_type="s-curve", response_curve_params={ "K": params[0], "b": params[1], "a": params[2], "x0": params[3], }, bounds=np.array([-10, 10]), ) channels[inp_col] = channel if sales is None: sales = channel.actual_sales else: sales += channel.actual_sales other_contributions = ( output_df.drop([*output_cols], axis=1).sum(axis=1, numeric_only=True).values ) correction = output_df.drop("Date", axis=1).sum(axis=1).values - ( sales + other_contributions ) scenario = Scenario( name="default", channels=channels, constant=other_contributions, correction=correction, ) ## setting session variables st.session_state["initialized"] = True st.session_state["actual_df"] = input_df st.session_state["raw_df"] = raw_df st.session_state["contri_df"] = output_df default_scenario_dict = class_to_dict(scenario) st.session_state["default_scenario_dict"] = default_scenario_dict st.session_state["scenario"] = scenario st.session_state["channels_list"] = channel_list st.session_state["optimization_channels"] = { channel_name: False for channel_name in channel_list } st.session_state["rcs"] = response_curves.copy() # orig_rcs_path = os.path.join( # st.session_state["project_path"], f"orig_rcs_{metrics}_{panel}.json" # ) # print("##########################") # print(orig_rcs_path) # if Path(orig_rcs_path).exists(): # print("fetched"*100) # with open(orig_rcs_path, "r") as f: # st.session_state["orig_rcs"] = json.load(f) # else: # print("created"*100) # st.session_state["orig_rcs"] = response_curves.copy() # with open(orig_rcs_path, "w") as f: # json.dump(st.session_state["orig_rcs"], f) st.session_state["powers"] = powers st.session_state["actual_contribution_df"] = pd.DataFrame(actual_output_dic) st.session_state["actual_input_df"] = pd.DataFrame(actual_input_dic) for channel in channels.values(): st.session_state[channel.name] = numerize( channel.actual_total_spends * channel.conversion_rate, 1 ) st.session_state["xlsx_buffer"] = io.BytesIO() if Path("../saved_scenarios.pkl").exists(): with open("../saved_scenarios.pkl", "rb") as f: st.session_state["saved_scenarios"] = pickle.load(f) else: st.session_state["saved_scenarios"] = OrderedDict() # st.session_state["total_spends_change"] = 0 st.session_state["optimization_channels"] = { channel_name: False for channel_name in channel_list } st.session_state["disable_download_button"] = True rcs_data = {} for channel in st.session_state["rcs"]: # Convert to native Python lists and types x = list(st.session_state["actual_input_df"][channel].values.astype(float)) y = list( st.session_state["actual_contribution_df"][channel].values.astype(float) ) power = float(np.ceil(np.log(max(x)) / np.log(10)) - 3) x_plot = list(np.linspace(0, 5 * max(x), 100)) rcs_data[channel] = { "K": float(st.session_state["rcs"][channel]["K"]), "b": float(st.session_state["rcs"][channel]["b"]), "a": float(st.session_state["rcs"][channel]["a"]), "x0": float(st.session_state["rcs"][channel]["x0"]), "power": power, "x": x, "y": y, "x_plot": x_plot, } return rcs_data, scenario # def initialize_data(): # # fetch data from excel # output = pd.read_excel('data.xlsx',sheet_name=None) # raw_df = output['RAW DATA MMM'] # contribution_df = output['CONTRIBUTION MMM'] # Revenue_df = output['Revenue'] # ## channels to be shows # channel_list = [] # for col in raw_df.columns: # if 'click' in col.lower() or 'spend' in col.lower() or 'imp' in col.lower(): # ##print(col) # channel_list.append(col) # else: # pass # ## NOTE : Considered only Desktop spends for all calculations # acutal_df = raw_df[raw_df.Region == 'Desktop'].copy() # ## NOTE : Considered one year of data # acutal_df = acutal_df[acutal_df.Date>'2020-12-31'] # actual_df = acutal_df.drop('Region',axis=1).sort_values(by='Date')[[*channel_list,'Date']] # ##load response curves # with open('./grammarly_response_curves.json','r') as f: # response_curves = json.load(f) # ## create channel dict for scenario creation # dates = actual_df.Date.values # channels = {} # rcs = {} # constant = 0. # for i,info_dict in enumerate(response_curves): # name = info_dict.get('name') # response_curve_type = info_dict.get('response_curve') # response_curve_params = info_dict.get('params') # rcs[name] = response_curve_params # if name != 'constant': # spends = actual_df[name].values # channel = Channel(name=name,dates=dates, # spends=spends, # response_curve_type=response_curve_type, # response_curve_params=response_curve_params, # bounds=np.array([-30,30])) # channels[name] = channel # else: # constant = info_dict.get('value',0.) * len(dates) # ## create scenario # scenario = Scenario(name='default', channels=channels, constant=constant) # default_scenario_dict = class_to_dict(scenario) # ## setting session variables # st.session_state['initialized'] = True # st.session_state['actual_df'] = actual_df # st.session_state['raw_df'] = raw_df # st.session_state['default_scenario_dict'] = default_scenario_dict # st.session_state['scenario'] = scenario # st.session_state['channels_list'] = channel_list # st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list} # st.session_state['rcs'] = rcs # for channel in channels.values(): # if channel.name not in st.session_state: # st.session_state[channel.name] = float(channel.actual_total_spends) # if 'xlsx_buffer' not in st.session_state: # st.session_state['xlsx_buffer'] = io.BytesIO() # ## for saving scenarios # if 'saved_scenarios' not in st.session_state: # if Path('../saved_scenarios.pkl').exists(): # with open('../saved_scenarios.pkl','rb') as f: # st.session_state['saved_scenarios'] = pickle.load(f) # else: # st.session_state['saved_scenarios'] = OrderedDict() # if 'total_spends_change' not in st.session_state: # st.session_state['total_spends_change'] = 0 # if 'optimization_channels' not in st.session_state: # st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list} # if 'disable_download_button' not in st.session_state: # st.session_state['disable_download_button'] = True def create_channel_summary(scenario): # Provided data data = { "Channel": [ "Paid Search", "Ga will cid baixo risco", "Digital tactic others", "Fb la tier 1", "Fb la tier 2", "Paid social others", "Programmatic", "Kwai", "Indicacao", "Infleux", "Influencer", ], "Spends": [ "$ 11.3K", "$ 155.2K", "$ 50.7K", "$ 125.4K", "$ 125.2K", "$ 105K", "$ 3.3M", "$ 47.5K", "$ 55.9K", "$ 632.3K", "$ 48.3K", ], "Revenue": [ "558.0K", "3.5M", "5.2M", "3.1M", "3.1M", "2.1M", "20.8M", "1.6M", "728.4K", "22.9M", "4.8M", ], } # Create DataFrame df = pd.DataFrame(data) # Convert currency strings to numeric values df["Spends"] = ( df["Spends"] .replace({"\$": "", "K": "*1e3", "M": "*1e6"}, regex=True) .map(pd.eval) .astype(int) ) df["Revenue"] = ( df["Revenue"] .replace({"\$": "", "K": "*1e3", "M": "*1e6"}, regex=True) .map(pd.eval) .astype(int) ) # Calculate ROI df["ROI"] = (df["Revenue"] - df["Spends"]) / df["Spends"] # Format columns format_currency = lambda x: f"${x:,.1f}" format_roi = lambda x: f"{x:.1f}" df["Spends"] = [ "$ 11.3K", "$ 155.2K", "$ 50.7K", "$ 125.4K", "$ 125.2K", "$ 105K", "$ 3.3M", "$ 47.5K", "$ 55.9K", "$ 632.3K", "$ 48.3K", ] df["Revenue"] = [ "$ 536.3K", "$ 3.4M", "$ 5M", "$ 3M", "$ 3M", "$ 2M", "$ 20M", "$ 1.5M", "$ 7.1M", "$ 22M", "$ 4.6M", ] df["ROI"] = df["ROI"].apply(format_roi) return df # @st.cache(allow_output_mutation=True) # def create_contribution_pie(scenario): # #c1f7dc # colors_map = {col:color for col,color in zip(st.session_state['channels_list'],plotly.colors.n_colors(plotly.colors.hex_to_rgb('#BE6468'), plotly.colors.hex_to_rgb('#E7B8B7'),23))} # total_contribution_fig = make_subplots(rows=1, cols=2,subplot_titles=['Spends','Revenue'],specs=[[{"type": "pie"}, {"type": "pie"}]]) # total_contribution_fig.add_trace( # go.Pie(labels=[channel_name_formating(channel_name) for channel_name in st.session_state['channels_list']] + ['Non Media'], # values= [round(scenario.channels[channel_name].actual_total_spends * scenario.channels[channel_name].conversion_rate,1) for channel_name in st.session_state['channels_list']] + [0], # marker=dict(colors = [plotly.colors.label_rgb(colors_map[channel_name]) for channel_name in st.session_state['channels_list']] + ['#F0F0F0']), # hole=0.3), # row=1, col=1) # total_contribution_fig.add_trace( # go.Pie(labels=[channel_name_formating(channel_name) for channel_name in st.session_state['channels_list']] + ['Non Media'], # values= [scenario.channels[channel_name].actual_total_sales for channel_name in st.session_state['channels_list']] + [scenario.correction.sum() + scenario.constant.sum()], # hole=0.3), # row=1, col=2) # total_contribution_fig.update_traces(textposition='inside',texttemplate='%{percent:.1%}') # total_contribution_fig.update_layout(uniformtext_minsize=12,title='Channel contribution', uniformtext_mode='hide') # return total_contribution_fig # @st.cache(allow_output_mutation=True) # def create_contribuion_stacked_plot(scenario): # weekly_contribution_fig = make_subplots(rows=1, cols=2,subplot_titles=['Spends','Revenue'],specs=[[{"type": "bar"}, {"type": "bar"}]]) # raw_df = st.session_state['raw_df'] # df = raw_df.sort_values(by='Date') # x = df.Date # weekly_spends_data = [] # weekly_sales_data = [] # for channel_name in st.session_state['channels_list']: # weekly_spends_data.append((go.Bar(x=x, # y=scenario.channels[channel_name].actual_spends * scenario.channels[channel_name].conversion_rate, # name=channel_name_formating(channel_name), # hovertemplate="Date:%{x}
Spend:%{y:$.2s}", # legendgroup=channel_name))) # weekly_sales_data.append((go.Bar(x=x, # y=scenario.channels[channel_name].actual_sales, # name=channel_name_formating(channel_name), # hovertemplate="Date:%{x}
Revenue:%{y:$.2s}", # legendgroup=channel_name, showlegend=False))) # for _d in weekly_spends_data: # weekly_contribution_fig.add_trace(_d, row=1, col=1) # for _d in weekly_sales_data: # weekly_contribution_fig.add_trace(_d, row=1, col=2) # weekly_contribution_fig.add_trace(go.Bar(x=x, # y=scenario.constant + scenario.correction, # name='Non Media', # hovertemplate="Date:%{x}
Revenue:%{y:$.2s}"), row=1, col=2) # weekly_contribution_fig.update_layout(barmode='stack', title='Channel contribuion by week', xaxis_title='Date') # weekly_contribution_fig.update_xaxes(showgrid=False) # weekly_contribution_fig.update_yaxes(showgrid=False) # return weekly_contribution_fig # @st.cache(allow_output_mutation=True) # def create_channel_spends_sales_plot(channel): # if channel is not None: # x = channel.dates # _spends = channel.actual_spends * channel.conversion_rate # _sales = channel.actual_sales # channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]]) # channel_sales_spends_fig.add_trace(go.Bar(x=x, y=_sales,marker_color='#c1f7dc',name='Revenue', hovertemplate="Date:%{x}
Revenue:%{y:$.2s}"), secondary_y = False) # channel_sales_spends_fig.add_trace(go.Scatter(x=x, y=_spends,line=dict(color='#005b96'),name='Spends',hovertemplate="Date:%{x}
Spend:%{y:$.2s}"), secondary_y = True) # channel_sales_spends_fig.update_layout(xaxis_title='Date',yaxis_title='Revenue',yaxis2_title='Spends ($)',title='Channel spends and Revenue week wise') # channel_sales_spends_fig.update_xaxes(showgrid=False) # channel_sales_spends_fig.update_yaxes(showgrid=False) # else: # raw_df = st.session_state['raw_df'] # df = raw_df.sort_values(by='Date') # x = df.Date # scenario = class_from_dict(st.session_state['default_scenario_dict']) # _sales = scenario.constant + scenario.correction # channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]]) # channel_sales_spends_fig.add_trace(go.Bar(x=x, y=_sales,marker_color='#c1f7dc',name='Revenue', hovertemplate="Date:%{x}
Revenue:%{y:$.2s}"), secondary_y = False) # # channel_sales_spends_fig.add_trace(go.Scatter(x=x, y=_spends,line=dict(color='#15C39A'),name='Spends',hovertemplate="Date:%{x}
Spend:%{y:$.2s}"), secondary_y = True) # channel_sales_spends_fig.update_layout(xaxis_title='Date',yaxis_title='Revenue',yaxis2_title='Spends ($)',title='Channel spends and Revenue week wise') # channel_sales_spends_fig.update_xaxes(showgrid=False) # channel_sales_spends_fig.update_yaxes(showgrid=False) # return channel_sales_spends_fig # Define a shared color palette def create_contribution_pie(): color_palette = [ "#F3F3F0", "#5E7D7E", "#2FA1FF", "#00EDED", "#00EAE4", "#304550", "#EDEBEB", "#7FBEFD", "#003059", "#A2F3F3", "#E1D6E2", "#B6B6B6", ] total_contribution_fig = make_subplots( rows=1, cols=2, subplot_titles=["Spends", "Revenue"], specs=[[{"type": "pie"}, {"type": "pie"}]], ) channels_list = [ "Paid Search", "Ga will cid baixo risco", "Digital tactic others", "Fb la tier 1", "Fb la tier 2", "Paid social others", "Programmatic", "Kwai", "Indicacao", "Infleux", "Influencer", "Non Media", ] # Assign colors from the limited palette to channels colors_map = { col: color_palette[i % len(color_palette)] for i, col in enumerate(channels_list) } colors_map["Non Media"] = color_palette[ 5 ] # Assign fixed green color for 'Non Media' # Hardcoded values for Spends and Revenue spends_values = [0.5, 3.36, 1.1, 2.7, 2.7, 2.27, 70.6, 1, 1, 13.7, 1, 0] revenue_values = [1, 4, 5, 3, 3, 2, 50.8, 1.5, 0.7, 13, 0, 16] # Add trace for Spends pie chart total_contribution_fig.add_trace( go.Pie( labels=[channel_name for channel_name in channels_list], values=spends_values, marker=dict( colors=[colors_map[channel_name] for channel_name in channels_list] ), hole=0.3, ), row=1, col=1, ) # Add trace for Revenue pie chart total_contribution_fig.add_trace( go.Pie( labels=[channel_name for channel_name in channels_list], values=revenue_values, marker=dict( colors=[colors_map[channel_name] for channel_name in channels_list] ), hole=0.3, ), row=1, col=2, ) total_contribution_fig.update_traces( textposition="inside", texttemplate="%{percent:.1%}" ) total_contribution_fig.update_layout( uniformtext_minsize=12, title="Channel contribution", uniformtext_mode="hide", ) return total_contribution_fig def create_contribuion_stacked_plot(scenario): weekly_contribution_fig = make_subplots( rows=1, cols=2, subplot_titles=["Spends", "Revenue"], specs=[[{"type": "bar"}, {"type": "bar"}]], ) raw_df = st.session_state["raw_df"] df = raw_df.sort_values(by="Date") x = df.Date weekly_spends_data = [] weekly_sales_data = [] for i, channel_name in enumerate(st.session_state["channels_list"]): color = color_palette[i % len(color_palette)] weekly_spends_data.append( go.Bar( x=x, y=scenario.channels[channel_name].actual_spends * scenario.channels[channel_name].conversion_rate, name=channel_name_formating(channel_name), hovertemplate="Date:%{x}
Spend:%{y:$.2s}", legendgroup=channel_name, marker_color=color, ) ) weekly_sales_data.append( go.Bar( x=x, y=scenario.channels[channel_name].actual_sales, name=channel_name_formating(channel_name), hovertemplate="Date:%{x}
Revenue:%{y:$.2s}", legendgroup=channel_name, showlegend=False, marker_color=color, ) ) for _d in weekly_spends_data: weekly_contribution_fig.add_trace(_d, row=1, col=1) for _d in weekly_sales_data: weekly_contribution_fig.add_trace(_d, row=1, col=2) weekly_contribution_fig.add_trace( go.Bar( x=x, y=scenario.constant + scenario.correction, name="Non Media", hovertemplate="Date:%{x}
Revenue:%{y:$.2s}", marker_color=color_palette[-1], ), row=1, col=2, ) weekly_contribution_fig.update_layout( barmode="stack", title="Channel contribution by week", xaxis_title="Date", ) weekly_contribution_fig.update_xaxes(showgrid=False) weekly_contribution_fig.update_yaxes(showgrid=False) return weekly_contribution_fig def create_channel_spends_sales_plot(channel): if channel is not None: x = channel.dates _spends = channel.actual_spends * channel.conversion_rate _sales = channel.actual_sales channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]]) channel_sales_spends_fig.add_trace( go.Bar( x=x, y=_sales, marker_color=color_palette[ 3 ], # You can choose a color from the palette name="Revenue", hovertemplate="Date:%{x}
Revenue:%{y:$.2s}", ), secondary_y=False, ) channel_sales_spends_fig.add_trace( go.Scatter( x=x, y=_spends, line=dict( color=color_palette[2] ), # You can choose another color from the palette name="Spends", hovertemplate="Date:%{x}
Spend:%{y:$.2s}", ), secondary_y=True, ) channel_sales_spends_fig.update_layout( xaxis_title="Date", yaxis_title="Revenue", yaxis2_title="Spends ($)", title="Channel spends and Revenue week-wise", ) channel_sales_spends_fig.update_xaxes(showgrid=False) channel_sales_spends_fig.update_yaxes(showgrid=False) else: raw_df = st.session_state["raw_df"] df = raw_df.sort_values(by="Date") x = df.Date scenario = class_from_dict(st.session_state["default_scenario_dict"]) _sales = scenario.constant + scenario.correction channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]]) channel_sales_spends_fig.add_trace( go.Bar( x=x, y=_sales, marker_color=color_palette[ 0 ], # You can choose a color from the palette name="Revenue", hovertemplate="Date:%{x}
Revenue:%{y:$.2s}", ), secondary_y=False, ) channel_sales_spends_fig.update_layout( xaxis_title="Date", yaxis_title="Revenue", yaxis2_title="Spends ($)", title="Channel spends and Revenue week-wise", ) channel_sales_spends_fig.update_xaxes(showgrid=False) channel_sales_spends_fig.update_yaxes(showgrid=False) return channel_sales_spends_fig def format_numbers(value, n_decimals=1, include_indicator=True): if value is None: return None _value = value if value < 1 else numerize(value, n_decimals) if include_indicator: return f"{CURRENCY_INDICATOR} {_value}" else: return f"{_value}" def decimal_formater(num_string, n_decimals=1): parts = num_string.split(".") if len(parts) == 1: return num_string + "." + "0" * n_decimals else: to_be_padded = n_decimals - len(parts[-1]) if to_be_padded > 0: return num_string + "0" * to_be_padded else: return num_string def channel_name_formating(channel_name): name_mod = channel_name.replace("_", " ") if name_mod.lower().endswith(" imp"): name_mod = name_mod.replace("Imp", "Spend") elif name_mod.lower().endswith(" clicks"): name_mod = name_mod.replace("Clicks", "Spend") return name_mod def send_email(email, message): s = smtplib.SMTP("smtp.gmail.com", 587) s.starttls() s.login("geethu4444@gmail.com", "jgydhpfusuremcol") s.sendmail("geethu4444@gmail.com", email, message) s.quit() def project_selection(): users = { "ioannis": "Ioannis Papadopoulos", "sharon": "Sharon Sheng", "herman": "Herman Kwong", "ismail": "Ismail Mohammed", "geetha": "Geetha Krishna", "srishti": "Srishti Verma", "samkeet": "Samkeet Sangai", "manoj": "Manoj P", "loveesh": "Loveesh Bhatt", "bhavya": "Bhavya Jayantilal Kanzariya", "pritisha": "Pritisha Punukollu", "ashish": "Ashish Sharma", "swarupa": "Swarupa Parepalli", } first_name = st.text_input("Enter Name").lower() if st.button("Load saved projects"): if len(first_name) == 0 or first_name not in users.keys(): st.warning("Enter a valid name") st.stop() st.session_state["username"] = users[first_name] c.execute( "SELECT email, user_id, user_type FROM users WHERE username = ?", (st.session_state["username"],), ) user_data = c.fetchone() email, user_id, user_type = user_data c.execute( "SELECT Distinct project_name, last_edited_page, updated_time as last_updated FROM sessions WHERE owner=?", (st.session_state["username"],), ) session_summary = c.fetchall() folder_path = r"Users" user_folder_path = os.path.join(folder_path, email) session_summary_df = pd.DataFrame( session_summary, columns=["Project Name", "Last Page Edited", "Modified Date"], ) session_summary_df["Modified Date"] = session_summary_df["Modified Date"].map( lambda x: pd.to_datetime(x) ) session_summary_df = session_summary_df.sort_values( by=["Modified Date"], ascending=False ) st.session_state["summary_df"] = session_summary_df # st.write(st.session_state["project_name"][0]) if len(session_summary_df) == 0: st.warning("No projects found please create a project in home page") st.stop() st.session_state["project_name"] = session_summary_df.iloc[0][0] st.session_state["project_path"] = os.path.join( user_folder_path, st.session_state["project_name"] ) project_dct_path = os.path.join( st.session_state["project_path"], "project_dct.pkl" ) with open(project_dct_path, "rb") as f: try: st.session_state["project_dct"] = pickle.load(f) st.rerun() except Exception as e: st.warning( "Something went wrong Unable to load saved details / data is lost due to app refresh. Please go to Home page and create a new project." ) st.stop() # if __name__ == "__main__": # initialize_data() ############################################################################################################# import os import json import glob import pickle import streamlit as st # Function to get panels names def get_panels_names(file_selected): raw_data_mmm_df = pd.read_excel(file_selected, sheet_name="RAW DATA MMM") if "Panel" in raw_data_mmm_df.columns: panel = list(set(raw_data_mmm_df["Panel"])) elif "panel_1" in raw_data_mmm_df.columns: panel = list(set(raw_data_mmm_df["panel_1"])) else: panel = ["Aggregated"] return panel # Function to get metrics names def get_metrics_names(directory): # Create a list to hold the final parts of the filenames last_portions = [] # Patterns to match Excel files (.xlsx and .xls) that contain @# patterns = [ os.path.join(directory, "*@#*.xlsx"), os.path.join(directory, "*@#*.xls"), ] # Process each pattern for pattern in patterns: files = glob.glob(pattern) # Extracting the last portion after @# for each file for file in files: base_name = os.path.basename(file) last_portion = base_name.split("@#")[-1] last_portion = last_portion.replace(".xlsx", "").replace( ".xls", "" ) # Removing extensions last_portions.append(last_portion) return last_portions # Function to load the original and modified JSON files into dictionaries def load_json_files(original_path, modified_path): try: with open(original_path, "r") as json_file: original_data = json.load(json_file) print("Original RCS data loaded successfully.") with open(modified_path, "r") as json_file: modified_data = json.load(json_file) print("Modified RCS data loaded successfully.") except: st.toast("Failed to Load/Update. Tool reset to default settings.", icon="⚠️") # Define the paths to the RCS data files original_json_file_path = os.path.join( st.session_state["project_path"], "rcs_data_original.json" ) modified_json_file_path = os.path.join( st.session_state["project_path"], "rcs_data_modified.json" ) # Remove the original data file if it exists if os.path.exists(original_json_file_path): os.remove(original_json_file_path) # Remove the modified data file if it exists if os.path.exists(modified_json_file_path): os.remove(modified_json_file_path) # Generate a new file and try again generate_rcs_data(original_path, modified_path) original_data, modified_data = load_json_files(original_path, modified_path) return original_data, modified_data # Function to format name def name_formating(name): # Replace underscores with spaces name_mod = name.replace("_", " ") # Capitalize the first letter of each word name_mod = name_mod.title() return name_mod # Function to load the original and modified pickle files into dictionaries def load_pickle_files(original_path, modified_path): try: # Load the original data from the pickle file with open(original_path, "rb") as pickle_file: original_data = pickle.load(pickle_file) print("Original scenario data loaded successfully from pickle file.") # Load the modified data from the pickle file with open(modified_path, "rb") as pickle_file: modified_data = pickle.load(pickle_file) print("Modified scenario data loaded successfully from pickle file.") except: st.toast("Failed to Load/Update. Tool reset to default settings.", icon="⚠️") # Define the paths to the scenario files original_pickle_file_path = os.path.join( st.session_state["project_path"], "scenario_data_original.pkl" ) modified_pickle_file_path = os.path.join( st.session_state["project_path"], "scenario_data_modified.pkl" ) # Remove the original data file if it exists if os.path.exists(original_pickle_file_path): os.remove(original_pickle_file_path) # Remove the modified data file if it exists if os.path.exists(modified_pickle_file_path): os.remove(modified_pickle_file_path) # Generate a new file and try again generate_rcs_data(original_path, modified_path) original_data, modified_data = load_pickle_files(original_path, modified_path) return original_data, modified_data # Function to generate RCS data and store it as JSON files def generate_rcs_data(original_path, modified_path): # Define the directory where the metrics data is located directory = os.path.join(st.session_state["project_path"], "metrics_level_data") # Retrieve the list of all metric names from the specified directory metrics_list = get_metrics_names(directory) # Dictionary to store RCS data for all metrics and their respective panels all_rcs_data_original = {} all_rcs_data_modified = {} # Iterate over each metric in the metrics list for metric in metrics_list: # Define the path to the Excel file corresponding to the current metric file_selected = f"metrics_level_data/data_test_overview_panel@#{metric}.xlsx" file_selected_path = os.path.join( st.session_state["project_path"], file_selected ) # Retrieve the list of panel names from the current metric's Excel file panel_list = get_panels_names(file_selected_path) # Check if "rcs_data_modified.json" exist if os.path.exists(modified_path): with open(modified_path, "r") as json_file: modified_data = json.load(json_file) # Iterate over each panel in the panel list for panel in panel_list: # Initialize the original RCS data for the current panel and metric rcs_dict_original, scenario = initialize_data( panel=panel if panel != "Aggregated" else None, target_file=file_selected_path, updated_rcs={}, metrics=metric, ) # Ensure the dictionary has the metric as a key for original data if metric not in all_rcs_data_original: all_rcs_data_original[metric] = {} # Store the original RCS data under the corresponding panel for the current metric all_rcs_data_original[metric][panel] = rcs_dict_original # Ensure the dictionary has the metric as a key for modified data if metric not in all_rcs_data_modified: all_rcs_data_modified[metric] = {} # Store the modified RCS data under the corresponding panel for the current metric for channel in rcs_dict_original: all_rcs_data_modified[metric][panel] = all_rcs_data_modified[ metric ].get(panel, {}) try: updated_rcs_dict = modified_data[metric][panel][channel] except: updated_rcs_dict = { "K": rcs_dict_original[channel]["K"], "b": rcs_dict_original[channel]["b"], "a": rcs_dict_original[channel]["a"], "x0": rcs_dict_original[channel]["x0"], } all_rcs_data_modified[metric][panel][channel] = updated_rcs_dict # Write the original RCS data to a JSON file with open(original_path, "w") as json_file: json.dump(all_rcs_data_original, json_file, indent=4) # Write the modified RCS data to a separate JSON file with open(modified_path, "w") as json_file: json.dump(all_rcs_data_modified, json_file, indent=4) # Function to generate scenario data and store it as pickle files def generate_scenario_data(original_path, modified_path): # Define the directory where the metrics data is located directory = os.path.join(st.session_state["project_path"], "metrics_level_data") # Retrieve the list of all metric names from the specified directory metrics_list = get_metrics_names(directory) # Dictionary to store scenario data for all metrics and their respective panels all_scenario_data_original = {} all_scenario_data_modified = {} # Iterate over each metric in the metrics list for metric in metrics_list: # Define the path to the Excel file corresponding to the current metric file_selected = f"metrics_level_data/data_test_overview_panel@#{metric}.xlsx" file_selected_path = os.path.join( st.session_state["project_path"], file_selected ) # Retrieve the list of panel names from the current metric's Excel file panel_list = get_panels_names(file_selected_path) # Check if "scenario_data_modified.pkl" exist if os.path.exists(modified_path): with open(modified_path, "rb") as pickle_file: modified_data = pickle.load(pickle_file) # Iterate over each panel in the panel list for panel in panel_list: # Initialize the original scenario data for the current panel and metric rcs_dict_original, scenario = initialize_data( panel=panel if panel != "Aggregated" else None, target_file=file_selected_path, updated_rcs={}, metrics=metric, ) # Ensure the dictionary has the metric as a key for original data if metric not in all_scenario_data_original: all_scenario_data_original[metric] = {} # Store the original scenario data under the corresponding panel for the current metric all_scenario_data_original[metric][panel] = class_convert_to_dict(scenario) # Ensure the dictionary has the metric as a key for modified data if metric not in all_scenario_data_modified: all_scenario_data_modified[metric] = {} # Store the modified scenario data under the corresponding panel for the current metric try: all_scenario_data_modified[metric][panel] = modified_data[metric][panel] except: all_scenario_data_modified[metric][panel] = class_convert_to_dict( scenario ) # Write the original RCS data to a pickle file with open(original_path, "wb") as pickle_file: pickle.dump(all_scenario_data_original, pickle_file) # Write the modified RCS data to a separate pickle file with open(modified_path, "wb") as pickle_file: pickle.dump(all_scenario_data_modified, pickle_file) #############################################################################################################