File size: 15,649 Bytes
855db18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e6910d
855db18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
import streamlit as st
import joblib
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from PIL import Image
import time
import matplotlib.pyplot as plt
from io import BytesIO


num_imputer = joblib.load('numerical_imputer.joblib')
cat_imputer = joblib.load('cat_imputer.joblib')
encoder = joblib.load('encoder.joblib')
scaler = joblib.load('scaler.joblib')
lr_model = joblib.load('lr_smote_model.joblib')


def preprocess_input(input_data):
    input_df = pd.DataFrame(input_data, index=[0])

    cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
    num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']

    input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
    input_df_imputed_num = num_imputer.transform(input_df[num_columns])

    input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
                                    columns=encoder.get_feature_names_out(cat_columns))

    input_df_scaled = scaler.transform(input_df_imputed_num)
    input_scaled_df = pd.DataFrame(input_df_scaled, columns=num_columns)
    final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
    final_df = final_df.reindex(columns=original_feature_names, fill_value=0)

    return final_df


original_feature_names = ['MONTANT', 'FREQUENCE_RECH', 'REVENUE', 'ARPU_SEGMENT', 'FREQUENCE',
                          'DATA_VOLUME', 'ON_NET', 'ORANGE', 'TIGO', 'ZONE1', 'ZONE2', 'REGULARITY', 'FREQ_TOP_PACK',
                          'REGION_DAKAR', 'REGION_DIOURBEL', 'REGION_FATICK', 'REGION_KAFFRINE', 'REGION_KAOLACK',
                          'REGION_KEDOUGOU', 'REGION_KOLDA', 'REGION_LOUGA', 'REGION_MATAM', 'REGION_SAINT-LOUIS',
                          'REGION_SEDHIOU', 'REGION_TAMBACOUNDA', 'REGION_THIES', 'REGION_ZIGUINCHOR',
                          'TENURE_Long-term', 'TENURE_Medium-term', 'TENURE_Mid-term', 'TENURE_Short-term',
                          'TENURE_Very short-term', 'TOP_PACK_VAS', 'TOP_PACK_data', 'TOP_PACK_international',
                          'TOP_PACK_messaging', 'TOP_PACK_other_services', 'TOP_PACK_social_media',
                          'TOP_PACK_voice']

# Set up the Streamlit app
st.set_page_config(layout="wide")

# Main page - Churn Prediction
st.title('CUSTOMER CHURN PREDICTION APP (CCPA)')

# Main page - Churn Prediction
st.markdown("Churn is a one of the biggest problem in the telecom industry. Research has shown that the average monthly churn rate among the top 4 wireless carriers in the US is 1.9% - 2%")
st.image("bg.png", use_column_width=True)

    # How to use
st.sidebar.image("welcome.png", use_column_width=True)
# st.sidebar.title("ENTER THE DETAILS OF THE CUSTOMER HERE")

# Define a dictionary of models with their names, actual models, and types
models = {
    'Logistic Regression': {'Logistic Regression': lr_model, 'type': 'logistic_regression'},
    #'ComplementNB': {'ComplementNB': cnb_model, 'type': 'Complement NB'}
}

# Allow the user to select a model from the sidebar
model_name = st.sidebar.selectbox('Logistic Regression', list(models.keys()))

# Retrieve the selected model and its type from the dictionary
model = models[model_name]['Logistic Regression']
model_type = models[model_name]['type']


# Collect input from the user
st.sidebar.title('ENTER CUSTOMER DETAILS')
input_features = {
    'MONTANT': st.sidebar.number_input('Top-up Amount (MONTANT)'),
    'FREQUENCE_RECH': st.sidebar.number_input('No. of Times the Customer Refilled (FREQUENCE_RECH)'),
    'REVENUE': st.sidebar.number_input('Monthly income of the client (REVENUE)'),
    'ARPU_SEGMENT': st.sidebar.number_input('Income over 90 days / 3 (ARPU_SEGMENT)'),
    'FREQUENCE': st.sidebar.number_input('Number of times the client has made an income (FREQUENCE)'),
    'DATA_VOLUME': st.sidebar.number_input('Number of Connections (DATA_VOLUME)'),
    'ON_NET': st.sidebar.number_input('Inter Expresso Call (ON_NET)'),
    'ORANGE': st.sidebar.number_input('Call to Orange (ORANGE)'),
    'TIGO': st.sidebar.number_input('Call to Tigo (TIGO)'),
    'ZONE1': st.sidebar.number_input('Call to Zone 1 (ZONE1)'),
    'ZONE2': st.sidebar.number_input('Call to Zone 2 (ZONE2)'),
    'REGULARITY': st.sidebar.number_input('Number of Times the Client is Active for 90 Days (REGULARITY)'),
    'FREQ_TOP_PACK': st.sidebar.number_input('Number of Times the Client has Activated the Top Packs (FREQ_TOP_PACK)'),
    'REGION': st.sidebar.selectbox('Location of Each Client (REGION)', ['DAKAR','DIOURBEL','FATICK','AFFRINE','KAOLACK',
                                                                        'KEDOUGOU','KOLDA','LOUGA','MATAM','SAINT-LOUIS',
                                                                        'SEDHIOU','TAMBACOUNDA','HIES','ZIGUINCHOR' ]),
                                                                       
    'TENURE': st.sidebar.selectbox('Duration in the Network (TENURE)', ['Long-term','Medium-term','Mid-term','Short-term',
                                                                       'Very short-term']),
    'TOP_PACK': st.sidebar.selectbox('Most Active Pack (TOP_PACK)', ['VAS', 'data', 'international', 
                                                                     'messaging','other_services', 'social_media',
                                                                     'voice'])
                                                                      
}

# Input validation
valid_input = True
error_messages = []

# Validate numeric inputs
numeric_ranges = {
    'MONTANT': [0, 1000000],
    'FREQUENCE_RECH': [0, 100],
    'REVENUE': [0, 1000000],
    'ARPU_SEGMENT': [0, 100000],
    'FREQUENCE': [0, 100],
    'DATA_VOLUME': [0, 100000],
    'ON_NET': [0, 100000],
    'ORANGE': [0, 100000],
    'TIGO': [0, 100000],
    'ZONE1': [0, 100000],
    'ZONE2': [0, 100000],
    'REGULARITY': [0, 100],
    'FREQ_TOP_PACK': [0, 100]
}

for feature, value in input_features.items():
    range_min, range_max = numeric_ranges.get(feature, [None, None])
    if range_min is not None and range_max is not None:
        if not range_min <= value <= range_max:
            valid_input = False
            error_messages.append(f"{feature} should be between {range_min} and {range_max}.")

#Churn Prediction

def predict_churn(input_data, model):
    # Preprocess the input data
    preprocessed_data = preprocess_input(input_data)

     # Calculate churn probabilities using the model
    probabilities = model.predict_proba(preprocessed_data)

    # Determine churn labels based on the model type
    if model_type == "logistic_regression":
        churn_labels = ["No Churn", "Churn"]
    #elif model_type == "ComplementNB":
        churn_labels = ["Churn", "No Churn"]
    # Extract churn probability for the first sample
    churn_probability = probabilities[0]

    # Create a dictionary mapping churn labels to their indices
    churn_indices = {label: idx for idx, label in enumerate(churn_labels)}

    # Determine the index with the highest churn probability
    churn_index = np.argmax(churn_probability)

    # Return churn labels, churn probabilities, churn indices, and churn index
    return churn_labels, churn_probability, churn_indices, churn_index

# Predict churn based on user input
if st.sidebar.button('Predict Churn'):
    try:
        with st.spinner("Wait, Results loading..."):
        # Simulate a long-running process
            progress_bar = st.progress(0)
            step = 20  # A big step will reduce the execution time
            for i in range(0, 100, step):
                time.sleep(0.1)
                progress_bar.progress(i + step)

                #churn_labels, churn_probability = predict_churn(input_features, model)  # Pass model1 or model2 based on the selected model
        churn_labels, churn_probability, churn_indices, churn_index = predict_churn(input_features, model)

        st.subheader('CHURN PREDICTION RESULTS')



        col1, col2 = st.columns(2)

        if churn_labels[churn_index] == "Churn":
            churn_prob = churn_probability[churn_index]
            with col1:
                st.error(f"DANGER! This customer is likely to churn with a probability of {churn_prob * 100:.2f}% 😒")
                resized_churn_image = Image.open('Churn.jpeg')
                resized_churn_image = resized_churn_image.resize((350, 300))  # Adjust the width and height as desired
                st.image(resized_churn_image)
                # Add suggestions for retaining churned customers in the 'Churn' group 
            with col2:
                st.info("ADVICE TO EXPRESSOR MANAGEMENT:\n"
                    "- Identify Reasons for Churn\n"
                    "- Offer Incentives\n"
                    "- Showcase Improvements\n"
                    "- Gather Feedback\n"
                    "- Customer Surveys\n"
                    "- Personalized Recommendations\n"
                    "- Reestablish Trust\n"
                    "- Follow-Up Communication\n"
                    "- Reactivation Campaigns\n"
                    "- Improve product or service offerings based on customer feedback\n"
                    " SUMMARY NOTE\n"
                    "- Remember that winning back churning customers takes time and persistence.\n" 
                    "- It\s crucial to genuinely address their concerns and provide value to rebuild their trust in your business\n" 
                    "- Regularly evaluate the effectiveness of your strategies and adjust them as needed based on customer responses and feedback\n")
        else:
            churn_prob = churn_probability[churn_index]
            with col1:
                st.success(f"This customer is a loyal (not churn) with a probability of {churn_prob * 100:.2f}% πŸ˜€")
                resized_not_churn_image = Image.open('NotChurn.png')
                resized_not_churn_image = resized_not_churn_image.resize((350, 300))  # Adjust the width and height as desired
                st.image(resized_not_churn_image)
                # Add suggestions for retaining churned customers in the 'Churn' group
            with col2:
                st.info("ADVICE TO EXPRESSOR MANAGEMENT\n"
                    "- Quality Products/Services\n"
                    "- Personalized Experience\n"
                    "- Loyalty Programs\n"
                    "- Excellent Customer Service\n"
                    "- Exclusive Content\n"
                    "- Early Access\n"
                    "- Personal Thank-You Notes\n"
                    "- Surprise Gifts or Discounts\n"
                    "- Feedback Opportunities\n"
                    "- Community Engagement\n"
                    "- Anniversary Celebrations\n"
                    "- Refer-a-Friend Programs\n"
                    "SUMMARY NOTE\n" 
                    "- Remember that the key to building lasting loyalty is consistency.\n" 
                    "- Continuously demonstrate your commitment to meeting customers needs and enhancing their experience.\n"
                    "- Regularly assess the effectiveness of your loyalty initiatives and adapt them based on customer feedback and preferences.")

        st.subheader('Churn Probability')

        # Create a donut chart to display probabilities
        fig = go.Figure(data=[go.Pie(
            labels=churn_labels,
            values=churn_probability,
            hole=0.5,
            textinfo='label+percent',
            marker=dict(colors=['#FFA07A', '#6495ED', '#FFD700', '#32CD32', '#FF69B4', '#8B008B']))])

        fig.update_traces(
            hoverinfo='label+percent',
            textfont_size=12,
            textposition='inside',
            texttemplate='%{label}: %{percent:.2f}%'
            )

        fig.update_layout(
            title='Churn Probability',
            title_x=0.5,
            showlegend=False,
            width=500,
            height=500
            )

        st.plotly_chart(fig, use_container_width=True)

        # Calculate the average churn rate (replace with your actual value)

        st.subheader('Customer Churn Probability Comparison')

        average_churn_rate = 19

        # Convert the overall churn rate to churn probability
        main_data_churn_probability = average_churn_rate / 100

        # Retrieve the predicted churn probability for the selected customer
        predicted_churn_prob = churn_probability[churn_index]

        if churn_labels[churn_index] == "Churn":
            churn_prob = churn_probability[churn_index]
            # Create a bar chart comparing the churn probability with the average churn rate
            labels = ['Churn Probability', 'Average Churn Probability']
            values = [predicted_churn_prob, main_data_churn_probability]

            fig = go.Figure(data=[go.Bar(x=labels, y=values)])
            fig.update_layout(
                xaxis_title='Churn Probability',
                yaxis_title='Probability',
                title='Comparison with Average Churn Rate',
                yaxis=dict(range=[0, 1])  # Set the y-axis limits between 0 and 1
            )

            # Add explanations
            if predicted_churn_prob > main_data_churn_probability:
                churn_comparison = "higher"
            elif predicted_churn_prob < main_data_churn_probability:
                churn_comparison = "lower"
            else:
                churn_comparison = "equal"

            explanation = f"This compares the churn probability of the selected customer " \
                            f"with the average churn rate of all customers. It provides insights into how the " \
                            f"individual customer's churn likelihood ({predicted_churn_prob:.2f}) compares to the " \
                            f"overall trend. The 'Churn Probability' represents the likelihood of churn " \
                            f"for the selected customer, while the 'Average Churn Rate' represents the average " \
                            f"churn rate across all customers ({main_data_churn_probability:.2f}).\n\n" \
                            f"The customer's churn rate is {churn_comparison} than the average churn rate."

            st.plotly_chart(fig)
            st.write(explanation)
        else:
    # Create a bar chart comparing the no-churn probability with the average churn rate
            labels = ['No-Churn Probability', 'Average Churn Probability']
            values = [1 - predicted_churn_prob, main_data_churn_probability]

            fig = go.Figure(data=[go.Bar(x=labels, y=values)])
            fig.update_layout(
                xaxis_title='Churn Probability',
                yaxis_title='Probability',
                title='Comparison with Average Churn Rate',
                yaxis=dict(range=[0, 1])  # Set the y-axis limits between 0 and 1
            )

            explanation = f"This bar chart compares the churn probability of the selected customer " \
                            f"with the average churn rate of all customers. It provides insights into how the " \
                            f"individual customer's churn likelihood ({predicted_churn_prob:.2f}) compares to the " \
                            f"overall trend." \
                            f"The prediction indicates that the customer is not likely to churn. " \
                        f"The churn probability is lower than the no-churn probability."

            st.plotly_chart(fig)
            st.write(explanation)
    except Exception as e:
        st.error(f"An error occurred: {str(e)}")