Spaces:

montebello-642
/

Logistic-Resression

Sleeping

App Files Files Community

montebello-642 commited on Jan 14

Commit

6b561c6

•

1 Parent(s): 246de3f

Initial commit

Browse files

Files changed (1) hide show

app.py +120 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import pandas as pd
+from sklearn.model_selection import train_test_split, cross_val_score
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+import seaborn as sns
+import matplotlib.pyplot as plt
+import gradio as gr
+#loading the dataset and select only the columns needed
+selected_columns = ['duration_mo', 'mos_ethnicity', 'complainant_ethnicity', 'is_force', 'is_abuse_of_authority', 'is_discourtesy', 'is_offensive_language', 'outcome_description']
+df = pd.read_csv('my_dataset_logistic.csv', usecols=selected_columns)
+print(df.columns)
+print(df.head())
+print(df.describe())
+print(df.isnull().sum())
+#set the name of the column to calculate accuracy
+X = df.drop('outcome_description', axis=1)
+y = df['outcome_description']
+X.fillna(0, inplace=True)
+#split into training and test set
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+#standardize the features
+scaler = StandardScaler()
+X_train_scaled = scaler.fit_transform(X_train)
+X_test_scaled = scaler.transform(X_test)
+#train the model
+model = LogisticRegression(random_state=42)
+model.fit(X_train_scaled, y_train)
+#make predictions and evaluate the model
+y_pred = model.predict(X_test_scaled)
+accuracy = accuracy_score(y_test, y_pred)
+print(f'Accuracy: {accuracy:.2f}')
+#classification report with confusion matrix, correlation graph and standard deviation of all the variables
+print(classification_report(y_test, y_pred))
+# Confusion Matrix
+conf_matrix = confusion_matrix(y_test, y_pred)
+plt.figure(figsize=(8, 6))
+sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,xticklabels=df['outcome_description'].unique(), yticklabels=df['outcome_description'].unique())
+plt.title("Confusion Matrix")
+plt.xlabel("Predicted")
+plt.ylabel("Actual")
+plt.show()
+#Correlation Matrix
+correlation_matrix = df.corr()
+plt.figure(figsize=(10, 8))
+sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
+plt.title('Correlation Matrix')
+plt.show()
+#plotting a bar chart to visualize better the correlation
+target_correlations = correlation_matrix['outcome_description'].sort_values(ascending=False)
+plt.figure(figsize=(10, 6))
+target_correlations.drop('outcome_description').plot(kind='bar', color='blue')
+plt.title('Correlations with Target Variable')
+plt.xlabel('Features')
+plt.ylabel('Correlation')
+plt.show()
+#Standard Deviation
+std_dev = df.std()
+print('\nStandard deviation')
+print(std_dev)
+#gradio implementation
+#create the available options for the ethnicities
+mos_ethnicity_options = ["Hispanic", "White", "Black", "Asian", "American Indian", "Other Race", "Refused", "Unknown"]
+complainant_ethnicity_options = ["Hispanic", "White", "Black", "Asian", "American Indian", "Other Race", "Refused", "Unknown"]
+#defining the function to make predictions using the model
+def predict_outcome_duration(mos_ethnicity, complainant_ethnicity, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language, duration_mo):
+    try:
+        #converting values from string to int
+        mos_ethnicity_encoded = mos_ethnicity_options.index(mos_ethnicity)
+        complainant_ethnicity_encoded = complainant_ethnicity_options.index(complainant_ethnicity)
+        #converting checkbox value to int
+        is_force = int(is_force)
+        is_abuse_of_authority = int(is_abuse_of_authority)
+        is_discourtesy = int(is_discourtesy)
+        is_offensive_language = int(is_offensive_language)
+        input_data = [[duration_mo, mos_ethnicity_encoded, complainant_ethnicity_encoded, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language]]
+        input_scaled = scaler.transform(input_data)
+        prediction = model.predict(input_scaled)[0]
+        #outputting the result
+        return "Arrest" if prediction == 1 else "No Arrest"
+    except Exception as e:
+        return f"Error: {str(e)}"
+#creating the gradio interface, using dropdowns to show the different ethnicities, checkbox to identify which type of allegation it was and a slider with the duration in months
+mos_ethnicity_dropdown = gr.Dropdown(choices=mos_ethnicity_options,label="Defendant Ethnicity")
+complainant_ethnicity_dropdown = gr.Dropdown(choices=complainant_ethnicity_options, label="Complainant Ethnicity")
+is_force_checkbox = gr.Checkbox()
+is_abuse_of_authority_checkbox = gr.Checkbox()
+is_discourtesy_checkbox = gr.Checkbox()
+is_offensive_language_checkbox = gr.Checkbox()
+duration_mo_slider = gr.Slider(minimum=0, maximum=20, label="Duration in months")
+iface = gr.Interface(
+    fn=predict_outcome_duration,
+    inputs=[complainant_ethnicity_dropdown, mos_ethnicity_dropdown, is_force_checkbox, is_abuse_of_authority_checkbox, is_discourtesy_checkbox, is_offensive_language_checkbox, duration_mo_slider],
+    outputs="text",
+    live=True,
+    title="Complaint Outcome Prediction"
+)
+# Launch the Gradio Interface
+iface.launch(share=True)