Spaces:
Sleeping
Sleeping
montebello-642
commited on
Commit
•
6b561c6
1
Parent(s):
246de3f
Initial commit
Browse files
app.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.model_selection import train_test_split, cross_val_score
|
3 |
+
from sklearn.preprocessing import StandardScaler
|
4 |
+
from sklearn.linear_model import LogisticRegression
|
5 |
+
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
6 |
+
import seaborn as sns
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import gradio as gr
|
9 |
+
|
10 |
+
#loading the dataset and select only the columns needed
|
11 |
+
selected_columns = ['duration_mo', 'mos_ethnicity', 'complainant_ethnicity', 'is_force', 'is_abuse_of_authority', 'is_discourtesy', 'is_offensive_language', 'outcome_description']
|
12 |
+
df = pd.read_csv('my_dataset_logistic.csv', usecols=selected_columns)
|
13 |
+
|
14 |
+
print(df.columns)
|
15 |
+
print(df.head())
|
16 |
+
print(df.describe())
|
17 |
+
print(df.isnull().sum())
|
18 |
+
|
19 |
+
#set the name of the column to calculate accuracy
|
20 |
+
X = df.drop('outcome_description', axis=1)
|
21 |
+
y = df['outcome_description']
|
22 |
+
X.fillna(0, inplace=True)
|
23 |
+
|
24 |
+
#split into training and test set
|
25 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
26 |
+
|
27 |
+
#standardize the features
|
28 |
+
scaler = StandardScaler()
|
29 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
30 |
+
X_test_scaled = scaler.transform(X_test)
|
31 |
+
|
32 |
+
#train the model
|
33 |
+
model = LogisticRegression(random_state=42)
|
34 |
+
model.fit(X_train_scaled, y_train)
|
35 |
+
|
36 |
+
#make predictions and evaluate the model
|
37 |
+
y_pred = model.predict(X_test_scaled)
|
38 |
+
accuracy = accuracy_score(y_test, y_pred)
|
39 |
+
print(f'Accuracy: {accuracy:.2f}')
|
40 |
+
|
41 |
+
#classification report with confusion matrix, correlation graph and standard deviation of all the variables
|
42 |
+
print(classification_report(y_test, y_pred))
|
43 |
+
|
44 |
+
# Confusion Matrix
|
45 |
+
conf_matrix = confusion_matrix(y_test, y_pred)
|
46 |
+
plt.figure(figsize=(8, 6))
|
47 |
+
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,xticklabels=df['outcome_description'].unique(), yticklabels=df['outcome_description'].unique())
|
48 |
+
plt.title("Confusion Matrix")
|
49 |
+
plt.xlabel("Predicted")
|
50 |
+
plt.ylabel("Actual")
|
51 |
+
plt.show()
|
52 |
+
|
53 |
+
#Correlation Matrix
|
54 |
+
correlation_matrix = df.corr()
|
55 |
+
plt.figure(figsize=(10, 8))
|
56 |
+
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
|
57 |
+
plt.title('Correlation Matrix')
|
58 |
+
plt.show()
|
59 |
+
|
60 |
+
#plotting a bar chart to visualize better the correlation
|
61 |
+
target_correlations = correlation_matrix['outcome_description'].sort_values(ascending=False)
|
62 |
+
plt.figure(figsize=(10, 6))
|
63 |
+
target_correlations.drop('outcome_description').plot(kind='bar', color='blue')
|
64 |
+
plt.title('Correlations with Target Variable')
|
65 |
+
plt.xlabel('Features')
|
66 |
+
plt.ylabel('Correlation')
|
67 |
+
plt.show()
|
68 |
+
|
69 |
+
#Standard Deviation
|
70 |
+
std_dev = df.std()
|
71 |
+
print('\nStandard deviation')
|
72 |
+
print(std_dev)
|
73 |
+
|
74 |
+
#gradio implementation
|
75 |
+
#create the available options for the ethnicities
|
76 |
+
mos_ethnicity_options = ["Hispanic", "White", "Black", "Asian", "American Indian", "Other Race", "Refused", "Unknown"]
|
77 |
+
complainant_ethnicity_options = ["Hispanic", "White", "Black", "Asian", "American Indian", "Other Race", "Refused", "Unknown"]
|
78 |
+
|
79 |
+
#defining the function to make predictions using the model
|
80 |
+
def predict_outcome_duration(mos_ethnicity, complainant_ethnicity, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language, duration_mo):
|
81 |
+
try:
|
82 |
+
#converting values from string to int
|
83 |
+
mos_ethnicity_encoded = mos_ethnicity_options.index(mos_ethnicity)
|
84 |
+
complainant_ethnicity_encoded = complainant_ethnicity_options.index(complainant_ethnicity)
|
85 |
+
|
86 |
+
#converting checkbox value to int
|
87 |
+
is_force = int(is_force)
|
88 |
+
is_abuse_of_authority = int(is_abuse_of_authority)
|
89 |
+
is_discourtesy = int(is_discourtesy)
|
90 |
+
is_offensive_language = int(is_offensive_language)
|
91 |
+
|
92 |
+
input_data = [[duration_mo, mos_ethnicity_encoded, complainant_ethnicity_encoded, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language]]
|
93 |
+
input_scaled = scaler.transform(input_data)
|
94 |
+
prediction = model.predict(input_scaled)[0]
|
95 |
+
|
96 |
+
#outputting the result
|
97 |
+
return "Arrest" if prediction == 1 else "No Arrest"
|
98 |
+
|
99 |
+
except Exception as e:
|
100 |
+
return f"Error: {str(e)}"
|
101 |
+
|
102 |
+
#creating the gradio interface, using dropdowns to show the different ethnicities, checkbox to identify which type of allegation it was and a slider with the duration in months
|
103 |
+
mos_ethnicity_dropdown = gr.Dropdown(choices=mos_ethnicity_options,label="Defendant Ethnicity")
|
104 |
+
complainant_ethnicity_dropdown = gr.Dropdown(choices=complainant_ethnicity_options, label="Complainant Ethnicity")
|
105 |
+
is_force_checkbox = gr.Checkbox()
|
106 |
+
is_abuse_of_authority_checkbox = gr.Checkbox()
|
107 |
+
is_discourtesy_checkbox = gr.Checkbox()
|
108 |
+
is_offensive_language_checkbox = gr.Checkbox()
|
109 |
+
duration_mo_slider = gr.Slider(minimum=0, maximum=20, label="Duration in months")
|
110 |
+
|
111 |
+
iface = gr.Interface(
|
112 |
+
fn=predict_outcome_duration,
|
113 |
+
inputs=[complainant_ethnicity_dropdown, mos_ethnicity_dropdown, is_force_checkbox, is_abuse_of_authority_checkbox, is_discourtesy_checkbox, is_offensive_language_checkbox, duration_mo_slider],
|
114 |
+
outputs="text",
|
115 |
+
live=True,
|
116 |
+
title="Complaint Outcome Prediction"
|
117 |
+
)
|
118 |
+
|
119 |
+
# Launch the Gradio Interface
|
120 |
+
iface.launch(share=True)
|