zaidmehdi commited on
Commit
f8b3be6
1 Parent(s): 46afa74

calculating metrics

Browse files
Files changed (2) hide show
  1. src/classifier.ipynb +120 -1
  2. src/utils.py +19 -0
src/classifier.ipynb CHANGED
@@ -35,7 +35,9 @@
35
  "from sklearn.preprocessing import LabelEncoder\n",
36
  "import torch\n",
37
  "from transformers import AutoModel, AutoTokenizer\n",
38
- "import xgboost as xgb"
 
 
39
  ]
40
  },
41
  {
@@ -910,6 +912,123 @@
910
  " learning_rate=0.1)\n",
911
  "xgb_model.fit(X_train, y_train_encoded)"
912
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
913
  }
914
  ],
915
  "metadata": {
 
35
  "from sklearn.preprocessing import LabelEncoder\n",
36
  "import torch\n",
37
  "from transformers import AutoModel, AutoTokenizer\n",
38
+ "import xgboost as xgb\n",
39
+ "\n",
40
+ "from utils import evaluate_predictions"
41
  ]
42
  },
43
  {
 
912
  " learning_rate=0.1)\n",
913
  "xgb_model.fit(X_train, y_train_encoded)"
914
  ]
915
+ },
916
+ {
917
+ "cell_type": "markdown",
918
+ "metadata": {},
919
+ "source": [
920
+ "Now let's have a look at some metrics for our models."
921
+ ]
922
+ },
923
+ {
924
+ "cell_type": "code",
925
+ "execution_count": 14,
926
+ "metadata": {},
927
+ "outputs": [
928
+ {
929
+ "name": "stdout",
930
+ "output_type": "stream",
931
+ "text": [
932
+ "Logistic Regression\n",
933
+ "\n",
934
+ "Train set:\n",
935
+ "Accuracy: 0.3448095238095238\n",
936
+ "F1 macro average: 0.30283202516650803\n",
937
+ "F1 weighted average: 0.35980803167526537\n",
938
+ "--------------------------------------------------\n",
939
+ "Test set:\n",
940
+ "Accuracy: 0.2324\n",
941
+ "F1 macro average: 0.15894661492139023\n",
942
+ "F1 weighted average: 0.2680459740545796\n"
943
+ ]
944
+ }
945
+ ],
946
+ "source": [
947
+ "lr_train_preds = lr_model.predict(X_train)\n",
948
+ "lr_test_preds = lr_model.predict(X_test)\n",
949
+ "\n",
950
+ "evaluate_predictions(model=\"Logistic Regression\", \n",
951
+ " train_preds= lr_train_preds, y_train=y_train,\n",
952
+ " test_preds=lr_test_preds, y_test=y_test)"
953
+ ]
954
+ },
955
+ {
956
+ "cell_type": "code",
957
+ "execution_count": 15,
958
+ "metadata": {},
959
+ "outputs": [
960
+ {
961
+ "name": "stdout",
962
+ "output_type": "stream",
963
+ "text": [
964
+ "Random Forest\n",
965
+ "\n",
966
+ "Train set:\n",
967
+ "Accuracy: 0.5817619047619048\n",
968
+ "F1 macro average: 0.6302920544396868\n",
969
+ "F1 weighted average: 0.5817320656440126\n",
970
+ "--------------------------------------------------\n",
971
+ "Test set:\n",
972
+ "Accuracy: 0.2188\n",
973
+ "F1 macro average: 0.11020842424166737\n",
974
+ "F1 weighted average: 0.2054551695522176\n"
975
+ ]
976
+ }
977
+ ],
978
+ "source": [
979
+ "rf_train_preds = rf_model.predict(X_train)\n",
980
+ "rf_test_preds = rf_model.predict(X_test)\n",
981
+ "\n",
982
+ "evaluate_predictions(model=\"Random Forest\", \n",
983
+ " train_preds= rf_train_preds, y_train=y_train,\n",
984
+ " test_preds=rf_test_preds, y_test=y_test)"
985
+ ]
986
+ },
987
+ {
988
+ "cell_type": "code",
989
+ "execution_count": 16,
990
+ "metadata": {},
991
+ "outputs": [
992
+ {
993
+ "name": "stderr",
994
+ "output_type": "stream",
995
+ "text": [
996
+ "/home/mehdi/miniconda3/envs/adc/lib/python3.10/site-packages/xgboost/core.py:160: UserWarning: [12:38:31] WARNING: /home/conda/feedstock_root/build_artifacts/xgboost-split_1705650282415/work/src/common/error_msg.cc:58: Falling back to prediction using DMatrix due to mismatched devices. This might lead to higher memory usage and slower performance. XGBoost is running on: cuda:0, while the input data is on: cpu.\n",
997
+ "Potential solutions:\n",
998
+ "- Use a data structure that matches the device ordinal in the booster.\n",
999
+ "- Set the device for booster before call to inplace_predict.\n",
1000
+ "\n",
1001
+ "This warning will only be shown once.\n",
1002
+ "\n",
1003
+ " warnings.warn(smsg, UserWarning)\n"
1004
+ ]
1005
+ },
1006
+ {
1007
+ "name": "stdout",
1008
+ "output_type": "stream",
1009
+ "text": [
1010
+ "XgBoost\n",
1011
+ "\n",
1012
+ "Train set:\n",
1013
+ "Accuracy: 0.9998571428571429\n",
1014
+ "F1 macro average: 0.9998485323510583\n",
1015
+ "F1 weighted average: 0.9998571499183782\n",
1016
+ "--------------------------------------------------\n",
1017
+ "Test set:\n",
1018
+ "Accuracy: 0.3552\n",
1019
+ "F1 macro average: 0.13665190979200587\n",
1020
+ "F1 weighted average: 0.288613804297705\n"
1021
+ ]
1022
+ }
1023
+ ],
1024
+ "source": [
1025
+ "xgb_train_preds = xgb_model.predict(X_train)\n",
1026
+ "xgb_test_preds = xgb_model.predict(X_test)\n",
1027
+ "\n",
1028
+ "evaluate_predictions(model=\"XgBoost\", \n",
1029
+ " train_preds= xgb_train_preds, y_train=y_train_encoded,\n",
1030
+ " test_preds=xgb_test_preds, y_test=y_test_encoded)"
1031
+ ]
1032
  }
1033
  ],
1034
  "metadata": {
src/utils.py CHANGED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics import accuracy_score, f1_score
2
+
3
+
4
+ def get_metrics(y_true, y_preds):
5
+ accuracy = accuracy_score(y_true, y_preds)
6
+ f1_macro = f1_score(y_true, y_preds, average="macro")
7
+ f1_weighted = f1_score(y_true, y_preds, average="weighted")
8
+ print(f"Accuracy: {accuracy}")
9
+ print(f"F1 macro average: {f1_macro}")
10
+ print(f"F1 weighted average: {f1_weighted}")
11
+
12
+
13
+ def evaluate_predictions(model:str, train_preds, y_train, test_preds, y_test):
14
+ print(model)
15
+ print("\nTrain set:")
16
+ get_metrics(y_train, train_preds)
17
+ print("-"*50)
18
+ print("Test set:")
19
+ get_metrics(y_test, test_preds)