Spaces:

zaidmehdi
/

arabic-dialect-classifier

Sleeping

App Files Files Community

zaidmehdi commited on Feb 27

Commit

f8b3be6

•

1 Parent(s): 46afa74

calculating metrics

Browse files

Files changed (2) hide show

src/classifier.ipynb +120 -1
src/utils.py +19 -0

src/classifier.ipynb CHANGED Viewed

@@ -35,7 +35,9 @@
     "from sklearn.preprocessing import LabelEncoder\n",
     "import torch\n",
     "from transformers import AutoModel, AutoTokenizer\n",
-    "import xgboost as xgb"
    ]
   },
   {
@@ -910,6 +912,123 @@
     "                              learning_rate=0.1)\n",
     "xgb_model.fit(X_train, y_train_encoded)"
    ]
   }
  ],
  "metadata": {

     "from sklearn.preprocessing import LabelEncoder\n",
     "import torch\n",
     "from transformers import AutoModel, AutoTokenizer\n",
+    "import xgboost as xgb\n",
+    "\n",
+    "from utils import evaluate_predictions"
    ]
   },
   {
     "                              learning_rate=0.1)\n",
     "xgb_model.fit(X_train, y_train_encoded)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's have a look at some metrics for our models."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Logistic Regression\n",
+      "\n",
+      "Train set:\n",
+      "Accuracy: 0.3448095238095238\n",
+      "F1 macro average: 0.30283202516650803\n",
+      "F1 weighted average: 0.35980803167526537\n",
+      "--------------------------------------------------\n",
+      "Test set:\n",
+      "Accuracy: 0.2324\n",
+      "F1 macro average: 0.15894661492139023\n",
+      "F1 weighted average: 0.2680459740545796\n"
+     ]
+    }
+   ],
+   "source": [
+    "lr_train_preds = lr_model.predict(X_train)\n",
+    "lr_test_preds = lr_model.predict(X_test)\n",
+    "\n",
+    "evaluate_predictions(model=\"Logistic Regression\", \n",
+    "                     train_preds= lr_train_preds, y_train=y_train,\n",
+    "                     test_preds=lr_test_preds, y_test=y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Random Forest\n",
+      "\n",
+      "Train set:\n",
+      "Accuracy: 0.5817619047619048\n",
+      "F1 macro average: 0.6302920544396868\n",
+      "F1 weighted average: 0.5817320656440126\n",
+      "--------------------------------------------------\n",
+      "Test set:\n",
+      "Accuracy: 0.2188\n",
+      "F1 macro average: 0.11020842424166737\n",
+      "F1 weighted average: 0.2054551695522176\n"
+     ]
+    }
+   ],
+   "source": [
+    "rf_train_preds = rf_model.predict(X_train)\n",
+    "rf_test_preds = rf_model.predict(X_test)\n",
+    "\n",
+    "evaluate_predictions(model=\"Random Forest\", \n",
+    "                     train_preds= rf_train_preds, y_train=y_train,\n",
+    "                     test_preds=rf_test_preds, y_test=y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mehdi/miniconda3/envs/adc/lib/python3.10/site-packages/xgboost/core.py:160: UserWarning: [12:38:31] WARNING: /home/conda/feedstock_root/build_artifacts/xgboost-split_1705650282415/work/src/common/error_msg.cc:58: Falling back to prediction using DMatrix due to mismatched devices. This might lead to higher memory usage and slower performance. XGBoost is running on: cuda:0, while the input data is on: cpu.\n",
+      "Potential solutions:\n",
+      "- Use a data structure that matches the device ordinal in the booster.\n",
+      "- Set the device for booster before call to inplace_predict.\n",
+      "\n",
+      "This warning will only be shown once.\n",
+      "\n",
+      "  warnings.warn(smsg, UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "XgBoost\n",
+      "\n",
+      "Train set:\n",
+      "Accuracy: 0.9998571428571429\n",
+      "F1 macro average: 0.9998485323510583\n",
+      "F1 weighted average: 0.9998571499183782\n",
+      "--------------------------------------------------\n",
+      "Test set:\n",
+      "Accuracy: 0.3552\n",
+      "F1 macro average: 0.13665190979200587\n",
+      "F1 weighted average: 0.288613804297705\n"
+     ]
+    }
+   ],
+   "source": [
+    "xgb_train_preds = xgb_model.predict(X_train)\n",
+    "xgb_test_preds = xgb_model.predict(X_test)\n",
+    "\n",
+    "evaluate_predictions(model=\"XgBoost\", \n",
+    "                     train_preds= xgb_train_preds, y_train=y_train_encoded,\n",
+    "                     test_preds=xgb_test_preds, y_test=y_test_encoded)"
+   ]
   }
  ],
  "metadata": {

src/utils.py CHANGED Viewed

	@@ -0,0 +1,19 @@

+from sklearn.metrics import accuracy_score, f1_score
+def get_metrics(y_true, y_preds):
+    accuracy = accuracy_score(y_true, y_preds)
+    f1_macro = f1_score(y_true, y_preds, average="macro")
+    f1_weighted = f1_score(y_true, y_preds, average="weighted")
+    print(f"Accuracy: {accuracy}")
+    print(f"F1 macro average: {f1_macro}")
+    print(f"F1 weighted average: {f1_weighted}")
+def evaluate_predictions(model:str, train_preds, y_train, test_preds, y_test):
+    print(model)
+    print("\nTrain set:")
+    get_metrics(y_train, train_preds)
+    print("-"*50)
+    print("Test set:")
+    get_metrics(y_test, test_preds)