{ "cells": [ { "cell_type": "markdown", "id": "6a013a36-e156-4212-8ade-5fee79e33680", "metadata": {}, "source": [ "Install dependencies" ] }, { "cell_type": "code", "execution_count": null, "id": "acabbaee-35be-452b-8573-4d0974fa6340", "metadata": {}, "outputs": [], "source": [ "!pip3 install torch torchvision torchaudio\n", "!pip3 install matplotlib\n", "!pip3 install ultralytics roboflow" ] }, { "cell_type": "code", "execution_count": null, "id": "fb8218b5-61c9-4fe3-b5c6-1643beb39e28", "metadata": {}, "outputs": [], "source": [ "import torch\n", "from ultralytics import YOLO\n", "from pathlib import Path\n", "import os\n", "import json\n", "import yaml\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.image as mpimg" ] }, { "cell_type": "code", "execution_count": null, "id": "4bccbb25", "metadata": {}, "outputs": [], "source": [ "\n", "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "print(f\"Using device: {device} ({'GPU' if device != 'cpu' else 'CPU'})\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "CONFIG = {\n", " 'model': 'yolo11m.pt', # Choose model size: n, s, m, l, x\n", " 'data': 'datasets/Hardhat-or-Hat.v1-without-hat.yolov11/data.yaml', \n", " 'epochs': 35,\n", " 'batch': 2 if device != 'cpu' else 4, # Adjust batch \n", " 'imgsz': 640,\n", " 'patience': 5,\n", " 'device': device, \n", "}\n", "os.environ[\"PYTORCH_CUDA_ALLOC_CONF\"] = \"expandable_segments:True\"\n" ] }, { "cell_type": "code", "execution_count": null, "id": "d349b982", "metadata": {}, "outputs": [], "source": [ "\n", "save_dir = Path('runs/detect')\n", "save_dir.mkdir(parents=True, exist_ok=True)\n", "\n", "this_path = os.getcwd()\n", "\n", "os.environ['ULTRALYTICS_CONFIG_DIR'] = this_path\n", "\n", "data_file = f'{this_path}/{CONFIG['data']}'\n", "with open(data_file, 'r') as f:\n", " data = yaml.safe_load(f)\n", " \n", "\n", "data['train'] = f'{this_path}/{CONFIG['data'].rsplit('/', 1)[0]}/train/images'\n", "data['val'] = f'{this_path}/{CONFIG['data'].rsplit('/', 1)[0]}/valid/images'\n", "data['test'] = f'{this_path}/{CONFIG['data'].rsplit('/', 1)[0]}/test/images'\n", "\n", "with open(data_file, 'w') as f:\n", " yaml.safe_dump(data, f)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "4f831042", "metadata": {}, "outputs": [], "source": [ "\n", "model = YOLO(CONFIG['model'])" ] }, { "cell_type": "code", "execution_count": null, "id": "20208cb5", "metadata": {}, "outputs": [], "source": [ "\n", "results = model.train(\n", " data=CONFIG['data'],\n", " epochs=CONFIG['epochs'],\n", " batch=CONFIG['batch'],\n", " imgsz=CONFIG['imgsz'],\n", " patience=CONFIG['patience'],\n", " device=CONFIG['device'],\n", " \n", " verbose=True,\n", " \n", " optimizer='SGD',\n", " lr0=0.001,\n", " lrf=0.01,\n", " momentum=0.9,\n", " weight_decay=0.0005,\n", " warmup_epochs=3,\n", " warmup_bias_lr=0.01,\n", " warmup_momentum=0.8,\n", " amp=False,\n", " \n", " # Augmentations\n", " augment=True,\n", " hsv_h=0.015, # Image HSV-Hue augmentationc\n", " hsv_s=0.7, # Image HSV-Saturation augmentation\n", " hsv_v=0.4, # Image HSV-Value augmentation\n", " degrees=10, # Image rotation (+/- deg)\n", " translate=0.1, # Image translation (+/- fraction)\n", " scale=0.3, # Image scale (+/- gain)\n", " shear=0.0, # Image shear (+/- deg)\n", " perspective=0.0, # Image perspective\n", " flipud=0.1, # Image flip up-down\n", " fliplr=0.1, # Image flip left-right\n", " mosaic=1.0, # Image mosaic\n", " mixup=0.0, # Image mixup\n", " \n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "06211243", "metadata": {}, "outputs": [], "source": [ "\n", "file_path = f\"{str(results.save_dir)}\" \n", "results_csv_path = f\"{file_path}/results.csv\" " ] }, { "cell_type": "code", "execution_count": null, "id": "e67532ea", "metadata": {}, "outputs": [], "source": [ "\n", "try:\n", " result_metrics = pd.read_csv(results_csv_path)\n", "except FileNotFoundError:\n", " print(f\"File not found: {results_csv_path}\")\n", " exit()\n", "\n", "\n", "metrics = {\n", " \"Train Box Loss\": \"train/box_loss\",\n", " \"Train Class Loss\": \"train/cls_loss\",\n", " \"Train DFL Loss\": \"train/dfl_loss\",\n", " \"Validation Box Loss\": \"val/box_loss\",\n", " \"Validation Class Loss\": \"val/cls_loss\",\n", " \"Validation DFL Loss\": \"val/dfl_loss\",\n", " \"Precision (B)\": \"metrics/precision(B)\",\n", " \"Recall (B)\": \"metrics/recall(B)\",\n", " \"mAP@0.5 (B)\": \"metrics/mAP50(B)\",\n", " \"mAP@0.5:0.95 (B)\": \"metrics/mAP50-95(B)\",\n", "}\n", "\n", "%matplotlib inline\n", "\n", "available_metrics = {name: col for name, col in metrics.items() if col in result_metrics.columns}\n", "missing_metrics = [name for name in metrics if name not in available_metrics]\n", "\n", "if missing_metrics:\n", " print(f\"Missing metrics: {', '.join(missing_metrics)}\")\n", "else:\n", " print(\"All expected metrics are present.\")\n", "\n", "for metric_name, col in available_metrics.items():\n", " plt.figure()\n", " plt.plot(result_metrics[\"epoch\"], result_metrics[col], label=metric_name)\n", " plt.title(metric_name)\n", " plt.xlabel(\"Epoch\")\n", " plt.ylabel(metric_name)\n", " plt.legend()\n", " plt.grid()\n", " plt.show()\n", "\n", "final_epoch = result_metrics.iloc[-1]\n", "final_metrics = {name: final_epoch[col] for name, col in available_metrics.items()}\n", "\n", "print(\"\\nFinal Metrics Summary (Last Epoch):\")\n", "for name, value in final_metrics.items():\n", " print(f\"{name}: {value:.4f}\")\n", "\n", "print(\"\\nImprovement Trends:\")\n", "for metric_name, col in available_metrics.items():\n", " initial = result_metrics[col].iloc[0]\n", " final = result_metrics[col].iloc[-1]\n", " trend = \"improved\" if final < initial else \"worsened\"\n", " print(f\"{metric_name}: {trend} (Initial: {initial:.4f}, Final: {final:.4f})\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "cd2fb43f", "metadata": {}, "outputs": [], "source": [ "\n", "\n", "img = mpimg.imread(f\"{file_path}/confusion_matrix_normalized.png\") \n", "plt.imshow(img)\n", "plt.axis('off') \n", "plt.show()\n", "\n", "img = mpimg.imread(f\"{file_path}/F1_curve.png\") \n", "plt.imshow(img)\n", "plt.axis('off') \n", "plt.show()\n", "\n", "img = mpimg.imread(f\"{file_path}/P_curve.png\") \n", "plt.imshow(img)\n", "plt.axis('off') \n", "plt.show()\n", "\n", "img = mpimg.imread(f\"{file_path}/R_curve.png\") \n", "plt.imshow(img)\n", "plt.axis('off') \n", "plt.show()\n", "\n", "img = mpimg.imread(f\"{file_path}/PR_curve.png\") \n", "plt.imshow(img)\n", "plt.axis('off') \n", "plt.show()\n", "\n", "img = mpimg.imread(f\"{file_path}/results.png\") \n", "plt.imshow(img)\n", "plt.axis('off') \n", "plt.show()\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }