{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "A100", "machine_shape": "hm" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "a268227e95cc46cda6c40f88f76729b4": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_8d2fc30851ab4972a0fc9763f81ed088", "IPY_MODEL_6c9471e6455a4245a9fa731ef480041d", "IPY_MODEL_e04732f9aee643378919f97ceed29a79", "IPY_MODEL_865a67dd6e0c4b779a6bb41a8bd06882" ], "layout": "IPY_MODEL_c3b6d6efc0bb4cebb43e54f1cac15398" } }, "5d2b151866db434b9a97054136972c02": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dfb07bd69a98467385a922d93dbbdf78", "placeholder": "", "style": "IPY_MODEL_b8c97b05b8cd41929982d3dfa4c0253e", "value": "
Epoch | \n", "Training Loss | \n", "Validation Loss | \n", "
---|---|---|
1 | \n", "No log | \n", "1.553022 | \n", "
2 | \n", "No log | \n", "1.460105 | \n", "
3 | \n", "No log | \n", "1.395301 | \n", "
4 | \n", "No log | \n", "1.355699 | \n", "
5 | \n", "No log | \n", "1.330109 | \n", "
6 | \n", "No log | \n", "1.311717 | \n", "
7 | \n", "No log | \n", "1.296761 | \n", "
8 | \n", "No log | \n", "1.283209 | \n", "
9 | \n", "No log | \n", "1.276882 | \n", "
10 | \n", "No log | \n", "1.280680 | \n", "
11 | \n", "No log | \n", "1.269814 | \n", "
12 | \n", "No log | \n", "1.270706 | \n", "
13 | \n", "No log | \n", "1.274694 | \n", "
14 | \n", "No log | \n", "1.269452 | \n", "
15 | \n", "1.055700 | \n", "1.269921 | \n", "
16 | \n", "1.055700 | \n", "1.265592 | \n", "
17 | \n", "1.055700 | \n", "1.271150 | \n", "
18 | \n", "1.055700 | \n", "1.269600 | \n", "
19 | \n", "1.055700 | \n", "1.269069 | \n", "
20 | \n", "1.055700 | \n", "1.269298 | \n", "
" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=140, training_loss=0.9502299581255231, metrics={'train_runtime': 38.6687, 'train_samples_per_second': 27.93, 'train_steps_per_second': 3.621, 'total_flos': 282195394560000.0, 'train_loss': 0.9502299581255231, 'epoch': 20.0})" ] }, "metadata": {}, "execution_count": 36 } ] }, { "cell_type": "markdown", "source": [ "# Save Model" ], "metadata": { "id": "9RkdIyRfLNkW" } }, { "cell_type": "code", "source": [ "trainer.save_model()\n", "trainer.push_to_hub()\n", "tokenizer.push_to_hub(\"gpt2-evy\")" ], "metadata": { "id": "Mp0hvvAILPlQ", "colab": { "base_uri": "https://localhost:8080/", "height": 152, "referenced_widgets": [ "f403a8090a6646a5b24bd3f1a482a466", "7c5e45acf4f44fbbbe42ba6ae0d1a00b", "241d22a572d14deeb99cf23d32b15437", "833282adf0374866a2e923ba2d89ed54", "92893ea5925a416496dbb5f0f6cc5fe4", "b965c7efbf2c4bb59f6d6339bca638aa", "8543bc719912446a956bc1df364892dc", "7df787a07b5442cf9995c0e239c65c25", "1b93b372a091455b90d7aee81706868d", "dccf8ecf30d94f3ca449bf2f681683c7", "0ffd8ed4d4e84c2eae4837978e6dfdcf", "1a8e5de3f44b4deb9815fe7dcac10f71", "15f90e6de63647a291899befc0912db8", "288e7c00f27b421388f23f1255d01f2d", "f57174571a494c4cb2074a904f3b86ce", "8eb37b23513e49679dd5f12554a66295", "de0b0d803dde4c8fa8bcd9eda5210420", "3be10b8fc42749fdb522f3168952b3ea", "d2895d4001b24dc6bc52f8ead03f77a3", "99962b09abe44dc7977f067371cf65ef", "e1434108affa47fa94b2680e72319099", "91d6a93bbb374189be6d6b8a62c8ab9b" ] }, "outputId": "4b296348-4bf4-440d-eb74-219e15b5e032" }, "execution_count": 37, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "model.safetensors: 0%| | 0.00/498M [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "f403a8090a6646a5b24bd3f1a482a466" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "README.md: 0%| | 0.00/2.19k [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "1a8e5de3f44b4deb9815fe7dcac10f71" } }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "CommitInfo(commit_url='https://huggingface.co/joshcarp/gpt2-evy/commit/c95869e260cd381e108a4f2ad1c6f3745a001a68', commit_message='Upload tokenizer', commit_description='', oid='c95869e260cd381e108a4f2ad1c6f3745a001a68', pr_url=None, pr_revision=None, pr_num=None)" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 37 } ] } ] }