diff --git "a/HSBv1.ipynb" "b/HSBv1.ipynb" new file mode 100644--- /dev/null +++ "b/HSBv1.ipynb" @@ -0,0 +1,4090 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "HSB_V1.ipynb", + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "7dcefcaa1c934adb9b61c524229917ca": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "VBoxView", + "_dom_classes": [], + "_model_name": "VBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_13cf470b9f9c4e7f962f0d790f534a2e", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_c8e44a79debe40c6b24e21341478e67b", + "IPY_MODEL_e5a8811bfe974e37bdc8fb204fe45a40", + "IPY_MODEL_2b179c9084914de5a3ed85b132802e74", + "IPY_MODEL_2bb80bb1221045b9a3944d86ef804244", + "IPY_MODEL_b2057ffad38845aba5ecdd294d82e336" + ] + } + }, + "13cf470b9f9c4e7f962f0d790f534a2e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": "column", + "width": "50%", + "min_width": null, + "border": null, + "align_items": "center", + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": "flex", + "left": null + } + }, + "c8e44a79debe40c6b24e21341478e67b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_d7460101911c4bf69205a5e3fdb857a3", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "
\nHugging Face\n
\nCopy a token from your Hugging Face tokens page and paste it below.\n
\nImmediately click login after copying your token or it might be stored in plain text in this notebook file.\n
", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_e92eff49a9b94929b91e69fffe6f45ab" + } + }, + "e5a8811bfe974e37bdc8fb204fe45a40": { + "model_module": "@jupyter-widgets/controls", + "model_name": "PasswordModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "PasswordView", + "style": "IPY_MODEL_84592c40621b4b7688455280546dd4bc", + "_dom_classes": [], + "description": "Token:", + "_model_name": "PasswordModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "", + "_view_count": null, + "disabled": false, + "_view_module_version": "1.5.0", + "continuous_update": true, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_2d3ec7d72c3c4f1eb29d3d12d2ee3972" + } + }, + "2b179c9084914de5a3ed85b132802e74": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ButtonView", + "style": "IPY_MODEL_4ad6df15532a45cea3e6809e3827b889", + "_dom_classes": [], + "description": "Login", + "_model_name": "ButtonModel", + "button_style": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "tooltip": "", + "_view_count": null, + "disabled": false, + "_view_module_version": "1.5.0", + "layout": "IPY_MODEL_02327dbb872a43fa9f38c659be0bc80d", + "_model_module": "@jupyter-widgets/controls", + "icon": "" + } + }, + "2bb80bb1221045b9a3944d86ef804244": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_0bd6ad5c7fc5409d98be3f37f6e59511", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "\nPro Tip: If you don't already have one, you can create a dedicated 'notebooks' token with 'write' access, that you can then easily reuse for all notebooks.\n
\nLogging in with your username and password is deprecated and won't be possible anymore in the near future. You can still use them for now by clicking below.\n", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_470b133751354196b59f1836f0c45fe3" + } + }, + "b2057ffad38845aba5ecdd294d82e336": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ButtonView", + "style": "IPY_MODEL_3a33f3e4c818461f906d2b450d32f803", + "_dom_classes": [], + "description": "Use password", + "_model_name": "ButtonModel", + "button_style": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "tooltip": "", + "_view_count": null, + "disabled": false, + "_view_module_version": "1.5.0", + "layout": "IPY_MODEL_0e3965ee7aa04bc8b25e81800d462ef7", + "_model_module": "@jupyter-widgets/controls", + "icon": "" + } + }, + "d7460101911c4bf69205a5e3fdb857a3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "e92eff49a9b94929b91e69fffe6f45ab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "84592c40621b4b7688455280546dd4bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "2d3ec7d72c3c4f1eb29d3d12d2ee3972": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4ad6df15532a45cea3e6809e3827b889": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ButtonStyleModel", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "button_color": null, + "font_weight": "", + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "02327dbb872a43fa9f38c659be0bc80d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "0bd6ad5c7fc5409d98be3f37f6e59511": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "470b133751354196b59f1836f0c45fe3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "3a33f3e4c818461f906d2b450d32f803": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ButtonStyleModel", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "button_color": null, + "font_weight": "", + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "0e3965ee7aa04bc8b25e81800d462ef7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "22659a943af54f25916fc84a39def214": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_95121b6ac1674e4ab346bfd35a498a4f", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_88cb9f455ce64627994083b4148ce5a4", + "IPY_MODEL_933bc3126162426891d06c5c90e61718", + "IPY_MODEL_e0cb52f1a45948f1ae54f875bc81ca84" + ] + } + }, + "95121b6ac1674e4ab346bfd35a498a4f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "88cb9f455ce64627994083b4148ce5a4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_de30a075be8d496aaa7ecd82f6fe2e85", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_bd746c2c0d794303a98c65170c125ab0" + } + }, + "933bc3126162426891d06c5c90e61718": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_2927f23930e44acfb843897b497cd629", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_d00fea9f3bf54dd9ad20f1495a9e0eea" + } + }, + "e0cb52f1a45948f1ae54f875bc81ca84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_e1446fbbacd047bfa080bb3331385b1e", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 8.05ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_bbc8d6348cef46e4ae136e685c1d8858" + } + }, + "de30a075be8d496aaa7ecd82f6fe2e85": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "bd746c2c0d794303a98c65170c125ab0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "2927f23930e44acfb843897b497cd629": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "d00fea9f3bf54dd9ad20f1495a9e0eea": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "e1446fbbacd047bfa080bb3331385b1e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "bbc8d6348cef46e4ae136e685c1d8858": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "93da0b8505664e5f9aa0e759a2b49741": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_7a259ca75fba4b6e85e62120ebaa53e7", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_62801d1d81624ee48c5146f384abf970", + "IPY_MODEL_107ed6a6a59d4389956bf93f02f1d9d8", + "IPY_MODEL_9bfcb6da24f043c4b0eeabb742e28847" + ] + } + }, + "7a259ca75fba4b6e85e62120ebaa53e7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "62801d1d81624ee48c5146f384abf970": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_29ba185923c742cda185337657182dc8", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4a7fee6a1bb04660b3b6fa303d5d5fb9" + } + }, + "107ed6a6a59d4389956bf93f02f1d9d8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_dc92fc5167594340a5d233a7c780c9be", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_0f71ffaaed274d52841b7c4563f83a4c" + } + }, + "9bfcb6da24f043c4b0eeabb742e28847": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_7eec39011c5740bcb85d1a3fd7251f5a", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1/1 [00:00<00:00, 14.65ba/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_0a5cf47f3aa54a27853fb1906e31d74e" + } + }, + "29ba185923c742cda185337657182dc8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "4a7fee6a1bb04660b3b6fa303d5d5fb9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "dc92fc5167594340a5d233a7c780c9be": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "0f71ffaaed274d52841b7c4563f83a4c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "7eec39011c5740bcb85d1a3fd7251f5a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "0a5cf47f3aa54a27853fb1906e31d74e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "33e9bcd77cdd4182a98c135118a29a0d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_5d7414d13c904bf9bd3c9900d9256c8b", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_4ec83b8fabba46a9a55327262a11523c", + "IPY_MODEL_ff82725e881b482b9e78199ea4469d85", + "IPY_MODEL_8ec76c12495b402cb16dd4a400d3e185" + ] + } + }, + "5d7414d13c904bf9bd3c9900d9256c8b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4ec83b8fabba46a9a55327262a11523c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_fab742e71b68410ebe2f8c2d2074a61e", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_7f71a32f7395417f8eb43852737061a4" + } + }, + "ff82725e881b482b9e78199ea4469d85": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_d539de5e75f647609659cc1c830b5a82", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_0bea115c4c1943608097893a383bc507" + } + }, + "8ec76c12495b402cb16dd4a400d3e185": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_1fdacd9886734bfbb5ebc664829e2a8f", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 980/? [00:18<00:00, 36.79ex/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_54a0a93cac8e4512ac213050caae0aad" + } + }, + "fab742e71b68410ebe2f8c2d2074a61e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "7f71a32f7395417f8eb43852737061a4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "d539de5e75f647609659cc1c830b5a82": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "0bea115c4c1943608097893a383bc507": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": "20px", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "1fdacd9886734bfbb5ebc664829e2a8f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "54a0a93cac8e4512ac213050caae0aad": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "7002a26a8625455682af2d92b4f4f3b3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_b5c702735ce84403ad67216c3cb872a9", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_c0522846dd85473590e9e3fd2c522335", + "IPY_MODEL_84909a89eb6140339979ad3e58a708d1", + "IPY_MODEL_44108700a6bd4789b1ad62a01263bb11" + ] + } + }, + "b5c702735ce84403ad67216c3cb872a9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "c0522846dd85473590e9e3fd2c522335": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_a8f0555374264ba79f38d71d91f5ff6f", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Upload file pytorch_model.bin: 100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_51846bf6f7b443d88f6c884df9bcfe12" + } + }, + "84909a89eb6140339979ad3e58a708d1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_0dabd4f119d34a8dafeefc125c46e52d", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 1262116337, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1262116337, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_0fd787cdff1a4096826d10efa3676aca" + } + }, + "44108700a6bd4789b1ad62a01263bb11": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_55d494df8f2d48d1ae4654d0833afedc", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 1.18G/1.18G [18:14<00:00, 1.08MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_b4840ffc785d41c7b193cdd7c0055191" + } + }, + "a8f0555374264ba79f38d71d91f5ff6f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "51846bf6f7b443d88f6c884df9bcfe12": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "0dabd4f119d34a8dafeefc125c46e52d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "0fd787cdff1a4096826d10efa3676aca": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "55d494df8f2d48d1ae4654d0833afedc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "b4840ffc785d41c7b193cdd7c0055191": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "c1c8855ab8df474ea1e4bd078060f27b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_829a4957c2ee4795a1ae71015a7b3393", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_9f09da2d4fef4a08a8bb0cfdbd3ded58", + "IPY_MODEL_b244af9b23894dcd8d58624fc00e2f9d", + "IPY_MODEL_16b8dadd139f4a52b489c84be1f9942b" + ] + } + }, + "829a4957c2ee4795a1ae71015a7b3393": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "9f09da2d4fef4a08a8bb0cfdbd3ded58": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_69ac19f932d3483dabdf8a1c1b5d1554", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Upload file runs/Jan31_00-26-59_70b8475e937e/events.out.tfevents.1643588864.70b8475e937e.72.2: 100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4f8c1af0353942d9848378e0b79d6455" + } + }, + "b244af9b23894dcd8d58624fc00e2f9d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_70bd992691ea4075b98890b6e9f2e290", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 12241, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 12241, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_6c27d46d0c604325bde1e84735264896" + } + }, + "16b8dadd139f4a52b489c84be1f9942b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_4ada050cc71a43a8ad80cd6a07483ff7", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 12.0k/12.0k [18:13<00:00, 8.04B/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_4496fd5850ef4f32984f88959414f9bf" + } + }, + "69ac19f932d3483dabdf8a1c1b5d1554": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "4f8c1af0353942d9848378e0b79d6455": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "70bd992691ea4075b98890b6e9f2e290": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "6c27d46d0c604325bde1e84735264896": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4ada050cc71a43a8ad80cd6a07483ff7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "4496fd5850ef4f32984f88959414f9bf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pwqX3mdhXwx5", + "outputId": "2d521cc6-1f82-4120-c439-d9c3e9a1699e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sun Jan 30 23:57:28 2022 \n", + "+-----------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 495.46 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", + "|-------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|===============================+======================+======================|\n", + "| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 66C P0 41W / 250W | 15683MiB / 16280MiB | 0% Default |\n", + "| | | N/A |\n", + "+-------------------------------+----------------------+----------------------+\n", + " \n", + "+-----------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=============================================================================|\n", + "| No running processes found |\n", + "+-----------------------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "source": [ + "%%capture\n", + "!pip install datasets\n", + "!pip install transformers\n", + "!pip install huggingface_hub\n", + "!pip install torchaudio\n", + "!pip install jiwer" + ], + "metadata": { + "id": "5SUbafEAX0CJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from huggingface_hub import notebook_login\n", + "\n", + "notebook_login()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 388, + "referenced_widgets": [ + "7dcefcaa1c934adb9b61c524229917ca", + "13cf470b9f9c4e7f962f0d790f534a2e", + "c8e44a79debe40c6b24e21341478e67b", + "e5a8811bfe974e37bdc8fb204fe45a40", + "2b179c9084914de5a3ed85b132802e74", + "2bb80bb1221045b9a3944d86ef804244", + "b2057ffad38845aba5ecdd294d82e336", + "d7460101911c4bf69205a5e3fdb857a3", + "e92eff49a9b94929b91e69fffe6f45ab", + "84592c40621b4b7688455280546dd4bc", + "2d3ec7d72c3c4f1eb29d3d12d2ee3972", + "4ad6df15532a45cea3e6809e3827b889", + "02327dbb872a43fa9f38c659be0bc80d", + "0bd6ad5c7fc5409d98be3f37f6e59511", + "470b133751354196b59f1836f0c45fe3", + "3a33f3e4c818461f906d2b450d32f803", + "0e3965ee7aa04bc8b25e81800d462ef7" + ] + }, + "id": "rWmcNVb2X7Hd", + "outputId": "420a0c5e-9ec3-40d6-f0fb-4bb597e8f0e5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Login successful\n", + "Your token has been saved to /root/.huggingface/token\n", + "\u001b[1m\u001b[31mAuthenticated through git-credential store but this isn't the helper defined on your machine.\n", + "You might have to re-authenticate when pushing to the Hugging Face Hub. Run the following command in your terminal in case you want to set this credential helper as the default\n", + "\n", + "git config --global credential.helper store\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%capture\n", + "!apt install git-lfs" + ], + "metadata": { + "id": "3X_6k7imX9L8" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#Create Wav2Vec2CTCTokenizer" + ], + "metadata": { + "id": "pP37N8UPYJ9K" + } + }, + { + "cell_type": "code", + "source": [ + "from datasets import load_dataset, load_metric, Audio\n", + "\n", + "\n", + "common_voice_train = load_dataset(\"mozilla-foundation/common_voice_8_0\", \"hsb\", use_auth_token=True, split=\"train+validation\")\n", + "common_voice_test = load_dataset(\"mozilla-foundation/common_voice_8_0\", \"hsb\", use_auth_token=True, split=\"test\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "O2xxPBqwYH0Z", + "outputId": "0233942a-ce68-4129-cf04-bc843dcd5444" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Reusing dataset common_voice (/root/.cache/huggingface/datasets/mozilla-foundation___common_voice/hsb/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8)\n", + "Reusing dataset common_voice (/root/.cache/huggingface/datasets/mozilla-foundation___common_voice/hsb/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Many ASR datasets only provide the target text, 'sentence' for each audio array 'audio' and file 'path'. Common Voice actually provides much more information about each audio file, such as the 'accent', etc. Keeping the notebook as general as possible, we only consider the transcribed text for fine-tuning." + ], + "metadata": { + "id": "4ApfA7IWYOro" + } + }, + { + "cell_type": "code", + "source": [ + "common_voice_train = common_voice_train.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])\n", + "common_voice_test = common_voice_test.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])" + ], + "metadata": { + "id": "T-M7qRWdYMII" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Let's write a short function to display some random samples of the dataset and run it a couple of times to get a feeling for the transcriptions." + ], + "metadata": { + "id": "c10yNHjNYU5q" + } + }, + { + "cell_type": "code", + "source": [ + "from datasets import ClassLabel\n", + "import random\n", + "import pandas as pd\n", + "from IPython.display import display, HTML\n", + "\n", + "def show_random_elements(dataset, num_examples=10):\n", + " assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n", + " picks = []\n", + " for _ in range(num_examples):\n", + " pick = random.randint(0, len(dataset)-1)\n", + " while pick in picks:\n", + " pick = random.randint(0, len(dataset)-1)\n", + " picks.append(pick)\n", + " \n", + " df = pd.DataFrame(dataset[picks])\n", + " display(HTML(df.to_html()))" + ], + "metadata": { + "id": "6SPbxreXYRTQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "show_random_elements(common_voice_train.remove_columns([\"path\", \"audio\"]), num_examples=10)" + ], + "metadata": { + "id": "FRsI225GYTps", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "outputId": "238aa5f4-ddb8-4960-eef8-98684bbc1f9a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sentence
0Na cyrkwinskim dnju w Budyšinje běchu wobdźělnicy z braniborskich kónčin.
1Chcemy hišće raz přepruwować, kak je k tomu dóšło.
2Ale poprawom je mi wšojedne, kotru druhu rěč moje dźěći nawuknu.
3Wučbnica wobsahuje wubrane fakty wo zažnej dobje wuwića čłowjeka, wo Egyptowskej a Grjekskej.
4Swój studij w Lipsku a Berlinje dyrbješe sej Marja Grólmusec wuwojować.
5Tute wotbłyšćuja gramatiske formy wšědneje, medijoweje a šulskeje rěče.
6Wój stej móhłoj wulkotnje dźiwadło hrać, ale docyła nic spěwać.
7Hladanje w internaće spěchuje wuwiće wosobiny kóždeho jednotliweho šulerja.
8Chcu rady přećelny swobodny stat měć, kotryž wobydlerjow zapřija.
9Připóznawamy tež prócowanja serbskeju radow a dalšich gremijow wo polěpšenje situacije serbskeho ludu.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "import re\n", + "chars_to_remove_regex = '[\\,\\?\\.\\!\\-\\;\\:\\\"\\“\\%\\‘\\”\\�\\'\\।\\&\\|\\’]'\n", + "\n", + "def remove_special_characters(batch):\n", + " batch[\"sentence\"] = re.sub(chars_to_remove_regex, '', batch[\"sentence\"]).lower()\n", + " return batch" + ], + "metadata": { + "id": "NRDcDNteYabw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "common_voice_train = common_voice_train.map(remove_special_characters)\n", + "common_voice_test = common_voice_test.map(remove_special_characters)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SyWGPpzNYciF", + "outputId": "4037174c-2afd-46da-f238-d2ff82f60669" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading cached processed dataset at /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/hsb/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8/cache-81de7c76e8664d53.arrow\n", + "Loading cached processed dataset at /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/hsb/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8/cache-d9f4d3280c75793e.arrow\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "show_random_elements(common_voice_train.remove_columns([\"path\",\"audio\"]))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "5XHPLLvAYeMY", + "outputId": "911c5d9a-03c8-4d83-ba70-8f5e8fc3b9ef" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sentence
0spěchowanje sporta ma so znajmjeńša na dotalnym niwowje dale wjesć
1šula ma nadawk kubłanja
2zahajili sće kulturne lěćo ze židowskim wuměłstwom
3hotowy tajki projekt ženje njeje přetož dwurěčny korpus rosće
4přez cyłe lěto dóstawaše wot swojeho předstajeneho listy
5cyrkwje a nabožne zhromadźenstwa su wažni partnerojo za spomóžne zhromadne žiwjenje w měsće
6woni stajichu mazane škleńčki preč a wzachu čiste z kamora
7abo su baterije jeničke woprawdźite móžne rozrisanje
8nimo zwěrjatow spózna tež čłowjek rjanosć tuteje rěčneje krajiny jako městno za wočerstwjenje
9tute poskitki maja po móžnosći bjez barjerow być
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "def extract_all_chars(batch):\n", + " all_text = \" \".join(batch[\"sentence\"])\n", + " vocab = list(set(all_text))\n", + " return {\"vocab\": [vocab], \"all_text\": [all_text]}" + ], + "metadata": { + "id": "z0UilCejYk66" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "vocab_train = common_voice_train.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_train.column_names)\n", + "vocab_test = common_voice_test.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_test.column_names)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 81, + "referenced_widgets": [ + "22659a943af54f25916fc84a39def214", + "95121b6ac1674e4ab346bfd35a498a4f", + "88cb9f455ce64627994083b4148ce5a4", + "933bc3126162426891d06c5c90e61718", + "e0cb52f1a45948f1ae54f875bc81ca84", + "de30a075be8d496aaa7ecd82f6fe2e85", + "bd746c2c0d794303a98c65170c125ab0", + "2927f23930e44acfb843897b497cd629", + "d00fea9f3bf54dd9ad20f1495a9e0eea", + "e1446fbbacd047bfa080bb3331385b1e", + "bbc8d6348cef46e4ae136e685c1d8858", + "93da0b8505664e5f9aa0e759a2b49741", + "7a259ca75fba4b6e85e62120ebaa53e7", + "62801d1d81624ee48c5146f384abf970", + "107ed6a6a59d4389956bf93f02f1d9d8", + "9bfcb6da24f043c4b0eeabb742e28847", + "29ba185923c742cda185337657182dc8", + "4a7fee6a1bb04660b3b6fa303d5d5fb9", + "dc92fc5167594340a5d233a7c780c9be", + "0f71ffaaed274d52841b7c4563f83a4c", + "7eec39011c5740bcb85d1a3fd7251f5a", + "0a5cf47f3aa54a27853fb1906e31d74e" + ] + }, + "id": "GbPSiLAXYmvK", + "outputId": "a7230585-d981-4483-f474-07d9d2da9838" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "22659a943af54f25916fc84a39def214", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + " 0%| | 0/1 [00:00\n", + " \n", + " Your browser does not support the audio element.\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 106 + } + ] + }, + { + "cell_type": "code", + "source": [ + "rand_int = random.randint(0, len(common_voice_train)-1)\n", + "\n", + "print(\"Target text:\", common_voice_train[rand_int][\"sentence\"])\n", + "print(\"Input array shape:\", common_voice_train[rand_int][\"audio\"][\"array\"].shape)\n", + "print(\"Sampling rate:\", common_voice_train[rand_int][\"audio\"][\"sampling_rate\"])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EE5jG9PVZmWU", + "outputId": "53136991-5dda-4207-b3c5-8340110589fb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Target text: pokazać ma so w tutym zwisku tež na prócowanja sakskeje\n", + "Input array shape: (87552,)\n", + "Sampling rate: 16000\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "def prepare_dataset(batch):\n", + " audio = batch[\"audio\"]\n", + "\n", + " # batched output is \"un-batched\"\n", + " batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sampling_rate\"]).input_values[0]\n", + " batch[\"input_length\"] = len(batch[\"input_values\"])\n", + " \n", + " with processor.as_target_processor():\n", + " batch[\"labels\"] = processor(batch[\"sentence\"]).input_ids\n", + " return batch" + ], + "metadata": { + "id": "XobzwvyNZpxv" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "common_voice_train = common_voice_train.map(prepare_dataset, remove_columns=common_voice_train.column_names)\n", + "common_voice_test = common_voice_test.map(prepare_dataset, remove_columns=common_voice_test.column_names)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 88, + "referenced_widgets": [ + "33e9bcd77cdd4182a98c135118a29a0d", + "5d7414d13c904bf9bd3c9900d9256c8b", + "4ec83b8fabba46a9a55327262a11523c", + "ff82725e881b482b9e78199ea4469d85", + "8ec76c12495b402cb16dd4a400d3e185", + "fab742e71b68410ebe2f8c2d2074a61e", + "7f71a32f7395417f8eb43852737061a4", + "d539de5e75f647609659cc1c830b5a82", + "0bea115c4c1943608097893a383bc507", + "1fdacd9886734bfbb5ebc664829e2a8f", + "54a0a93cac8e4512ac213050caae0aad" + ] + }, + "id": "A7gT1EQMZsvz", + "outputId": "f3b6a1e4-9f86-42a1-8ce4-482d31f03aaf" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "33e9bcd77cdd4182a98c135118a29a0d", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "0ex [00:00, ?ex/s]" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Loading cached processed dataset at /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/hsb/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8/cache-2397f7d3fbbc5fa8.arrow\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Training" + ], + "metadata": { + "id": "Hu8imi4QZx3f" + } + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "\n", + "from dataclasses import dataclass, field\n", + "from typing import Any, Dict, List, Optional, Union\n", + "\n", + "@dataclass\n", + "class DataCollatorCTCWithPadding:\n", + " \"\"\"\n", + " Data collator that will dynamically pad the inputs received.\n", + " Args:\n", + " processor (:class:`~transformers.Wav2Vec2Processor`)\n", + " The processor used for proccessing the data.\n", + " padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):\n", + " Select a strategy to pad the returned sequences (according to the model's padding side and padding index)\n", + " among:\n", + " * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single\n", + " sequence if provided).\n", + " * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the\n", + " maximum acceptable input length for the model if that argument is not provided.\n", + " * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of\n", + " different lengths).\n", + " \"\"\"\n", + "\n", + " processor: Wav2Vec2Processor\n", + " padding: Union[bool, str] = True\n", + "\n", + " def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n", + " # split inputs and labels since they have to be of different lenghts and need\n", + " # different padding methods\n", + " input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n", + " label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n", + "\n", + " batch = self.processor.pad(\n", + " input_features,\n", + " padding=self.padding,\n", + " return_tensors=\"pt\",\n", + " )\n", + " with self.processor.as_target_processor():\n", + " labels_batch = self.processor.pad(\n", + " label_features,\n", + " padding=self.padding,\n", + " return_tensors=\"pt\",\n", + " )\n", + "\n", + " # replace padding with -100 to ignore loss correctly\n", + " labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n", + "\n", + " batch[\"labels\"] = labels\n", + "\n", + " return batch" + ], + "metadata": { + "id": "rAYCcrJFZubm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)" + ], + "metadata": { + "id": "k05sM3rsZzGS" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "wer_metric = load_metric(\"wer\")" + ], + "metadata": { + "id": "T1zgf2ZKZ1et" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def compute_metrics(pred):\n", + " pred_logits = pred.predictions\n", + " pred_ids = np.argmax(pred_logits, axis=-1)\n", + "\n", + " pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id\n", + "\n", + " pred_str = processor.batch_decode(pred_ids)\n", + " # we do not want to group tokens when computing the metrics\n", + " label_str = processor.batch_decode(pred.label_ids, group_tokens=False)\n", + "\n", + " wer = wer_metric.compute(predictions=pred_str, references=label_str)\n", + "\n", + " return {\"wer\": wer}" + ], + "metadata": { + "id": "2J_feilIZ3Jf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from transformers import Wav2Vec2ForCTC\n", + "\n", + "model = Wav2Vec2ForCTC.from_pretrained(\n", + " \"facebook/wav2vec2-xls-r-300m\", \n", + " attention_dropout=0.04,\n", + " hidden_dropout=0.04,\n", + " feat_proj_dropout=0.04,\n", + " mask_time_prob=0.45,\n", + " layerdrop=0.0,\n", + " ctc_loss_reduction=\"mean\", \n", + " pad_token_id=processor.tokenizer.pad_token_id,\n", + " vocab_size=len(processor.tokenizer),\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fEzH8f_lZ5FG", + "outputId": "c5808890-dac7-4f9c-9a16-7a4118d409b6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n", + "Model config Wav2Vec2Config {\n", + " \"activation_dropout\": 0.0,\n", + " \"adapter_kernel_size\": 3,\n", + " \"adapter_stride\": 2,\n", + " \"add_adapter\": false,\n", + " \"apply_spec_augment\": true,\n", + " \"architectures\": [\n", + " \"Wav2Vec2ForPreTraining\"\n", + " ],\n", + " \"attention_dropout\": 0.04,\n", + " \"bos_token_id\": 1,\n", + " \"classifier_proj_size\": 256,\n", + " \"codevector_dim\": 768,\n", + " \"contrastive_logits_temperature\": 0.1,\n", + " \"conv_bias\": true,\n", + " \"conv_dim\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512\n", + " ],\n", + " \"conv_kernel\": [\n", + " 10,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 2,\n", + " 2\n", + " ],\n", + " \"conv_stride\": [\n", + " 5,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2\n", + " ],\n", + " \"ctc_loss_reduction\": \"mean\",\n", + " \"ctc_zero_infinity\": false,\n", + " \"diversity_loss_weight\": 0.1,\n", + " \"do_stable_layer_norm\": true,\n", + " \"eos_token_id\": 2,\n", + " \"feat_extract_activation\": \"gelu\",\n", + " \"feat_extract_dropout\": 0.0,\n", + " \"feat_extract_norm\": \"layer\",\n", + " \"feat_proj_dropout\": 0.04,\n", + " \"feat_quantizer_dropout\": 0.0,\n", + " \"final_dropout\": 0.0,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout\": 0.04,\n", + " \"hidden_size\": 1024,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4096,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"layerdrop\": 0.0,\n", + " \"mask_feature_length\": 10,\n", + " \"mask_feature_min_masks\": 0,\n", + " \"mask_feature_prob\": 0.0,\n", + " \"mask_time_length\": 10,\n", + " \"mask_time_min_masks\": 2,\n", + " \"mask_time_prob\": 0.45,\n", + " \"model_type\": \"wav2vec2\",\n", + " \"num_adapter_layers\": 3,\n", + " \"num_attention_heads\": 16,\n", + " \"num_codevector_groups\": 2,\n", + " \"num_codevectors_per_group\": 320,\n", + " \"num_conv_pos_embedding_groups\": 16,\n", + " \"num_conv_pos_embeddings\": 128,\n", + " \"num_feat_extract_layers\": 7,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_negatives\": 100,\n", + " \"output_hidden_size\": 1024,\n", + " \"pad_token_id\": 46,\n", + " \"proj_codevector_dim\": 768,\n", + " \"tdnn_dilation\": [\n", + " 1,\n", + " 2,\n", + " 3,\n", + " 1,\n", + " 1\n", + " ],\n", + " \"tdnn_dim\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 1500\n", + " ],\n", + " \"tdnn_kernel\": [\n", + " 5,\n", + " 3,\n", + " 3,\n", + " 1,\n", + " 1\n", + " ],\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.16.1\",\n", + " \"use_weighted_layer_sum\": false,\n", + " \"vocab_size\": 47,\n", + " \"xvector_output_dim\": 512\n", + "}\n", + "\n", + "loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n", + "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['project_hid.bias', 'project_q.bias', 'quantizer.codevectors', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_hid.weight']\n", + "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "model.freeze_feature_extractor()" + ], + "metadata": { + "id": "h7MYlVI1Z-w8", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "7980d864-3a23-45cc-fd0e-2b5bc0a16a80" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1703: FutureWarning: The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5.Please use the equivalent `freeze_feature_encoder` method instead.\n", + " FutureWarning,\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from transformers import TrainingArguments\n", + "\n", + "training_args = TrainingArguments(\n", + " output_dir=repo_name,\n", + " group_by_length=True,\n", + " per_device_train_batch_size=16,\n", + " gradient_accumulation_steps=2,\n", + " evaluation_strategy=\"steps\",\n", + " num_train_epochs=50,\n", + " gradient_checkpointing=True,\n", + " fp16=True,\n", + " save_steps=100,\n", + " eval_steps=100,\n", + " logging_steps=100,\n", + " learning_rate=4.5e-4,\n", + " warmup_steps=500,\n", + " save_total_limit=2,\n", + " push_to_hub=True,\n", + ")" + ], + "metadata": { + "id": "DapYDcW4Z_Z0", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5277a6ae-bccb-4665-99e3-34470b0e087b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "PyTorch: setting up devices\n", + "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from transformers import Trainer\n", + "\n", + "trainer = Trainer(\n", + " model=model,\n", + " data_collator=data_collator,\n", + " args=training_args,\n", + " compute_metrics=compute_metrics,\n", + " train_dataset=common_voice_train,\n", + " eval_dataset=common_voice_test,\n", + " tokenizer=processor.feature_extractor,\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jHiXuohDaD_5", + "outputId": "5e9a9de1-968a-47b3-b544-687cfa9923fb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/content/wav2vec2-large-xls-r-300m-hsb-v1 is already a clone of https://huggingface.co/DrishtiSharma/wav2vec2-large-xls-r-300m-hsb-v1. Make sure you pull the latest changes with `repo.git_pull()`.\n", + "Using amp half precision backend\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "trainer.train()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "27TZ79d_aIgO", + "outputId": "25b28e4c-0ef6-48fc-f6c5-0e849b63be37" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " FutureWarning,\n", + "***** Running training *****\n", + " Num examples = 980\n", + " Num Epochs = 50\n", + " Instantaneous batch size per device = 16\n", + " Total train batch size (w. parallel, distributed & accumulation) = 32\n", + " Gradient Accumulation steps = 2\n", + " Total optimization steps = 1550\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [1550/1550 3:17:32, Epoch 50/50]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation LossWer
1008.9720003.7497811.000000
2003.3401003.2320061.000000
3003.2046003.1740720.980611
4002.4031001.0579300.899577
5001.0427000.7989190.755655
6000.7410000.6405190.629878
7000.5699000.6129350.592841
8000.4607000.6548120.569476
9000.3827000.6267910.519016
10000.3282000.5918500.501616
11000.2764000.5953030.480487
12000.2335000.5717450.472782
13000.2106000.5674480.456873
14000.1859000.5684660.450162
15000.1592000.5683520.440219

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100/preprocessor_config.json\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/preprocessor_config.json\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200/preprocessor_config.json\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1400\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1400/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 418\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1500\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1500/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1500/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1500/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300] due to args.save_total_limit\n", + "\n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "TrainOutput(global_step=1550, training_loss=1.4571432159792992, metrics={'train_runtime': 11862.6013, 'train_samples_per_second': 4.131, 'train_steps_per_second': 0.131, 'total_flos': 1.0091977567651906e+19, 'train_loss': 1.4571432159792992, 'epoch': 50.0})" + ] + }, + "metadata": {}, + "execution_count": 118 + } + ] + }, + { + "cell_type": "code", + "source": [ + "trainer.push_to_hub()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 356, + "referenced_widgets": [ + "7002a26a8625455682af2d92b4f4f3b3", + "b5c702735ce84403ad67216c3cb872a9", + "c0522846dd85473590e9e3fd2c522335", + "84909a89eb6140339979ad3e58a708d1", + "44108700a6bd4789b1ad62a01263bb11", + "a8f0555374264ba79f38d71d91f5ff6f", + "51846bf6f7b443d88f6c884df9bcfe12", + "0dabd4f119d34a8dafeefc125c46e52d", + "0fd787cdff1a4096826d10efa3676aca", + "55d494df8f2d48d1ae4654d0833afedc", + "b4840ffc785d41c7b193cdd7c0055191", + "c1c8855ab8df474ea1e4bd078060f27b", + "829a4957c2ee4795a1ae71015a7b3393", + "9f09da2d4fef4a08a8bb0cfdbd3ded58", + "b244af9b23894dcd8d58624fc00e2f9d", + "16b8dadd139f4a52b489c84be1f9942b", + "69ac19f932d3483dabdf8a1c1b5d1554", + "4f8c1af0353942d9848378e0b79d6455", + "70bd992691ea4075b98890b6e9f2e290", + "6c27d46d0c604325bde1e84735264896", + "4ada050cc71a43a8ad80cd6a07483ff7", + "4496fd5850ef4f32984f88959414f9bf" + ] + }, + "id": "qKKeV9gEaLHr", + "outputId": "26794641-f90d-4f24-81a1-8266ab1e9559" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/config.json\n", + "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/pytorch_model.bin\n", + "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/preprocessor_config.json\n", + "Several commits (2) will be pushed upstream.\n", + "The progress bars may be unreliable.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7002a26a8625455682af2d92b4f4f3b3", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + "Upload file pytorch_model.bin: 0%| | 3.36k/1.18G [00:00 main\n", + "\n", + "Dropping the following result as it does not have all the necessary fields:\n", + "{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'hsb'}}\n", + "To https://huggingface.co/DrishtiSharma/wav2vec2-large-xls-r-300m-hsb-v1\n", + " d251b08..f6c95b2 main -> main\n", + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'https://huggingface.co/DrishtiSharma/wav2vec2-large-xls-r-300m-hsb-v1/commit/d251b08a878f40381635bc9d1653edb51bb97ad4'" + ] + }, + "metadata": {}, + "execution_count": 119 + } + ] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "vMeJybu4aPpa" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file