{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "3238b837634a47db899e9151534dbde8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "VBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "VBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "VBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_017feac2ec2c414a9d9fed06a0236450",
              "IPY_MODEL_f33a070eaa7a461799076726260c2810",
              "IPY_MODEL_d6b917dc13614bb5b98e352ba7af0ded",
              "IPY_MODEL_ef71f921a16541b0b5e29f6c0cddd1da"
            ],
            "layout": "IPY_MODEL_f28d5164aedb452db1c9e649b564ca06"
          }
        },
        "b0e8e35e9046496aaa5a7d2b1edf1958": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9a00148607964bc6ab902316c6a94b3d",
            "placeholder": "​",
            "style": "IPY_MODEL_59f55019601f4a898ec0f094686916ff",
            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
          }
        },
        "0892f90286544cc8ac9de26eda52b897": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "PasswordModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "PasswordModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "PasswordView",
            "continuous_update": true,
            "description": "Token:",
            "description_tooltip": null,
            "disabled": false,
            "layout": "IPY_MODEL_b0c6fbf0c0454b8db44050c207752cb4",
            "placeholder": "​",
            "style": "IPY_MODEL_4996420f16784d53b3bdc02bc36cbe9e",
            "value": ""
          }
        },
        "fdc7e5d873af4530bf7598665bb66517": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "CheckboxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "CheckboxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "CheckboxView",
            "description": "Add token as git credential?",
            "description_tooltip": null,
            "disabled": false,
            "indent": true,
            "layout": "IPY_MODEL_66c75939b38f4f4b907abd5699f7cee4",
            "style": "IPY_MODEL_40f69a6162954f55839021035bce8863",
            "value": true
          }
        },
        "dec50c0ecd64421aa55077bb56a034dc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ButtonModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ButtonModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ButtonView",
            "button_style": "",
            "description": "Login",
            "disabled": false,
            "icon": "",
            "layout": "IPY_MODEL_1d03693f32af4e81a252a3ccfc3ec8c3",
            "style": "IPY_MODEL_fd3e2731e3494babb7d9d73c4178954a",
            "tooltip": ""
          }
        },
        "a98b2edf76074df9bd3c7a25705af31a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_342c460a4df64397b6c754fb99262932",
            "placeholder": "​",
            "style": "IPY_MODEL_703cd77af78b47fc90e6ea357d67abf3",
            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
          }
        },
        "f28d5164aedb452db1c9e649b564ca06": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": "center",
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": "flex",
            "flex": null,
            "flex_flow": "column",
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": "50%"
          }
        },
        "9a00148607964bc6ab902316c6a94b3d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "59f55019601f4a898ec0f094686916ff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "b0c6fbf0c0454b8db44050c207752cb4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4996420f16784d53b3bdc02bc36cbe9e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "66c75939b38f4f4b907abd5699f7cee4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "40f69a6162954f55839021035bce8863": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "1d03693f32af4e81a252a3ccfc3ec8c3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fd3e2731e3494babb7d9d73c4178954a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ButtonStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ButtonStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "button_color": null,
            "font_weight": ""
          }
        },
        "342c460a4df64397b6c754fb99262932": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "703cd77af78b47fc90e6ea357d67abf3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d8ffc35a02894ca183d4ed89b8bd3626": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4913579fe7344f358604de28627841d3",
            "placeholder": "​",
            "style": "IPY_MODEL_03478eb3a7414966b67e054d8f0b13ec",
            "value": "Connecting..."
          }
        },
        "4913579fe7344f358604de28627841d3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "03478eb3a7414966b67e054d8f0b13ec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "017feac2ec2c414a9d9fed06a0236450": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2339c46035e54936a11239cda588df52",
            "placeholder": "​",
            "style": "IPY_MODEL_317307d87d924722b1cca58c7bfefb62",
            "value": "Token is valid (permission: read)."
          }
        },
        "f33a070eaa7a461799076726260c2810": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6ad25282973c4066a1bee09122582eb4",
            "placeholder": "​",
            "style": "IPY_MODEL_a9cf45485257457f82d79198464e9ba2",
            "value": "Your token has been saved in your configured git credential helpers (store)."
          }
        },
        "d6b917dc13614bb5b98e352ba7af0ded": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fb04e3f6fa7c4344b2187a8325f6b2cf",
            "placeholder": "​",
            "style": "IPY_MODEL_1b644e3182564dd5bef2627e93ff03a2",
            "value": "Your token has been saved to /root/.cache/huggingface/token"
          }
        },
        "ef71f921a16541b0b5e29f6c0cddd1da": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "LabelModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "LabelModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "LabelView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e09e10fcc0a04a0d956f61a980b30dd6",
            "placeholder": "​",
            "style": "IPY_MODEL_3a692ba3aaee44a19ae981ad2d0798ce",
            "value": "Login successful"
          }
        },
        "2339c46035e54936a11239cda588df52": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "317307d87d924722b1cca58c7bfefb62": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "6ad25282973c4066a1bee09122582eb4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a9cf45485257457f82d79198464e9ba2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "fb04e3f6fa7c4344b2187a8325f6b2cf": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1b644e3182564dd5bef2627e93ff03a2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e09e10fcc0a04a0d956f61a980b30dd6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3a692ba3aaee44a19ae981ad2d0798ce": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "cce630e45be646b190c6999a28733168": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_48c4b23d0c244633adab4180f74ab773",
              "IPY_MODEL_cb33700a1e5c4a3a9cdd98884e48f5d6",
              "IPY_MODEL_bd0d37d5c6dd4a4e9997715376a773ef"
            ],
            "layout": "IPY_MODEL_0589ff01b9df477fbad5ea797e96a4f0"
          }
        },
        "48c4b23d0c244633adab4180f74ab773": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_038bd2fc97074a8aa75a6f92c964c0e7",
            "placeholder": "​",
            "style": "IPY_MODEL_8fa04fcf0bf44b2ab38555aa1c6dc162",
            "value": "tokenizer_config.json: 100%"
          }
        },
        "cb33700a1e5c4a3a9cdd98884e48f5d6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7f282cf480d548a3992e1f7c97afe9db",
            "max": 79,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_acf1b501048d42828f9fa50214ec1e5f",
            "value": 79
          }
        },
        "bd0d37d5c6dd4a4e9997715376a773ef": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_04d406e9202544559bcde33ebd2f7062",
            "placeholder": "​",
            "style": "IPY_MODEL_7e3170a7e1464ffc8a2ffce8a86e8c22",
            "value": " 79.0/79.0 [00:00&lt;00:00, 3.06kB/s]"
          }
        },
        "0589ff01b9df477fbad5ea797e96a4f0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "038bd2fc97074a8aa75a6f92c964c0e7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8fa04fcf0bf44b2ab38555aa1c6dc162": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7f282cf480d548a3992e1f7c97afe9db": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "acf1b501048d42828f9fa50214ec1e5f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "04d406e9202544559bcde33ebd2f7062": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7e3170a7e1464ffc8a2ffce8a86e8c22": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "40007c7ac41d4fa7bd1a1dfc8b797a28": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_25dc2ea3b0f34450897566ba285a5ef5",
              "IPY_MODEL_75106df5712d4790af9c7f38e09689ff",
              "IPY_MODEL_a251a0150af44299a9fe01ce885ba237"
            ],
            "layout": "IPY_MODEL_61402404f69a4135a2d78d1876afeb7f"
          }
        },
        "25dc2ea3b0f34450897566ba285a5ef5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_093e227b15034d5c9f92592eb9b26e3e",
            "placeholder": "​",
            "style": "IPY_MODEL_616f5dc3758c459cb9dc3aeda0454602",
            "value": "config.json: 100%"
          }
        },
        "75106df5712d4790af9c7f38e09689ff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e18f8bba554d4789aa126b14ea259a6e",
            "max": 571,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_e9d96aa69c9f4678b09c61d9d5fa1c4e",
            "value": 571
          }
        },
        "a251a0150af44299a9fe01ce885ba237": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ed3d3d7ea5094206a614533bb7a52397",
            "placeholder": "​",
            "style": "IPY_MODEL_65be10d75afe45b6948b677cc7d92f89",
            "value": " 571/571 [00:00&lt;00:00, 6.15kB/s]"
          }
        },
        "61402404f69a4135a2d78d1876afeb7f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "093e227b15034d5c9f92592eb9b26e3e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "616f5dc3758c459cb9dc3aeda0454602": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e18f8bba554d4789aa126b14ea259a6e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e9d96aa69c9f4678b09c61d9d5fa1c4e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "ed3d3d7ea5094206a614533bb7a52397": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "65be10d75afe45b6948b677cc7d92f89": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "4cdd68881f934afab33c0ec54db86e91": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_765dc3a4f874418db757ee837848155d",
              "IPY_MODEL_9521e2f37eee45f2aa641ddbc57c5635",
              "IPY_MODEL_4d6b1531658143bab74c8d8fcdf1962d"
            ],
            "layout": "IPY_MODEL_8b36b1d06ec44d6a8bec1a540547eea8"
          }
        },
        "765dc3a4f874418db757ee837848155d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0a56916da88948f7a538a8f66b6d6430",
            "placeholder": "​",
            "style": "IPY_MODEL_e681565ca6754ef8b4f7fdfe2fb6a360",
            "value": "vocab.json: 100%"
          }
        },
        "9521e2f37eee45f2aa641ddbc57c5635": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_bebc95e489aa4036b6012637912b70b4",
            "max": 898822,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_0b7a9cf3b56c402583531ee7d1844a43",
            "value": 898822
          }
        },
        "4d6b1531658143bab74c8d8fcdf1962d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_5bf79491604245bea82d61edd4d13560",
            "placeholder": "​",
            "style": "IPY_MODEL_0cdbbed010b44a43a4d8e457585d2db9",
            "value": " 899k/899k [00:00&lt;00:00, 8.17MB/s]"
          }
        },
        "8b36b1d06ec44d6a8bec1a540547eea8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0a56916da88948f7a538a8f66b6d6430": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e681565ca6754ef8b4f7fdfe2fb6a360": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "bebc95e489aa4036b6012637912b70b4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0b7a9cf3b56c402583531ee7d1844a43": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "5bf79491604245bea82d61edd4d13560": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0cdbbed010b44a43a4d8e457585d2db9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ead7ac25cabe4c919ac9ee9744eeecba": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_43d01cb496374b57ba7c1e426d9c029a",
              "IPY_MODEL_691e362ce7814878a8e35dd1a2fcd7e8",
              "IPY_MODEL_b2df15de3b5e4456ab95456fa4fdd857"
            ],
            "layout": "IPY_MODEL_34e97833f77a4cecaa4d46c992490afc"
          }
        },
        "43d01cb496374b57ba7c1e426d9c029a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_59142ab104424e8fbd6459b9a3a0bd1e",
            "placeholder": "​",
            "style": "IPY_MODEL_d69195cd3bfd4cfd88e7d521eb46f4fb",
            "value": "merges.txt: 100%"
          }
        },
        "691e362ce7814878a8e35dd1a2fcd7e8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_929c8bbed3da465c989349f22c93cdb8",
            "max": 456318,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_69049fc2d6e345e58023fbf4f4dc1d81",
            "value": 456318
          }
        },
        "b2df15de3b5e4456ab95456fa4fdd857": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_03125a0b697d4acebdb3068fcf3352c4",
            "placeholder": "​",
            "style": "IPY_MODEL_a964666eb9a2428d9cdc87bf9ad9ca27",
            "value": " 456k/456k [00:00&lt;00:00, 6.73MB/s]"
          }
        },
        "34e97833f77a4cecaa4d46c992490afc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "59142ab104424e8fbd6459b9a3a0bd1e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d69195cd3bfd4cfd88e7d521eb46f4fb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "929c8bbed3da465c989349f22c93cdb8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "69049fc2d6e345e58023fbf4f4dc1d81": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "03125a0b697d4acebdb3068fcf3352c4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a964666eb9a2428d9cdc87bf9ad9ca27": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e01af6c7bc014394a7a1438c66be1f36": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_51ecf681ef614164b97e2ce057b6763e",
              "IPY_MODEL_8450b81bca3f41969564d7352c783d07",
              "IPY_MODEL_333e7ed0a5a24e289a2775315336e1f7"
            ],
            "layout": "IPY_MODEL_8044dbc3b7f04c888b1e3a547c4f6eac"
          }
        },
        "51ecf681ef614164b97e2ce057b6763e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_156b5a82fdb14777819a290c1f638c3c",
            "placeholder": "​",
            "style": "IPY_MODEL_c7c412609a104adcbc1a3fcee55a3205",
            "value": "special_tokens_map.json: 100%"
          }
        },
        "8450b81bca3f41969564d7352c783d07": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_59afc8dd721849e8a06b9959ad1bc0fe",
            "max": 772,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_28133bfb9e1846239979aad0b0b1a912",
            "value": 772
          }
        },
        "333e7ed0a5a24e289a2775315336e1f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_593946596d1448f3b1a485bac9d583df",
            "placeholder": "​",
            "style": "IPY_MODEL_e91a2991b3bc4a4e90d3928780c4eb34",
            "value": " 772/772 [00:00&lt;00:00, 14.2kB/s]"
          }
        },
        "8044dbc3b7f04c888b1e3a547c4f6eac": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "156b5a82fdb14777819a290c1f638c3c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c7c412609a104adcbc1a3fcee55a3205": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "59afc8dd721849e8a06b9959ad1bc0fe": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "28133bfb9e1846239979aad0b0b1a912": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "593946596d1448f3b1a485bac9d583df": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e91a2991b3bc4a4e90d3928780c4eb34": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "33ba4c1c6ba046b78c6c0e5ef987de2a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_3dd07c7fd25f4a0cac9c6d79bf312c5b",
              "IPY_MODEL_68d6df7a72b84174a216d6804a422c00",
              "IPY_MODEL_02857e625d664d748f0411dbd9307285"
            ],
            "layout": "IPY_MODEL_adfc7c2afddb4979bd6e30d450d4efb6"
          }
        },
        "3dd07c7fd25f4a0cac9c6d79bf312c5b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_cbf6d52e02bb47c8a4c73208c8cf658a",
            "placeholder": "​",
            "style": "IPY_MODEL_20b683d9910440f882ca10ba0837c545",
            "value": "Map: 100%"
          }
        },
        "68d6df7a72b84174a216d6804a422c00": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a39c78323e05487aa6c94e3c67b1d136",
            "max": 2501,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_27ee050eee1f479b8cd328158e52e497",
            "value": 2501
          }
        },
        "02857e625d664d748f0411dbd9307285": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_50478538cf9947c5a760c538eaeb6f46",
            "placeholder": "​",
            "style": "IPY_MODEL_735906f0d31049e4af3fae4f2462cc05",
            "value": " 2501/2501 [00:10&lt;00:00, 260.48 examples/s]"
          }
        },
        "adfc7c2afddb4979bd6e30d450d4efb6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cbf6d52e02bb47c8a4c73208c8cf658a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "20b683d9910440f882ca10ba0837c545": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a39c78323e05487aa6c94e3c67b1d136": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "27ee050eee1f479b8cd328158e52e497": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "50478538cf9947c5a760c538eaeb6f46": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "735906f0d31049e4af3fae4f2462cc05": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "## Data Collection 🛠\n",
        "\n",
        "The subjQA dataset is constructed based on publicly available review datasets. Specifically, the movies, books, electronics, and grocery categories are constructed using reviews from the Amazon Review dataset. The TripAdvisor category, as the name suggests, is constructed using reviews from TripAdvisor which can be found [here](link). Finally, the restaurants category is constructed using the Yelp Dataset which is also publicly available.\n",
        "\n",
        "The process of constructing SubjQA is discussed in detail in our paper. In a nutshell, the dataset construction consists of the following steps:\n",
        "\n",
        "1. First, all opinions expressed in reviews are extracted. In the pipeline, each opinion is modeled as a (modifier, aspect) pair which is a pair of spans where the former describes the latter. *(e.g., \"good, hotel\", and \"terrible, acting\" are a few examples of extracted opinions)*.\n",
        "2. Using Matrix Factorization techniques, implication relationships between different expressed opinions are mined. For instance, the system mines that \"responsive keys\" implies \"good keyboard\". In our pipeline, we refer to the conclusion of an implication (i.e., \"good keyboard\" in this example) as the query opinion, and we refer to the premise (i.e., \"responsive keys\") as its neighboring opinion.\n",
        "3. Annotators are then asked to write a question based on query opinions. For instance, given \"good keyboard\" as the query opinion, they might write \"Is this keyboard any good?\"\n",
        "4. Each question written based on a query opinion is then paired with a review that mentions its neighboring opinion. In our example, that would be a review that mentions \"responsive keys\".\n",
        "5. The question and review pairs are presented to annotators to select the correct answer span, and rate the subjectivity level of the question as well as the subjectivity level of the highlighted answer span."
      ],
      "metadata": {
        "id": "r32wNE8C5FeQ"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Data Format 📊\n",
        "\n",
        "All files are in standard CSV format, and they consist of the following columns:\n",
        "\n",
        "- **domain**: The category/domain of the review (e.g., hotels, books, ...).\n",
        "- **question**: The question (written based on a query opinion).\n",
        "- **review**: The review (that mentions the neighboring opinion).\n",
        "- **human_ans_spans**: The span labeled by annotators as the answer.\n",
        "- **human_ans_indices**: The (character-level) start and end indices of the answer span highlighted by annotators.\n",
        "- **question_subj_level**: The subjectivity level of the question (on a 1 to 5 scale with 1 being the most subjective).\n",
        "- **ques_subj_score**: The subjectivity score of the question computed using the TextBlob package.\n",
        "- **is_ques_subjective**: A boolean subjectivity label derived from question_subj_level (i.e., scores below 4 are considered as subjective).\n",
        "- **answer_subj_level**: The subjectivity level of the answer span (on a 1 to 5 scale with 5 being the most subjective).\n",
        "- **ans_subj_score**: The subjectivity score of the answer span computed using the TextBlob package.\n",
        "- **is_ans_subjective**: A boolean subjectivity label derived from answer_subj_level (i.e., scores below 4 are considered as subjective).\n",
        "- **nn_mod**: The modifier of the neighboring opinion (which appears in the review).\n",
        "- **nn_asp**: The aspect of the neighboring opinion (which appears in the review).\n",
        "- **query_mod**: The modifier of the query opinion (around which a question is manually written).\n",
        "- **query_asp**: The aspect of the query opinion (around which a question is manually written).\n",
        "- **item_id**: The id of the item/business discussed in the review.\n",
        "- **review_id**: A unique id associated with the review.\n",
        "- **q_review_id**: A unique id assigned to the question-review pair.\n",
        "- **q_reviews_id**: A unique id assigned to all question-review pairs with a shared question."
      ],
      "metadata": {
        "id": "GM783wxV4XQa"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Citation\n",
        "Johannes Bjerva, Nikita Bhutani, Behzad Golahn, Wang-Chiew Tan, and Isabelle Augenstein. (2020). SubjQA: A Dataset for Subjectivity and Review Comprehension. In *Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing*. Association for Computational Linguistics."
      ],
      "metadata": {
        "id": "1-kEwC-R5a14"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import userdata\n",
        "userdata.get('HuggingFace')\n",
        "\n",
        "# Retrieve secret name\n",
        "secret_name = userdata.get('HuggingFace')\n",
        "\n",
        "# Set up Git configuration\n",
        "!git config --global user.email \"kagantimur@icloud.com\"\n",
        "!git config --global user.name \"kgntmr\"\n",
        "\n",
        "# Log in to the Hugging Face Hub\n",
        "!huggingface-cli login"
      ],
      "metadata": {
        "id": "f5HXB1xKmjIE"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from huggingface_hub import notebook_login\n",
        "\n",
        "notebook_login()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 145,
          "referenced_widgets": [
            "3238b837634a47db899e9151534dbde8",
            "b0e8e35e9046496aaa5a7d2b1edf1958",
            "0892f90286544cc8ac9de26eda52b897",
            "fdc7e5d873af4530bf7598665bb66517",
            "dec50c0ecd64421aa55077bb56a034dc",
            "a98b2edf76074df9bd3c7a25705af31a",
            "f28d5164aedb452db1c9e649b564ca06",
            "9a00148607964bc6ab902316c6a94b3d",
            "59f55019601f4a898ec0f094686916ff",
            "b0c6fbf0c0454b8db44050c207752cb4",
            "4996420f16784d53b3bdc02bc36cbe9e",
            "66c75939b38f4f4b907abd5699f7cee4",
            "40f69a6162954f55839021035bce8863",
            "1d03693f32af4e81a252a3ccfc3ec8c3",
            "fd3e2731e3494babb7d9d73c4178954a",
            "342c460a4df64397b6c754fb99262932",
            "703cd77af78b47fc90e6ea357d67abf3",
            "d8ffc35a02894ca183d4ed89b8bd3626",
            "4913579fe7344f358604de28627841d3",
            "03478eb3a7414966b67e054d8f0b13ec",
            "017feac2ec2c414a9d9fed06a0236450",
            "f33a070eaa7a461799076726260c2810",
            "d6b917dc13614bb5b98e352ba7af0ded",
            "ef71f921a16541b0b5e29f6c0cddd1da",
            "2339c46035e54936a11239cda588df52",
            "317307d87d924722b1cca58c7bfefb62",
            "6ad25282973c4066a1bee09122582eb4",
            "a9cf45485257457f82d79198464e9ba2",
            "fb04e3f6fa7c4344b2187a8325f6b2cf",
            "1b644e3182564dd5bef2627e93ff03a2",
            "e09e10fcc0a04a0d956f61a980b30dd6",
            "3a692ba3aaee44a19ae981ad2d0798ce"
          ]
        },
        "id": "yaTEJggLuZaK",
        "outputId": "7c00023e-a2c1-4579-d095-efbf04f3e266"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "3238b837634a47db899e9151534dbde8"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from datasets import load_dataset\n",
        "import datasets\n",
        "from transformers import AutoTokenizer"
      ],
      "metadata": {
        "id": "kZvfdiRsuhs2"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "model = \"deepset/roberta-base-squad2\"\n",
        "tokenizer = AutoTokenizer.from_pretrained(model)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 304,
          "referenced_widgets": [
            "cce630e45be646b190c6999a28733168",
            "48c4b23d0c244633adab4180f74ab773",
            "cb33700a1e5c4a3a9cdd98884e48f5d6",
            "bd0d37d5c6dd4a4e9997715376a773ef",
            "0589ff01b9df477fbad5ea797e96a4f0",
            "038bd2fc97074a8aa75a6f92c964c0e7",
            "8fa04fcf0bf44b2ab38555aa1c6dc162",
            "7f282cf480d548a3992e1f7c97afe9db",
            "acf1b501048d42828f9fa50214ec1e5f",
            "04d406e9202544559bcde33ebd2f7062",
            "7e3170a7e1464ffc8a2ffce8a86e8c22",
            "40007c7ac41d4fa7bd1a1dfc8b797a28",
            "25dc2ea3b0f34450897566ba285a5ef5",
            "75106df5712d4790af9c7f38e09689ff",
            "a251a0150af44299a9fe01ce885ba237",
            "61402404f69a4135a2d78d1876afeb7f",
            "093e227b15034d5c9f92592eb9b26e3e",
            "616f5dc3758c459cb9dc3aeda0454602",
            "e18f8bba554d4789aa126b14ea259a6e",
            "e9d96aa69c9f4678b09c61d9d5fa1c4e",
            "ed3d3d7ea5094206a614533bb7a52397",
            "65be10d75afe45b6948b677cc7d92f89",
            "4cdd68881f934afab33c0ec54db86e91",
            "765dc3a4f874418db757ee837848155d",
            "9521e2f37eee45f2aa641ddbc57c5635",
            "4d6b1531658143bab74c8d8fcdf1962d",
            "8b36b1d06ec44d6a8bec1a540547eea8",
            "0a56916da88948f7a538a8f66b6d6430",
            "e681565ca6754ef8b4f7fdfe2fb6a360",
            "bebc95e489aa4036b6012637912b70b4",
            "0b7a9cf3b56c402583531ee7d1844a43",
            "5bf79491604245bea82d61edd4d13560",
            "0cdbbed010b44a43a4d8e457585d2db9",
            "ead7ac25cabe4c919ac9ee9744eeecba",
            "43d01cb496374b57ba7c1e426d9c029a",
            "691e362ce7814878a8e35dd1a2fcd7e8",
            "b2df15de3b5e4456ab95456fa4fdd857",
            "34e97833f77a4cecaa4d46c992490afc",
            "59142ab104424e8fbd6459b9a3a0bd1e",
            "d69195cd3bfd4cfd88e7d521eb46f4fb",
            "929c8bbed3da465c989349f22c93cdb8",
            "69049fc2d6e345e58023fbf4f4dc1d81",
            "03125a0b697d4acebdb3068fcf3352c4",
            "a964666eb9a2428d9cdc87bf9ad9ca27",
            "e01af6c7bc014394a7a1438c66be1f36",
            "51ecf681ef614164b97e2ce057b6763e",
            "8450b81bca3f41969564d7352c783d07",
            "333e7ed0a5a24e289a2775315336e1f7",
            "8044dbc3b7f04c888b1e3a547c4f6eac",
            "156b5a82fdb14777819a290c1f638c3c",
            "c7c412609a104adcbc1a3fcee55a3205",
            "59afc8dd721849e8a06b9959ad1bc0fe",
            "28133bfb9e1846239979aad0b0b1a912",
            "593946596d1448f3b1a485bac9d583df",
            "e91a2991b3bc4a4e90d3928780c4eb34"
          ]
        },
        "id": "MQqXE-tPu_uG",
        "outputId": "63ea7de8-78b1-4f57-b020-00249c1ea864"
      },
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "cce630e45be646b190c6999a28733168"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "40007c7ac41d4fa7bd1a1dfc8b797a28"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "4cdd68881f934afab33c0ec54db86e91"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "ead7ac25cabe4c919ac9ee9744eeecba"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "e01af6c7bc014394a7a1438c66be1f36"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Is it a fast tokenizer or not?\n",
        "# A fast tokenizer is optimized for speed and efficiency in tokenizing text\n",
        "# Often implement faster processing, useful for large-scale NLP tasks.\n",
        "tokenizer.is_fast"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "i2ezr5hGvIvu",
        "outputId": "6a07c728-499e-4543-c998-06ac1a54e7d8"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 17
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# # Define the maximum length and stride parameters for tokenization\n",
        "# max_length = 384 # 384 is commonly used as it is sufficient to cover a significant portion of most input\n",
        "# stride = 128 # 128 is often used as it provides a good balance between capturing context and avoiding redundancy\n",
        "\n",
        "# # Define a function to preprocess training samples\n",
        "# def preprocess_training_samples(samples):\n",
        "#     # Extract questions from the samples dictionary and strip whitespace\n",
        "#     questions = [q.strip() for q in samples[\"question\"]]\n",
        "\n",
        "#     # Tokenize questions and contexts using the tokenizer\n",
        "#     inputs = tokenizer(\n",
        "#         questions,\n",
        "#         samples[\"context\"],\n",
        "#         max_length=max_length,\n",
        "#         truncation=\"only_second\",\n",
        "#         stride=stride,\n",
        "#         return_overflowing_tokens=True,\n",
        "#         return_offsets_mapping=True,\n",
        "#         padding=\"max_length\",\n",
        "#     )\n",
        "\n",
        "#     # Extract offset_mapping, sample_map, and answers from the tokenized inputs\n",
        "#     offset_mapping = inputs.pop(\"offset_mapping\")\n",
        "#     sample_map = inputs.pop(\"overflow_to_sample_mapping\")\n",
        "#     answers = samples[\"answers\"]\n",
        "\n",
        "#     # Initialize lists to store start and end positions of answers\n",
        "#     start_positions = []\n",
        "#     end_positions = []\n",
        "\n",
        "#     # Iterate over the offset_mapping to process each tokenized input\n",
        "#     for i, offset in enumerate(offset_mapping):\n",
        "#         # Get the sample index for the current tokenized input\n",
        "#         sample_idx = sample_map[i]\n",
        "\n",
        "#         # Get the answer text and start position from the answers dictionary\n",
        "#         answer = answers[sample_idx]\n",
        "#         start_char = answer[\"answer_start\"][0]\n",
        "#         end_char = answer[\"answer_start\"][0] + len(answer[\"text\"][0])\n",
        "\n",
        "#         # Get the sequence_ids to identify the start and end of the context\n",
        "#         sequence_ids = inputs.sequence_ids(i)\n",
        "\n",
        "#         # Find the start and end token positions of the context\n",
        "#         idx = 0\n",
        "#         while sequence_ids[idx] != 1:\n",
        "#             idx += 1\n",
        "#         context_start = idx\n",
        "#         while sequence_ids[idx] == 1:\n",
        "#             idx += 1\n",
        "#         context_end = idx - 1\n",
        "\n",
        "#         # If the answer is fully contained within the context, find its token positions\n",
        "#         if offset[context_start][0] > start_char or offset[context_end][1] < end_char:\n",
        "#             start_positions.append(0)\n",
        "#             end_positions.append(0)\n",
        "#         else:\n",
        "#             idx = context_start\n",
        "#             while idx <= context_end and offset[idx][0] <= start_char:\n",
        "#                 idx += 1\n",
        "#             start_positions.append(idx - 1)\n",
        "\n",
        "#             idx = context_end\n",
        "#             while idx >= context_start and offset[idx][1] >= end_char:\n",
        "#                 idx -= 1\n",
        "#             end_positions.append(idx + 1)\n",
        "\n",
        "#     # Add start and end positions to the inputs dictionary\n",
        "#     inputs[\"start_positions\"] = start_positions\n",
        "#     inputs[\"end_positions\"] = end_positions\n",
        "\n",
        "#     # Return the modified inputs dictionary\n",
        "#     return inputs"
      ],
      "metadata": {
        "id": "WqjndZe6vKwB"
      },
      "execution_count": 19,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "df_train=pd.read_csv('/content/drive/MyDrive/subjqa-train.csv')\n",
        "df_test=pd.read_csv('/content/drive/MyDrive/subjqa-test.csv')"
      ],
      "metadata": {
        "id": "O3WTknCj1qru"
      },
      "execution_count": 22,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Define the maximum length and stride parameters for tokenization\n",
        "max_length = 384  # Maximum length of tokenized sequences, commonly used for a balance between context and memory usage\n",
        "stride = 128  # Stride determines overlap between tokenized sequences, providing context while avoiding redundancy\n",
        "\n",
        "# Define a function to preprocess training samples\n",
        "def preprocess_training_samples(samples, batch_size=32):\n",
        "    # Get the total number of samples\n",
        "    num_samples = len(samples[\"question\"])\n",
        "    processed_samples = []\n",
        "\n",
        "    # Process samples in batches to optimize memory usage\n",
        "    for i in range(0, num_samples, batch_size):\n",
        "        # Extract questions, contexts, and answers for the current batch\n",
        "        batch_questions = [q.strip() for q in samples[\"question\"][i:i+batch_size]]\n",
        "        batch_contexts = samples[\"context\"][i:i+batch_size]\n",
        "        batch_answers = samples[\"answers\"][i:i+batch_size]\n",
        "\n",
        "        # Tokenize questions and contexts using the tokenizer\n",
        "        inputs = tokenizer(\n",
        "            batch_questions,\n",
        "            batch_contexts,\n",
        "            max_length=max_length,\n",
        "            truncation=\"only_second\",\n",
        "            stride=stride,\n",
        "            return_overflowing_tokens=True,\n",
        "            return_offsets_mapping=True,\n",
        "            padding=\"max_length\",\n",
        "        )\n",
        "\n",
        "        # Extract offset_mapping and sample_map from tokenized inputs\n",
        "        offset_mapping = inputs.pop(\"offset_mapping\")\n",
        "        sample_map = inputs.pop(\"overflow_to_sample_mapping\")\n",
        "\n",
        "        # Process each tokenized input in the batch\n",
        "        for j, offset in enumerate(offset_mapping):\n",
        "            # Get the sample index for the current tokenized input\n",
        "            sample_idx = sample_map[j]\n",
        "            answer = batch_answers[j]\n",
        "            start_char = answer[\"answer_start\"][0]\n",
        "            end_char = start_char + len(answer[\"text\"][0])\n",
        "            sequence_ids = inputs.sequence_ids(j)\n",
        "\n",
        "            # Find the start and end token positions of the context\n",
        "            context_start = next(idx for idx, seq_id in enumerate(sequence_ids) if seq_id == 1)\n",
        "            context_end = next(idx for idx, seq_id in enumerate(sequence_ids[::-1]) if seq_id == 1)\n",
        "\n",
        "            # Calculate start and end positions of the answer within the tokenized sequence\n",
        "            start_position = max(0, context_start - 1)\n",
        "            end_position = min(len(offset), context_end + 1)\n",
        "\n",
        "            # Initialize lists to store start and end positions of answers\n",
        "            start_positions = [0] * len(offset)\n",
        "            end_positions = [0] * len(offset)\n",
        "\n",
        "            # Set start and end positions if answer is fully contained within the context\n",
        "            if offset[start_position][0] <= start_char and offset[end_position][1] >= end_char:\n",
        "                start_positions[start_position] = 1\n",
        "                end_positions[end_position] = 1\n",
        "\n",
        "            # Add tokenized input and corresponding start/end positions to processed samples\n",
        "            processed_samples.append({\n",
        "                \"inputs\": inputs,\n",
        "                \"start_positions\": start_positions,\n",
        "                \"end_positions\": end_positions\n",
        "            })\n",
        "\n",
        "    return processed_samples"
      ],
      "metadata": {
        "id": "eisYCm-TvyL1"
      },
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 608
        },
        "id": "HzhybUfc1BxM",
        "outputId": "74d0ebc6-d59e-4ac8-a7d0-febb2080bf1b"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "      item_id  domain         nn_mod             nn_asp  query_mod  \\\n",
              "0  B00BVMXBDO  movies      addictive               show       full   \n",
              "1  1404918051  movies  enough simple               film   charming   \n",
              "2  B0000633ZP  movies           weak               plot        bad   \n",
              "3  B0000AQS0F  movies    outstanding               show  wonderful   \n",
              "4  B003Y5H5FG  movies          great  production design      great   \n",
              "\n",
              "        query_asp                       q_review_id  \\\n",
              "0          series  d9a9615d45df2f6e6108db4ca46bfded   \n",
              "1           movie  06ffe37a8023636a3ce00b020a517e87   \n",
              "2             one  3b625c68e91b9e6987a08b84a9a9d234   \n",
              "3          series  f3abfa98b011127e7cb49bcd07f8deeb   \n",
              "4  costume design  1b03744e764b257592c2c768345c14bc   \n",
              "\n",
              "                       q_reviews_id  \\\n",
              "0  399f1046fe6bd97990107f9d7aa86f4a   \n",
              "1  42d9dd5b0c67150cac1e13308811cbb5   \n",
              "2  32d06ccf2132cda644aea791fa688c53   \n",
              "3  e546636f0bb9f93d5f24b4ade9ebab45   \n",
              "4  a0a97e460a194bcb3286fe68d20aadc2   \n",
              "\n",
              "                                         question  question_subj_level  \\\n",
              "0               Who is the author of this series?                    1   \n",
              "1  Can we enjoy the movie along with our family ?                    1   \n",
              "2                             Does this one good?                    5   \n",
              "3               Is this series good and excelent?                    1   \n",
              "4                      How is the costume design?                    1   \n",
              "\n",
              "   ques_subj_score  is_ques_subjective                         review_id  \\\n",
              "0              0.0               False  090671369dddfeb02db9bf7125a47c79   \n",
              "1              0.5               False  a29821121e74d319cb93f77101e99c88   \n",
              "2              0.6                True  12a1b821f761bd19a75be7b16cef4a7c   \n",
              "3              0.6                True  cd0f92322e67cc9d70de6674caace78c   \n",
              "4              0.0               False  f6b5024393ebc70287befdaf47a50b75   \n",
              "\n",
              "                                              review  \\\n",
              "0  Whether it be in her portrayal of a nerdy lesb...   \n",
              "1  An outstanding romantic comedy, 13 Going on 30...   \n",
              "2  To let the truth be known, I watched this movi...   \n",
              "3  At the time of my review, there had been 910 c...   \n",
              "4  \"Fright Night\" is great! This is how the story...   \n",
              "\n",
              "                                 human_ans_spans human_ans_indices  \\\n",
              "0                                 ANSWERNOTFOUND        (251, 265)   \n",
              "1                                 ANSWERNOTFOUND      (1195, 1209)   \n",
              "2                                 ANSWERNOTFOUND      (1476, 1490)   \n",
              "3                       this show is OUTSTANDING        (296, 320)   \n",
              "4  The costume design by Susan Matheson is great      (1254, 1299)   \n",
              "\n",
              "   answer_subj_level  ans_subj_score  is_ans_subjective  \n",
              "0                  1           0.000              False  \n",
              "1                  1           0.000              False  \n",
              "2                  5           0.000              False  \n",
              "3                  1           0.875               True  \n",
              "4                  1           0.750               True  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-7c4de625-0638-44a8-b8f2-755b0a20e0e1\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>item_id</th>\n",
              "      <th>domain</th>\n",
              "      <th>nn_mod</th>\n",
              "      <th>nn_asp</th>\n",
              "      <th>query_mod</th>\n",
              "      <th>query_asp</th>\n",
              "      <th>q_review_id</th>\n",
              "      <th>q_reviews_id</th>\n",
              "      <th>question</th>\n",
              "      <th>question_subj_level</th>\n",
              "      <th>ques_subj_score</th>\n",
              "      <th>is_ques_subjective</th>\n",
              "      <th>review_id</th>\n",
              "      <th>review</th>\n",
              "      <th>human_ans_spans</th>\n",
              "      <th>human_ans_indices</th>\n",
              "      <th>answer_subj_level</th>\n",
              "      <th>ans_subj_score</th>\n",
              "      <th>is_ans_subjective</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>B00BVMXBDO</td>\n",
              "      <td>movies</td>\n",
              "      <td>addictive</td>\n",
              "      <td>show</td>\n",
              "      <td>full</td>\n",
              "      <td>series</td>\n",
              "      <td>d9a9615d45df2f6e6108db4ca46bfded</td>\n",
              "      <td>399f1046fe6bd97990107f9d7aa86f4a</td>\n",
              "      <td>Who is the author of this series?</td>\n",
              "      <td>1</td>\n",
              "      <td>0.0</td>\n",
              "      <td>False</td>\n",
              "      <td>090671369dddfeb02db9bf7125a47c79</td>\n",
              "      <td>Whether it be in her portrayal of a nerdy lesb...</td>\n",
              "      <td>ANSWERNOTFOUND</td>\n",
              "      <td>(251, 265)</td>\n",
              "      <td>1</td>\n",
              "      <td>0.000</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1404918051</td>\n",
              "      <td>movies</td>\n",
              "      <td>enough simple</td>\n",
              "      <td>film</td>\n",
              "      <td>charming</td>\n",
              "      <td>movie</td>\n",
              "      <td>06ffe37a8023636a3ce00b020a517e87</td>\n",
              "      <td>42d9dd5b0c67150cac1e13308811cbb5</td>\n",
              "      <td>Can we enjoy the movie along with our family ?</td>\n",
              "      <td>1</td>\n",
              "      <td>0.5</td>\n",
              "      <td>False</td>\n",
              "      <td>a29821121e74d319cb93f77101e99c88</td>\n",
              "      <td>An outstanding romantic comedy, 13 Going on 30...</td>\n",
              "      <td>ANSWERNOTFOUND</td>\n",
              "      <td>(1195, 1209)</td>\n",
              "      <td>1</td>\n",
              "      <td>0.000</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>B0000633ZP</td>\n",
              "      <td>movies</td>\n",
              "      <td>weak</td>\n",
              "      <td>plot</td>\n",
              "      <td>bad</td>\n",
              "      <td>one</td>\n",
              "      <td>3b625c68e91b9e6987a08b84a9a9d234</td>\n",
              "      <td>32d06ccf2132cda644aea791fa688c53</td>\n",
              "      <td>Does this one good?</td>\n",
              "      <td>5</td>\n",
              "      <td>0.6</td>\n",
              "      <td>True</td>\n",
              "      <td>12a1b821f761bd19a75be7b16cef4a7c</td>\n",
              "      <td>To let the truth be known, I watched this movi...</td>\n",
              "      <td>ANSWERNOTFOUND</td>\n",
              "      <td>(1476, 1490)</td>\n",
              "      <td>5</td>\n",
              "      <td>0.000</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>B0000AQS0F</td>\n",
              "      <td>movies</td>\n",
              "      <td>outstanding</td>\n",
              "      <td>show</td>\n",
              "      <td>wonderful</td>\n",
              "      <td>series</td>\n",
              "      <td>f3abfa98b011127e7cb49bcd07f8deeb</td>\n",
              "      <td>e546636f0bb9f93d5f24b4ade9ebab45</td>\n",
              "      <td>Is this series good and excelent?</td>\n",
              "      <td>1</td>\n",
              "      <td>0.6</td>\n",
              "      <td>True</td>\n",
              "      <td>cd0f92322e67cc9d70de6674caace78c</td>\n",
              "      <td>At the time of my review, there had been 910 c...</td>\n",
              "      <td>this show is OUTSTANDING</td>\n",
              "      <td>(296, 320)</td>\n",
              "      <td>1</td>\n",
              "      <td>0.875</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>B003Y5H5FG</td>\n",
              "      <td>movies</td>\n",
              "      <td>great</td>\n",
              "      <td>production design</td>\n",
              "      <td>great</td>\n",
              "      <td>costume design</td>\n",
              "      <td>1b03744e764b257592c2c768345c14bc</td>\n",
              "      <td>a0a97e460a194bcb3286fe68d20aadc2</td>\n",
              "      <td>How is the costume design?</td>\n",
              "      <td>1</td>\n",
              "      <td>0.0</td>\n",
              "      <td>False</td>\n",
              "      <td>f6b5024393ebc70287befdaf47a50b75</td>\n",
              "      <td>\"Fright Night\" is great! This is how the story...</td>\n",
              "      <td>The costume design by Susan Matheson is great</td>\n",
              "      <td>(1254, 1299)</td>\n",
              "      <td>1</td>\n",
              "      <td>0.750</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7c4de625-0638-44a8-b8f2-755b0a20e0e1')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-7c4de625-0638-44a8-b8f2-755b0a20e0e1 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-7c4de625-0638-44a8-b8f2-755b0a20e0e1');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-45e29ce9-3f4e-4fcd-b916-993b6e2d8b3b\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-45e29ce9-3f4e-4fcd-b916-993b6e2d8b3b')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-45e29ce9-3f4e-4fcd-b916-993b6e2d8b3b button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "df_train",
              "summary": "{\n  \"name\": \"df_train\",\n  \"rows\": 2501,\n  \"fields\": [\n    {\n      \"column\": \"item_id\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 503,\n        \"samples\": [\n          \"B00005JNTI\",\n          \"B000EQ5PUA\",\n          \"B0000BWVCJ\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"domain\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 1,\n        \"samples\": [\n          \"movies\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"nn_mod\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 277,\n        \"samples\": [\n          \"watchable\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"nn_asp\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 188,\n        \"samples\": [\n          \"emotion\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"query_mod\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 314,\n        \"samples\": [\n          \"believable\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"query_asp\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 181,\n        \"samples\": [\n          \"thing\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"q_review_id\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1369,\n        \"samples\": [\n          \"8563d7cf4c4f5432ec5cd62d06d436c1\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"q_reviews_id\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1265,\n        \"samples\": [\n          \"c8c5527688a2acd208ba8d62e2dad23b\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"question\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 798,\n        \"samples\": [\n          \"How can I read the set story at this hotel?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"question_subj_level\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1,\n        \"min\": 1,\n        \"max\": 5,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          5\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"ques_subj_score\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.29416550579019396,\n        \"min\": 0.0,\n        \"max\": 1.0,\n        \"num_unique_values\": 50,\n        \"samples\": [\n          0.1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"is_ques_subjective\",\n      \"properties\": {\n        \"dtype\": \"boolean\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          true\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"review_id\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1330,\n        \"samples\": [\n          \"24ad2780d2984f734476f57a5d412537\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"review\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1330,\n        \"samples\": [\n          \"An outstanding animated romantic comedy, Shrek, brings to the screen the love story between, an ogre (Shrek), and a beautiful princess (Fiona), with all the ups and downs that that entails!  In addition, the couple finds itself in the company of adorable characters from classic fairy tales, with the cherry on the cake being the hilarious talking donkey.It is a film about human relations, hope and second chances, but most importantly about trust, love, and inner strength.Mike Myers, Eddie Murphy, Cameron Diaz, and the rest of the cast, have truly outdone themselves with their performances, which are exceptional to say the least!  All the actors, without exceptions, give it their 100% and it really shows (the animation does come ALIVE)! Very well written and very well presented, the movie is without a doubt guaranteed to provide more than just a few laughs.  The film is simple enough, but does a great job of describing people's every day lives and the problems they face.  It just goes to show that simplicity is often far better than complexity, when trying to present issues of a human nature.The setting, the plot, the dialogues, the HUMOR (!!!) and the music are all wonderful!In short, Shrek is a movie definitely worth watching and one to seriously consider adding to your movie collection!  Strongly recommended along with Shrek 2. ANSWERNOTFOUND\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"human_ans_spans\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 906,\n        \"samples\": [\n          \"my review of Christopher Nolan 's \\\" Memento \\\"\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"human_ans_indices\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1744,\n        \"samples\": [\n          \"(985, 999)\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"answer_subj_level\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1,\n        \"min\": 1,\n        \"max\": 5,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          5\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"ans_subj_score\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.33359523274248176,\n        \"min\": 0.0,\n        \"max\": 1.0,\n        \"num_unique_values\": 148,\n        \"samples\": [\n          0.6749999999999999\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"is_ans_subjective\",\n      \"properties\": {\n        \"dtype\": \"boolean\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          true\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 23
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "GMK9xMMf4V3p"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.info"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 221
        },
        "id": "PRe8FNYw133V",
        "outputId": "75d5d576-55fd-48e0-d87a-0338ff0e41aa"
      },
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<bound method DataFrame.info of          item_id  domain         nn_mod             nn_asp  query_mod  \\\n",
              "0     B00BVMXBDO  movies      addictive               show       full   \n",
              "1     1404918051  movies  enough simple               film   charming   \n",
              "2     B0000633ZP  movies           weak               plot        bad   \n",
              "3     B0000AQS0F  movies    outstanding               show  wonderful   \n",
              "4     B003Y5H5FG  movies          great  production design      great   \n",
              "...          ...     ...            ...                ...        ...   \n",
              "2496  B00FZM8Z7I  movies        awesome         soundtrack       more   \n",
              "2497  B00005JNTI  movies           good             moment      great   \n",
              "2498  0790731487  movies            few              laugh      funny   \n",
              "2499  630575067X  movies      excellent              story       fine   \n",
              "2500  B001GCUNYO  movies           good    action sequence      great   \n",
              "\n",
              "           query_asp                       q_review_id  \\\n",
              "0             series  d9a9615d45df2f6e6108db4ca46bfded   \n",
              "1              movie  06ffe37a8023636a3ce00b020a517e87   \n",
              "2                one  3b625c68e91b9e6987a08b84a9a9d234   \n",
              "3             series  f3abfa98b011127e7cb49bcd07f8deeb   \n",
              "4     costume design  1b03744e764b257592c2c768345c14bc   \n",
              "...              ...                               ...   \n",
              "2496         respect  41b28a42a25d9bbf466bee35db18fda8   \n",
              "2497       potential  4aad1be24631680e40b174f2cba620d9   \n",
              "2498             bit  a4d1520852e799ab1b0830956fcead5b   \n",
              "2499           story  59036d77f1a9e034ec1c2ba46aeede9b   \n",
              "2500           stunt  d0dcbca6dc554306896943f606f3782b   \n",
              "\n",
              "                          q_reviews_id  \\\n",
              "0     399f1046fe6bd97990107f9d7aa86f4a   \n",
              "1     42d9dd5b0c67150cac1e13308811cbb5   \n",
              "2     32d06ccf2132cda644aea791fa688c53   \n",
              "3     e546636f0bb9f93d5f24b4ade9ebab45   \n",
              "4     a0a97e460a194bcb3286fe68d20aadc2   \n",
              "...                                ...   \n",
              "2496  0b61aca21a7a7a7330281087e837ab65   \n",
              "2497  27a4780e3330a6f47e71ce952d7bfe76   \n",
              "2498  1cc8f6187a6cc2e3a264538479ed42d2   \n",
              "2499  9fc9edc022f6eba0a00ed228636c8d84   \n",
              "2500  2b58d9a0052d92ea60fd7a9366644b23   \n",
              "\n",
              "                                            question  question_subj_level  \\\n",
              "0                  Who is the author of this series?                    1   \n",
              "1     Can we enjoy the movie along with our family ?                    1   \n",
              "2                                Does this one good?                    5   \n",
              "3                  Is this series good and excelent?                    1   \n",
              "4                         How is the costume design?                    1   \n",
              "...                                              ...                  ...   \n",
              "2496               Who do you have more respect for?                    1   \n",
              "2497                           How is the potential?                    1   \n",
              "2498                                 How is the bit?                    1   \n",
              "2499                      Do you know Maria's story?                    5   \n",
              "2500                            How were the stunts?                    1   \n",
              "\n",
              "      ques_subj_score  is_ques_subjective                         review_id  \\\n",
              "0                 0.0               False  090671369dddfeb02db9bf7125a47c79   \n",
              "1                 0.5               False  a29821121e74d319cb93f77101e99c88   \n",
              "2                 0.6                True  12a1b821f761bd19a75be7b16cef4a7c   \n",
              "3                 0.6                True  cd0f92322e67cc9d70de6674caace78c   \n",
              "4                 0.0               False  f6b5024393ebc70287befdaf47a50b75   \n",
              "...               ...                 ...                               ...   \n",
              "2496              0.5               False  5f4940e66691b0414fb4abbfa15dcc78   \n",
              "2497              1.0                True  765f22bb7d2b7e8b98f905e8a4c1a442   \n",
              "2498              0.0               False  705900af450309f7328ed78e812092bd   \n",
              "2499              0.0               False  26caf6113662f003c0a1bc46509bcb58   \n",
              "2500              0.0               False  9a78c0a2520e174e56cf4baf54ee9f46   \n",
              "\n",
              "                                                 review  \\\n",
              "0     Whether it be in her portrayal of a nerdy lesb...   \n",
              "1     An outstanding romantic comedy, 13 Going on 30...   \n",
              "2     To let the truth be known, I watched this movi...   \n",
              "3     At the time of my review, there had been 910 c...   \n",
              "4     \"Fright Night\" is great! This is how the story...   \n",
              "...                                                 ...   \n",
              "2496  This movie is just great. The soundtrack is aw...   \n",
              "2497  Tom Cruise (Collateral, Vanilla Sky) stars as ...   \n",
              "2498  This review is for the Blu Ray version of Blaz...   \n",
              "2499  Hey, i know youve already seen this movie, if ...   \n",
              "2500  The movie Taken was very good. The action sequ...   \n",
              "\n",
              "                                    human_ans_spans human_ans_indices  \\\n",
              "0                                    ANSWERNOTFOUND        (251, 265)   \n",
              "1                                    ANSWERNOTFOUND      (1195, 1209)   \n",
              "2                                    ANSWERNOTFOUND      (1476, 1490)   \n",
              "3                          this show is OUTSTANDING        (296, 320)   \n",
              "4     The costume design by Susan Matheson is great      (1254, 1299)   \n",
              "...                                             ...               ...   \n",
              "2496                                 ANSWERNOTFOUND        (371, 385)   \n",
              "2497                                 ANSWERNOTFOUND      (1787, 1801)   \n",
              "2498                                 ANSWERNOTFOUND      (1339, 1353)   \n",
              "2499                                 ANSWERNOTFOUND      (1063, 1077)   \n",
              "2500                                 ANSWERNOTFOUND        (801, 815)   \n",
              "\n",
              "      answer_subj_level  ans_subj_score  is_ans_subjective  \n",
              "0                     1           0.000              False  \n",
              "1                     1           0.000              False  \n",
              "2                     5           0.000              False  \n",
              "3                     1           0.875               True  \n",
              "4                     1           0.750               True  \n",
              "...                 ...             ...                ...  \n",
              "2496                  1           0.000              False  \n",
              "2497                  1           0.000              False  \n",
              "2498                  1           0.000              False  \n",
              "2499                  5           0.000              False  \n",
              "2500                  1           0.000              False  \n",
              "\n",
              "[2501 rows x 19 columns]>"
            ],
            "text/html": [
              "<div style=\"max-width:800px; border: 1px solid var(--colab-border-color);\"><style>\n",
              "      pre.function-repr-contents {\n",
              "        overflow-x: auto;\n",
              "        padding: 8px 12px;\n",
              "        max-height: 500px;\n",
              "      }\n",
              "\n",
              "      pre.function-repr-contents.function-repr-contents-collapsed {\n",
              "        cursor: pointer;\n",
              "        max-height: 100px;\n",
              "      }\n",
              "    </style>\n",
              "    <pre style=\"white-space: initial; background:\n",
              "         var(--colab-secondary-surface-color); padding: 8px 12px;\n",
              "         border-bottom: 1px solid var(--colab-border-color);\"><b>pandas.core.frame.DataFrame.info</b><br/>def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -&gt; None</pre><pre class=\"function-repr-contents function-repr-contents-collapsed\" style=\"\"><a class=\"filepath\" style=\"display:none\" href=\"#\">/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py</a>Print a concise summary of a DataFrame.\n",
              "\n",
              "This method prints information about a DataFrame including\n",
              "the index dtype and columns, non-null values and memory usage.\n",
              "\n",
              "Parameters\n",
              "----------\n",
              "verbose : bool, optional\n",
              "    Whether to print the full summary. By default, the setting in\n",
              "    ``pandas.options.display.max_info_columns`` is followed.\n",
              "buf : writable buffer, defaults to sys.stdout\n",
              "    Where to send the output. By default, the output is printed to\n",
              "    sys.stdout. Pass a writable buffer if you need to further process\n",
              "    the output.    max_cols : int, optional\n",
              "    When to switch from the verbose to the truncated output. If the\n",
              "    DataFrame has more than `max_cols` columns, the truncated output\n",
              "    is used. By default, the setting in\n",
              "    ``pandas.options.display.max_info_columns`` is used.\n",
              "memory_usage : bool, str, optional\n",
              "    Specifies whether total memory usage of the DataFrame\n",
              "    elements (including the index) should be displayed. By default,\n",
              "    this follows the ``pandas.options.display.memory_usage`` setting.\n",
              "\n",
              "    True always show memory usage. False never shows memory usage.\n",
              "    A value of &#x27;deep&#x27; is equivalent to &quot;True with deep introspection&quot;.\n",
              "    Memory usage is shown in human-readable units (base-2\n",
              "    representation). Without deep introspection a memory estimation is\n",
              "    made based in column dtype and number of rows assuming values\n",
              "    consume the same memory amount for corresponding dtypes. With deep\n",
              "    memory introspection, a real memory usage calculation is performed\n",
              "    at the cost of computational resources. See the\n",
              "    :ref:`Frequently Asked Questions &lt;df-memory-usage&gt;` for more\n",
              "    details.\n",
              "show_counts : bool, optional\n",
              "    Whether to show the non-null counts. By default, this is shown\n",
              "    only if the DataFrame is smaller than\n",
              "    ``pandas.options.display.max_info_rows`` and\n",
              "    ``pandas.options.display.max_info_columns``. A value of True always\n",
              "    shows the counts, and False never shows the counts.\n",
              "null_counts : bool, optional\n",
              "    .. deprecated:: 1.2.0\n",
              "        Use show_counts instead.\n",
              "\n",
              "Returns\n",
              "-------\n",
              "None\n",
              "    This method prints a summary of a DataFrame and returns None.\n",
              "\n",
              "See Also\n",
              "--------\n",
              "DataFrame.describe: Generate descriptive statistics of DataFrame\n",
              "    columns.\n",
              "DataFrame.memory_usage: Memory usage of DataFrame columns.\n",
              "\n",
              "Examples\n",
              "--------\n",
              "&gt;&gt;&gt; int_values = [1, 2, 3, 4, 5]\n",
              "&gt;&gt;&gt; text_values = [&#x27;alpha&#x27;, &#x27;beta&#x27;, &#x27;gamma&#x27;, &#x27;delta&#x27;, &#x27;epsilon&#x27;]\n",
              "&gt;&gt;&gt; float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n",
              "&gt;&gt;&gt; df = pd.DataFrame({&quot;int_col&quot;: int_values, &quot;text_col&quot;: text_values,\n",
              "...                   &quot;float_col&quot;: float_values})\n",
              "&gt;&gt;&gt; df\n",
              "    int_col text_col  float_col\n",
              "0        1    alpha       0.00\n",
              "1        2     beta       0.25\n",
              "2        3    gamma       0.50\n",
              "3        4    delta       0.75\n",
              "4        5  epsilon       1.00\n",
              "\n",
              "Prints information of all columns:\n",
              "\n",
              "&gt;&gt;&gt; df.info(verbose=True)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 5 entries, 0 to 4\n",
              "Data columns (total 3 columns):\n",
              " #   Column     Non-Null Count  Dtype\n",
              "---  ------     --------------  -----\n",
              " 0   int_col    5 non-null      int64\n",
              " 1   text_col   5 non-null      object\n",
              " 2   float_col  5 non-null      float64\n",
              "dtypes: float64(1), int64(1), object(1)\n",
              "memory usage: 248.0+ bytes\n",
              "\n",
              "Prints a summary of columns count and its dtypes but not per column\n",
              "information:\n",
              "\n",
              "&gt;&gt;&gt; df.info(verbose=False)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 5 entries, 0 to 4\n",
              "Columns: 3 entries, int_col to float_col\n",
              "dtypes: float64(1), int64(1), object(1)\n",
              "memory usage: 248.0+ bytes\n",
              "\n",
              "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n",
              "buffer content and writes to a text file:\n",
              "\n",
              "&gt;&gt;&gt; import io\n",
              "&gt;&gt;&gt; buffer = io.StringIO()\n",
              "&gt;&gt;&gt; df.info(buf=buffer)\n",
              "&gt;&gt;&gt; s = buffer.getvalue()\n",
              "&gt;&gt;&gt; with open(&quot;df_info.txt&quot;, &quot;w&quot;,\n",
              "...           encoding=&quot;utf-8&quot;) as f:  # doctest: +SKIP\n",
              "...     f.write(s)\n",
              "260\n",
              "\n",
              "The `memory_usage` parameter allows deep introspection mode, specially\n",
              "useful for big DataFrames and fine-tune memory optimization:\n",
              "\n",
              "&gt;&gt;&gt; random_strings_array = np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6)\n",
              "&gt;&gt;&gt; df = pd.DataFrame({\n",
              "...     &#x27;column_1&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6),\n",
              "...     &#x27;column_2&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6),\n",
              "...     &#x27;column_3&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6)\n",
              "... })\n",
              "&gt;&gt;&gt; df.info()\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 1000000 entries, 0 to 999999\n",
              "Data columns (total 3 columns):\n",
              " #   Column    Non-Null Count    Dtype\n",
              "---  ------    --------------    -----\n",
              " 0   column_1  1000000 non-null  object\n",
              " 1   column_2  1000000 non-null  object\n",
              " 2   column_3  1000000 non-null  object\n",
              "dtypes: object(3)\n",
              "memory usage: 22.9+ MB\n",
              "\n",
              "&gt;&gt;&gt; df.info(memory_usage=&#x27;deep&#x27;)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 1000000 entries, 0 to 999999\n",
              "Data columns (total 3 columns):\n",
              " #   Column    Non-Null Count    Dtype\n",
              "---  ------    --------------    -----\n",
              " 0   column_1  1000000 non-null  object\n",
              " 1   column_2  1000000 non-null  object\n",
              " 2   column_3  1000000 non-null  object\n",
              "dtypes: object(3)\n",
              "memory usage: 165.9 MB</pre>\n",
              "      <script>\n",
              "      if (google.colab.kernel.accessAllowed && google.colab.files && google.colab.files.view) {\n",
              "        for (const element of document.querySelectorAll('.filepath')) {\n",
              "          element.style.display = 'block'\n",
              "          element.onclick = (event) => {\n",
              "            event.preventDefault();\n",
              "            event.stopPropagation();\n",
              "            google.colab.files.view(element.textContent, 3434);\n",
              "          };\n",
              "        }\n",
              "      }\n",
              "      for (const element of document.querySelectorAll('.function-repr-contents')) {\n",
              "        element.onclick = (event) => {\n",
              "          event.preventDefault();\n",
              "          event.stopPropagation();\n",
              "          element.classList.toggle('function-repr-contents-collapsed');\n",
              "        };\n",
              "      }\n",
              "      </script>\n",
              "      </div>"
            ]
          },
          "metadata": {},
          "execution_count": 29
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.columns"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "5X6yTrJ-2Fmo",
        "outputId": "ca4ad115-d9d0-4ef9-dd94-37c0d926036c"
      },
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Index(['item_id', 'domain', 'nn_mod', 'nn_asp', 'query_mod', 'query_asp',\n",
              "       'q_review_id', 'q_reviews_id', 'question', 'question_subj_level',\n",
              "       'ques_subj_score', 'is_ques_subjective', 'review_id', 'review',\n",
              "       'human_ans_spans', 'human_ans_indices', 'answer_subj_level',\n",
              "       'ans_subj_score', 'is_ans_subjective'],\n",
              "      dtype='object')"
            ]
          },
          "metadata": {},
          "execution_count": 30
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.iloc[0:10].question"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6nnoYQyK2MBe",
        "outputId": "cd521ee7-69b9-47b3-83c0-5b36947aeb37"
      },
      "execution_count": 34,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0                    Who is the author of this series?\n",
              "1       Can we enjoy the movie along with our family ?\n",
              "2                                  Does this one good?\n",
              "3                    Is this series good and excelent?\n",
              "4                           How is the costume design?\n",
              "5                         How are the special effects?\n",
              "6                              Do you have any credit?\n",
              "7                           How do you like the story?\n",
              "8    What criticism deserves the movie Passion of C...\n",
              "9             How much is missing from the collection?\n",
              "Name: question, dtype: object"
            ]
          },
          "metadata": {},
          "execution_count": 34
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.iloc[2].review"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 178
        },
        "id": "0lpkVFjm26FR",
        "outputId": "79204717-1a1f-4f8d-cb5b-5795cf5e4e9d"
      },
      "execution_count": 56,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "\"To let the truth be known, I watched this movie with a mix of anticipation and fear. Being an avid Star Wars fan, I was excited to see any Star Wars movie, but I suspected this would be as disappointing as the Phantom Menace. WRONG! Although this doesn't even come close to the great casting and story lines and sheer art of the first three Star Wars series, it was WAY better than Phantom Menace for the following reasons: 1) This movie included LESS Jar-Jar, which, despite initial heavy marketing for the first movie, the character was found by the general consensus to be REALLY annoying. 2) This movie demonstrated some of the political turmoil behind the original Star Wars movies. 3) You get to see some of what led Anakin to turn over to the Dark Side. Finally, the special effects were really good!It was not 4 or 5 stars because the actors that were cast in this movie (as well as The Phantom Menace) are all well known for other cinematic accomplishments, and it was hard to believe that they were supposed to be these other characters.  They should have casted lesser-known actors, in my opinion.  Also, the plot about the clones was weak, to me.But, note well- the fight-scene with Yoda by itself makes the movie worth watching. It was action packed, entertaining, and even a little bit funny.  I do recommend this movie to any Star Wars fan, way way better than the Phantom Menace, but do not go into it expecting it to be as good as the first Star Wars series. ANSWERNOTFOUND\""
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 56
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.iloc[2].human_ans_indices"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "CNxpn9lv3QRx",
        "outputId": "0072592a-3de1-4d67-ca7a-57437419618e"
      },
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'(1476, 1490)'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 57
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.iloc[2].review[1476:1490]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "mi0231lB3cjJ",
        "outputId": "e3145726-3527-48b0-dc07-63d1ebee6da0"
      },
      "execution_count": 59,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'ANSWERNOTFOUND'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 59
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Picking the necessary columns for further analysis\n",
        "df_train=df_train[['question','human_ans_indices','review','human_ans_spans']]\n",
        "df_test=df_test[['question','human_ans_indices','review','human_ans_spans']]"
      ],
      "metadata": {
        "id": "GYTknC6D3zZQ"
      },
      "execution_count": 60,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Generate a sequence evenly spaced numbers\n",
        "import numpy as np\n",
        "df_train['id']=np.linspace(0,len(df_train)-1,len(df_train)) # Generates a sequence of IDs from 0 to the length of the training data minus 1\n",
        "df_test['id']=np.linspace(0,len(df_test)-1,len(df_test)) # Same\n",
        "\n",
        "# Convert to strings\n",
        "df_train['id']=df_train['id'].astype(str)\n",
        "df_test['id']=df_test['id'].astype(str)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Ginf6f7C330b",
        "outputId": "5bb1df99-c9d1-47aa-c417-98163f3e7e5f"
      },
      "execution_count": 61,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-61-5eb43ae72060>:2: SettingWithCopyWarning: \n",
            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
            "Try using .loc[row_indexer,col_indexer] = value instead\n",
            "\n",
            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
            "  df_train['id']=np.linspace(0,len(df_train)-1,len(df_train))\n",
            "<ipython-input-61-5eb43ae72060>:5: SettingWithCopyWarning: \n",
            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
            "Try using .loc[row_indexer,col_indexer] = value instead\n",
            "\n",
            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
            "  df_train['id']=df_train['id'].astype(str)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.info"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 221
        },
        "id": "IA1O56Rt5m7m",
        "outputId": "1c782044-f9f5-4833-c907-24595fb5ad3d"
      },
      "execution_count": 63,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<bound method DataFrame.info of                                             question human_ans_indices  \\\n",
              "0                  Who is the author of this series?        (251, 265)   \n",
              "1     Can we enjoy the movie along with our family ?      (1195, 1209)   \n",
              "2                                Does this one good?      (1476, 1490)   \n",
              "3                  Is this series good and excelent?        (296, 320)   \n",
              "4                         How is the costume design?      (1254, 1299)   \n",
              "...                                              ...               ...   \n",
              "2496               Who do you have more respect for?        (371, 385)   \n",
              "2497                           How is the potential?      (1787, 1801)   \n",
              "2498                                 How is the bit?      (1339, 1353)   \n",
              "2499                      Do you know Maria's story?      (1063, 1077)   \n",
              "2500                            How were the stunts?        (801, 815)   \n",
              "\n",
              "                                                 review  \\\n",
              "0     Whether it be in her portrayal of a nerdy lesb...   \n",
              "1     An outstanding romantic comedy, 13 Going on 30...   \n",
              "2     To let the truth be known, I watched this movi...   \n",
              "3     At the time of my review, there had been 910 c...   \n",
              "4     \"Fright Night\" is great! This is how the story...   \n",
              "...                                                 ...   \n",
              "2496  This movie is just great. The soundtrack is aw...   \n",
              "2497  Tom Cruise (Collateral, Vanilla Sky) stars as ...   \n",
              "2498  This review is for the Blu Ray version of Blaz...   \n",
              "2499  Hey, i know youve already seen this movie, if ...   \n",
              "2500  The movie Taken was very good. The action sequ...   \n",
              "\n",
              "                                    human_ans_spans      id  \n",
              "0                                    ANSWERNOTFOUND     0.0  \n",
              "1                                    ANSWERNOTFOUND     1.0  \n",
              "2                                    ANSWERNOTFOUND     2.0  \n",
              "3                          this show is OUTSTANDING     3.0  \n",
              "4     The costume design by Susan Matheson is great     4.0  \n",
              "...                                             ...     ...  \n",
              "2496                                 ANSWERNOTFOUND  2496.0  \n",
              "2497                                 ANSWERNOTFOUND  2497.0  \n",
              "2498                                 ANSWERNOTFOUND  2498.0  \n",
              "2499                                 ANSWERNOTFOUND  2499.0  \n",
              "2500                                 ANSWERNOTFOUND  2500.0  \n",
              "\n",
              "[2501 rows x 5 columns]>"
            ],
            "text/html": [
              "<div style=\"max-width:800px; border: 1px solid var(--colab-border-color);\"><style>\n",
              "      pre.function-repr-contents {\n",
              "        overflow-x: auto;\n",
              "        padding: 8px 12px;\n",
              "        max-height: 500px;\n",
              "      }\n",
              "\n",
              "      pre.function-repr-contents.function-repr-contents-collapsed {\n",
              "        cursor: pointer;\n",
              "        max-height: 100px;\n",
              "      }\n",
              "    </style>\n",
              "    <pre style=\"white-space: initial; background:\n",
              "         var(--colab-secondary-surface-color); padding: 8px 12px;\n",
              "         border-bottom: 1px solid var(--colab-border-color);\"><b>pandas.core.frame.DataFrame.info</b><br/>def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -&gt; None</pre><pre class=\"function-repr-contents function-repr-contents-collapsed\" style=\"\"><a class=\"filepath\" style=\"display:none\" href=\"#\">/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py</a>Print a concise summary of a DataFrame.\n",
              "\n",
              "This method prints information about a DataFrame including\n",
              "the index dtype and columns, non-null values and memory usage.\n",
              "\n",
              "Parameters\n",
              "----------\n",
              "verbose : bool, optional\n",
              "    Whether to print the full summary. By default, the setting in\n",
              "    ``pandas.options.display.max_info_columns`` is followed.\n",
              "buf : writable buffer, defaults to sys.stdout\n",
              "    Where to send the output. By default, the output is printed to\n",
              "    sys.stdout. Pass a writable buffer if you need to further process\n",
              "    the output.    max_cols : int, optional\n",
              "    When to switch from the verbose to the truncated output. If the\n",
              "    DataFrame has more than `max_cols` columns, the truncated output\n",
              "    is used. By default, the setting in\n",
              "    ``pandas.options.display.max_info_columns`` is used.\n",
              "memory_usage : bool, str, optional\n",
              "    Specifies whether total memory usage of the DataFrame\n",
              "    elements (including the index) should be displayed. By default,\n",
              "    this follows the ``pandas.options.display.memory_usage`` setting.\n",
              "\n",
              "    True always show memory usage. False never shows memory usage.\n",
              "    A value of &#x27;deep&#x27; is equivalent to &quot;True with deep introspection&quot;.\n",
              "    Memory usage is shown in human-readable units (base-2\n",
              "    representation). Without deep introspection a memory estimation is\n",
              "    made based in column dtype and number of rows assuming values\n",
              "    consume the same memory amount for corresponding dtypes. With deep\n",
              "    memory introspection, a real memory usage calculation is performed\n",
              "    at the cost of computational resources. See the\n",
              "    :ref:`Frequently Asked Questions &lt;df-memory-usage&gt;` for more\n",
              "    details.\n",
              "show_counts : bool, optional\n",
              "    Whether to show the non-null counts. By default, this is shown\n",
              "    only if the DataFrame is smaller than\n",
              "    ``pandas.options.display.max_info_rows`` and\n",
              "    ``pandas.options.display.max_info_columns``. A value of True always\n",
              "    shows the counts, and False never shows the counts.\n",
              "null_counts : bool, optional\n",
              "    .. deprecated:: 1.2.0\n",
              "        Use show_counts instead.\n",
              "\n",
              "Returns\n",
              "-------\n",
              "None\n",
              "    This method prints a summary of a DataFrame and returns None.\n",
              "\n",
              "See Also\n",
              "--------\n",
              "DataFrame.describe: Generate descriptive statistics of DataFrame\n",
              "    columns.\n",
              "DataFrame.memory_usage: Memory usage of DataFrame columns.\n",
              "\n",
              "Examples\n",
              "--------\n",
              "&gt;&gt;&gt; int_values = [1, 2, 3, 4, 5]\n",
              "&gt;&gt;&gt; text_values = [&#x27;alpha&#x27;, &#x27;beta&#x27;, &#x27;gamma&#x27;, &#x27;delta&#x27;, &#x27;epsilon&#x27;]\n",
              "&gt;&gt;&gt; float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n",
              "&gt;&gt;&gt; df = pd.DataFrame({&quot;int_col&quot;: int_values, &quot;text_col&quot;: text_values,\n",
              "...                   &quot;float_col&quot;: float_values})\n",
              "&gt;&gt;&gt; df\n",
              "    int_col text_col  float_col\n",
              "0        1    alpha       0.00\n",
              "1        2     beta       0.25\n",
              "2        3    gamma       0.50\n",
              "3        4    delta       0.75\n",
              "4        5  epsilon       1.00\n",
              "\n",
              "Prints information of all columns:\n",
              "\n",
              "&gt;&gt;&gt; df.info(verbose=True)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 5 entries, 0 to 4\n",
              "Data columns (total 3 columns):\n",
              " #   Column     Non-Null Count  Dtype\n",
              "---  ------     --------------  -----\n",
              " 0   int_col    5 non-null      int64\n",
              " 1   text_col   5 non-null      object\n",
              " 2   float_col  5 non-null      float64\n",
              "dtypes: float64(1), int64(1), object(1)\n",
              "memory usage: 248.0+ bytes\n",
              "\n",
              "Prints a summary of columns count and its dtypes but not per column\n",
              "information:\n",
              "\n",
              "&gt;&gt;&gt; df.info(verbose=False)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 5 entries, 0 to 4\n",
              "Columns: 3 entries, int_col to float_col\n",
              "dtypes: float64(1), int64(1), object(1)\n",
              "memory usage: 248.0+ bytes\n",
              "\n",
              "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n",
              "buffer content and writes to a text file:\n",
              "\n",
              "&gt;&gt;&gt; import io\n",
              "&gt;&gt;&gt; buffer = io.StringIO()\n",
              "&gt;&gt;&gt; df.info(buf=buffer)\n",
              "&gt;&gt;&gt; s = buffer.getvalue()\n",
              "&gt;&gt;&gt; with open(&quot;df_info.txt&quot;, &quot;w&quot;,\n",
              "...           encoding=&quot;utf-8&quot;) as f:  # doctest: +SKIP\n",
              "...     f.write(s)\n",
              "260\n",
              "\n",
              "The `memory_usage` parameter allows deep introspection mode, specially\n",
              "useful for big DataFrames and fine-tune memory optimization:\n",
              "\n",
              "&gt;&gt;&gt; random_strings_array = np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6)\n",
              "&gt;&gt;&gt; df = pd.DataFrame({\n",
              "...     &#x27;column_1&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6),\n",
              "...     &#x27;column_2&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6),\n",
              "...     &#x27;column_3&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6)\n",
              "... })\n",
              "&gt;&gt;&gt; df.info()\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 1000000 entries, 0 to 999999\n",
              "Data columns (total 3 columns):\n",
              " #   Column    Non-Null Count    Dtype\n",
              "---  ------    --------------    -----\n",
              " 0   column_1  1000000 non-null  object\n",
              " 1   column_2  1000000 non-null  object\n",
              " 2   column_3  1000000 non-null  object\n",
              "dtypes: object(3)\n",
              "memory usage: 22.9+ MB\n",
              "\n",
              "&gt;&gt;&gt; df.info(memory_usage=&#x27;deep&#x27;)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 1000000 entries, 0 to 999999\n",
              "Data columns (total 3 columns):\n",
              " #   Column    Non-Null Count    Dtype\n",
              "---  ------    --------------    -----\n",
              " 0   column_1  1000000 non-null  object\n",
              " 1   column_2  1000000 non-null  object\n",
              " 2   column_3  1000000 non-null  object\n",
              "dtypes: object(3)\n",
              "memory usage: 165.9 MB</pre>\n",
              "      <script>\n",
              "      if (google.colab.kernel.accessAllowed && google.colab.files && google.colab.files.view) {\n",
              "        for (const element of document.querySelectorAll('.filepath')) {\n",
              "          element.style.display = 'block'\n",
              "          element.onclick = (event) => {\n",
              "            event.preventDefault();\n",
              "            event.stopPropagation();\n",
              "            google.colab.files.view(element.textContent, 3434);\n",
              "          };\n",
              "        }\n",
              "      }\n",
              "      for (const element of document.querySelectorAll('.function-repr-contents')) {\n",
              "        element.onclick = (event) => {\n",
              "          event.preventDefault();\n",
              "          event.stopPropagation();\n",
              "          element.classList.toggle('function-repr-contents-collapsed');\n",
              "        };\n",
              "      }\n",
              "      </script>\n",
              "      </div>"
            ]
          },
          "metadata": {},
          "execution_count": 63
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df_test.info"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 221
        },
        "id": "6PuUHxJ75tI9",
        "outputId": "231377e9-d714-4b80-97d5-79ad13b7df0a"
      },
      "execution_count": 64,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<bound method DataFrame.info of                                               question human_ans_indices  \\\n",
              "0             Is this storyline interesting or strong?      (1850, 1886)   \n",
              "1             Is this storyline interesting or strong?        (240, 249)   \n",
              "2    Is the sound of the movie a reason to recommen...        (111, 135)   \n",
              "3    Is the sound of the movie a reason to recommen...        (394, 408)   \n",
              "4                           How do you rate the sound?      (5806, 5820)   \n",
              "..                                                 ...               ...   \n",
              "577                                    How is it act ?        (539, 561)   \n",
              "578                                   How is the view?      (2034, 2048)   \n",
              "579                                   How is the view?      (2034, 2048)   \n",
              "580                                How is the costume?          (26, 55)   \n",
              "581                                How is the costume?          (26, 55)   \n",
              "\n",
              "                                                review  \\\n",
              "0    Spoilers thar be, Maytees.Is a man created by ...   \n",
              "1    Spoilers thar be, Maytees.Is a man created by ...   \n",
              "2    This was my first bluray of a Disney classic a...   \n",
              "3    This was my first bluray of a Disney classic a...   \n",
              "4    In the realm of big Hollywood filmmaking, a fe...   \n",
              "..                                                 ...   \n",
              "577  del Toro's visual imagination is present in Pa...   \n",
              "578  Now I am not a religious person, even tough I ...   \n",
              "579  Now I am not a religious person, even tough I ...   \n",
              "580  The sets are spectucular, the costumes are so ...   \n",
              "581  The sets are spectucular, the costumes are so ...   \n",
              "\n",
              "                          human_ans_spans     id  \n",
              "0    just barely above mildly interesting    0.0  \n",
              "1                               important    1.0  \n",
              "2                sound were crystal clear    2.0  \n",
              "3                          ANSWERNOTFOUND    3.0  \n",
              "4                          ANSWERNOTFOUND    4.0  \n",
              "..                                    ...    ...  \n",
              "577                the dialogue is clunky  577.0  \n",
              "578                        ANSWERNOTFOUND  578.0  \n",
              "579                        ANSWERNOTFOUND  579.0  \n",
              "580         the costumes are so authentic  580.0  \n",
              "581         the costumes are so authentic  581.0  \n",
              "\n",
              "[582 rows x 5 columns]>"
            ],
            "text/html": [
              "<div style=\"max-width:800px; border: 1px solid var(--colab-border-color);\"><style>\n",
              "      pre.function-repr-contents {\n",
              "        overflow-x: auto;\n",
              "        padding: 8px 12px;\n",
              "        max-height: 500px;\n",
              "      }\n",
              "\n",
              "      pre.function-repr-contents.function-repr-contents-collapsed {\n",
              "        cursor: pointer;\n",
              "        max-height: 100px;\n",
              "      }\n",
              "    </style>\n",
              "    <pre style=\"white-space: initial; background:\n",
              "         var(--colab-secondary-surface-color); padding: 8px 12px;\n",
              "         border-bottom: 1px solid var(--colab-border-color);\"><b>pandas.core.frame.DataFrame.info</b><br/>def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -&gt; None</pre><pre class=\"function-repr-contents function-repr-contents-collapsed\" style=\"\"><a class=\"filepath\" style=\"display:none\" href=\"#\">/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py</a>Print a concise summary of a DataFrame.\n",
              "\n",
              "This method prints information about a DataFrame including\n",
              "the index dtype and columns, non-null values and memory usage.\n",
              "\n",
              "Parameters\n",
              "----------\n",
              "verbose : bool, optional\n",
              "    Whether to print the full summary. By default, the setting in\n",
              "    ``pandas.options.display.max_info_columns`` is followed.\n",
              "buf : writable buffer, defaults to sys.stdout\n",
              "    Where to send the output. By default, the output is printed to\n",
              "    sys.stdout. Pass a writable buffer if you need to further process\n",
              "    the output.    max_cols : int, optional\n",
              "    When to switch from the verbose to the truncated output. If the\n",
              "    DataFrame has more than `max_cols` columns, the truncated output\n",
              "    is used. By default, the setting in\n",
              "    ``pandas.options.display.max_info_columns`` is used.\n",
              "memory_usage : bool, str, optional\n",
              "    Specifies whether total memory usage of the DataFrame\n",
              "    elements (including the index) should be displayed. By default,\n",
              "    this follows the ``pandas.options.display.memory_usage`` setting.\n",
              "\n",
              "    True always show memory usage. False never shows memory usage.\n",
              "    A value of &#x27;deep&#x27; is equivalent to &quot;True with deep introspection&quot;.\n",
              "    Memory usage is shown in human-readable units (base-2\n",
              "    representation). Without deep introspection a memory estimation is\n",
              "    made based in column dtype and number of rows assuming values\n",
              "    consume the same memory amount for corresponding dtypes. With deep\n",
              "    memory introspection, a real memory usage calculation is performed\n",
              "    at the cost of computational resources. See the\n",
              "    :ref:`Frequently Asked Questions &lt;df-memory-usage&gt;` for more\n",
              "    details.\n",
              "show_counts : bool, optional\n",
              "    Whether to show the non-null counts. By default, this is shown\n",
              "    only if the DataFrame is smaller than\n",
              "    ``pandas.options.display.max_info_rows`` and\n",
              "    ``pandas.options.display.max_info_columns``. A value of True always\n",
              "    shows the counts, and False never shows the counts.\n",
              "null_counts : bool, optional\n",
              "    .. deprecated:: 1.2.0\n",
              "        Use show_counts instead.\n",
              "\n",
              "Returns\n",
              "-------\n",
              "None\n",
              "    This method prints a summary of a DataFrame and returns None.\n",
              "\n",
              "See Also\n",
              "--------\n",
              "DataFrame.describe: Generate descriptive statistics of DataFrame\n",
              "    columns.\n",
              "DataFrame.memory_usage: Memory usage of DataFrame columns.\n",
              "\n",
              "Examples\n",
              "--------\n",
              "&gt;&gt;&gt; int_values = [1, 2, 3, 4, 5]\n",
              "&gt;&gt;&gt; text_values = [&#x27;alpha&#x27;, &#x27;beta&#x27;, &#x27;gamma&#x27;, &#x27;delta&#x27;, &#x27;epsilon&#x27;]\n",
              "&gt;&gt;&gt; float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n",
              "&gt;&gt;&gt; df = pd.DataFrame({&quot;int_col&quot;: int_values, &quot;text_col&quot;: text_values,\n",
              "...                   &quot;float_col&quot;: float_values})\n",
              "&gt;&gt;&gt; df\n",
              "    int_col text_col  float_col\n",
              "0        1    alpha       0.00\n",
              "1        2     beta       0.25\n",
              "2        3    gamma       0.50\n",
              "3        4    delta       0.75\n",
              "4        5  epsilon       1.00\n",
              "\n",
              "Prints information of all columns:\n",
              "\n",
              "&gt;&gt;&gt; df.info(verbose=True)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 5 entries, 0 to 4\n",
              "Data columns (total 3 columns):\n",
              " #   Column     Non-Null Count  Dtype\n",
              "---  ------     --------------  -----\n",
              " 0   int_col    5 non-null      int64\n",
              " 1   text_col   5 non-null      object\n",
              " 2   float_col  5 non-null      float64\n",
              "dtypes: float64(1), int64(1), object(1)\n",
              "memory usage: 248.0+ bytes\n",
              "\n",
              "Prints a summary of columns count and its dtypes but not per column\n",
              "information:\n",
              "\n",
              "&gt;&gt;&gt; df.info(verbose=False)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 5 entries, 0 to 4\n",
              "Columns: 3 entries, int_col to float_col\n",
              "dtypes: float64(1), int64(1), object(1)\n",
              "memory usage: 248.0+ bytes\n",
              "\n",
              "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n",
              "buffer content and writes to a text file:\n",
              "\n",
              "&gt;&gt;&gt; import io\n",
              "&gt;&gt;&gt; buffer = io.StringIO()\n",
              "&gt;&gt;&gt; df.info(buf=buffer)\n",
              "&gt;&gt;&gt; s = buffer.getvalue()\n",
              "&gt;&gt;&gt; with open(&quot;df_info.txt&quot;, &quot;w&quot;,\n",
              "...           encoding=&quot;utf-8&quot;) as f:  # doctest: +SKIP\n",
              "...     f.write(s)\n",
              "260\n",
              "\n",
              "The `memory_usage` parameter allows deep introspection mode, specially\n",
              "useful for big DataFrames and fine-tune memory optimization:\n",
              "\n",
              "&gt;&gt;&gt; random_strings_array = np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6)\n",
              "&gt;&gt;&gt; df = pd.DataFrame({\n",
              "...     &#x27;column_1&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6),\n",
              "...     &#x27;column_2&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6),\n",
              "...     &#x27;column_3&#x27;: np.random.choice([&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;], 10 ** 6)\n",
              "... })\n",
              "&gt;&gt;&gt; df.info()\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 1000000 entries, 0 to 999999\n",
              "Data columns (total 3 columns):\n",
              " #   Column    Non-Null Count    Dtype\n",
              "---  ------    --------------    -----\n",
              " 0   column_1  1000000 non-null  object\n",
              " 1   column_2  1000000 non-null  object\n",
              " 2   column_3  1000000 non-null  object\n",
              "dtypes: object(3)\n",
              "memory usage: 22.9+ MB\n",
              "\n",
              "&gt;&gt;&gt; df.info(memory_usage=&#x27;deep&#x27;)\n",
              "&lt;class &#x27;pandas.core.frame.DataFrame&#x27;&gt;\n",
              "RangeIndex: 1000000 entries, 0 to 999999\n",
              "Data columns (total 3 columns):\n",
              " #   Column    Non-Null Count    Dtype\n",
              "---  ------    --------------    -----\n",
              " 0   column_1  1000000 non-null  object\n",
              " 1   column_2  1000000 non-null  object\n",
              " 2   column_3  1000000 non-null  object\n",
              "dtypes: object(3)\n",
              "memory usage: 165.9 MB</pre>\n",
              "      <script>\n",
              "      if (google.colab.kernel.accessAllowed && google.colab.files && google.colab.files.view) {\n",
              "        for (const element of document.querySelectorAll('.filepath')) {\n",
              "          element.style.display = 'block'\n",
              "          element.onclick = (event) => {\n",
              "            event.preventDefault();\n",
              "            event.stopPropagation();\n",
              "            google.colab.files.view(element.textContent, 3434);\n",
              "          };\n",
              "        }\n",
              "      }\n",
              "      for (const element of document.querySelectorAll('.function-repr-contents')) {\n",
              "        element.onclick = (event) => {\n",
              "          event.preventDefault();\n",
              "          event.stopPropagation();\n",
              "          element.classList.toggle('function-repr-contents-collapsed');\n",
              "        };\n",
              "      }\n",
              "      </script>\n",
              "      </div>"
            ]
          },
          "metadata": {},
          "execution_count": 64
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "int(df_train.iloc[0].human_ans_indices.split('(')[1].split(',')[0])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dDGXgO925wRW",
        "outputId": "96b23f6e-6300-4c92-9aac-74bc7377ad7e"
      },
      "execution_count": 68,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "251"
            ]
          },
          "metadata": {},
          "execution_count": 68
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "float(df_train.iloc[0].human_ans_indices.split('(')[1].split(',')[1].split(' ')[1].split(')')[0])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "NCXUJLI09hjC",
        "outputId": "9f08eadf-f05c-4be8-b9c6-d2630b824462"
      },
      "execution_count": 67,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "265.0"
            ]
          },
          "metadata": {},
          "execution_count": 67
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Where the answers are\n",
        "df_train['answers']=df_train['human_ans_spans']\n",
        "# Actual answer text itself, right answer where should be\n",
        "df_test['answers']=df_test['human_ans_spans']"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_2AIdMOg_E9_",
        "outputId": "39e7aa3d-fe13-47d6-9312-ab26048a9cdb"
      },
      "execution_count": 70,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-70-612decbcb18c>:2: SettingWithCopyWarning: \n",
            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
            "Try using .loc[row_indexer,col_indexer] = value instead\n",
            "\n",
            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
            "  df_train['answers']=df_train['human_ans_spans']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Extract answer data and adds it to a new column\n",
        "for i in range(0,len(df_train)):\n",
        "  answer1={}\n",
        "  si=int(df_train.iloc[i].human_ans_indices.split('(')[1].split(',')[0])\n",
        "  ei=int(df_train.iloc[i].human_ans_indices.split('(')[1].split(',')[1].split(' ')[1].split(')')[0])\n",
        "  answer1['text']=[df_train.iloc[i].review[si:ei]]\n",
        "  answer1['answer_start']=[si]\n",
        "  df_train.at[i, 'answers']=answer1"
      ],
      "metadata": {
        "id": "NBY1eCL9-wNR"
      },
      "execution_count": 71,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(df_train.iloc[i].answers,df_train.iloc[i].human_ans_spans)"
      ],
      "metadata": {
        "id": "XEPxIoLK-yg_"
      },
      "execution_count": 72,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Same for the test data\n",
        "for i in range(0,len(df_test)):\n",
        "  answer1={}\n",
        "  si=int(df_test.iloc[i].human_ans_indices.split('(')[1].split(',')[0])\n",
        "  ei=int(df_test.iloc[i].human_ans_indices.split('(')[1].split(',')[1].split(' ')[1].split(')')[0])\n",
        "  answer1['text']=[df_test.iloc[i].review[si:ei]]\n",
        "  answer1['answer_start']=[si]\n",
        "  df_test.at[i, 'answers']=answer1"
      ],
      "metadata": {
        "id": "M5vNLyBe_Oac"
      },
      "execution_count": 73,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(df_train.iloc[i].answers,df_train.iloc[i].human_ans_spans)"
      ],
      "metadata": {
        "id": "FDsJrxf5_SPi"
      },
      "execution_count": 74,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df_train.columns"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "QZMKSb7d_UG1",
        "outputId": "f71e43ec-6df8-43e9-a5c3-1f1f04bd5a64"
      },
      "execution_count": 75,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Index(['question', 'human_ans_indices', 'review', 'human_ans_spans', 'id',\n",
              "       'answers'],\n",
              "      dtype='object')"
            ]
          },
          "metadata": {},
          "execution_count": 75
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Standardizing the columns for clarity (context)\n",
        "df_train.columns=['question', 'human_ans_indices', 'context', 'human_ans_spans', 'id',\n",
        "       'answers']\n",
        "\n",
        "df_test.columns=['question', 'human_ans_indices', 'context', 'human_ans_spans','id',\n",
        "       'answers']"
      ],
      "metadata": {
        "id": "dAWno84T_zo0"
      },
      "execution_count": 76,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Creating Datasets from Pandas DataFrames for Validation and Training\n",
        "val_dataset2 = datasets.Dataset.from_pandas(df_test)\n",
        "train_dataset2 = datasets.Dataset.from_pandas(df_train)"
      ],
      "metadata": {
        "id": "f_m7XTH4_1ez"
      },
      "execution_count": 78,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Preprocess the training examples .map() function on training dataset with the preprocessing function\n",
        "train_dataset = train_dataset2.map(\n",
        "    preprocess_training_examples,\n",
        "    batched=True,\n",
        "    remove_columns=train_dataset2.column_names,\n",
        ")\n",
        "len(train_dataset2), len(train_dataset) # compare the lengths of the original dataset (train_dataset2) and the preprocessed dataset (train_dataset)."
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 67,
          "referenced_widgets": [
            "33ba4c1c6ba046b78c6c0e5ef987de2a",
            "3dd07c7fd25f4a0cac9c6d79bf312c5b",
            "68d6df7a72b84174a216d6804a422c00",
            "02857e625d664d748f0411dbd9307285",
            "adfc7c2afddb4979bd6e30d450d4efb6",
            "cbf6d52e02bb47c8a4c73208c8cf658a",
            "20b683d9910440f882ca10ba0837c545",
            "a39c78323e05487aa6c94e3c67b1d136",
            "27ee050eee1f479b8cd328158e52e497",
            "50478538cf9947c5a760c538eaeb6f46",
            "735906f0d31049e4af3fae4f2462cc05"
          ]
        },
        "id": "ShRJOEGxAgU0",
        "outputId": "8db75a97-189a-4bdc-b64a-5f52ab020729"
      },
      "execution_count": 81,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Map:   0%|          | 0/2501 [00:00<?, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "33ba4c1c6ba046b78c6c0e5ef987de2a"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(2501, 4862)"
            ]
          },
          "metadata": {},
          "execution_count": 81
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "It shows that all 2501 examples were processed in 10 seconds at a speed of 260.48 examples per second. The resulting dataset has 4862 examples."
      ],
      "metadata": {
        "id": "rWUvAhSNDS2U"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "train_dataset2.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pNx3qxpADXaY",
        "outputId": "1de1f758-d574-4c6a-e22f-725c634c59f4"
      },
      "execution_count": 82,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(2501, 6)"
            ]
          },
          "metadata": {},
          "execution_count": 82
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def preprocess_validation_examples(examples):\n",
        "    questions = [q.strip() for q in examples[\"question\"]] # Cleaning the questions by stripping leading and trailing whitespace for consistency\n",
        "    inputs = tokenizer( # Tokenization; converting questions and contexts into numerical IDs, enabling the model to understand\n",
        "        questions,\n",
        "        examples[\"context\"],\n",
        "        max_length=max_length, # Total length of the input sequence\n",
        "        truncation=\"only_second\", # If the total length exceeds max_length, only the context will be truncated\n",
        "        stride=stride, # Overlap between the chunks\n",
        "        return_overflowing_tokens=True,\n",
        "        return_offsets_mapping=True,\n",
        "        padding=\"max_length\",\n",
        "    )\n",
        "\n",
        "    sample_map = inputs.pop(\"overflow_to_sample_mapping\")\n",
        "    example_ids = []\n",
        "\n",
        "    for i in range(len(inputs[\"input_ids\"])):\n",
        "        sample_idx = sample_map[i]\n",
        "        example_ids.append(examples[\"id\"][sample_idx])\n",
        "\n",
        "        sequence_ids = inputs.sequence_ids(i)\n",
        "        offset = inputs[\"offset_mapping\"][i]\n",
        "        inputs[\"offset_mapping\"][i] = [\n",
        "            o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)\n",
        "        ]\n",
        "\n",
        "    inputs[\"example_id\"] = example_ids\n",
        "    return inputs"
      ],
      "metadata": {
        "id": "FMNSy4J6DaWF"
      },
      "execution_count": 83,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!git init"
      ],
      "metadata": {
        "id": "XrPqAtnkFL-v"
      },
      "execution_count": 84,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!git add Capstone-1-SubjQATransformer.ipynb"
      ],
      "metadata": {
        "id": "3omq_wy0FMoS"
      },
      "execution_count": 85,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "ZbeWMl5GFRdm"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}