{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "7b073d9c166a45539c651ecd0b27285c": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_ffba5f49ba214984b8329b4e06e40dcf", "IPY_MODEL_e69b98b68527488cbb7c1ee5f6011a92", "IPY_MODEL_983145e29ab84636b1e266c73eeb6889" ], "layout": "IPY_MODEL_278c2c021faf4f2696c03070fc3511bf" } }, "ffba5f49ba214984b8329b4e06e40dcf": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3e9df4ac699843df8ae361aba1337b06", "placeholder": "​", "style": "IPY_MODEL_8e9cc6914fc14e56945716902fb11fe9", "value": "tokenizer_config.json: 100%" } }, "e69b98b68527488cbb7c1ee5f6011a92": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0ca945d6f8a5457d9706c211869983c8", "max": 905, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_69cf2c7a4ecf4c3188586c86c7056518", "value": 905 } }, "983145e29ab84636b1e266c73eeb6889": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1d60ac5b02de4cc7b0d51ed57165ff64", "placeholder": "​", "style": "IPY_MODEL_e34074fc893d47b8bf75a8089cd8df37", "value": " 905/905 [00:00<00:00, 32.4kB/s]" } }, "278c2c021faf4f2696c03070fc3511bf": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3e9df4ac699843df8ae361aba1337b06": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8e9cc6914fc14e56945716902fb11fe9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0ca945d6f8a5457d9706c211869983c8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "69cf2c7a4ecf4c3188586c86c7056518": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "1d60ac5b02de4cc7b0d51ed57165ff64": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e34074fc893d47b8bf75a8089cd8df37": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "26cb34af09544ea1b17526891fd319e3": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_71627d67c8ec4993a8b1e4a93d53e8db", "IPY_MODEL_ec11e5b722dd4d69bcd758775edd61b6", "IPY_MODEL_63f760d16a1249b6a80f7a73989299ac" ], "layout": "IPY_MODEL_ba3dae52e6bc4a48945ebde7464a5277" } }, "71627d67c8ec4993a8b1e4a93d53e8db": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e914f0b293b4410a82e02f990b8a9a1d", "placeholder": "​", "style": "IPY_MODEL_7e18f5165da84f60bea9d9a1819e17df", "value": "vocab.json: 100%" } }, "ec11e5b722dd4d69bcd758775edd61b6": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ba1b3dc7d77b4fee9addea98f60277bc", "max": 961143, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b40d206858bd46e19ef99aa1de302060", "value": 961143 } }, "63f760d16a1249b6a80f7a73989299ac": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_92113b912eea41b4b2bfadd3f65cd835", "placeholder": "​", "style": "IPY_MODEL_bfca51dacc6a4762810309006fc6eca5", "value": " 961k/961k [00:00<00:00, 5.41MB/s]" } }, "ba3dae52e6bc4a48945ebde7464a5277": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e914f0b293b4410a82e02f990b8a9a1d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7e18f5165da84f60bea9d9a1819e17df": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ba1b3dc7d77b4fee9addea98f60277bc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b40d206858bd46e19ef99aa1de302060": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "92113b912eea41b4b2bfadd3f65cd835": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bfca51dacc6a4762810309006fc6eca5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a7a66681148748ae94788ee276925bec": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_438a2db97b8649cfbd5b6d87aeb4b13a", "IPY_MODEL_8608253bb3cc4e93a379b05648e1e35c", "IPY_MODEL_aa84ccf805b3476ea155e64ed31edef6" ], "layout": "IPY_MODEL_851b1acc7d524ec1aabb04012c469432" } }, "438a2db97b8649cfbd5b6d87aeb4b13a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2277a0b29cd841a2a1ad5526f39c28a6", "placeholder": "​", "style": "IPY_MODEL_5f7a4ea2308d4cb2b6e4a0f2f925e2b1", "value": "merges.txt: 100%" } }, "8608253bb3cc4e93a379b05648e1e35c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dc23a6ac81894fe484247273be9d6a83", "max": 524619, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d37f42e8924a4b0fafd2e407b1db865b", "value": 524619 } }, "aa84ccf805b3476ea155e64ed31edef6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_03fa9da8e5084d688bc7b0f4d8ba3fb5", "placeholder": "​", "style": "IPY_MODEL_613fa346a05441828356976ea941a7c7", "value": " 525k/525k [00:00<00:00, 13.6MB/s]" } }, "851b1acc7d524ec1aabb04012c469432": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2277a0b29cd841a2a1ad5526f39c28a6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5f7a4ea2308d4cb2b6e4a0f2f925e2b1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "dc23a6ac81894fe484247273be9d6a83": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d37f42e8924a4b0fafd2e407b1db865b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "03fa9da8e5084d688bc7b0f4d8ba3fb5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "613fa346a05441828356976ea941a7c7": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f580a1015d334b398d024357adf898cf": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_866f75c147c747f5b9f3d2993440823e", "IPY_MODEL_fe4842b3b96a4c1db17abe9a90e026c3", "IPY_MODEL_69561cfd572c4e0fbc107e25a1e50654" ], "layout": "IPY_MODEL_00bc783a0f344176af0a760569c25ffd" } }, "866f75c147c747f5b9f3d2993440823e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dd0a347823244e3a8dbd2f8005f378ac", "placeholder": "​", "style": "IPY_MODEL_84b3abe67c4745fab0d67831a7a3c3d7", "value": "tokenizer.json: 100%" } }, "fe4842b3b96a4c1db17abe9a90e026c3": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e4bf0e2ea3904cc68b5dcdabf38169b4", "max": 2224003, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_8f338304e7e44b5f90c156dddc1a129b", "value": 2224003 } }, "69561cfd572c4e0fbc107e25a1e50654": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c314dafdd3e1472789af4307ed170085", "placeholder": "​", "style": "IPY_MODEL_3311af848aac451e8e9b21fae5e89422", "value": " 2.22M/2.22M [00:00<00:00, 16.3MB/s]" } }, "00bc783a0f344176af0a760569c25ffd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dd0a347823244e3a8dbd2f8005f378ac": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "84b3abe67c4745fab0d67831a7a3c3d7": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e4bf0e2ea3904cc68b5dcdabf38169b4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8f338304e7e44b5f90c156dddc1a129b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "c314dafdd3e1472789af4307ed170085": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3311af848aac451e8e9b21fae5e89422": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f5dbb938e5f542ef8f2a4a92c98b6ed7": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_ee0d678d5b694781a419c4277306204b", "IPY_MODEL_c2d90be349784a16975a07e8aace7567", "IPY_MODEL_a7e7ba1a8e954d71a27210b91456c760" ], "layout": "IPY_MODEL_3b341c3a37414a10832b1dc9fad77298" } }, "ee0d678d5b694781a419c4277306204b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2790e327bfd94b66b1ef411489f21d6f", "placeholder": "​", "style": "IPY_MODEL_88a5e37f2eb24ded9e3ec4a7835c34c4", "value": "special_tokens_map.json: 100%" } }, "c2d90be349784a16975a07e8aace7567": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b541e9cead9d43179b72d3f966d156bc", "max": 389, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9d8b4e12f6fa478eba0624457d8b7804", "value": 389 } }, "a7e7ba1a8e954d71a27210b91456c760": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_21311ed9e97e42eab18d6eb58497f125", "placeholder": "​", "style": "IPY_MODEL_65c49dcd741b4101a349752fb9b2db49", "value": " 389/389 [00:00<00:00, 5.78kB/s]" } }, "3b341c3a37414a10832b1dc9fad77298": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2790e327bfd94b66b1ef411489f21d6f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "88a5e37f2eb24ded9e3ec4a7835c34c4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b541e9cead9d43179b72d3f966d156bc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9d8b4e12f6fa478eba0624457d8b7804": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "21311ed9e97e42eab18d6eb58497f125": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "65c49dcd741b4101a349752fb9b2db49": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "This Notebook is a Stable-diffusion tool which allows you to find similiar tokens from the SD 1.5 vocab.json that you can use for text-to-image generation." ], "metadata": { "id": "L7JTcbOdBPfh" } }, { "cell_type": "code", "source": [ "# Load the tokens into the colab\n", "!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n", "import torch\n", "from torch import linalg as LA\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "%cd /content/sd_tokens\n", "token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)\n", "#-----#\n", "\n", "#Import the vocab.json\n", "import json\n", "import pandas as pd\n", "with open('vocab.json', 'r') as f:\n", " data = json.load(f)\n", "\n", "_df = pd.DataFrame({'count': data})['count']\n", "\n", "vocab = {\n", " value: key for key, value in _df.items()\n", "}\n", "#-----#\n", "\n", "# Define functions/constants\n", "NUM_TOKENS = 49407\n", "\n", "def absolute_value(x):\n", " return max(x, -x)\n", "\n", "def similarity(id_A , id_B):\n", " #Tensors\n", " A = token[id_A]\n", " B = token[id_B]\n", " #Tensor vector length (2nd order, i.e (a^2 + b^2 + ....)^(1/2)\n", " _A = LA.vector_norm(A, ord=2)\n", " _B = LA.vector_norm(B, ord=2)\n", " #----#\n", " result = torch.dot(A,B)/(_A*_B)\n", " similarity_pcnt = absolute_value(result.item()*100)\n", " similarity_pcnt_aprox = round(similarity_pcnt, 3)\n", " result = f'{similarity_pcnt_aprox} %'\n", " return result\n", "#----#\n", "\n", "mix_with = \"\"\n", "mix_method = \"None\"" ], "metadata": { "id": "Ch9puvwKH1s3", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "46175dd6-a875-4208-a128-fddab8752406" }, "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into 'sd_tokens'...\n", "remote: Enumerating objects: 7, done.\u001b[K\n", "remote: Counting objects: 100% (4/4), done.\u001b[K\n", "remote: Compressing objects: 100% (4/4), done.\u001b[K\n", "remote: Total 7 (delta 0), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n", "Unpacking objects: 100% (7/7), 305.93 KiB | 5.67 MiB/s, done.\n", "/content/sd_tokens\n" ] } ] }, { "cell_type": "code", "source": [ "print(vocab[8922]) #the vocab item for ID 8922\n", "print(token[8922].shape) #dimension of the token" ], "metadata": { "id": "S_Yh9gH_OUA1" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Get the IDs from a prompt text.\n", "\n", "The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens" ], "metadata": { "id": "f1-jS7YJApiO" } }, { "cell_type": "code", "source": [ "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "prompt= \"banana\" # @param {type:'string'}\n", "tokenizer_output = tokenizer(text = prompt)\n", "input_ids = tokenizer_output['input_ids']\n", "print(input_ids)\n", "id_A = input_ids[1]\n", "A = token[id_A]\n", "_A = LA.vector_norm(A, ord=2)" ], "metadata": { "id": "RPdkYzT2_X85", "colab": { "base_uri": "https://localhost:8080/", "height": 194, "referenced_widgets": [ "7b073d9c166a45539c651ecd0b27285c", "ffba5f49ba214984b8329b4e06e40dcf", "e69b98b68527488cbb7c1ee5f6011a92", "983145e29ab84636b1e266c73eeb6889", "278c2c021faf4f2696c03070fc3511bf", "3e9df4ac699843df8ae361aba1337b06", "8e9cc6914fc14e56945716902fb11fe9", "0ca945d6f8a5457d9706c211869983c8", "69cf2c7a4ecf4c3188586c86c7056518", "1d60ac5b02de4cc7b0d51ed57165ff64", "e34074fc893d47b8bf75a8089cd8df37", "26cb34af09544ea1b17526891fd319e3", "71627d67c8ec4993a8b1e4a93d53e8db", "ec11e5b722dd4d69bcd758775edd61b6", "63f760d16a1249b6a80f7a73989299ac", "ba3dae52e6bc4a48945ebde7464a5277", "e914f0b293b4410a82e02f990b8a9a1d", "7e18f5165da84f60bea9d9a1819e17df", "ba1b3dc7d77b4fee9addea98f60277bc", "b40d206858bd46e19ef99aa1de302060", "92113b912eea41b4b2bfadd3f65cd835", "bfca51dacc6a4762810309006fc6eca5", "a7a66681148748ae94788ee276925bec", "438a2db97b8649cfbd5b6d87aeb4b13a", "8608253bb3cc4e93a379b05648e1e35c", "aa84ccf805b3476ea155e64ed31edef6", "851b1acc7d524ec1aabb04012c469432", "2277a0b29cd841a2a1ad5526f39c28a6", "5f7a4ea2308d4cb2b6e4a0f2f925e2b1", "dc23a6ac81894fe484247273be9d6a83", "d37f42e8924a4b0fafd2e407b1db865b", "03fa9da8e5084d688bc7b0f4d8ba3fb5", "613fa346a05441828356976ea941a7c7", "f580a1015d334b398d024357adf898cf", "866f75c147c747f5b9f3d2993440823e", "fe4842b3b96a4c1db17abe9a90e026c3", "69561cfd572c4e0fbc107e25a1e50654", "00bc783a0f344176af0a760569c25ffd", "dd0a347823244e3a8dbd2f8005f378ac", "84b3abe67c4745fab0d67831a7a3c3d7", "e4bf0e2ea3904cc68b5dcdabf38169b4", "8f338304e7e44b5f90c156dddc1a129b", "c314dafdd3e1472789af4307ed170085", "3311af848aac451e8e9b21fae5e89422", "f5dbb938e5f542ef8f2a4a92c98b6ed7", "ee0d678d5b694781a419c4277306204b", "c2d90be349784a16975a07e8aace7567", "a7e7ba1a8e954d71a27210b91456c760", "3b341c3a37414a10832b1dc9fad77298", "2790e327bfd94b66b1ef411489f21d6f", "88a5e37f2eb24ded9e3ec4a7835c34c4", "b541e9cead9d43179b72d3f966d156bc", "9d8b4e12f6fa478eba0624457d8b7804", "21311ed9e97e42eab18d6eb58497f125", "65c49dcd741b4101a349752fb9b2db49" ] }, "outputId": "4c2d8236-8af4-4d96-e4f0-c6a4da2d0da2" }, "execution_count": 2, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "tokenizer_config.json: 0%| | 0.00/905 [00:00 with ID = 8922 the rest of the 49407 tokens as a 1x49407 tensor\n" ] } ] }, { "cell_type": "markdown", "source": [ "Print the sorted list from above result" ], "metadata": { "id": "y-Ig3glrVQC3" } }, { "cell_type": "code", "source": [ "list_size = 100 # @param {type:'number'}\n", "\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "for index in range(list_size):\n", " id = indices[index].item()\n", " if (print_Name):\n", " print(f'{vocab[id]}') # vocab item\n", " if (print_ID):\n", " print(f'ID = {id}') # IDs\n", " if (print_Similarity):\n", " print(f'similiarity = {round(sorted[index].item()*100,2)} %') # % value\n", " if (print_Divider):\n", " print('--------')" ], "metadata": { "id": "YIEmLAzbHeuo", "colab": { "base_uri": "https://localhost:8080/" }, "collapsed": true, "outputId": "a03b49b8-a6ec-457b-f78e-be3cb3029eae" }, "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "banana\n", "similiarity = 100.0 %\n", "--------\n", "bananas\n", "similiarity = 38.93 %\n", "--------\n", "banan\n", "similiarity = 30.8 %\n", "--------\n", "ðŁįĮ\n", "similiarity = 27.12 %\n", "--------\n", "pineapple\n", "similiarity = 19.7 %\n", "--------\n", "chicken\n", "similiarity = 19.24 %\n", "--------\n", "potassium\n", "similiarity = 19.21 %\n", "--------\n", "sausage\n", "similiarity = 19.07 %\n", "--------\n", "lemon\n", "similiarity = 18.82 %\n", "--------\n", "orange\n", "similiarity = 18.42 %\n", "--------\n", "peanut\n", "similiarity = 17.84 %\n", "--------\n", "parachute\n", "similiarity = 17.19 %\n", "--------\n", "duck\n", "similiarity = 16.8 %\n", "--------\n", "yellow\n", "similiarity = 16.21 %\n", "--------\n", "grape\n", "similiarity = 16.19 %\n", "--------\n", "kangaroo\n", "similiarity = 16.13 %\n", "--------\n", "apple\n", "similiarity = 16.13 %\n", "--------\n", "tangerine\n", "similiarity = 16.08 %\n", "--------\n", "giraffe\n", "similiarity = 16.04 %\n", "--------\n", "mango\n", "similiarity = 16.03 %\n", "--------\n", "rubber\n", "similiarity = 15.95 %\n", "--------\n", "bamboo\n", "similiarity = 15.88 %\n", "--------\n", "umbrella\n", "similiarity = 15.82 %\n", "--------\n", "nutella\n", "similiarity = 15.69 %\n", "--------\n", "ferrari\n", "similiarity = 15.69 %\n", "--------\n", "oranges\n", "similiarity = 15.65 %\n", "--------\n", "peanuts\n", "similiarity = 15.62 %\n", "--------\n", "ali\n", "similiarity = 15.49 %\n", "--------\n", "dae\n", "similiarity = 15.45 %\n", "--------\n", "apd\n", "similiarity = 15.43 %\n", "--------\n", "cucumber\n", "similiarity = 15.32 %\n", "--------\n", "potato\n", "similiarity = 15.22 %\n", "--------\n", "monkey\n", "similiarity = 15.2 %\n", "--------\n", "croissant\n", "similiarity = 15.18 %\n", "--------\n", "papaya\n", "similiarity = 15.17 %\n", "--------\n", "christmas\n", "similiarity = 15.12 %\n", "--------\n", "sandwich\n", "similiarity = 15.0 %\n", "--------\n", "rainbow\n", "similiarity = 14.98 %\n", "--------\n", "tomato\n", "similiarity = 14.96 %\n", "--------\n", "martini\n", "similiarity = 14.93 %\n", "--------\n", "cabaret\n", "similiarity = 14.83 %\n", "--------\n", "ginger\n", "similiarity = 14.82 %\n", "--------\n", "µ\n", "similiarity = 14.8 %\n", "--------\n", "animal\n", "similiarity = 14.76 %\n", "--------\n", "vanilla\n", "similiarity = 14.73 %\n", "--------\n", "ells\n", "similiarity = 14.66 %\n", "--------\n", "mustache\n", "similiarity = 14.64 %\n", "--------\n", "lime\n", "similiarity = 14.62 %\n", "--------\n", "sickle\n", "similiarity = 14.6 %\n", "--------\n", "vista\n", "similiarity = 14.53 %\n", "--------\n", "coconut\n", "similiarity = 14.52 %\n", "--------\n", "kara\n", "similiarity = 14.46 %\n", "--------\n", "alligator\n", "similiarity = 14.39 %\n", "--------\n", "blueberry\n", "similiarity = 14.34 %\n", "--------\n", "ðŁĻĤ\n", "similiarity = 14.3 %\n", "--------\n", "squirrel\n", "similiarity = 14.29 %\n", "--------\n", "atore\n", "similiarity = 14.19 %\n", "--------\n", "watermelon\n", "similiarity = 14.13 %\n", "--------\n", "nana\n", "similiarity = 14.09 %\n", "--------\n", "latex\n", "similiarity = 14.08 %\n", "--------\n", "reas\n", "similiarity = 14.07 %\n", "--------\n", "agricultural\n", "similiarity = 14.02 %\n", "--------\n", "davis\n", "similiarity = 14.0 %\n", "--------\n", "zucchini\n", "similiarity = 14.0 %\n", "--------\n", "saxophone\n", "similiarity = 13.93 %\n", "--------\n", "rough\n", "similiarity = 13.92 %\n", "--------\n", "mozzarella\n", "similiarity = 13.91 %\n", "--------\n", "eggplant\n", "similiarity = 13.9 %\n", "--------\n", "pickle\n", "similiarity = 13.89 %\n", "--------\n", "tortilla\n", "similiarity = 13.88 %\n", "--------\n", "maniac\n", "similiarity = 13.84 %\n", "--------\n", "milk\n", "similiarity = 13.83 %\n", "--------\n", "ylde\n", "similiarity = 13.82 %\n", "--------\n", "cellphone\n", "similiarity = 13.78 %\n", "--------\n", "duck\n", "similiarity = 13.73 %\n", "--------\n", "finely\n", "similiarity = 13.72 %\n", "--------\n", "umbrel\n", "similiarity = 13.71 %\n", "--------\n", "fanny\n", "similiarity = 13.69 %\n", "--------\n", "navis\n", "similiarity = 13.68 %\n", "--------\n", "twister\n", "similiarity = 13.67 %\n", "--------\n", "moustache\n", "similiarity = 13.66 %\n", "--------\n", "manafort\n", "similiarity = 13.66 %\n", "--------\n", "nod\n", "similiarity = 13.62 %\n", "--------\n", "dros\n", "similiarity = 13.6 %\n", "--------\n", "grapefruit\n", "similiarity = 13.6 %\n", "--------\n", "broom\n", "similiarity = 13.59 %\n", "--------\n", "scorpion\n", "similiarity = 13.59 %\n", "--------\n", "د\n", "similiarity = 13.58 %\n", "--------\n", "fruit\n", "similiarity = 13.57 %\n", "--------\n", "dv\n", "similiarity = 13.55 %\n", "--------\n", "agan\n", "similiarity = 13.53 %\n", "--------\n", "sunflower\n", "similiarity = 13.49 %\n", "--------\n", "banc\n", "similiarity = 13.46 %\n", "--------\n", "literature\n", "similiarity = 13.45 %\n", "--------\n", "pelican\n", "similiarity = 13.43 %\n", "--------\n", "breakfast\n", "similiarity = 13.42 %\n", "--------\n", "pear\n", "similiarity = 13.42 %\n", "--------\n", "gri\n", "similiarity = 13.42 %\n", "--------\n", "orange\n", "similiarity = 13.4 %\n", "--------\n", "monet\n", "similiarity = 13.4 %\n", "--------\n" ] } ] }, { "cell_type": "markdown", "source": [ "Find the most similiar Tokens for given input" ], "metadata": { "id": "qqZ5DvfLBJnw" } }, { "cell_type": "markdown", "source": [ "Valid ID ranges for id_for_token_A / id_for_token_B are between 0 and 49407" ], "metadata": { "id": "kX72bAuhOtlT" } }, { "cell_type": "code", "source": [ "id_for_token_A = 4567 # @param {type:'number'}\n", "id_for_token_B = 4343 # @param {type:'number'}\n", "\n", "similarity_str = 'The similarity between tokens A and B is ' + similarity(id_for_token_A , id_for_token_B)\n", "\n", "print(similarity_str)" ], "metadata": { "id": "MwmOdC9cNZty" }, "execution_count": null, "outputs": [] } ] }