diff --git "a/wav2vec2/wav2vec2-xls-r-ukrainian-cv-10.ipynb" "b/wav2vec2/wav2vec2-xls-r-ukrainian-cv-10.ipynb"
new file mode 100644--- /dev/null
+++ "b/wav2vec2/wav2vec2-xls-r-ukrainian-cv-10.ipynb"
@@ -0,0 +1,4969 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 829,
+ "status": "ok",
+ "timestamp": 1641588786523,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "YELVqGxMxnbG",
+ "outputId": "876761c1-2e03-411b-e61b-07ac4ad61377"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fri Sep 2 01:31:23 2022 \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 515.65.01 Driver Version: 515.65.01 CUDA Version: 11.7 |\n",
+ "|-------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|===============================+======================+======================|\n",
+ "| 0 NVIDIA GeForce ... Off | 00000000:0A:00.0 On | N/A |\n",
+ "| 0% 35C P5 52W / 390W | 1231MiB / 24576MiB | 34% Default |\n",
+ "| | | N/A |\n",
+ "+-------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=============================================================================|\n",
+ "| 0 N/A N/A 1216 G /usr/lib/xorg/Xorg 485MiB |\n",
+ "| 0 N/A N/A 1601 G /usr/bin/kwin_x11 97MiB |\n",
+ "| 0 N/A N/A 1650 G /usr/bin/plasmashell 64MiB |\n",
+ "| 0 N/A N/A 1747 G telegram-desktop 4MiB |\n",
+ "| 0 N/A N/A 4701 G ...5/usr/lib/firefox/firefox 175MiB |\n",
+ "| 0 N/A N/A 804722 G ...RendererForSitePerProcess 363MiB |\n",
+ "| 0 N/A N/A 867357 G ...996071496053229024,131072 35MiB |\n",
+ "+-----------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "gpu_info = !nvidia-smi\n",
+ "gpu_info = '\\n'.join(gpu_info)\n",
+ "if gpu_info.find('failed') >= 0:\n",
+ " print('Not connected to a GPU')\n",
+ "else:\n",
+ " print(gpu_info)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "id": "c8eh87Hoee5d"
+ },
+ "outputs": [],
+ "source": [
+ "#%%capture\n",
+ "#!pip install datasets==1.13.3\n",
+ "#!pip install transformers==4.11.3\n",
+ "#!pip install huggingface_hub==0.1\n",
+ "#!pip install torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html\n",
+ "#!pip install jiwer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 5334,
+ "status": "ok",
+ "timestamp": 1641588811766,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "2MMXcWFFgCXU",
+ "outputId": "be9fd72e-4395-4cd0-ff87-631dad046e71"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Reusing dataset common_voice_10_0 (/home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e)\n",
+ "Reusing dataset common_voice_10_0 (/home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from datasets import load_dataset, load_metric, Audio\n",
+ "\n",
+ "common_voice_train = load_dataset(\"mozilla-foundation/common_voice_10_0\", \"uk\", split=\"train\", use_auth_token=True)\n",
+ "common_voice_test = load_dataset(\"mozilla-foundation/common_voice_10_0\", \"uk\", split=\"test\", use_auth_token=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dataset({\n",
+ " features: ['client_id', 'path', 'audio', 'sentence', 'up_votes', 'down_votes', 'age', 'gender', 'accent', 'locale', 'segment'],\n",
+ " num_rows: 11463\n",
+ "})"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "common_voice_train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "common_voice_train.cleanup_cache_files()\n",
+ "common_voice_test.cleanup_cache_files()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "id": "kbyq6lDgQc2a"
+ },
+ "outputs": [],
+ "source": [
+ "common_voice_train = common_voice_train.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])\n",
+ "common_voice_test = common_voice_test.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "id": "72737oog2F6U"
+ },
+ "outputs": [],
+ "source": [
+ "from datasets import ClassLabel\n",
+ "import random\n",
+ "import pandas as pd\n",
+ "from IPython.display import display, HTML\n",
+ "\n",
+ "def show_random_elements(dataset, num_examples=10):\n",
+ " assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n",
+ " picks = []\n",
+ " for _ in range(num_examples):\n",
+ " pick = random.randint(0, len(dataset)-1)\n",
+ " while pick in picks:\n",
+ " pick = random.randint(0, len(dataset)-1)\n",
+ " picks.append(pick)\n",
+ " \n",
+ " df = pd.DataFrame(dataset[picks])\n",
+ " display(HTML(df.to_html()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 363
+ },
+ "executionInfo": {
+ "elapsed": 39,
+ "status": "ok",
+ "timestamp": 1641588811771,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "K_JUmf3G3b9S",
+ "outputId": "8603c909-09e1-43ae-f7c2-b27b25d795a3"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sentence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Вона нас не лякає. | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Бейнбрідж затримався, готуючи екіпажі, й фактично не встиг узяти участі у війні. | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " А тепер. | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Наші \"будьонівці\", ніби з цікавості, зібралися й оточили червоні шеренги. | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Серед квітів я вмирав, Серед хмар я воскресав. | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Сьогодні виробництво полімерів найбільша галузь хімічної промисловості. | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Хмельницький заплатив за все на цілий рік наперед. | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Соловій же залишився підпалити бікфордів шнур. | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Тоді його слово буде хвилювати, захоплювати, піднімати людську душу. | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Тут були яблуні, сливи, вишні, — вишень найбільше. | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "show_random_elements(common_voice_train.remove_columns([\"path\", \"audio\"]), num_examples=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 30,
+ "status": "ok",
+ "timestamp": 1641588811775,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "XIHocAuTQbBR",
+ "outputId": "e8392853-e0d1-45ba-df74-065c50565654"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.00995326042175293,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 12,
+ "unit": "ba",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4c0c81459dfb4ede8f0ec6fe25a0807e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/12 [00:00, ?ba/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007294893264770508,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 7,
+ "unit": "ba",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "66c16ae632444339ae8ec80070398586",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/7 [00:00, ?ba/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "filter_func = lambda x: not (\"joki\" in x or \"ы\" in x)\n",
+ "common_voice_train = common_voice_train.filter(filter_func, input_columns=[\"sentence\"])\n",
+ "common_voice_test = common_voice_test.filter(filter_func, input_columns=[\"sentence\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "id": "ZcVsD0ETElrR"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'sentence': \"привіт як у тебе справи загалом м'якотілий друже\"}"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def cleaner(batch):\n",
+ " replace_as_space = \"!:;,—…–“”?\\\"«»\"\n",
+ " special_words = {\n",
+ " \"ХIХ\": \"дев'ятнадцятого\",\n",
+ " \"Linux\": \"Лінукс\",\n",
+ " \"Maace\": \"Маасе\",\n",
+ " \"м 'ясо\": \"м'ясо\",\n",
+ " \"'іде\": \"іде\",\n",
+ " \"Д'Аламбер\": \"даламбер\",\n",
+ " \" - \": \" \",\n",
+ " \"--\": \" \",\n",
+ " \"....\": \" \",\n",
+ " \"...\": \" \",\n",
+ " \"..\": \" \",\n",
+ " \" '\": \" \",\n",
+ " \"О'\": \"о\",\n",
+ " \"-\": \"\" #further check needed\n",
+ " }\n",
+ " # check abbreviations later\n",
+ " abbreviations = {\n",
+ " 'ЧК': \"чека\",\n",
+ " 'ҐПУ': \"ґепеу\",\n",
+ " 'ЄС.': \"єес\",\n",
+ " 'УНР': \"уенер\",\n",
+ " 'ДТП.': \"детепе\",\n",
+ " 'РНБО': \"еренбео\",\n",
+ " 'СРСР': \"есересер\",\n",
+ " 'ДБР': \"дебеер\",\n",
+ " 'КП': \"капе\",\n",
+ " 'ОС': \"оес\",\n",
+ " } \n",
+ " chars_dict = {\n",
+ " \"C\": \"С\",\n",
+ " \"I\": \"І\",\n",
+ " \"P\": \"Р\",\n",
+ " \"a\": \"а\",\n",
+ " \"e\": \"е\",\n",
+ " \"x\": \"х\",\n",
+ " \"y\": \"у\",\n",
+ " \"p\": \"р\",\n",
+ " \"o\": \"о\",\n",
+ " \"i\": \"і\",\n",
+ " \"\\u0301\": \"\",\n",
+ " \"`\": \"'\",\n",
+ " \"՚\": \"'\",\n",
+ " \".\": \" \",\n",
+ " \"’\": \"'\"\n",
+ " \n",
+ " }\n",
+ " for word in special_words.keys():\n",
+ " batch[\"sentence\"] = batch[\"sentence\"].replace(word, special_words[word])\n",
+ " for word in abbreviations.keys():\n",
+ " batch[\"sentence\"] = batch[\"sentence\"].replace(word, abbreviations[word])\n",
+ " for char in chars_dict.keys():\n",
+ " batch[\"sentence\"] = batch[\"sentence\"].replace(char, chars_dict[char])\n",
+ " for char in replace_as_space:\n",
+ " batch[\"sentence\"] = batch[\"sentence\"].replace(char, \" \")\n",
+ " batch[\"sentence\"] = \" \".join(filter(lambda x: x != \"\", batch[\"sentence\"].strip().lower().split(\" \")))\n",
+ " return batch\n",
+ "\n",
+ "sentence = {\"sentence\": \"Привіт, - як у тебе справи загалом, м'якотілий друже?\"}\n",
+ "cleaner(sentence)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 401
+ },
+ "executionInfo": {
+ "elapsed": 32,
+ "status": "ok",
+ "timestamp": 1641588811774,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "6falIJSBED65",
+ "outputId": "2f0ca829-dbfa-4d70-ee4a-ded2ae342117"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.00716710090637207,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 11463,
+ "unit": "ex",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ca29db891c7f4d0cbd328a65477d2392",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/11463 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sentence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " так так усе на світі кінчається | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " комуністів тільки й є що воєнком та два ротні політруки | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " уже й убитих чимало | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " трупів не закопували | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " до фастова дісталася з якимось польським обозом | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " невже то ви були | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " при отій купці отої нещасної духовної братії | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " вирішуємо напасти на бригаду по дорозі не допустивши до села | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " де то хто таке видав аби хлопи купували панські маєтки | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " коні пішли з коноводами в балку | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "show_random_elements(common_voice_train.map(cleaner).remove_columns([\"path\", \"audio\"]), num_examples=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Loading cached processed dataset at /home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e/cache-890587fbc5f83609.arrow\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007320880889892578,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 6783,
+ "unit": "ex",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8261bf1a7bd747fb88f7e063c24273d4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/6783 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "common_voice_train = common_voice_train.map(cleaner)\n",
+ "common_voice_test = common_voice_test.map(cleaner)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 363
+ },
+ "executionInfo": {
+ "elapsed": 24,
+ "status": "ok",
+ "timestamp": 1641588811775,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "RBDRAAYxRE6n",
+ "outputId": "a16beae1-84e6-4388-d601-2ed3a92bf451"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sentence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " я замилувався маневруванням тачанок на полях | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " андрій із бугаєм вилізли на близький горб роздивилися | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " вона нам потрібна як щоденний хліб | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " олесеві ще більше захотілось чаю | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " вирішуємо тут поснідати і з годину відпочити бо люди й коні потомлені | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " воротилову сотню найдужче боявся він дрібного дощу який почав сіятись удосвіта й міг зашкодити | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " люта злість піднялася в душі хлопця при вигляді оцього свого тирана | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " також цього року в столиці виникла низка профспілкових організацій і був створений робочий клуб | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " накидав того літа а вони в наших плавнях затрималися всю січ мені засмерділи | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " їздять коло нас | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "show_random_elements(common_voice_train.remove_columns([\"path\",\"audio\"]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "id": "LwCshNbbeRZR"
+ },
+ "outputs": [],
+ "source": [
+ "def extract_all_chars(batch):\n",
+ " all_text = \" \".join(batch[\"sentence\"])\n",
+ " vocab = list(set(all_text))\n",
+ " return {\"vocab\": [vocab], \"all_text\": [all_text]}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 81,
+ "referenced_widgets": [
+ "116786d9364a4a57b521cddaabeda688",
+ "9baa2f69aa9c4387bf1086a04ed78420",
+ "a1e2c04dc2cb45ea80bec125e3dbf56f",
+ "b6d46d40efa14b21814f41531f5a2f41",
+ "d8bf8dc5d6c84140a4e96c9c435b8f17",
+ "04ec68b059df4c628839c3ac29e2ebdd",
+ "427056895c674c428400bee0f5b43995",
+ "d518f2c2ab6945b78a6d336dad6262bd",
+ "77f1a51099b24831ad8b2be3d2dc833a",
+ "5815ae1348994bfebba4a8e968489a96",
+ "22ba979142074f1d976e1a905544fd2d",
+ "8b6b7f28751c45c8869aa86eb2a0ab26",
+ "445c84e1e2e541f2a54fb989def386ae",
+ "68502fb433564eee8dfdf272ed7e4f56",
+ "1f3abdf2e0f6459da4179a94d691c4c4",
+ "48c60be3ca9349a295b83f65769c7f27",
+ "6c80bd8a8fe14a5989fe27445c14650f",
+ "5c2a7fea8c434d51ada69a0854b88baf",
+ "414efa8a08cd491cb78af8a95a151daa",
+ "c31a747e18df4b4aa4449a30e387448c",
+ "3dedffa30b774426bd474072a3a0d591",
+ "05d8496d54174ae298c319b0194fc710"
+ ]
+ },
+ "executionInfo": {
+ "elapsed": 560,
+ "status": "ok",
+ "timestamp": 1641588812313,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "_m6uUjjcfbjH",
+ "outputId": "4cc94e18-9295-4414-c611-c98916fe3d4d"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.009069681167602539,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 1,
+ "unit": "ba",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "21abcee2f4f6401096ebfcc2b283f704",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?ba/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007071256637573242,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 1,
+ "unit": "ba",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "82e2b6e9482345ba913c5800eab41275",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?ba/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "vocab_train = common_voice_train.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_train.column_names)\n",
+ "vocab_test = common_voice_test.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_test.column_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "id": "aQfneNsmlJI0"
+ },
+ "outputs": [],
+ "source": [
+ "vocab_list = list(set(vocab_train[\"vocab\"][0]) | set(vocab_test[\"vocab\"][0]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 18,
+ "status": "ok",
+ "timestamp": 1641588812314,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "_0kRndSvqaKk",
+ "outputId": "35c48e76-5060-470b-8405-bd6d288296ea"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{' ': 0,\n",
+ " \"'\": 1,\n",
+ " 'а': 2,\n",
+ " 'б': 3,\n",
+ " 'в': 4,\n",
+ " 'г': 5,\n",
+ " 'д': 6,\n",
+ " 'е': 7,\n",
+ " 'ж': 8,\n",
+ " 'з': 9,\n",
+ " 'и': 10,\n",
+ " 'й': 11,\n",
+ " 'к': 12,\n",
+ " 'л': 13,\n",
+ " 'м': 14,\n",
+ " 'н': 15,\n",
+ " 'о': 16,\n",
+ " 'п': 17,\n",
+ " 'р': 18,\n",
+ " 'с': 19,\n",
+ " 'т': 20,\n",
+ " 'у': 21,\n",
+ " 'ф': 22,\n",
+ " 'х': 23,\n",
+ " 'ц': 24,\n",
+ " 'ч': 25,\n",
+ " 'ш': 26,\n",
+ " 'щ': 27,\n",
+ " 'ь': 28,\n",
+ " 'ю': 29,\n",
+ " 'я': 30,\n",
+ " 'є': 31,\n",
+ " 'і': 32,\n",
+ " 'ї': 33,\n",
+ " 'ґ': 34}"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vocab_dict = {v: k for k, v in enumerate(sorted(vocab_list))}\n",
+ "vocab_dict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "id": "npbIbBoLgaFX"
+ },
+ "outputs": [],
+ "source": [
+ "vocab_dict[\"|\"] = vocab_dict[\" \"]\n",
+ "del vocab_dict[\" \"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 15,
+ "status": "ok",
+ "timestamp": 1641588812316,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "znF0bNunsjbl",
+ "outputId": "480da4c9-b3d4-41c6-fc5c-b87b8b66202e"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "37"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vocab_dict[\"[UNK]\"] = len(vocab_dict)\n",
+ "vocab_dict[\"[PAD]\"] = len(vocab_dict)\n",
+ "len(vocab_dict)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "id": "ehyUoh9vk191"
+ },
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "with open('vocab.json', 'w') as vocab_file:\n",
+ " json.dump(vocab_dict, vocab_file)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 8013,
+ "status": "ok",
+ "timestamp": 1641588820318,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "xriFGEWQkO4M",
+ "outputId": "a4497f75-d6f5-411a-d983-2ad519f65b8b"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import Wav2Vec2CTCTokenizer\n",
+ "\n",
+ "tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(\"./\", unk_token=\"[UNK]\", pad_token=\"[PAD]\", word_delimiter_token=\"|\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "id": "A1XApZBAF2zr"
+ },
+ "outputs": [],
+ "source": [
+ "repo_name = \"wav2vec2-xls-r-300m-uk\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "id": "kAR0-2KLkopp"
+ },
+ "outputs": [],
+ "source": [
+ "from transformers import Wav2Vec2FeatureExtractor\n",
+ "\n",
+ "feature_extractor = Wav2Vec2FeatureExtractor(feature_size=1, sampling_rate=16000, padding_value=0.0, do_normalize=True, return_attention_mask=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "id": "KYZtoW-tlZgl"
+ },
+ "outputs": [],
+ "source": [
+ "from transformers import Wav2Vec2Processor\n",
+ "\n",
+ "processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# save tokenizer to folder\n",
+ "processor.save_pretrained(repo_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 54
+ },
+ "executionInfo": {
+ "elapsed": 18,
+ "status": "ok",
+ "timestamp": 1641588820325,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "TTCS7W6XJ9BG",
+ "outputId": "18b0d44f-a498-4a79-f0a7-984fae48cad1"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'/home/robinhad/.cache/huggingface/datasets/downloads/extracted/ee7155196e5d51620d53e48cf58eb693b7839b8ff183604c8bb948d3e0aad92d/cv-corpus-10.0-2022-07-04/uk/clips/common_voice_uk_20907128.mp3'"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "common_voice_train[0][\"path\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 863,
+ "status": "ok",
+ "timestamp": 1641588821172,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "qj_z5Zc3GAs9",
+ "outputId": "ace70f42-dcf0-445c-9b81-b23d4089c90d"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'path': '/home/robinhad/.cache/huggingface/datasets/downloads/extracted/ee7155196e5d51620d53e48cf58eb693b7839b8ff183604c8bb948d3e0aad92d/cv-corpus-10.0-2022-07-04/uk/clips/common_voice_uk_20907128.mp3',\n",
+ " 'array': array([ 0.0000000e+00, -3.5002383e-14, 9.4785833e-15, ...,\n",
+ " -5.0386465e-08, -4.4114326e-08, -1.9402206e-08], dtype=float32),\n",
+ " 'sampling_rate': 48000}"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "common_voice_train[0][\"audio\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "id": "rrv65aj7G95i"
+ },
+ "outputs": [],
+ "source": [
+ "common_voice_train = common_voice_train.cast_column(\"audio\", Audio(sampling_rate=16_000))\n",
+ "common_voice_test = common_voice_test.cast_column(\"audio\", Audio(sampling_rate=16_000))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 31,
+ "status": "ok",
+ "timestamp": 1641588821174,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "aKtkc1o_HWHC",
+ "outputId": "55538536-b8c6-484f-d695-5c8e0492747a"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'path': '/home/robinhad/.cache/huggingface/datasets/downloads/extracted/ee7155196e5d51620d53e48cf58eb693b7839b8ff183604c8bb948d3e0aad92d/cv-corpus-10.0-2022-07-04/uk/clips/common_voice_uk_20907128.mp3',\n",
+ " 'array': array([ 1.00456624e-13, -1.54340042e-13, 7.00158518e-13, ...,\n",
+ " -1.50335762e-08, -1.92623926e-08, -2.21930367e-08], dtype=float32),\n",
+ " 'sampling_rate': 16000}"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "common_voice_train[0][\"audio\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 80
+ },
+ "executionInfo": {
+ "elapsed": 27,
+ "status": "ok",
+ "timestamp": 1641588821175,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "dueM6U7Ev0OA",
+ "outputId": "8f8e14bf-6d59-43e2-ae2d-525bac8e5097"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "от би була рада\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import IPython.display as ipd\n",
+ "import numpy as np\n",
+ "import random\n",
+ "\n",
+ "rand_int = random.randint(0, len(common_voice_train)-1)\n",
+ "\n",
+ "print(common_voice_train[rand_int][\"sentence\"])\n",
+ "ipd.Audio(data=common_voice_train[rand_int][\"audio\"][\"array\"], autoplay=True, rate=16000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 22,
+ "status": "ok",
+ "timestamp": 1641588821176,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "1Po2g7YPuRTx",
+ "outputId": "ad79ec8a-ab5a-4c52-edfa-a20d0eec2282"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Target text: от їхало якихось із десять на конях назустріч\n",
+ "Input array shape: (73152,)\n",
+ "Sampling rate: 16000\n"
+ ]
+ }
+ ],
+ "source": [
+ "rand_int = random.randint(0, len(common_voice_train)-1)\n",
+ "\n",
+ "print(\"Target text:\", common_voice_train[rand_int][\"sentence\"])\n",
+ "print(\"Input array shape:\", common_voice_train[rand_int][\"audio\"][\"array\"].shape)\n",
+ "print(\"Sampling rate:\", common_voice_train[rand_int][\"audio\"][\"sampling_rate\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "id": "eJY7I0XAwe9p"
+ },
+ "outputs": [],
+ "source": [
+ "def prepare_dataset(batch):\n",
+ " audio = batch[\"audio\"]\n",
+ "\n",
+ " # batched output is \"un-batched\"\n",
+ " batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sampling_rate\"]).input_values[0]\n",
+ " batch[\"input_length\"] = len(batch[\"input_values\"])\n",
+ " \n",
+ " with processor.as_target_processor():\n",
+ " batch[\"labels\"] = processor(batch[\"sentence\"]).input_ids\n",
+ " return batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 87,
+ "referenced_widgets": [
+ "a29f88f174f8499082fbb36a36c47fa4",
+ "efc3bc0c48124ebeb79d245216eaf0fe",
+ "d45747150d0b434593a3a7c98399599a",
+ "ea73f7deb1c643f7b81de7fb7acaaf1b",
+ "18bc63944343440f837cdff76db004fc",
+ "9c875952cdd649a5bab87de9bb3f5200",
+ "aa329cb93df44a6da6012c7cc49d7489",
+ "b39b6e9131ca4ce3b31e84ceb04e1b83",
+ "c5eed102ef134a4e8ca41713b82ff6a4",
+ "e6e50da6516847878309fdc5c463edb3",
+ "a4ae510b4f3845f891a796cf844fc2bb"
+ ]
+ },
+ "executionInfo": {
+ "elapsed": 107521,
+ "status": "ok",
+ "timestamp": 1641588928679,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "-np9xYK-wl8q",
+ "outputId": "779b4637-0606-4cc8-be3c-16c1c4241e63"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.00739741325378418,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 11463,
+ "unit": "ex",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c51a283b0cf149d7a84ade53f6eb40d9",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/11463 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.010543107986450195,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 6783,
+ "unit": "ex",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9666ba72472f47db816037cec309e7ed",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/6783 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "common_voice_train = common_voice_train.map(prepare_dataset, remove_columns=common_voice_train.column_names)\n",
+ "common_voice_test = common_voice_test.map(prepare_dataset, remove_columns=common_voice_test.column_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {
+ "id": "tdHfbUJ_09iA"
+ },
+ "outputs": [],
+ "source": [
+ "#max_input_length_in_sec = 5.0\n",
+ "#common_voice_train = common_voice_train.filter(lambda x: x < max_input_length_in_sec * processor.feature_extractor.sampling_rate, input_columns=[\"input_length\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {
+ "id": "tborvC9hx88e"
+ },
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "\n",
+ "from dataclasses import dataclass, field\n",
+ "from typing import Any, Dict, List, Optional, Union\n",
+ "\n",
+ "@dataclass\n",
+ "class DataCollatorCTCWithPadding:\n",
+ " \"\"\"\n",
+ " Data collator that will dynamically pad the inputs received.\n",
+ " Args:\n",
+ " processor (:class:`~transformers.Wav2Vec2Processor`)\n",
+ " The processor used for proccessing the data.\n",
+ " padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):\n",
+ " Select a strategy to pad the returned sequences (according to the model's padding side and padding index)\n",
+ " among:\n",
+ " * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single\n",
+ " sequence if provided).\n",
+ " * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the\n",
+ " maximum acceptable input length for the model if that argument is not provided.\n",
+ " * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of\n",
+ " different lengths).\n",
+ " \"\"\"\n",
+ "\n",
+ " processor: Wav2Vec2Processor\n",
+ " padding: Union[bool, str] = True\n",
+ "\n",
+ " def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n",
+ " # split inputs and labels since they have to be of different lenghts and need\n",
+ " # different padding methods\n",
+ " input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n",
+ " label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n",
+ "\n",
+ " batch = self.processor.pad(\n",
+ " input_features,\n",
+ " padding=self.padding,\n",
+ " return_tensors=\"pt\",\n",
+ " )\n",
+ " with self.processor.as_target_processor():\n",
+ " labels_batch = self.processor.pad(\n",
+ " label_features,\n",
+ " padding=self.padding,\n",
+ " return_tensors=\"pt\",\n",
+ " )\n",
+ "\n",
+ " # replace padding with -100 to ignore loss correctly\n",
+ " labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n",
+ "\n",
+ " batch[\"labels\"] = labels\n",
+ "\n",
+ " return batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "id": "lbQf5GuZyQ4_"
+ },
+ "outputs": [],
+ "source": [
+ "data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {
+ "id": "9Xsux2gmyXso"
+ },
+ "outputs": [],
+ "source": [
+ "wer_metric = load_metric(\"wer\")\n",
+ "cer_metric = load_metric(\"cer\")\n",
+ "metrics = [wer_metric, cer_metric]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {
+ "id": "1XZ-kjweyTy_"
+ },
+ "outputs": [],
+ "source": [
+ "def compute_metrics(pred):\n",
+ " pred_logits = pred.predictions\n",
+ " pred_ids = np.argmax(pred_logits, axis=-1)\n",
+ "\n",
+ " pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id\n",
+ "\n",
+ " pred_str = processor.batch_decode(pred_ids)\n",
+ " # we do not want to group tokens when computing the metrics\n",
+ " label_str = processor.batch_decode(pred.label_ids, group_tokens=False)\n",
+ "\n",
+ " wer = wer_metric.compute(predictions=pred_str, references=label_str)\n",
+ " cer = cer_metric.compute(predictions=pred_str, references=label_str)\n",
+ "\n",
+ " return {\"wer\": wer, \"cer\": cer}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 9496,
+ "status": "ok",
+ "timestamp": 1641588938616,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "e7cqAWIayn6w",
+ "outputId": "b7b20ce9-e1b2-473f-8032-2a75f98dfa9e"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.codevectors', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_q.bias', 'project_hid.weight']\n",
+ "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import Wav2Vec2ForCTC\n",
+ "\n",
+ "model = Wav2Vec2ForCTC.from_pretrained(\n",
+ " \"facebook/wav2vec2-xls-r-300m\", \n",
+ " attention_dropout=0.3,\n",
+ " hidden_dropout=0.3,\n",
+ " feat_proj_dropout=0.3,\n",
+ " mask_time_prob=0.05,\n",
+ " layerdrop=0.3,\n",
+ " ctc_loss_reduction=\"mean\", \n",
+ " pad_token_id=processor.tokenizer.pad_token_id,\n",
+ " vocab_size=len(processor.tokenizer),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {
+ "id": "oGI8zObtZ3V0"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/robinhad/Projects/Speech/wav2vec2-xls-r-ukrainian/.venv/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1618: FutureWarning: The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5.Please use the equivalent `freeze_feature_encoder` method instead.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "model.freeze_feature_extractor()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {
+ "id": "KbeKSV7uzGPP"
+ },
+ "outputs": [],
+ "source": [
+ "from transformers import TrainingArguments\n",
+ "\n",
+ "training_args = TrainingArguments(\n",
+ " output_dir=repo_name,\n",
+ " group_by_length=True,\n",
+ " per_device_train_batch_size=16,\n",
+ " gradient_accumulation_steps=6,\n",
+ " eval_accumulation_steps=4,\n",
+ " evaluation_strategy=\"steps\",\n",
+ " num_train_epochs=100,\n",
+ " gradient_checkpointing=True,\n",
+ " fp16=True,\n",
+ " save_steps=400,\n",
+ " eval_steps=400,\n",
+ " logging_steps=400,\n",
+ " learning_rate=3e-4,\n",
+ " warmup_steps=500,\n",
+ " save_total_limit=2,\n",
+ " report_to=\"tensorboard\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "executionInfo": {
+ "elapsed": 11063,
+ "status": "ok",
+ "timestamp": 1641588949674,
+ "user": {
+ "displayName": "Yurii Paniv",
+ "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
+ "userId": "13095662915325887123"
+ },
+ "user_tz": -120
+ },
+ "id": "rY7vBmFCPFgC",
+ "outputId": "2e89d5ea-5b25-44bf-8492-a6220b0b1c38"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using cuda_amp half precision backend\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import Trainer\n",
+ "\n",
+ "trainer = Trainer(\n",
+ " model=model,\n",
+ " data_collator=data_collator,\n",
+ " args=training_args,\n",
+ " compute_metrics=compute_metrics,\n",
+ " train_dataset=common_voice_train,\n",
+ " eval_dataset=common_voice_test,\n",
+ " tokenizer=processor.feature_extractor,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 409
+ },
+ "id": "9fRr9TG5pGBl",
+ "outputId": "c2a7c797-326c-4bd2-b167-9d2f41d77def"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "/home/robinhad/Projects/Speech/wav2vec2-xls-r-ukrainian/.venv/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ " warnings.warn(\n",
+ "***** Running training *****\n",
+ " Num examples = 11463\n",
+ " Num Epochs = 100\n",
+ " Instantaneous batch size per device = 16\n",
+ " Total train batch size (w. parallel, distributed & accumulation) = 96\n",
+ " Gradient Accumulation steps = 6\n",
+ " Total optimization steps = 11900\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007272958755493164,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 11900,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "deebda57b25f4f95b4915d7a8d479a62",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/11900 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 5.4923, 'learning_rate': 0.0002388, 'epoch': 3.36}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.0072269439697265625,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "19ecc3e6ca434f5789950aeaad863a08",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-400\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-400/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 3.336439609527588, 'eval_wer': 1.0, 'eval_cer': 1.0, 'eval_runtime': 211.1144, 'eval_samples_per_second': 32.13, 'eval_steps_per_second': 4.017, 'epoch': 3.36}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-400/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-400/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-6800] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 3.3451, 'learning_rate': 0.00029215789473684205, 'epoch': 6.72}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007369041442871094,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "775a551a1cec442a965bada0af3b83e6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-800\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-800/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 3.3383235931396484, 'eval_wer': 1.0, 'eval_cer': 1.0, 'eval_runtime': 211.2077, 'eval_samples_per_second': 32.115, 'eval_steps_per_second': 4.015, 'epoch': 6.72}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-800/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-800/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-7200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 3.329, 'learning_rate': 0.00028163157894736836, 'epoch': 10.08}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007276296615600586,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "20b67f8d1d164f4ea9f7668d619e6e7b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-1200\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-1200/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 3.3315727710723877, 'eval_wer': 1.0, 'eval_cer': 1.0, 'eval_runtime': 211.0671, 'eval_samples_per_second': 32.137, 'eval_steps_per_second': 4.018, 'epoch': 10.08}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-1200/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-1200/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 3.1437, 'learning_rate': 0.00027110526315789473, 'epoch': 13.44}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.006963014602661133,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "70c7241390304e4888951e93cdc4ca41",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-1600\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-1600/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 2.463456392288208, 'eval_wer': 0.9999594238182187, 'eval_cer': 0.9103366773973774, 'eval_runtime': 208.2248, 'eval_samples_per_second': 32.575, 'eval_steps_per_second': 4.073, 'epoch': 13.44}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-1600/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-1600/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-800] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 1.5745, 'learning_rate': 0.00026057894736842104, 'epoch': 16.8}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.0073626041412353516,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a78f719c4e7c4b1d974da356c2390432",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-2000\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-2000/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 1.1160292625427246, 'eval_wer': 0.9768512882937715, 'eval_cer': 0.36920856433421695, 'eval_runtime': 211.5684, 'eval_samples_per_second': 32.061, 'eval_steps_per_second': 4.008, 'epoch': 16.8}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-2000/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-2000/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-1200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 1.0611, 'learning_rate': 0.00025005263157894735, 'epoch': 20.17}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.0070798397064208984,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b30e2ef5c27c4c71ad0d7027bfef821d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-2400\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-2400/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.9386810064315796, 'eval_wer': 0.891316697098803, 'eval_cer': 0.28140508810053505, 'eval_runtime': 210.3288, 'eval_samples_per_second': 32.25, 'eval_steps_per_second': 4.032, 'epoch': 20.17}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-2400/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-2400/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-1600] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.8741, 'learning_rate': 0.00023952631578947364, 'epoch': 23.53}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.0073435306549072266,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b340f47813b74d458a3e18fb15da89be",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-2800\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-2800/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.9212636351585388, 'eval_wer': 0.8704402515723271, 'eval_cer': 0.26904808800689695, 'eval_runtime': 212.3022, 'eval_samples_per_second': 31.95, 'eval_steps_per_second': 3.994, 'epoch': 23.53}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-2800/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-2800/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-2000] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.7685, 'learning_rate': 0.00022899999999999998, 'epoch': 26.89}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007165431976318359,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a190b8f6eb8e434ea38065c7331ca229",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-3200\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-3200/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8420816659927368, 'eval_wer': 0.8330898762426455, 'eval_cer': 0.24242257905154294, 'eval_runtime': 208.9367, 'eval_samples_per_second': 32.464, 'eval_steps_per_second': 4.059, 'epoch': 26.89}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-3200/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-3200/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-2400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.6913, 'learning_rate': 0.0002184736842105263, 'epoch': 30.25}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007306575775146484,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "99146f2067db4c7e80bc098c573138de",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-3600\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-3600/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8760627508163452, 'eval_wer': 0.80908906471901, 'eval_cer': 0.23088894844415456, 'eval_runtime': 211.4392, 'eval_samples_per_second': 32.08, 'eval_steps_per_second': 4.011, 'epoch': 30.25}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-3600/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-3600/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-2800] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.6387, 'learning_rate': 0.0002079473684210526, 'epoch': 33.61}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007359981536865234,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5cdc1f646ea4449c884719317058ad6d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-4000\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-4000/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.7959503531455994, 'eval_wer': 0.7825725299249341, 'eval_cer': 0.21715966587343358, 'eval_runtime': 212.6087, 'eval_samples_per_second': 31.904, 'eval_steps_per_second': 3.989, 'epoch': 33.61}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-4000/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-4000/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-3200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.5837, 'learning_rate': 0.00019742105263157892, 'epoch': 36.97}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007372379302978516,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6a1d3419c50d409c9bc2b6b239bda3b5",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-4400\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-4400/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8115484714508057, 'eval_wer': 0.7656725502130249, 'eval_cer': 0.2103047112879113, 'eval_runtime': 213.1592, 'eval_samples_per_second': 31.821, 'eval_steps_per_second': 3.978, 'epoch': 36.97}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-4400/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-4400/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-3600] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.5456, 'learning_rate': 0.00018689473684210524, 'epoch': 40.33}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007362842559814453,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9844a70e8695435094b62a58a5301f28",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-4800\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-4800/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8416911959648132, 'eval_wer': 0.7579833637654697, 'eval_cer': 0.21142836846914626, 'eval_runtime': 212.6331, 'eval_samples_per_second': 31.9, 'eval_steps_per_second': 3.988, 'epoch': 40.33}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-4800/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-4800/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-4000] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.5179, 'learning_rate': 0.00017636842105263155, 'epoch': 43.69}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.006958484649658203,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e5e46905e0f04ab6a6129ba61a1d80ba",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-5200\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-5200/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8144774436950684, 'eval_wer': 0.7585311422195171, 'eval_cer': 0.20418917479004078, 'eval_runtime': 209.912, 'eval_samples_per_second': 32.314, 'eval_steps_per_second': 4.04, 'epoch': 43.69}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-5200/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-5200/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-4400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.4861, 'learning_rate': 0.00016584210526315787, 'epoch': 47.06}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007211446762084961,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "eb38365a17bb4efab96f3dd48a145d42",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-5600\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-5600/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8354366421699524, 'eval_wer': 0.739216879691621, 'eval_cer': 0.20429572848826133, 'eval_runtime': 208.2486, 'eval_samples_per_second': 32.572, 'eval_steps_per_second': 4.072, 'epoch': 47.06}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-5600/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-5600/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-4800] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.4641, 'learning_rate': 0.0001553157894736842, 'epoch': 50.42}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.00705265998840332,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "129886198a2b49e3a62732bef0b76110",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-6000\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-6000/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8092752695083618, 'eval_wer': 0.7217691215256644, 'eval_cer': 0.19502232784312712, 'eval_runtime': 210.6162, 'eval_samples_per_second': 32.206, 'eval_steps_per_second': 4.026, 'epoch': 50.42}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-6000/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-6000/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-5200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.4368, 'learning_rate': 0.00014478947368421052, 'epoch': 53.78}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007249355316162109,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "518e42e24b7d479ca5a8fe5247ad01f5",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-6400\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-6400/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8010208010673523, 'eval_wer': 0.7055589369040374, 'eval_cer': 0.1889358514447713, 'eval_runtime': 211.7021, 'eval_samples_per_second': 32.04, 'eval_steps_per_second': 4.006, 'epoch': 53.78}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-6400/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-6400/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-5600] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.4207, 'learning_rate': 0.00013426315789473683, 'epoch': 57.14}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007280111312866211,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "310968ba59f84d3899c3fcfc37555a72",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-6800\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-6800/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8339575529098511, 'eval_wer': 0.7027388922702373, 'eval_cer': 0.18708892067561503, 'eval_runtime': 212.1919, 'eval_samples_per_second': 31.966, 'eval_steps_per_second': 3.996, 'epoch': 57.14}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-6800/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-6800/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-6000] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3972, 'learning_rate': 0.00012376315789473682, 'epoch': 60.5}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.0073511600494384766,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "70c026d4141f4d41a79bef25278385a9",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-7200\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-7200/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8393998742103577, 'eval_wer': 0.6940961655508217, 'eval_cer': 0.18495784671120397, 'eval_runtime': 211.3395, 'eval_samples_per_second': 32.095, 'eval_steps_per_second': 4.013, 'epoch': 60.5}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-7200/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-7200/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-6400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3825, 'learning_rate': 0.00011323684210526315, 'epoch': 63.86}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.0074388980865478516,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "edc0cd14d08a4236ac788f2f71f3d547",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-7600\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-7600/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8245963454246521, 'eval_wer': 0.6890647190099412, 'eval_cer': 0.18329173433902804, 'eval_runtime': 212.3966, 'eval_samples_per_second': 31.936, 'eval_steps_per_second': 3.993, 'epoch': 63.86}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-7600/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-7600/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-6800] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3719, 'learning_rate': 0.00010271052631578946, 'epoch': 67.23}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007239818572998047,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8daa262ce82f405fb2f3c6c7c09873ad",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-8000\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-8000/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8630526065826416, 'eval_wer': 0.6835463582876852, 'eval_cer': 0.17931372960546071, 'eval_runtime': 209.1605, 'eval_samples_per_second': 32.43, 'eval_steps_per_second': 4.054, 'epoch': 67.23}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-8000/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-8000/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-7200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3601, 'learning_rate': 9.218421052631579e-05, 'epoch': 70.59}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.006955146789550781,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "da7b7cf93dab401c8db40d62be638e02",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-8400\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-8400/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8463570475578308, 'eval_wer': 0.6926354230066951, 'eval_cer': 0.18325944533956726, 'eval_runtime': 208.5926, 'eval_samples_per_second': 32.518, 'eval_steps_per_second': 4.065, 'epoch': 70.59}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-8400/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-8400/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-7600] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3468, 'learning_rate': 8.16578947368421e-05, 'epoch': 73.95}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007674217224121094,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9c1914cd880b4ff1a88cc4907460108f",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-8800\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-8800/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8564673662185669, 'eval_wer': 0.6686751876648407, 'eval_cer': 0.17537124277130026, 'eval_runtime': 221.3871, 'eval_samples_per_second': 30.639, 'eval_steps_per_second': 3.83, 'epoch': 73.95}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-8800/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-8800/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-8000] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3326, 'learning_rate': 7.113157894736842e-05, 'epoch': 77.31}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007187604904174805,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a0d41f2cff1744aba4aa9d61f2b141c1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-9200\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-9200/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8846502900123596, 'eval_wer': 0.665672550213025, 'eval_cer': 0.1759911915609471, 'eval_runtime': 209.6762, 'eval_samples_per_second': 32.35, 'eval_steps_per_second': 4.044, 'epoch': 77.31}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-9200/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-9200/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-8400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3272, 'learning_rate': 6.0605263157894733e-05, 'epoch': 80.67}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007404804229736328,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0fe22fe7b78943afa6eaab5153dde314",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-9600\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-9600/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8954363465309143, 'eval_wer': 0.6707242848447961, 'eval_cer': 0.17582328876375108, 'eval_runtime': 211.264, 'eval_samples_per_second': 32.107, 'eval_steps_per_second': 4.014, 'epoch': 80.67}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-9600/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-9600/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-8800] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3213, 'learning_rate': 5.007894736842105e-05, 'epoch': 84.03}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.006964445114135742,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "65019eafe06d4894a31fd4b0c6503218",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-10000\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-10000/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8902531266212463, 'eval_wer': 0.6653885169405559, 'eval_cer': 0.17506772617636898, 'eval_runtime': 209.0979, 'eval_samples_per_second': 32.439, 'eval_steps_per_second': 4.056, 'epoch': 84.03}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-10000/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-10000/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-9200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3125, 'learning_rate': 3.955263157894737e-05, 'epoch': 87.39}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007245779037475586,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f23bf08feef0414e844f231b57c88c3b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-10400\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-10400/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.8979071974754333, 'eval_wer': 0.6685940353012781, 'eval_cer': 0.17526791797302577, 'eval_runtime': 209.5385, 'eval_samples_per_second': 32.371, 'eval_steps_per_second': 4.047, 'epoch': 87.39}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-10400/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-10400/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-9600] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3095, 'learning_rate': 2.9026315789473685e-05, 'epoch': 90.75}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007394552230834961,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3e53e86fd8a242a2b8ff424210dde90d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-10800\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-10800/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.9134525060653687, 'eval_wer': 0.6639886386691012, 'eval_cer': 0.17337578260462444, 'eval_runtime': 214.8656, 'eval_samples_per_second': 31.569, 'eval_steps_per_second': 3.947, 'epoch': 90.75}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-10800/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-10800/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-10000] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.3009, 'learning_rate': 1.8526315789473684e-05, 'epoch': 94.12}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007333517074584961,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4eee8fdf4ce940c6bf475a70a09d87c0",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-11200\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-11200/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.9179993867874146, 'eval_wer': 0.6621829985798336, 'eval_cer': 0.1734791074028989, 'eval_runtime': 209.042, 'eval_samples_per_second': 32.448, 'eval_steps_per_second': 4.057, 'epoch': 94.12}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-11200/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-11200/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-10400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 6783\n",
+ " Batch size = 8\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'loss': 0.2967, 'learning_rate': 8e-06, 'epoch': 97.48}\n"
+ ]
+ },
+ {
+ "data": {
+ "application/json": {
+ "ascii": false,
+ "bar_format": null,
+ "colour": null,
+ "elapsed": 0.007021665573120117,
+ "initial": 0,
+ "n": 0,
+ "ncols": null,
+ "nrows": null,
+ "postfix": null,
+ "prefix": "",
+ "rate": null,
+ "total": 848,
+ "unit": "it",
+ "unit_divisor": 1000,
+ "unit_scale": false
+ },
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0dfbd82a81e24bec865be87a87258dfd",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/848 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-11600\n",
+ "Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-11600/config.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'eval_loss': 0.9188246130943298, 'eval_wer': 0.660722256035707, 'eval_cer': 0.17280426731416873, 'eval_runtime': 211.3614, 'eval_samples_per_second': 32.092, 'eval_steps_per_second': 4.012, 'epoch': 97.48}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-11600/pytorch_model.bin\n",
+ "Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-11600/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-10800] due to args.save_total_limit\n",
+ "\n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'train_runtime': 40015.4, 'train_samples_per_second': 28.646, 'train_steps_per_second': 0.297, 'train_loss': 0.9631941716811236, 'epoch': 100.0}\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=11900, training_loss=0.9631941716811236, metrics={'train_runtime': 40015.4, 'train_samples_per_second': 28.646, 'train_steps_per_second': 0.297, 'train_loss': 0.9631941716811236, 'epoch': 100.0})"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Dropping the following result as it does not have all the necessary fields:\n",
+ "{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'uk'}}\n"
+ ]
+ }
+ ],
+ "source": [
+ "trainer.create_model_card()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "R351I9IQp_9D"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "loading configuration file wav2vec2-xls-r-300m-uk/checkpoint-5600/config.json\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n",
+ " \"activation_dropout\": 0.0,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForCTC\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"mean\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.0,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"gradient_checkpointing\": false,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.0,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.0,\n",
+ " \"mask_feature_length\": 10,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.0,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.05,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 37,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.14.1\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 40\n",
+ "}\n",
+ "\n",
+ "loading weights file wav2vec2-xls-r-300m-uk/checkpoint-5600/pytorch_model.bin\n",
+ "All model checkpoint weights were used when initializing Wav2Vec2ForCTC.\n",
+ "\n",
+ "All the weights of Wav2Vec2ForCTC were initialized from the model checkpoint at wav2vec2-xls-r-300m-uk/checkpoint-5600.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use Wav2Vec2ForCTC for predictions without further training.\n",
+ "loading feature extractor configuration file wav2vec2-xls-r-300m-uk/preprocessor_config.json\n",
+ "Feature extractor Wav2Vec2FeatureExtractor {\n",
+ " \"do_normalize\": true,\n",
+ " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
+ " \"feature_size\": 1,\n",
+ " \"padding_side\": \"right\",\n",
+ " \"padding_value\": 0.0,\n",
+ " \"return_attention_mask\": true,\n",
+ " \"sampling_rate\": 16000\n",
+ "}\n",
+ "\n",
+ "Didn't find file wav2vec2-xls-r-300m-uk/tokenizer.json. We won't load it.\n",
+ "loading file wav2vec2-xls-r-300m-uk/vocab.json\n",
+ "loading file wav2vec2-xls-r-300m-uk/tokenizer_config.json\n",
+ "loading file wav2vec2-xls-r-300m-uk/added_tokens.json\n",
+ "loading file wav2vec2-xls-r-300m-uk/special_tokens_map.json\n",
+ "loading file None\n",
+ "Adding to the vocabulary\n",
+ "Adding to the vocabulary\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n",
+ "model = Wav2Vec2ForCTC.from_pretrained(repo_name + \"/checkpoint-11200\").to(\"cuda\")\n",
+ "processor = Wav2Vec2Processor.from_pretrained(repo_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jD7TZ1YS3S_K"
+ },
+ "source": [
+ "\n",
+ "Now, we will just take the first example of the test set, run it through the model and take the `argmax(...)` of the logits to retrieve the predicted token ids."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "pax07TnL3WZn"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
+ ]
+ }
+ ],
+ "source": [
+ "audio_id = 10\n",
+ "\n",
+ "input_dict = processor(common_voice_test[\"input_values\"], return_tensors=\"pt\", padding=True)\n",
+ "\n",
+ "logits = model(input_dict.input_values.to(\"cuda\")).logits\n",
+ "\n",
+ "pred_ids = torch.argmax(logits, dim=-1)[audio_id]\n",
+ "\n",
+ "common_voice_test_transcription = load_dataset(\"common_voice\", \"uk\", split=\"test\")\n",
+ "\n",
+ "print(\"Prediction:\")\n",
+ "print(processor.decode(pred_ids))\n",
+ "\n",
+ "print(\"\\nReference:\")\n",
+ "print(common_voice_test_transcription[audio_id][\"sentence\"].lower())"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "collapsed_sections": [],
+ "machine_shape": "hm",
+ "name": "Копія записника \"Fine-Tune XLS-R on Common Voice.ipynb\"",
+ "provenance": [
+ {
+ "file_id": "https://github.com/patrickvonplaten/notebooks/blob/master/Fine_Tune_XLS_R_on_Common_Voice.ipynb",
+ "timestamp": 1641583715050
+ }
+ ]
+ },
+ "kernelspec": {
+ "display_name": "Python 3.9.13 (conda)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.13"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "a5cdd9abf8df3af0fd61fdb3838d6c6f2f66a9ba4bf4484f45cd88abf9f04fe9"
+ }
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "04ec68b059df4c628839c3ac29e2ebdd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "05d8496d54174ae298c319b0194fc710": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "116786d9364a4a57b521cddaabeda688": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_a1e2c04dc2cb45ea80bec125e3dbf56f",
+ "IPY_MODEL_b6d46d40efa14b21814f41531f5a2f41",
+ "IPY_MODEL_d8bf8dc5d6c84140a4e96c9c435b8f17"
+ ],
+ "layout": "IPY_MODEL_9baa2f69aa9c4387bf1086a04ed78420"
+ }
+ },
+ "18bc63944343440f837cdff76db004fc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a4ae510b4f3845f891a796cf844fc2bb",
+ "placeholder": "",
+ "style": "IPY_MODEL_e6e50da6516847878309fdc5c463edb3",
+ "value": " 6962/6962 [01:46<00:00, 78.15ex/s]"
+ }
+ },
+ "1f3abdf2e0f6459da4179a94d691c4c4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c31a747e18df4b4aa4449a30e387448c",
+ "max": 1,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_414efa8a08cd491cb78af8a95a151daa",
+ "value": 1
+ }
+ },
+ "22ba979142074f1d976e1a905544fd2d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3dedffa30b774426bd474072a3a0d591": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "414efa8a08cd491cb78af8a95a151daa": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "427056895c674c428400bee0f5b43995": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "445c84e1e2e541f2a54fb989def386ae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "48c60be3ca9349a295b83f65769c7f27": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_05d8496d54174ae298c319b0194fc710",
+ "placeholder": "",
+ "style": "IPY_MODEL_3dedffa30b774426bd474072a3a0d591",
+ "value": " 1/1 [00:00<00:00, 11.09ba/s]"
+ }
+ },
+ "5815ae1348994bfebba4a8e968489a96": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5c2a7fea8c434d51ada69a0854b88baf": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "68502fb433564eee8dfdf272ed7e4f56": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_5c2a7fea8c434d51ada69a0854b88baf",
+ "placeholder": "",
+ "style": "IPY_MODEL_6c80bd8a8fe14a5989fe27445c14650f",
+ "value": "100%"
+ }
+ },
+ "6c80bd8a8fe14a5989fe27445c14650f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "77f1a51099b24831ad8b2be3d2dc833a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8b6b7f28751c45c8869aa86eb2a0ab26": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_68502fb433564eee8dfdf272ed7e4f56",
+ "IPY_MODEL_1f3abdf2e0f6459da4179a94d691c4c4",
+ "IPY_MODEL_48c60be3ca9349a295b83f65769c7f27"
+ ],
+ "layout": "IPY_MODEL_445c84e1e2e541f2a54fb989def386ae"
+ }
+ },
+ "9baa2f69aa9c4387bf1086a04ed78420": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9c875952cdd649a5bab87de9bb3f5200": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a1e2c04dc2cb45ea80bec125e3dbf56f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_427056895c674c428400bee0f5b43995",
+ "placeholder": "",
+ "style": "IPY_MODEL_04ec68b059df4c628839c3ac29e2ebdd",
+ "value": "100%"
+ }
+ },
+ "a29f88f174f8499082fbb36a36c47fa4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_d45747150d0b434593a3a7c98399599a",
+ "IPY_MODEL_ea73f7deb1c643f7b81de7fb7acaaf1b",
+ "IPY_MODEL_18bc63944343440f837cdff76db004fc"
+ ],
+ "layout": "IPY_MODEL_efc3bc0c48124ebeb79d245216eaf0fe"
+ }
+ },
+ "a4ae510b4f3845f891a796cf844fc2bb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "aa329cb93df44a6da6012c7cc49d7489": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b39b6e9131ca4ce3b31e84ceb04e1b83": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "b6d46d40efa14b21814f41531f5a2f41": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_77f1a51099b24831ad8b2be3d2dc833a",
+ "max": 1,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d518f2c2ab6945b78a6d336dad6262bd",
+ "value": 1
+ }
+ },
+ "c31a747e18df4b4aa4449a30e387448c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c5eed102ef134a4e8ca41713b82ff6a4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d45747150d0b434593a3a7c98399599a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_aa329cb93df44a6da6012c7cc49d7489",
+ "placeholder": "",
+ "style": "IPY_MODEL_9c875952cdd649a5bab87de9bb3f5200",
+ "value": "100%"
+ }
+ },
+ "d518f2c2ab6945b78a6d336dad6262bd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d8bf8dc5d6c84140a4e96c9c435b8f17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_22ba979142074f1d976e1a905544fd2d",
+ "placeholder": "",
+ "style": "IPY_MODEL_5815ae1348994bfebba4a8e968489a96",
+ "value": " 1/1 [00:00<00:00, 7.95ba/s]"
+ }
+ },
+ "e6e50da6516847878309fdc5c463edb3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ea73f7deb1c643f7b81de7fb7acaaf1b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c5eed102ef134a4e8ca41713b82ff6a4",
+ "max": 6962,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_b39b6e9131ca4ce3b31e84ceb04e1b83",
+ "value": 6962
+ }
+ },
+ "efc3bc0c48124ebeb79d245216eaf0fe": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}