Upload fine-tune-whisper-streaming-cf11-el.ipynb
Browse files
fine-tune-whisper-streaming-cf11-el.ipynb
CHANGED
@@ -661,7 +661,8 @@
|
|
661 |
],
|
662 |
"source": [
|
663 |
"from datasets import IterableDatasetDict\n",
|
664 |
-
"
|
|
|
665 |
"raw_datasets = IterableDatasetDict()\n",
|
666 |
"\n",
|
667 |
"raw_datasets[\"train\"] = load_whole_dataset(\"mozilla-foundation/common_voice_11_0\", \"el\", split=\"train+validation\", use_auth_token=access_token) \n",
|
@@ -2014,27 +2015,6 @@
|
|
2014 |
"We can label our checkpoint with the `whisper-event` tag on push by setting the appropriate key-word arguments (kwargs):"
|
2015 |
]
|
2016 |
},
|
2017 |
-
{
|
2018 |
-
"cell_type": "code",
|
2019 |
-
"execution_count": 45,
|
2020 |
-
"id": "4128bee3",
|
2021 |
-
"metadata": {},
|
2022 |
-
"outputs": [
|
2023 |
-
{
|
2024 |
-
"data": {
|
2025 |
-
"text/plain": [
|
2026 |
-
"'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'"
|
2027 |
-
]
|
2028 |
-
},
|
2029 |
-
"execution_count": 45,
|
2030 |
-
"metadata": {},
|
2031 |
-
"output_type": "execute_result"
|
2032 |
-
}
|
2033 |
-
],
|
2034 |
-
"source": [
|
2035 |
-
"access_token"
|
2036 |
-
]
|
2037 |
-
},
|
2038 |
{
|
2039 |
"cell_type": "code",
|
2040 |
"execution_count": 50,
|
@@ -2085,44 +2065,7 @@
|
|
2085 |
],
|
2086 |
"source": [
|
2087 |
"from huggingface_hub import notebook_login\n",
|
2088 |
-
"notebook_login()\n"
|
2089 |
-
]
|
2090 |
-
},
|
2091 |
-
{
|
2092 |
-
"cell_type": "code",
|
2093 |
-
"execution_count": 53,
|
2094 |
-
"id": "8f26ce24",
|
2095 |
-
"metadata": {},
|
2096 |
-
"outputs": [
|
2097 |
-
{
|
2098 |
-
"name": "stderr",
|
2099 |
-
"output_type": "stream",
|
2100 |
-
"text": [
|
2101 |
-
"/home/farsipal/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/repository.py:725: FutureWarning: Creating a repository through 'clone_from' is deprecated and will be removed in v0.12. Please create the repository first using `create_repo(..., exists_ok=True)`.\n",
|
2102 |
-
" warnings.warn(\n"
|
2103 |
-
]
|
2104 |
-
},
|
2105 |
-
{
|
2106 |
-
"ename": "OSError",
|
2107 |
-
"evalue": "Tried to clone a repository in a non-empty folder that isn't a git repository. If you really want to do this, do it manually:\ngit init && git remote add origin && git pull origin main\n or clone repo to a new folder and move your existing files there afterwards.",
|
2108 |
-
"output_type": "error",
|
2109 |
-
"traceback": [
|
2110 |
-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
2111 |
-
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
|
2112 |
-
"Cell \u001b[0;32mIn [53], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39m#access_token = \"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\"\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m trainer\u001b[39m.\u001b[39;49mpush_to_hub(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
|
2113 |
-
"File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/transformers/trainer.py:3456\u001b[0m, in \u001b[0;36mTrainer.push_to_hub\u001b[0;34m(self, commit_message, blocking, **kwargs)\u001b[0m\n\u001b[1;32m 3453\u001b[0m \u001b[39m# If a user calls manually `push_to_hub` with `self.args.push_to_hub = False`, we try to create the repo but\u001b[39;00m\n\u001b[1;32m 3454\u001b[0m \u001b[39m# it might fail.\u001b[39;00m\n\u001b[1;32m 3455\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mhasattr\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mrepo\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m-> 3456\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_git_repo()\n\u001b[1;32m 3458\u001b[0m model_name \u001b[39m=\u001b[39m kwargs\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mmodel_name\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m 3459\u001b[0m \u001b[39mif\u001b[39;00m model_name \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39mshould_save:\n",
|
2114 |
-
"File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/transformers/trainer.py:3309\u001b[0m, in \u001b[0;36mTrainer.init_git_repo\u001b[0;34m(self, at_init)\u001b[0m\n\u001b[1;32m 3306\u001b[0m repo_name \u001b[39m=\u001b[39m get_full_repo_name(repo_name, token\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39mhub_token)\n\u001b[1;32m 3308\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 3309\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrepo \u001b[39m=\u001b[39m Repository(\n\u001b[1;32m 3310\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49margs\u001b[39m.\u001b[39;49moutput_dir,\n\u001b[1;32m 3311\u001b[0m clone_from\u001b[39m=\u001b[39;49mrepo_name,\n\u001b[1;32m 3312\u001b[0m use_auth_token\u001b[39m=\u001b[39;49muse_auth_token,\n\u001b[1;32m 3313\u001b[0m private\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49margs\u001b[39m.\u001b[39;49mhub_private_repo,\n\u001b[1;32m 3314\u001b[0m )\n\u001b[1;32m 3315\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mEnvironmentError\u001b[39;00m:\n\u001b[1;32m 3316\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39moverwrite_output_dir \u001b[39mand\u001b[39;00m at_init:\n\u001b[1;32m 3317\u001b[0m \u001b[39m# Try again after wiping output_dir\u001b[39;00m\n",
|
2115 |
-
"File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/utils/_deprecation.py:101\u001b[0m, in \u001b[0;36m_deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m message \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m+\u001b[39m custom_message\n\u001b[1;32m 100\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mFutureWarning\u001b[39;00m)\n\u001b[0;32m--> 101\u001b[0m \u001b[39mreturn\u001b[39;00m f(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
|
2116 |
-
"File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:124\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[39mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 120\u001b[0m kwargs \u001b[39m=\u001b[39m smoothly_deprecate_use_auth_token(\n\u001b[1;32m 121\u001b[0m fn_name\u001b[39m=\u001b[39mfn\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, has_token\u001b[39m=\u001b[39mhas_token, kwargs\u001b[39m=\u001b[39mkwargs\n\u001b[1;32m 122\u001b[0m )\n\u001b[0;32m--> 124\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
|
2117 |
-
"File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/repository.py:528\u001b[0m, in \u001b[0;36mRepository.__init__\u001b[0;34m(self, local_dir, clone_from, repo_type, token, git_user, git_email, revision, private, skip_lfs_files, client)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhuggingface_token \u001b[39m=\u001b[39m HfFolder\u001b[39m.\u001b[39mget_token()\n\u001b[1;32m 527\u001b[0m \u001b[39mif\u001b[39;00m clone_from \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 528\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mclone_from(repo_url\u001b[39m=\u001b[39;49mclone_from)\n\u001b[1;32m 529\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 530\u001b[0m \u001b[39mif\u001b[39;00m is_git_repo(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir):\n",
|
2118 |
-
"File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:124\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[39mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 120\u001b[0m kwargs \u001b[39m=\u001b[39m smoothly_deprecate_use_auth_token(\n\u001b[1;32m 121\u001b[0m fn_name\u001b[39m=\u001b[39mfn\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, has_token\u001b[39m=\u001b[39mhas_token, kwargs\u001b[39m=\u001b[39mkwargs\n\u001b[1;32m 122\u001b[0m )\n\u001b[0;32m--> 124\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
|
2119 |
-
"File \u001b[0;32m~/miniconda3/envs/whisper/lib/python3.10/site-packages/huggingface_hub/repository.py:762\u001b[0m, in \u001b[0;36mRepository.clone_from\u001b[0;34m(self, repo_url, token)\u001b[0m\n\u001b[1;32m 759\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 760\u001b[0m \u001b[39m# Check if the folder is the root of a git repository\u001b[39;00m\n\u001b[1;32m 761\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_git_repo(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir):\n\u001b[0;32m--> 762\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 763\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mTried to clone a repository in a non-empty folder that isn\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt a\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 764\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m git repository. If you really want to do this, do it\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 765\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m manually:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mgit init && git remote add origin && git pull\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 766\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m origin main\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m or clone repo to a new folder and move your\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 767\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m existing files there afterwards.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 768\u001b[0m )\n\u001b[1;32m 770\u001b[0m \u001b[39mif\u001b[39;00m is_local_clone(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir, repo_url):\n\u001b[1;32m 771\u001b[0m logger\u001b[39m.\u001b[39mwarning(\n\u001b[1;32m 772\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir\u001b[39m}\u001b[39;00m\u001b[39m is already a clone of \u001b[39m\u001b[39m{\u001b[39;00mclean_repo_url\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 773\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m Make sure you pull the latest changes with\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 774\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m `repo.git_pull()`.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 775\u001b[0m )\n",
|
2120 |
-
"\u001b[0;31mOSError\u001b[0m: Tried to clone a repository in a non-empty folder that isn't a git repository. If you really want to do this, do it manually:\ngit init && git remote add origin && git pull origin main\n or clone repo to a new folder and move your existing files there afterwards."
|
2121 |
-
]
|
2122 |
-
}
|
2123 |
-
],
|
2124 |
-
"source": [
|
2125 |
-
"\n",
|
2126 |
"trainer.push_to_hub(**kwargs)"
|
2127 |
]
|
2128 |
}
|
|
|
661 |
],
|
662 |
"source": [
|
663 |
"from datasets import IterableDatasetDict\n",
|
664 |
+
"# Please use a read access token below\"\n",
|
665 |
+
"access_token = 'use your token here'\n",
|
666 |
"raw_datasets = IterableDatasetDict()\n",
|
667 |
"\n",
|
668 |
"raw_datasets[\"train\"] = load_whole_dataset(\"mozilla-foundation/common_voice_11_0\", \"el\", split=\"train+validation\", use_auth_token=access_token) \n",
|
|
|
2015 |
"We can label our checkpoint with the `whisper-event` tag on push by setting the appropriate key-word arguments (kwargs):"
|
2016 |
]
|
2017 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2018 |
{
|
2019 |
"cell_type": "code",
|
2020 |
"execution_count": 50,
|
|
|
2065 |
],
|
2066 |
"source": [
|
2067 |
"from huggingface_hub import notebook_login\n",
|
2068 |
+
"notebook_login()\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2069 |
"trainer.push_to_hub(**kwargs)"
|
2070 |
]
|
2071 |
}
|