{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4",
"authorship_tag": "ABX9TyOZhPcZe61RhDjhEFQv0vrl",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7O5JSosg5-rx"
},
"outputs": [],
"source": [
"!pip install -U llama2-wrapper==0.1.12"
]
},
{
"cell_type": "code",
"source": [
"%cd /content\n",
"!git clone https://github.com/liltom-eth/llama2-webui\n",
"\n",
"%cd /content/llama2-webui\n",
"!python -m llama2_wrapper.download --repo_id TheBloke/CodeLlama-7B-Instruct-GPTQ\n",
"\n",
"%cd /content/llama2-webui\n",
"!python app.py --backend_type gptq --model_path ./models/CodeLlama-7B-Instruct-GPTQ/ --share True"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Y6A7bJdkmzY8",
"outputId": "0d702a7d-68ab-4747-f012-246d4dee3718"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content\n",
"fatal: destination path 'llama2-webui' already exists and is not an empty directory.\n",
"/content/llama2-webui\n",
"Start downloading model TheBloke/CodeLlama-7B-Instruct-GPTQ to: ./models/CodeLlama-7B-Instruct-GPTQ\n",
"Fetching 15 files: 0% 0/15 [00:00, ?it/s]\n",
"Downloading (…)d0d05/.gitattributes: 100% 1.52k/1.52k [00:00<00:00, 7.94MB/s]\n",
"Fetching 15 files: 7% 1/15 [00:01<00:16, 1.15s/it]\n",
"Downloading (…)478d0d05/LICENSE.txt: 100% 7.02k/7.02k [00:00<00:00, 31.6MB/s]\n",
"\n",
"Downloading (…)478d0d05/config.json: 100% 1.25k/1.25k [00:00<00:00, 7.95MB/s]\n",
"\n",
"Downloading (…)nfiguration_llama.py: 100% 8.56k/8.56k [00:00<00:00, 41.7MB/s]\n",
"\n",
"Downloading (…)81b84478d0d05/Notice: 100% 112/112 [00:00<00:00, 750kB/s]\n",
"\n",
"Downloading (…)neration_config.json: 100% 132/132 [00:00<00:00, 836kB/s]\n",
"\n",
"Downloading (…)8d0d05/USE_POLICY.md: 100% 105/105 [00:00<00:00, 686kB/s]\n",
"\n",
"Downloading (…)84478d0d05/README.md: 100% 22.0k/22.0k [00:00<00:00, 59.5MB/s]\n",
"\n",
"Downloading (…)05/modeling_llama.py: 100% 45.9k/45.9k [00:00<00:00, 27.5MB/s]\n",
"\n",
"Downloading (…)quantize_config.json: 100% 187/187 [00:00<00:00, 1.34MB/s]\n",
"\n",
"Downloading (…)cial_tokens_map.json: 100% 411/411 [00:00<00:00, 2.82MB/s]\n",
"\n",
"Downloading (…)d0d05/tokenizer.json: 0% 0.00/1.84M [00:00, ?B/s]\u001b[A\n",
"\n",
"Downloading (…)okenizer_config.json: 100% 824/824 [00:00<00:00, 5.75MB/s]\n",
"\n",
"\n",
"Downloading model.safetensors: 0% 0.00/3.90G [00:00, ?B/s]\u001b[A\u001b[A\n",
"\n",
"\n",
"Downloading tokenizer.model: 100% 500k/500k [00:00<00:00, 16.3MB/s]\n",
"\n",
"Downloading (…)d0d05/tokenizer.json: 100% 1.84M/1.84M [00:00<00:00, 5.47MB/s]\n",
"\n",
"\n",
"Downloading model.safetensors: 0% 10.5M/3.90G [00:00<01:08, 56.4MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 1% 21.0M/3.90G [00:00<00:57, 67.1MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 1% 31.5M/3.90G [00:00<00:51, 75.5MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 1% 52.4M/3.90G [00:00<00:40, 94.5MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 2% 73.4M/3.90G [00:00<00:33, 113MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 2% 94.4M/3.90G [00:00<00:28, 133MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 3% 115M/3.90G [00:00<00:25, 148MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 3% 136M/3.90G [00:01<00:24, 156MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 4% 157M/3.90G [00:01<00:22, 167MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 5% 178M/3.90G [00:01<00:22, 168MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 5% 199M/3.90G [00:01<00:21, 169MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 6% 220M/3.90G [00:01<00:21, 170MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 6% 241M/3.90G [00:01<00:21, 174MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 7% 262M/3.90G [00:01<00:20, 177MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 7% 283M/3.90G [00:02<01:08, 52.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 8% 315M/3.90G [00:02<00:47, 75.6MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 9% 346M/3.90G [00:03<00:36, 97.8MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 9% 367M/3.90G [00:03<00:31, 111MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 10% 388M/3.90G [00:03<00:28, 122MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 10% 409M/3.90G [00:03<00:26, 134MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 11% 430M/3.90G [00:03<00:24, 141MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 12% 461M/3.90G [00:03<00:21, 160MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 12% 482M/3.90G [00:03<00:20, 165MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 13% 503M/3.90G [00:04<00:20, 166MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 13% 524M/3.90G [00:04<00:19, 170MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 14% 556M/3.90G [00:04<00:18, 181MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 15% 577M/3.90G [00:04<00:18, 182MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 15% 598M/3.90G [00:04<00:18, 183MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 16% 619M/3.90G [00:04<00:17, 184MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 16% 640M/3.90G [00:04<00:17, 184MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 17% 661M/3.90G [00:04<00:18, 178MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 17% 682M/3.90G [00:04<00:17, 180MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 18% 703M/3.90G [00:05<00:17, 180MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 19% 724M/3.90G [00:05<00:17, 181MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 19% 744M/3.90G [00:05<00:18, 171MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 20% 765M/3.90G [00:05<00:18, 173MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 20% 786M/3.90G [00:05<00:17, 175MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 21% 807M/3.90G [00:05<00:17, 178MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 21% 828M/3.90G [00:05<00:17, 180MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 22% 849M/3.90G [00:05<00:16, 182MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 22% 870M/3.90G [00:07<01:37, 30.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 23% 891M/3.90G [00:08<01:13, 40.8MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 24% 923M/3.90G [00:08<00:50, 59.3MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 24% 944M/3.90G [00:08<00:42, 70.2MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 25% 975M/3.90G [00:08<00:30, 94.3MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 26% 996M/3.90G [00:08<00:27, 107MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 26% 1.02G/3.90G [00:08<00:23, 121MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 27% 1.04G/3.90G [00:08<00:21, 134MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 27% 1.06G/3.90G [00:08<00:20, 141MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 28% 1.08G/3.90G [00:09<00:18, 151MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 28% 1.10G/3.90G [00:09<00:17, 160MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 29% 1.12G/3.90G [00:09<00:16, 166MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 29% 1.14G/3.90G [00:09<00:16, 171MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 30% 1.16G/3.90G [00:09<00:15, 175MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 30% 1.18G/3.90G [00:09<00:15, 178MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 31% 1.21G/3.90G [00:09<00:15, 179MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 31% 1.23G/3.90G [00:09<00:14, 181MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 32% 1.25G/3.90G [00:09<00:14, 182MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 33% 1.27G/3.90G [00:10<00:23, 113MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 33% 1.29G/3.90G [00:10<00:20, 128MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 34% 1.31G/3.90G [00:10<00:18, 139MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 34% 1.33G/3.90G [00:10<00:17, 150MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 35% 1.35G/3.90G [00:10<00:16, 158MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 35% 1.37G/3.90G [00:12<01:24, 29.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 36% 1.41G/3.90G [00:12<00:55, 45.3MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 37% 1.44G/3.90G [00:13<00:39, 63.0MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 37% 1.46G/3.90G [00:13<00:33, 72.6MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 38% 1.48G/3.90G [00:13<00:29, 82.0MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 38% 1.50G/3.90G [00:13<00:24, 98.6MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 39% 1.53G/3.90G [00:13<00:19, 124MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 40% 1.55G/3.90G [00:13<00:17, 132MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 40% 1.57G/3.90G [00:13<00:16, 143MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 41% 1.59G/3.90G [00:14<00:15, 153MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 41% 1.61G/3.90G [00:14<00:14, 160MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 42% 1.64G/3.90G [00:14<00:13, 167MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 43% 1.66G/3.90G [00:14<00:13, 171MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 43% 1.68G/3.90G [00:14<00:12, 177MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 44% 1.70G/3.90G [00:14<00:12, 174MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 44% 1.72G/3.90G [00:14<00:12, 173MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 45% 1.74G/3.90G [00:14<00:12, 175MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 45% 1.76G/3.90G [00:14<00:11, 179MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 46% 1.78G/3.90G [00:15<00:12, 172MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 46% 1.80G/3.90G [00:15<00:12, 174MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 47% 1.82G/3.90G [00:15<00:11, 177MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 47% 1.85G/3.90G [00:16<00:28, 71.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 48% 1.87G/3.90G [00:16<00:23, 87.4MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 49% 1.90G/3.90G [00:16<00:16, 118MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 49% 1.92G/3.90G [00:16<00:14, 132MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 50% 1.94G/3.90G [00:16<00:13, 143MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 50% 1.96G/3.90G [00:16<00:12, 152MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 51% 1.98G/3.90G [00:16<00:13, 142MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 51% 2.00G/3.90G [00:16<00:13, 144MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 52% 2.02G/3.90G [00:17<00:12, 144MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 52% 2.04G/3.90G [00:17<00:12, 148MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 53% 2.07G/3.90G [00:17<00:12, 152MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 54% 2.09G/3.90G [00:17<00:22, 81.2MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 54% 2.12G/3.90G [00:18<00:16, 107MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 55% 2.14G/3.90G [00:18<00:14, 119MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 55% 2.16G/3.90G [00:18<00:14, 123MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 56% 2.18G/3.90G [00:18<00:13, 131MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 57% 2.21G/3.90G [00:18<00:10, 156MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 57% 2.23G/3.90G [00:18<00:10, 162MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 58% 2.25G/3.90G [00:18<00:10, 160MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 59% 2.29G/3.90G [00:18<00:09, 174MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 59% 2.31G/3.90G [00:19<00:08, 178MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 60% 2.33G/3.90G [00:19<00:08, 180MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 60% 2.35G/3.90G [00:19<00:08, 181MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 61% 2.37G/3.90G [00:19<00:08, 181MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 61% 2.39G/3.90G [00:19<00:08, 181MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 62% 2.41G/3.90G [00:19<00:08, 182MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 62% 2.43G/3.90G [00:19<00:08, 182MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 63% 2.45G/3.90G [00:19<00:08, 177MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 64% 2.47G/3.90G [00:20<00:11, 124MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 64% 2.51G/3.90G [00:20<00:09, 149MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 65% 2.53G/3.90G [00:22<00:40, 34.2MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 66% 2.56G/3.90G [00:22<00:26, 50.1MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 66% 2.58G/3.90G [00:22<00:21, 60.1MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 67% 2.60G/3.90G [00:22<00:18, 69.4MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 67% 2.62G/3.90G [00:22<00:15, 84.0MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 68% 2.64G/3.90G [00:22<00:12, 99.4MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 68% 2.66G/3.90G [00:23<00:12, 96.0MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 69% 2.68G/3.90G [00:23<00:12, 95.4MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 69% 2.71G/3.90G [00:23<00:14, 84.2MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 70% 2.73G/3.90G [00:23<00:14, 82.0MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 70% 2.74G/3.90G [00:24<00:14, 80.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 70% 2.75G/3.90G [00:24<00:15, 75.8MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 71% 2.76G/3.90G [00:24<00:15, 75.3MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 71% 2.77G/3.90G [00:24<00:15, 72.2MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 71% 2.78G/3.90G [00:24<00:14, 74.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 72% 2.79G/3.90G [00:24<00:14, 74.7MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 72% 2.80G/3.90G [00:25<00:15, 69.4MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 72% 2.81G/3.90G [00:25<00:15, 71.3MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 72% 2.82G/3.90G [00:25<00:13, 77.5MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 73% 2.84G/3.90G [00:25<00:12, 84.6MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 73% 2.85G/3.90G [00:25<00:12, 83.8MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 73% 2.86G/3.90G [00:25<00:12, 81.6MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 74% 2.88G/3.90G [00:25<00:10, 97.2MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 75% 2.90G/3.90G [00:26<00:08, 118MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 75% 2.93G/3.90G [00:26<00:07, 134MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 76% 2.95G/3.90G [00:26<00:06, 149MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 76% 2.97G/3.90G [00:26<00:05, 159MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 77% 2.99G/3.90G [00:27<00:23, 37.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 77% 3.02G/3.90G [00:27<00:15, 57.4MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 78% 3.04G/3.90G [00:28<00:12, 67.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 79% 3.06G/3.90G [00:28<00:10, 78.8MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 79% 3.08G/3.90G [00:28<00:08, 92.9MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 80% 3.10G/3.90G [00:28<00:07, 109MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 80% 3.14G/3.90G [00:28<00:05, 138MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 81% 3.16G/3.90G [00:28<00:05, 146MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 82% 3.18G/3.90G [00:28<00:04, 152MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 82% 3.20G/3.90G [00:29<00:04, 161MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 83% 3.22G/3.90G [00:29<00:03, 170MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 83% 3.24G/3.90G [00:29<00:04, 158MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 84% 3.26G/3.90G [00:29<00:04, 156MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 84% 3.28G/3.90G [00:29<00:03, 160MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 85% 3.30G/3.90G [00:29<00:03, 162MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 85% 3.32G/3.90G [00:29<00:03, 160MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 86% 3.34G/3.90G [00:29<00:03, 171MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 87% 3.38G/3.90G [00:30<00:02, 191MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 87% 3.40G/3.90G [00:30<00:02, 188MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 88% 3.42G/3.90G [00:30<00:02, 187MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 88% 3.44G/3.90G [00:30<00:02, 182MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 89% 3.46G/3.90G [00:30<00:02, 183MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 89% 3.48G/3.90G [00:30<00:02, 183MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 90% 3.50G/3.90G [00:30<00:02, 184MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 90% 3.52G/3.90G [00:30<00:02, 185MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 91% 3.54G/3.90G [00:30<00:01, 183MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 91% 3.57G/3.90G [00:31<00:05, 55.5MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 92% 3.59G/3.90G [00:32<00:08, 38.3MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 93% 3.61G/3.90G [00:32<00:05, 50.7MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 93% 3.63G/3.90G [00:33<00:04, 65.0MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 94% 3.65G/3.90G [00:33<00:03, 80.3MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 94% 3.67G/3.90G [00:33<00:02, 97.3MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 95% 3.69G/3.90G [00:33<00:01, 113MB/s] \u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 95% 3.71G/3.90G [00:33<00:01, 128MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 96% 3.73G/3.90G [00:33<00:01, 139MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 96% 3.75G/3.90G [00:33<00:00, 153MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 97% 3.77G/3.90G [00:33<00:00, 158MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 97% 3.80G/3.90G [00:34<00:00, 165MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 98% 3.82G/3.90G [00:34<00:00, 167MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 98% 3.84G/3.90G [00:34<00:00, 169MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 99% 3.86G/3.90G [00:34<00:00, 174MB/s]\u001b[A\u001b[A\n",
"\n",
"Downloading model.safetensors: 100% 3.90G/3.90G [00:34<00:00, 113MB/s]\n",
"Fetching 15 files: 100% 15/15 [00:36<00:00, 2.41s/it]\n",
"/content/llama2-webui\n",
"Running on GPU with backend torch transformers.\n",
"2023-08-26 07:14:25.222792: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
"skip module injection for FusedLlamaMLPForQuantizedModel not support integrate without triton yet.\n",
"Caching examples at: '/content/llama2-webui/gradio_cached_examples/19'\n",
"Caching example 1/5\n",
"Caching example 2/5\n",
"Caching example 3/5\n",
"Caching example 4/5\n",
"Caching example 5/5\n",
"Caching complete\n",
"\n",
"Running on local URL: http://127.0.0.1:7860\n",
"Running on public URL: https://71c3606942c440e7dd.gradio.live\n",
"\n",
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n",
"Keyboard interruption in main thread... closing server.\n",
"Traceback (most recent call last):\n",
" File \"/usr/local/lib/python3.10/dist-packages/gradio/blocks.py\", line 2130, in block_thread\n",
" time.sleep(0.1)\n",
"KeyboardInterrupt\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/content/llama2-webui/app.py\", line 322, in \n",
" main()\n",
" File \"/content/llama2-webui/app.py\", line 318, in main\n",
" demo.queue(max_size=20).launch(share=args.share)\n",
" File \"/usr/local/lib/python3.10/dist-packages/gradio/blocks.py\", line 2046, in launch\n",
" self.block_thread()\n",
" File \"/usr/local/lib/python3.10/dist-packages/gradio/blocks.py\", line 2132, in block_thread\n",
" print(\"Keyboard interruption in main thread... closing server.\")\n",
"KeyboardInterrupt\n",
"Killing tunnel 127.0.0.1:7860 <> https://71c3606942c440e7dd.gradio.live\n",
"terminate called without an active exception\n"
]
}
]
}
]
}