Upload 6 files
Browse files- benchmark_jglue/.DS_Store +0 -0
- benchmark_jglue/JGLUE_Llama-2-13b-hf-qlora-dolly-ja-2ep.ipynb +1 -0
- benchmark_jglue/result/result_jcommonsenseqa.json +24 -0
- benchmark_jglue/result/result_jnli.json +24 -0
- benchmark_jglue/result/result_jsquad.json +22 -0
- benchmark_jglue/result/result_marc_ja.json +24 -0
benchmark_jglue/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
benchmark_jglue/JGLUE_Llama-2-13b-hf-qlora-dolly-ja-2ep.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","gpuType":"A100","authorship_tag":"ABX9TyOcWa/R2MQZHg1iTqbsixCh"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["# JGLUE"],"metadata":{"id":"vW5pSUvHPrgi"}},{"cell_type":"markdown","source":["https://github.com/Stability-AI/lm-evaluation-harness/tree/jp-stable"],"metadata":{"id":"8jqvft-4Pt3Q"}},{"cell_type":"markdown","source":["## HuggingFace ログイン(事前学習モデルのllama-2-13b-hfのダウンロードに必要)"],"metadata":{"id":"xNAr_zTpS2n1"}},{"cell_type":"code","source":["!pip install huggingface_hub"],"metadata":{"id":"85vaoDDrS708"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["!huggingface-cli login"],"metadata":{"id":"qdMlCfQqS9yO"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## JGLUEスクリプトの実行"],"metadata":{"id":"xzgw7KFSS30w"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"8dNUdnbwPRpm"},"outputs":[],"source":["!git clone -b jp-stable https://github.com/Stability-AI/lm-evaluation-harness.git\n","%cd lm-evaluation-harness\n","!pip install -e \".[ja]\""]},{"cell_type":"code","source":["!python main.py \\\n"," --model hf-causal-experimental \\\n"," --model_args \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\" \\\n"," --tasks \"jsquad-1.1-0.3\" \\\n"," --num_fewshot \"2\" \\\n"," --batch_size 1 \\\n"," --device \"cuda\" \\\n"," --output_path \"/content/lm-evaluation-harness/result/result_jsquad.json\""],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JVyyviF9ZiQU","executionInfo":{"status":"ok","timestamp":1691319117294,"user_tz":-540,"elapsed":512615,"user":{"displayName":"八木原統","userId":"03559086887314454384"}},"outputId":"0319169d-c5f4-42a7-da8b-275f53c085de"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["2023-08-06 10:43:29.368525: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Selected Tasks: ['jsquad-1.1-0.3']\n","Loading checkpoint shards: 100% 3/3 [00:02<00:00, 1.37it/s]\n","/content/lm-evaluation-harness/lm_eval/tasks/ja/jsquad.py:75: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n"," self.jasquad_metric = datasets.load_metric(jasquad.__file__)\n","Running greedy_until requests\n","0it [00:00, ?it/s]\n","{\n"," \"results\": {\n"," \"jsquad-1.1-0.3\": {\n"," \"exact_match\": 62.83205763169743,\n"," \"f1\": 77.09913819742155\n"," }\n"," },\n"," \"versions\": {\n"," \"jsquad-1.1-0.3\": 1.1\n"," },\n"," \"config\": {\n"," \"model\": \"hf-causal-experimental\",\n"," \"model_args\": \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\",\n"," \"num_fewshot\": 2,\n"," \"batch_size\": 1,\n"," \"device\": \"cuda\",\n"," \"no_cache\": false,\n"," \"limit\": null,\n"," \"bootstrap_iters\": 100000,\n"," \"description_dict\": {}\n"," }\n","}\n","hf-causal-experimental (pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep), limit: None, provide_description: False, num_fewshot: 2, batch_size: 1\n","| Task |Version| Metric | Value | |Stderr|\n","|--------------|------:|-----------|------:|---|------|\n","|jsquad-1.1-0.3| 1.1|exact_match|62.8321| | |\n","| | |f1 |77.0991| | |\n","\n"]}]},{"cell_type":"code","source":["!python main.py \\\n"," --model hf-causal-experimental \\\n"," --model_args \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\" \\\n"," --tasks \"jcommonsenseqa-1.1-0.3\" \\\n"," --num_fewshot \"3\" \\\n"," --batch_size 1 \\\n"," --device \"cuda\" \\\n"," --output_path \"/content/lm-evaluation-harness/result/result_jcommonsenseqa.json\""],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"i1gV8hDmmRRh","executionInfo":{"status":"ok","timestamp":1691322994804,"user_tz":-540,"elapsed":1773681,"user":{"displayName":"八木原統","userId":"03559086887314454384"}},"outputId":"262839c7-b9c1-4d75-dbff-06450456f42a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["2023-08-06 11:27:05.800305: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Selected Tasks: ['jcommonsenseqa-1.1-0.3']\n","Loading checkpoint shards: 100% 3/3 [00:02<00:00, 1.39it/s]\n","Running loglikelihood requests\n","100% 5595/5595 [21:13<00:00, 4.39it/s]\n","{\n"," \"results\": {\n"," \"jcommonsenseqa-1.1-0.3\": {\n"," \"acc\": 0.7578194816800715,\n"," \"acc_stderr\": 0.012812432289317893,\n"," \"acc_norm\": 0.4280607685433423,\n"," \"acc_norm_stderr\": 0.014798127177394432\n"," }\n"," },\n"," \"versions\": {\n"," \"jcommonsenseqa-1.1-0.3\": 1.1\n"," },\n"," \"config\": {\n"," \"model\": \"hf-causal-experimental\",\n"," \"model_args\": \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\",\n"," \"num_fewshot\": 3,\n"," \"batch_size\": 1,\n"," \"device\": \"cuda\",\n"," \"no_cache\": false,\n"," \"limit\": null,\n"," \"bootstrap_iters\": 100000,\n"," \"description_dict\": {}\n"," }\n","}\n","hf-causal-experimental (pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep), limit: None, provide_description: False, num_fewshot: 3, batch_size: 1\n","| Task |Version| Metric |Value | |Stderr|\n","|----------------------|------:|--------|-----:|---|-----:|\n","|jcommonsenseqa-1.1-0.3| 1.1|acc |0.7578|± |0.0128|\n","| | |acc_norm|0.4281|± |0.0148|\n","\n"]}]},{"cell_type":"code","source":["!python main.py \\\n"," --model hf-causal-experimental \\\n"," --model_args \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\" \\\n"," --tasks \"jnli-1.1-0.3\" \\\n"," --num_fewshot \"3\" \\\n"," --batch_size 8 \\\n"," --device \"cuda\" \\\n"," --output_path \"/content/lm-evaluation-harness/result/result_jnli.json\""],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"VM1EUhLTsfi_","executionInfo":{"status":"ok","timestamp":1691326111411,"user_tz":-540,"elapsed":2017998,"user":{"displayName":"八木原統","userId":"03559086887314454384"}},"outputId":"5963f031-6056-4389-a891-cc3ae1cf6d25"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["2023-08-06 12:14:57.989717: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Selected Tasks: ['jnli-1.1-0.3']\n","Loading checkpoint shards: 100% 3/3 [00:02<00:00, 1.38it/s]\n","Running loglikelihood requests\n","100% 7006/7006 [25:13<00:00, 4.63it/s]\n","{\n"," \"results\": {\n"," \"jnli-1.1-0.3\": {\n"," \"acc\": 0.5069843878389483,\n"," \"acc_stderr\": 0.010135765974065071,\n"," \"acc_norm\": 0.3056696795398521,\n"," \"acc_norm_stderr\": 0.009339813231542836\n"," }\n"," },\n"," \"versions\": {\n"," \"jnli-1.1-0.3\": 1.1\n"," },\n"," \"config\": {\n"," \"model\": \"hf-causal-experimental\",\n"," \"model_args\": \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\",\n"," \"num_fewshot\": 3,\n"," \"batch_size\": 8,\n"," \"device\": \"cuda\",\n"," \"no_cache\": false,\n"," \"limit\": null,\n"," \"bootstrap_iters\": 100000,\n"," \"description_dict\": {}\n"," }\n","}\n","hf-causal-experimental (pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep), limit: None, provide_description: False, num_fewshot: 3, batch_size: 8\n","| Task |Version| Metric |Value | |Stderr|\n","|------------|------:|--------|-----:|---|-----:|\n","|jnli-1.1-0.3| 1.1|acc |0.5070|± |0.0101|\n","| | |acc_norm|0.3057|± |0.0093|\n","\n"]}]},{"cell_type":"code","source":["!python main.py \\\n"," --model hf-causal-experimental \\\n"," --model_args \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\" \\\n"," --tasks \"marc_ja-1.1-0.3\" \\\n"," --num_fewshot \"3\" \\\n"," --batch_size 2 \\\n"," --device \"cuda\" \\\n"," --output_path \"/content/lm-evaluation-harness/result/result_marc_ja.json\""],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Z1h0sHgoz3hb","executionInfo":{"status":"ok","timestamp":1691330876590,"user_tz":-540,"elapsed":4750329,"user":{"displayName":"八木原統","userId":"03559086887314454384"}},"outputId":"0811254a-fa5c-457a-da8e-7137e3065d63"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["2023-08-06 12:48:50.938519: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Selected Tasks: ['marc_ja-1.1-0.3']\n","Loading checkpoint shards: 100% 3/3 [00:02<00:00, 1.39it/s]\n","Running loglikelihood requests\n","100% 10006/10006 [1:10:23<00:00, 2.37it/s]\n","{\n"," \"results\": {\n"," \"marc_ja-1.1-0.3\": {\n"," \"acc\": 0.7964273081004598,\n"," \"acc_stderr\": 0.005355417561710155,\n"," \"acc_norm\": 0.7964273081004598,\n"," \"acc_norm_stderr\": 0.005355417561710155\n"," }\n"," },\n"," \"versions\": {\n"," \"marc_ja-1.1-0.3\": 1.1\n"," },\n"," \"config\": {\n"," \"model\": \"hf-causal-experimental\",\n"," \"model_args\": \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\",\n"," \"num_fewshot\": 3,\n"," \"batch_size\": 2,\n"," \"device\": \"cuda\",\n"," \"no_cache\": false,\n"," \"limit\": null,\n"," \"bootstrap_iters\": 100000,\n"," \"description_dict\": {}\n"," }\n","}\n","hf-causal-experimental (pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep), limit: None, provide_description: False, num_fewshot: 3, batch_size: 2\n","| Task |Version| Metric |Value | |Stderr|\n","|---------------|------:|--------|-----:|---|-----:|\n","|marc_ja-1.1-0.3| 1.1|acc |0.7964|± |0.0054|\n","| | |acc_norm|0.7964|± |0.0054|\n","\n"]}]},{"cell_type":"markdown","source":["JGLUE実行後、結果ファイルはローカルに取得したが、Google driveをマウントしていればそこに保存しても良いと思う。なぜか、Colabを数時間実行しているとdriveのマウントが外れるエラーがたまに出るためローカルに落とす様に今回はした。"],"metadata":{"id":"_QTxh2qDVuwI"}},{"cell_type":"code","source":["from google.colab import files\n","\n","files.download('/content/lm-evaluation-harness/result/result_jsquad.json')\n","files.download('/content/lm-evaluation-harness/result/result_jcommonsenseqa.json')\n","files.download('/content/lm-evaluation-harness/result/result_jnli.json')\n","files.download('/content/lm-evaluation-harness/result/result_marc_ja.json')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":34},"id":"q2IV2w8423zg","executionInfo":{"status":"ok","timestamp":1691330876591,"user_tz":-540,"elapsed":9,"user":{"displayName":"八木原統","userId":"03559086887314454384"}},"outputId":"fe4907d0-2ccf-4c2f-d6ee-cc5f89b355ba"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.Javascript object>"],"application/javascript":["\n"," async function download(id, filename, size) {\n"," if (!google.colab.kernel.accessAllowed) {\n"," return;\n"," }\n"," const div = document.createElement('div');\n"," const label = document.createElement('label');\n"," label.textContent = `Downloading \"${filename}\": `;\n"," div.appendChild(label);\n"," const progress = document.createElement('progress');\n"," progress.max = size;\n"," div.appendChild(progress);\n"," document.body.appendChild(div);\n","\n"," const buffers = [];\n"," let downloaded = 0;\n","\n"," const channel = await google.colab.kernel.comms.open(id);\n"," // Send a message to notify the kernel that we're ready.\n"," channel.send({})\n","\n"," for await (const message of channel.messages) {\n"," // Send a message to notify the kernel that we're ready.\n"," channel.send({})\n"," if (message.buffers) {\n"," for (const buffer of message.buffers) {\n"," buffers.push(buffer);\n"," downloaded += buffer.byteLength;\n"," progress.value = downloaded;\n"," }\n"," }\n"," }\n"," const blob = new Blob(buffers, {type: 'application/binary'});\n"," const a = document.createElement('a');\n"," a.href = window.URL.createObjectURL(blob);\n"," a.download = filename;\n"," div.appendChild(a);\n"," a.click();\n"," div.remove();\n"," }\n"," "]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.Javascript object>"],"application/javascript":["download(\"download_500cc0c5-8473-4de0-ac89-39543dc96377\", \"result_marc_ja.json\", 588)"]},"metadata":{}}]},{"cell_type":"markdown","source":["ちなみに、ベンチマークタスクは以下の様に\",\"区切りで指定すれば複数一気に行うことができる。なぜか単発毎よりも実行時時間がかかりそうだったため今回は個別に行った。"],"metadata":{"id":"ce4HppNNVC7B"}},{"cell_type":"code","source":["!python main.py \\\n"," --model hf-causal-experimental \\\n"," --model_args \"pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep\" \\\n"," --tasks \"jsquad-1.1-0.3,jcommonsenseqa-1.1-0.3,jnli-1.1-0.3,marc_ja-1.1-0.3\" \\\n"," --num_fewshot \"2,3,3,3\" \\\n"," --batch_size 1 \\\n"," --device \"cuda\" \\\n"," --output_path \"result.json\""],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Rho49R3iPulh","executionInfo":{"status":"ok","timestamp":1691316334804,"user_tz":-540,"elapsed":9624749,"user":{"displayName":"八木原統","userId":"03559086887314454384"}},"outputId":"9dc02616-0d85-4ac8-da43-80cc0b7ba760"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["2023-08-06 07:25:14.257072: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Selected Tasks: ['jsquad-1.1-0.3', 'jcommonsenseqa-1.1-0.3', 'jnli-1.1-0.3', 'marc_ja-1.1-0.3']\n","Loading checkpoint shards: 100% 3/3 [00:02<00:00, 1.38it/s]\n","/content/lm-evaluation-harness/lm_eval/tasks/ja/jsquad.py:75: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n"," self.jasquad_metric = datasets.load_metric(jasquad.__file__)\n","Running greedy_until requests\n","100% 4442/4442 [2:16:25<00:00, 1.84s/it]\n","Running loglikelihood requests\n"," 5% 1302/24205 [14:46<4:19:50, 1.47it/s]\n","Traceback (most recent call last):\n"," File \"/content/lm-evaluation-harness/main.py\", line 122, in <module>\n"," main()\n"," File \"/content/lm-evaluation-harness/main.py\", line 91, in main\n"," results = evaluator.simple_evaluate(\n"," File \"/content/lm-evaluation-harness/lm_eval/utils.py\", line 185, in _wrapper\n"," return fn(*args, **kwargs)\n"," File \"/content/lm-evaluation-harness/lm_eval/evaluator.py\", line 87, in simple_evaluate\n"," results = evaluate(\n"," File \"/content/lm-evaluation-harness/lm_eval/utils.py\", line 185, in _wrapper\n"," return fn(*args, **kwargs)\n"," File \"/content/lm-evaluation-harness/lm_eval/evaluator.py\", line 273, in evaluate\n"," resps = getattr(lm, reqtype)([req.args for req in reqs])\n"," File \"/content/lm-evaluation-harness/lm_eval/base.py\", line 852, in fn\n"," rem_res = getattr(self.lm, attr)(remaining_reqs)\n"," File \"/content/lm-evaluation-harness/lm_eval/base.py\", line 191, in loglikelihood\n"," return self._loglikelihood_tokens(new_reqs)\n"," File \"/content/lm-evaluation-harness/lm_eval/base.py\", line 302, in _loglikelihood_tokens\n"," ).cpu() # [batch, padding_length, vocab]\n","KeyboardInterrupt\n","^C\n"]}]}]}
|
benchmark_jglue/result/result_jcommonsenseqa.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"jcommonsenseqa-1.1-0.3": {
|
4 |
+
"acc": 0.7578194816800715,
|
5 |
+
"acc_stderr": 0.012812432289317893,
|
6 |
+
"acc_norm": 0.4280607685433423,
|
7 |
+
"acc_norm_stderr": 0.014798127177394432
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"jcommonsenseqa-1.1-0.3": 1.1
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-causal-experimental",
|
15 |
+
"model_args": "pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep",
|
16 |
+
"num_fewshot": 3,
|
17 |
+
"batch_size": 1,
|
18 |
+
"device": "cuda",
|
19 |
+
"no_cache": false,
|
20 |
+
"limit": null,
|
21 |
+
"bootstrap_iters": 100000,
|
22 |
+
"description_dict": {}
|
23 |
+
}
|
24 |
+
}
|
benchmark_jglue/result/result_jnli.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"jnli-1.1-0.3": {
|
4 |
+
"acc": 0.5069843878389483,
|
5 |
+
"acc_stderr": 0.010135765974065071,
|
6 |
+
"acc_norm": 0.3056696795398521,
|
7 |
+
"acc_norm_stderr": 0.009339813231542836
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"jnli-1.1-0.3": 1.1
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-causal-experimental",
|
15 |
+
"model_args": "pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep",
|
16 |
+
"num_fewshot": 3,
|
17 |
+
"batch_size": 8,
|
18 |
+
"device": "cuda",
|
19 |
+
"no_cache": false,
|
20 |
+
"limit": null,
|
21 |
+
"bootstrap_iters": 100000,
|
22 |
+
"description_dict": {}
|
23 |
+
}
|
24 |
+
}
|
benchmark_jglue/result/result_jsquad.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"jsquad-1.1-0.3": {
|
4 |
+
"exact_match": 62.83205763169743,
|
5 |
+
"f1": 77.09913819742155
|
6 |
+
}
|
7 |
+
},
|
8 |
+
"versions": {
|
9 |
+
"jsquad-1.1-0.3": 1.1
|
10 |
+
},
|
11 |
+
"config": {
|
12 |
+
"model": "hf-causal-experimental",
|
13 |
+
"model_args": "pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep",
|
14 |
+
"num_fewshot": 2,
|
15 |
+
"batch_size": 1,
|
16 |
+
"device": "cuda",
|
17 |
+
"no_cache": false,
|
18 |
+
"limit": null,
|
19 |
+
"bootstrap_iters": 100000,
|
20 |
+
"description_dict": {}
|
21 |
+
}
|
22 |
+
}
|
benchmark_jglue/result/result_marc_ja.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"marc_ja-1.1-0.3": {
|
4 |
+
"acc": 0.7964273081004598,
|
5 |
+
"acc_stderr": 0.005355417561710155,
|
6 |
+
"acc_norm": 0.7964273081004598,
|
7 |
+
"acc_norm_stderr": 0.005355417561710155
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"versions": {
|
11 |
+
"marc_ja-1.1-0.3": 1.1
|
12 |
+
},
|
13 |
+
"config": {
|
14 |
+
"model": "hf-causal-experimental",
|
15 |
+
"model_args": "pretrained=meta-llama/Llama-2-13b-hf,peft=HachiML/Llama-2-13b-hf-qlora-dolly-ja-2ep",
|
16 |
+
"num_fewshot": 3,
|
17 |
+
"batch_size": 2,
|
18 |
+
"device": "cuda",
|
19 |
+
"no_cache": false,
|
20 |
+
"limit": null,
|
21 |
+
"bootstrap_iters": 100000,
|
22 |
+
"description_dict": {}
|
23 |
+
}
|
24 |
+
}
|