{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "19fe0df6", "metadata": {}, "outputs": [], "source": [ "# !pip3 install https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp310-cp310-linux_x86_64.whl" ] }, { "cell_type": "code", "execution_count": 2, "id": "20861f3e", "metadata": {}, "outputs": [], "source": [ "from awq import AutoAWQForCausalLM\n", "from transformers import AutoConfig, AwqConfig, AutoTokenizer, AutoModelForCausalLM\n", "import torch\n", "\n", "model_path = 'mesolitica/malaysian-tinyllama-1.1b-16k-instructions-v3'" ] }, { "cell_type": "code", "execution_count": 3, "id": "9939ad4e", "metadata": { "scrolled": true }, "outputs": [], "source": [ "model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype = torch.bfloat16)" ] }, { "cell_type": "code", "execution_count": 4, "id": "fdb86f50", "metadata": {}, "outputs": [], "source": [ "!rm -rf test" ] }, { "cell_type": "code", "execution_count": 5, "id": "72e76288", "metadata": {}, "outputs": [], "source": [ "model.save_pretrained('./test', safe_serialization = False)" ] }, { "cell_type": "code", "execution_count": 6, "id": "aa245150", "metadata": {}, "outputs": [], "source": [ "model = AutoAWQForCausalLM.from_pretrained('./test')" ] }, { "cell_type": "code", "execution_count": 7, "id": "d3949cf4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "AWQ: 100%|██████████| 22/22 [02:25<00:00, 6.59s/it]\n" ] } ], "source": [ "quant_path = 'malaysian-tinyllama-1.1b-16k-instructions-v3-awq'\n", "quant_config = { \"zero_point\": True, \"q_group_size\": 128, \"w_bit\": 4, \"version\": \"GEMM\" }\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\n", "model.quantize(tokenizer, quant_config=quant_config, calib_data = 'mesolitica/malaysian-calibration')" ] }, { "cell_type": "code", "execution_count": 8, "id": "ee290c1e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:root:`quant_config.json` is being deprecated in the future in favor of quantization_config in config.json.\n" ] }, { "data": { "text/plain": [ "('malaysian-tinyllama-1.1b-16k-instructions-v3-awq/tokenizer_config.json',\n", " 'malaysian-tinyllama-1.1b-16k-instructions-v3-awq/special_tokens_map.json',\n", " 'malaysian-tinyllama-1.1b-16k-instructions-v3-awq/tokenizer.json')" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.save_quantized(quant_path, safetensors = False)\n", "tokenizer.save_pretrained(quant_path)" ] }, { "cell_type": "code", "execution_count": 9, "id": "737f2403", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "CommitInfo(commit_url='https://huggingface.co/mesolitica/malaysian-tinyllama-1.1b-16k-instructions-v3-AWQ/commit/909193caec2d94a495535d9033f5dcd975686356', commit_message='Upload tokenizer', commit_description='', oid='909193caec2d94a495535d9033f5dcd975686356', pr_url=None, pr_revision=None, pr_num=None)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.push_to_hub('mesolitica/malaysian-tinyllama-1.1b-16k-instructions-v3-AWQ')" ] }, { "cell_type": "code", "execution_count": 10, "id": "ed92c8ee", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "CommitInfo(commit_url='https://huggingface.co/mesolitica/malaysian-tinyllama-1.1b-16k-instructions-v3-AWQ/commit/a259a50f290eb1e648396698d9a0dcac7d33d5a2', commit_message='Upload config', commit_description='', oid='a259a50f290eb1e648396698d9a0dcac7d33d5a2', pr_url=None, pr_revision=None, pr_num=None)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "quantization_config = AwqConfig(\n", " bits=quant_config['w_bit'],\n", " group_size=quant_config['q_group_size'],\n", " zero_point=quant_config['zero_point'],\n", " backend='autoawq',\n", " version=quant_config['version'].lower(),\n", ")\n", "\n", "config = AutoConfig.from_pretrained(model_path)\n", "config.quantization_config = quantization_config\n", "\n", "config.push_to_hub('mesolitica/malaysian-tinyllama-1.1b-16k-instructions-v3-AWQ')" ] }, { "cell_type": "code", "execution_count": 11, "id": "c74b2f45", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "config.json\t\tquant_config.json\t tokenizer_config.json\r\n", "generation_config.json\tspecial_tokens_map.json\r\n", "pytorch_model.bin\ttokenizer.json\r\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] } ], "source": [ "!ls malaysian-tinyllama-1.1b-16k-instructions-v3-awq" ] }, { "cell_type": "code", "execution_count": 12, "id": "2e0fb591", "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import HfApi\n", "\n", "api = HfApi()" ] }, { "cell_type": "code", "execution_count": 13, "id": "dd06cfa2", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2c8f617cd86c41e9ae97e6ed0e9cca0a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "pytorch_model.bin: 0%| | 0.00/766M [00:00 [INST] KWSP tu apa [/INST]KWSP, singkatan untuk Kumpulan Wang Simpanan Pekerja, ialah Kumpulan Wang Simpanan Pekerja di Malaysia, yang merupakan dana simpanan pekerja. KWSP bertujuan untuk menyediakan simpanan persaraan dan faedah keselamatan sosial untuk ahli KWSP (dana caruman dan majikan) dengan menyediakan pelaburan boleh beli. Dana ini diuruskan oleh KWSP, sebuah syarikat milik kerajaan di Malaysia. KWSP terdiri daripada simpanan caruman pekerja dan simpanan majikan, dengan peruntukan yang berkaitan dengan skim simpanan yang berbeza di Malaysia. Dana ini menggunakan pelaburan yang dibuat oleh pencarum dan pembayar caruman untuk menjana dividen dan faedah, yang seterusnya menyokong matlamat kerajaan untuk menyediakan simpanan persaraan yang selamat dan mencukupi untuk pekerja.'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "generate_kwargs = dict(\n", " inputs,\n", " max_new_tokens=1024,\n", " top_p=0.95,\n", " top_k=50,\n", " temperature=0.9,\n", " do_sample=True,\n", " num_beams=1,\n", ")\n", "r = quantized_model.generate(**generate_kwargs)\n", "tokenizer.decode(r[0])" ] }, { "cell_type": "code", "execution_count": null, "id": "a9a93555", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }