gguf-my-repo-plus-tiktoken

Runtime error

App Files Files Community

Vaibhav Srivastav commited on Mar 26

Commit

2bede7c

•

1 Parent(s): eecc2cb

up.

Browse files

Files changed (2) hide show

Dockerfile +61 -0
app.py +59 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,61 @@

+FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
+    # gradio dependencies \
+    ffmpeg
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:${PATH}
+WORKDIR ${HOME}/app
+RUN curl https://pyenv.run | bash
+ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ARG PYTHON_VERSION=3.10.13
+RUN pyenv install ${PYTHON_VERSION} && \
+    pyenv global ${PYTHON_VERSION} && \
+    pyenv rehash && \
+    pip install --no-cache-dir -U pip setuptools wheel && \
+    pip install "huggingface-hub" "hf-transfer"
+COPY --chown=1000 . ${HOME}/app
+RUN git clone https://github.com/ggerganov/llama.cpp && \
+    cd llama.cpp && \
+    make clean && \
+    LLAMA_CUDA=1 make
+RUN pip install -r llama.cpp/requirements.txt
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    TQDM_POSITION=-1 \
+    TQDM_MININTERVAL=1 \
+    SYSTEM=spaces
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+import subprocess
+from huggingface_hub import create_repo, HfApi
+from huggingface_hub import snapshot_download
+api = HfApi()
+def process_model(model_id, q_method, username, hf_token):
+    MODEL_NAME = model_id.split('/')[-1]
+    fp16 = f"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin"
+    snapshot_download(repo_id=model_id, local_dir = f"{MODEL_NAME}", local_dir_use_symlinks=False)
+    print("Model downloaded successully!")
+    fp16_conversion = f"python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}"
+    subprocess.run(fp16_conversion, shell=True)
+    print("Model converted to fp16 successully!")
+    qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{q_method.upper()}.gguf"
+    quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
+    subprocess.run(quantise_ggml, shell=True)
+    print("Quantised successfully!")
+    # Create empty repo
+    create_repo(
+        repo_id = f"{username}/{MODEL_NAME}-{q_method}-GGUF",
+        repo_type="model",
+        exist_ok=True,
+        token=hf_token
+    )
+    print("Empty repo created successfully!")
+    # Upload gguf files
+    api.upload_folder(
+        folder_path=MODEL_NAME,
+        repo_id=f"{username}/{MODEL_NAME}-{q_method}-GGUF",
+        allow_patterns=["*.gguf","$.md"],
+        token=hf_token
+    )
+    print("Uploaded successfully!")
+    return "Processing complete."
+# Create Gradio interface
+iface = gr.Interface(
+    fn=process_model,
+    inputs=[
+        gr.Textbox(lines=1, label="Model ID"),
+        gr.Textbox(lines=1, label="Quantization Methods"),
+        gr.Textbox(lines=1, label="Username"),
+        gr.Textbox(lines=1, label="Token")
+    ],
+    outputs="text"
+)
+# Launch the interface
+iface.launch(debug=True)