Spaces:

ztime
/

qwen

Sleeping

File size: 721 Bytes

55204ef
c230d0d
0f1a0f1
c230d0d
0f1a0f1
 
 
22ea270
8f2ecaf
 
c230d0d
bc1c0b6
c230d0d

FROM python:3.11-slim

RUN apt update && apt install -y git cmake build-essential ninja-build wget
WORKDIR /app
RUN git clone https://github.com/ggerganov/llama.cpp --depth 1 repo && cd repo && \
  sed -i 's/v1\/chat/api\/v1\/chat/g' examples/server/server.cpp

RUN cd repo && cmake -B build && cmake --build build --config Release --target server && cp ./build/bin/server /app/server
# RUN wget -c -O model.gguf "https://huggingface.co/zhangtao103239/Qwen-1.8B-GGUF/resolve/main/qwen-1.8b-q5_k_m.gguf"
RUN wget -c -O model.gguf "https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GGUF/resolve/main/qwen2-beta-0_5b-chat-q8_0.gguf"
EXPOSE  7860
ENTRYPOINT ["sh", "-c", "/app/server -m model.gguf --host 0.0.0.0 --port 7860"]