Spaces:
Runtime error
Runtime error
limcheekin
commited on
Commit
•
2516e02
0
Parent(s):
Duplicate from limcheekin/orca_mini_v3_13B-GGML
Browse files- .gitattributes +35 -0
- Dockerfile +35 -0
- README.md +20 -0
- index.html +37 -0
- main.py +28 -0
- start_server.sh +6 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Grab a fresh copy of the Python image
|
2 |
+
FROM python:3.10-slim
|
3 |
+
|
4 |
+
# Install build and runtime dependencies
|
5 |
+
RUN apt-get update && \
|
6 |
+
apt-get install -y \
|
7 |
+
libopenblas-dev \
|
8 |
+
ninja-build \
|
9 |
+
build-essential \
|
10 |
+
pkg-config \
|
11 |
+
curl
|
12 |
+
|
13 |
+
RUN pip install -U pip setuptools wheel && \
|
14 |
+
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
|
15 |
+
|
16 |
+
# Download model
|
17 |
+
RUN mkdir model && \
|
18 |
+
curl -L https://huggingface.co/TheBloke/orca_mini_v3_13B-GGML/resolve/main/orca_mini_v3_13b.ggmlv3.q5_K_S.bin -o model/ggmlv3-model.bin
|
19 |
+
|
20 |
+
COPY ./start_server.sh ./
|
21 |
+
COPY ./main.py ./
|
22 |
+
COPY ./index.html ./
|
23 |
+
|
24 |
+
# Make the server start script executable
|
25 |
+
RUN chmod +x ./start_server.sh
|
26 |
+
|
27 |
+
# Set environment variable for the host
|
28 |
+
ENV HOST=0.0.0.0
|
29 |
+
ENV PORT=7860
|
30 |
+
|
31 |
+
# Expose a port for the server
|
32 |
+
EXPOSE ${PORT}
|
33 |
+
|
34 |
+
# Run the server start script
|
35 |
+
CMD ["/bin/sh", "./start_server.sh"]
|
README.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: orca_mini_v3_13B-GGML (q5_K_S)
|
3 |
+
colorFrom: purple
|
4 |
+
colorTo: blue
|
5 |
+
sdk: docker
|
6 |
+
models:
|
7 |
+
- TheBloke/orca_mini_v3_13B-GGML
|
8 |
+
tags:
|
9 |
+
- inference api
|
10 |
+
- openai-api compatible
|
11 |
+
- llama-cpp-python
|
12 |
+
- orca_mini_v3_13B
|
13 |
+
- ggml
|
14 |
+
pinned: false
|
15 |
+
duplicated_from: limcheekin/orca_mini_v3_13B-GGML
|
16 |
+
---
|
17 |
+
|
18 |
+
# orca_mini_v3_13B-GGML (q5_K_S)
|
19 |
+
|
20 |
+
Please refer to the [index.html](index.html) for more information.
|
index.html
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<title>orca_mini_v3_13B-GGML (q5_K_S)</title>
|
5 |
+
</head>
|
6 |
+
<body>
|
7 |
+
<h1>orca_mini_v3_13B-GGML (q5_K_S)</h1>
|
8 |
+
<p>
|
9 |
+
With the utilization of the
|
10 |
+
<a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
|
11 |
+
package, we are excited to introduce the GGML model hosted in the Hugging
|
12 |
+
Face Docker Spaces, made accessible through an OpenAI-compatible API. This
|
13 |
+
space includes comprehensive API documentation to facilitate seamless
|
14 |
+
integration.
|
15 |
+
</p>
|
16 |
+
<ul>
|
17 |
+
<li>
|
18 |
+
The API endpoint:
|
19 |
+
<a href="https://limcheekin-orca-mini-v3-13b-ggml.hf.space/v1"
|
20 |
+
>https://limcheekin-orca-mini-v3-13b-ggml.hf.space/v1</a
|
21 |
+
>
|
22 |
+
</li>
|
23 |
+
<li>
|
24 |
+
The API doc:
|
25 |
+
<a href="https://limcheekin-orca-mini-v3-13b-ggml.hf.space/docs"
|
26 |
+
>https://limcheekin-orca-mini-v3-13b-ggml.hf.space/docs</a
|
27 |
+
>
|
28 |
+
</li>
|
29 |
+
</ul>
|
30 |
+
<p>
|
31 |
+
If you find this resource valuable, your support in the form of starring
|
32 |
+
the space would be greatly appreciated. Your engagement plays a vital role
|
33 |
+
in furthering the application for a community GPU grant, ultimately
|
34 |
+
enhancing the capabilities and accessibility of this space.
|
35 |
+
</p>
|
36 |
+
</body>
|
37 |
+
</html>
|
main.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_cpp.server.app import create_app, Settings
|
2 |
+
from fastapi.responses import HTMLResponse
|
3 |
+
import os
|
4 |
+
|
5 |
+
print("os.cpu_count()", os.cpu_count())
|
6 |
+
app = create_app(
|
7 |
+
Settings(
|
8 |
+
n_threads=os.cpu_count(),
|
9 |
+
model="model/ggmlv3-model.bin",
|
10 |
+
embedding=False
|
11 |
+
)
|
12 |
+
)
|
13 |
+
|
14 |
+
# Read the content of index.html once and store it in memory
|
15 |
+
with open("index.html", "r") as f:
|
16 |
+
content = f.read()
|
17 |
+
|
18 |
+
|
19 |
+
@app.get("/", response_class=HTMLResponse)
|
20 |
+
async def read_items():
|
21 |
+
return content
|
22 |
+
|
23 |
+
if __name__ == "__main__":
|
24 |
+
import uvicorn
|
25 |
+
uvicorn.run(app,
|
26 |
+
host=os.environ["HOST"],
|
27 |
+
port=int(os.environ["PORT"])
|
28 |
+
)
|
start_server.sh
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
# For mlock support
|
4 |
+
ulimit -l unlimited
|
5 |
+
|
6 |
+
python3 -B main.py
|