matthoffner commited on
Commit
0d67dc2
β€’
0 Parent(s):

Duplicate from matthoffner/ggml-ctransformers-fastapi

Browse files
Files changed (5) hide show
  1. .gitattributes +34 -0
  2. Dockerfile +29 -0
  3. README.md +19 -0
  4. main.py +43 -0
  5. requirements.txt +11 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:latest
2
+
3
+ ENV PYTHONUNBUFFERED 1
4
+
5
+ EXPOSE 8000
6
+
7
+ WORKDIR /app
8
+
9
+ RUN wget -qO- "https://cmake.org/files/v3.17/cmake-3.17.0-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local
10
+
11
+ COPY requirements.txt ./
12
+ RUN pip install --upgrade pip && \
13
+ pip install -r requirements.txt
14
+
15
+ RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
16
+ RUN apt-get install git-lfs
17
+
18
+ RUN git clone https://github.com/ggerganov/ggml && cd ggml && mkdir build && cd build && cmake ..
19
+ RUN git clone https://huggingface.co/bigcode/gpt_bigcode-santacoder
20
+ RUN python ggml/examples/starcoder/convert-hf-to-ggml.py ./gpt_bigcode-santacoder/
21
+ RUN cd ggml/build && make -j4 starcoder starcoder-quantize
22
+
23
+ RUN ggml/build/bin/starcoder-quantize models/./gpt_bigcode-santacoder/-ggml.bin ggml-model-q4_1.bin 3
24
+
25
+ COPY . .
26
+
27
+ RUN ls -al
28
+
29
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ggml-ctransformers-fastapi
3
+ emoji: πŸ³πŸ€—βš‘οΈ
4
+ sdk: docker
5
+ app_port: 8000
6
+ duplicated_from: matthoffner/ggml-ctransformers-fastapi
7
+ ---
8
+
9
+ # ggml-ctransformers-fastapi
10
+
11
+ ## <a href="https://github.com/ggerganov/ggml" target="_blank">ggml</a>
12
+ ## <a href="https://github.com/marella/ctransformers" target="_blank">ctransformers</a>
13
+ ## [FastAPI Docs](https://matthoffner-ggml-ctransformers-fastapi.hf.space/docs)
14
+
15
+ ### Updates
16
+
17
+ * Added /v1/chat/completions
18
+ * [Start using ctransformers](https://github.com/marella/ctransformers)
19
+ * [Added starcoder example](https://github.com/ggerganov/ggml/tree/master/examples/starcoder)
main.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fastapi
2
+ import json
3
+ import markdown
4
+ import uvicorn
5
+ from fastapi.responses import HTMLResponse
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from sse_starlette.sse import EventSourceResponse
8
+ from ctransformers.langchain import CTransformers
9
+ from pydantic import BaseModel
10
+
11
+ llm = CTransformers(model='ggml-model-q4_1.bin', model_type='starcoder')
12
+ app = fastapi.FastAPI()
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ @app.get("/")
22
+ async def index():
23
+ with open("README.md", "r", encoding="utf-8") as readme_file:
24
+ md_template_string = readme_file.read()
25
+ html_content = markdown.markdown(md_template_string)
26
+ return HTMLResponse(content=html_content, status_code=200)
27
+
28
+ class ChatCompletionRequest(BaseModel):
29
+ prompt: str
30
+
31
+ @app.post("/v1/chat/completions")
32
+ async def chat(request: ChatCompletionRequest, response_mode=None):
33
+ completion = llm(request.prompt)
34
+ async def server_sent_events(chat_chunks):
35
+ for chat_chunk in chat_chunks:
36
+ yield dict(data=json.dumps(chat_chunk))
37
+ yield dict(data="[DONE]")
38
+
39
+ return EventSourceResponse(server_sent_events(completion))
40
+
41
+ if __name__ == "__main__":
42
+ uvicorn.run(app, host="0.0.0.0", port=8000)
43
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ uvicorn
2
+ markdown
3
+ fastapi
4
+ loguru
5
+ torch
6
+ numpy
7
+ transformers
8
+ ctransformers
9
+ accelerate
10
+ langchain
11
+ sse_starlette