yolo12138 commited on
Commit
59f9119
·
1 Parent(s): 657d2f2

docker 环境部署

Browse files
Files changed (5) hide show
  1. .gitignore +5 -0
  2. Dockerfile +30 -0
  3. app.py +89 -0
  4. table.jpg +0 -0
  5. word_1.jpg +0 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __pycache__
2
+ .vscode
3
+ .DS_Store
4
+ .coverage
5
+ output
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://huggingface.co/spaces/gaunernst/layoutlm-docvqa-paddleocr/blob/main/Dockerfile
2
+ FROM python:3.10
3
+
4
+ ENV CUDA_VISIBLE_DEVICES=-1
5
+ ARG PIP_NO_CACHE_DIR=1
6
+
7
+ # libssl1.1 for PaddlePaddle
8
+ RUN wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb \
9
+ && dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb \
10
+ && rm libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb
11
+
12
+ RUN pip install torch==2.1.1 -i https://download.pytorch.org/whl/cpu
13
+ RUN pip install paddlepaddle==2.5.1 -i https://mirror.baidu.com/pypi/simple
14
+ RUN pip install transformers gradio Pillow fastapi
15
+ RUN pip install paddleocr==2.7.0.3 \
16
+ && pip uninstall -y opencv-python opencv-contrib-python \
17
+ && pip install opencv-python-headless
18
+
19
+ RUN useradd -m -u 1000 user
20
+ USER user
21
+ ENV HOME=/home/user
22
+ WORKDIR $HOME/app
23
+ RUN mkdir output
24
+ COPY --chown=user app.py $HOME/app
25
+ COPY --chown=user table.jpg $HOME/app
26
+ COPY --chown=user word_1.jpg $HOME/app
27
+ RUN paddleocr --image_dir ./word_1.jpg --use_angle_cls true --use_gpu false --lang ch
28
+ RUN paddleocr --image_dir ./table.jpg --type=structure --table=true --lang ch
29
+
30
+ CMD ["python", "-u", "app.py"]
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ from fastapi.staticfiles import StaticFiles
3
+ import hashlib
4
+ from enum import Enum
5
+ from fastapi import FastAPI, UploadFile, File
6
+ from paddleocr import PaddleOCR, PPStructure, save_structure_res
7
+ from PIL import Image
8
+ import io
9
+ import numpy as np
10
+
11
+ app = FastAPI()
12
+ use_gpu = False
13
+ output_dir = 'output'
14
+
15
+ class LangEnum(str, Enum):
16
+ ch = "ch"
17
+ en = "en"
18
+
19
+ # cache with ocr
20
+ ocr_cache = {}
21
+
22
+ # get ocr ins
23
+ def get_ocr(lang, use_gpu=False):
24
+ if not ocr_cache.get(lang):
25
+ ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu)
26
+
27
+ return ocr_cache.get(lang)
28
+
29
+
30
+ @app.post("/ocr")
31
+ async def create_upload_file(
32
+ file: UploadFile = File(...),
33
+ lang: LangEnum = LangEnum.ch,
34
+ # use_gpu: bool = False
35
+ ):
36
+ contents = await file.read()
37
+ image = Image.open(io.BytesIO(contents))
38
+ ocr = get_ocr(lang=lang, use_gpu=use_gpu)
39
+ img2np = np.array(image)
40
+ result = ocr.ocr(img2np, cls=True)[0]
41
+
42
+ boxes = [line[0] for line in result]
43
+ txts = [line[1][0] for line in result]
44
+ scores = [line[1][1] for line in result]
45
+
46
+ # 识别结果
47
+ final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)]
48
+ return final_result
49
+
50
+
51
+ @app.post("/ocr_table")
52
+ async def create_upload_file(
53
+ file: UploadFile = File(...),
54
+ lang: LangEnum = LangEnum.ch,
55
+ # use_gpu: bool = False
56
+ ):
57
+ table_engine = PPStructure(show_log=True, table=True, lang=lang)
58
+
59
+ contents = await file.read()
60
+ # 计算文件内容的哈希值
61
+ file_hash = hashlib.sha256(contents).hexdigest()
62
+
63
+ image = Image.open(io.BytesIO(contents))
64
+ img2np = np.array(image)
65
+ result = table_engine(img2np)
66
+
67
+ save_structure_res(result, output_dir, f'{file_hash}')
68
+
69
+ htmls = []
70
+ types = []
71
+ bboxes = []
72
+
73
+ for item in result:
74
+ item_res = item.get('res', {})
75
+ htmls.append(item_res.get('html', ''))
76
+ types.append(item.get('type', ''))
77
+ bboxes.append(item.get('bbox', ''))
78
+
79
+ return {
80
+ 'htmls': htmls,
81
+ 'hash': file_hash,
82
+ 'bboxes': bboxes,
83
+ 'types': types,
84
+ }
85
+
86
+
87
+ if __name__ == '__main__':
88
+ app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output")
89
+ uvicorn.run(app=app, port=7860)
table.jpg ADDED
word_1.jpg ADDED