File size: 2,676 Bytes
c9019cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f34ff16
c9019cd
 
 
 
 
 
 
 
 
f34ff16
 
 
 
 
 
 
 
 
c9019cd
f34ff16
c9019cd
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import torch

print(torch.__version__)
torch_ver, cuda_ver = torch.__version__.split('+')
os.system(f'pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/{cuda_ver}/torch{torch_ver}/index.html --no-cache-dir')
os.system('cd src/ndl_layout/mmdetection && python setup.py bdist_wheel && pip install dist/*.whl')
os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/text_recognition/mojilist_NDL.txt -P ./src/text_recognition/models')
os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/text_recognition/ndlenfixed64-mj0-synth1.pth -P ./src/text_recognition/models')
os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/ndl_layout/ndl_layout_config.py -P ./src/ndl_layout/models')
os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/ndl_layout/epoch_140_all_eql_bt.pth -P ./src/ndl_layout/models')
os.system('wget https://lab.ndl.go.jp/dataset/ndlocr/separate_pages_ssd/weights.hdf5 -P ./src/separate_pages_ssd/ssd_tools')
os.system("wget https://i.imgur.com/fSL1CGG.jpg")
os.environ["PYTHONPATH"]=os.environ["PYTHONPATH"]+":"+f"{os.getcwd()}/src/text_recognition/deep-text-recognition-benchmark"

import gradio as gr
from PIL import Image
from uuid import uuid4
from pathlib import Path


def inference(im):
    dir_name = uuid4()
    Path(f'{dir_name}/img').mkdir(parents=True)
    im.save(f'{dir_name}/img/image.jpg')
    os.system(f'python main.py infer {dir_name}/img/image.jpg {dir_name}_output -s f -i')
    image_path = f'{dir_name}_output/image/pred_img/image_L.jpg'
    if Path(f'{dir_name}_output/image/pred_img/image_R.jpg').exists():
        image_L = Image.open(f'{dir_name}_output/image/pred_img/image_L.jpg')
        image_R = Image.open(f'{dir_name}_output/image/pred_img/image_R.jpg')
        dst = Image.new('RGB', (image_L.width + image_R.width, image_L.height))
        dst.paste(image_L, (0, 0))
        dst.paste(image_R, (image_L.width, 0))
        dst.save(f'{dir_name}_output/image/pred_img/image_LR.jpg')
        image_path = f'{dir_name}_output/image/pred_img/image_LR.jpg'
    with open(f'{dir_name}_output/image/txt/image_main.txt') as f:
        return image_path, f.read()

title = "NDLOCR"
description = "Gradio demo for NDLOCR. NDLOCR is a text recognition (OCR) Program."
article = "<p style='text-align: center'><a href='https://github.com/ndl-lab' target='_blank'>NDL Lab</a> | <a href='https://github.com/ndl-lab/ndlocr_cli' target='_blank'>NDLOCR Repo</a></p>"
gr.Interface(
    inference, 
    gr.inputs.Image(label='image', type='pil'), 
    ['image', 'text'],
    title=title,
    description=description,
    article=article,
    examples=['fSL1CGG.jpg']
).launch(enable_queue=True, cache_examples=True)