File size: 1,803 Bytes
9ee83a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d902f4
 
9ee83a7
 
 
 
2d902f4
 
9ee83a7
 
 
 
21a2300
 
 
 
2d902f4
 
 
9ee83a7
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
FROM grobid/grobid:0.7.3
RUN apt-get update && \
    apt-get install wget unzip texlive-full nano git apt-transport-https curl gnupg -yqq
# -- installing grobid, python (torch and tensorflow), java and latex finished -- #

# now install grobid python client
WORKDIR /opt
RUN git clone https://github.com/kermitt2/grobid_client_python && \
    cd grobid_client_python && \
    python3 setup.py install

# install sbt and pdf2figures
RUN echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | tee /etc/apt/sources.list.d/sbt.list && \
    echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | tee /etc/apt/sources.list.d/sbt_old.list && \
    curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import && \
    chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg && \
    apt-get update && \
    apt-get install sbt -yqq
RUN git clone https://github.com/allenai/pdffigures2.git

# install python dependency
WORKDIR /project
ADD ./requirements.txt /project/requirements.txt
RUN pip install --no-cache-dir --upgrade -r requirements.txt
# add nltk
ADD ./core/init_nltk.py /project/core/init_nltk.py
# download tokernizer for nltk
RUN python core/init_nltk.py
ADD ./example.pdf /project/example/example.pdf
# init sbt
# add code
ADD ./core/init_sbt.py /project/core/init_sbt.py
RUN python core/init_sbt.py
# add app
ADD ./app.py /project/app.py
EXPOSE 7860
# creat log dir for grobid
RUN mkdir /opt/grobid/logs
# downlaod en_core_web_sm
RUN python -m spacy download en_core_web_sm

# add code
ADD ./core/ /project/core/
# add service starting
ADD ./start_service.sh /project/start_service.sh

WORKDIR /project
CMD ["bash", "start_service.sh"]