quantaji commited on
Commit
9ee83a7
1 Parent(s): 2d17079

add all other all

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ example.pdf filter=lfs diff=lfs merge=lfs -text
.gitattributes copy ADDED
@@ -0,0 +1 @@
 
 
1
+ example.pdf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ *.ipynb_checkpoints
7
+ axcell_data/papers/
8
+
9
+ # experiments output
10
+ experiments/
11
+
12
+ # output file
13
+ *.out
14
+ temp/
15
+ *.ipynb
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM grobid/grobid:0.7.3
2
+ RUN apt-get update && \
3
+ apt-get install wget unzip texlive-full nano git apt-transport-https curl gnupg -yqq
4
+ # -- installing grobid, python (torch and tensorflow), java and latex finished -- #
5
+
6
+ # now install grobid python client
7
+ WORKDIR /opt
8
+ RUN git clone https://github.com/kermitt2/grobid_client_python && \
9
+ cd grobid_client_python && \
10
+ python3 setup.py install
11
+
12
+ # install sbt and pdf2figures
13
+ RUN echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | tee /etc/apt/sources.list.d/sbt.list && \
14
+ echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | tee /etc/apt/sources.list.d/sbt_old.list && \
15
+ curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/scalasbt-release.gpg --import && \
16
+ chmod 644 /etc/apt/trusted.gpg.d/scalasbt-release.gpg && \
17
+ apt-get update && \
18
+ apt-get install sbt -yqq
19
+ RUN git clone https://github.com/allenai/pdffigures2.git
20
+
21
+ # install python dependency
22
+ WORKDIR /project
23
+ ADD ./requirements.txt /project/requirements.txt
24
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
25
+ # add code
26
+ ADD ./core/ /project/core/
27
+ # download tokernizer for nltk
28
+ RUN python core/init_nltk.py
29
+ ADD ./example.pdf /project/example/example.pdf
30
+ # init sbt
31
+ RUN python core/init_sbt.py
32
+ # add app
33
+ ADD ./app.py /project/app.py
34
+ EXPOSE 7860
35
+ EXPOSE 8070
36
+ EXPOSE 8071
37
+ # add service starting
38
+ ADD ./start_service.sh /project/start_service.sh
39
+
40
+ WORKDIR /project
41
+ CMD ["bash", "start_service.sh"]
README copy.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # docker integration of existing code
2
+ build docker
3
+ ```sh
4
+ docker build --tag doc2slide -f Dockerfile .
5
+ ```
6
+ run in interactive mode and use gpu
7
+ ```sh
8
+ docker run --name test --gpus all -p 7080:8070 -p 7081:8071 -p 7860:7860 --rm -it doc2slide bash
9
+ ```
10
+ current version of grobid 0.7.3 have python 3.8.10 and openjdk 17. To run as a service
11
+ ```sh
12
+ docker run -d --name test --gpus all -p 7080:8070 -p 7081:8071 -p 7860:7860 --rm doc2slide
13
+ ```
14
+ https://shinylive.io/py/examples gives a lot of examples
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import hashlib
3
+ from subprocess import call
4
+
5
+ from shiny import App, reactive, render, ui
6
+
7
+ from core.read_pdf import process_pdf, temp_dir
8
+
9
+ last_pdf_md5_preprocess_stage = None
10
+
11
+
12
+ def compute_hash(file_pth):
13
+ with open(file_pth, 'rb') as file_to_check:
14
+ # read contents of the file
15
+ data = file_to_check.read()
16
+ # pipe contents of the file through
17
+ md5_returned = hashlib.md5(data).hexdigest()
18
+ return md5_returned
19
+
20
+
21
+ def ui_card(title, *args):
22
+ return (ui.div(
23
+ {"class": "card mb-4"},
24
+ ui.div(title, class_="card-header"),
25
+ ui.div({"class": "card-body"}, *args),
26
+ ), )
27
+
28
+
29
+ app_ui = ui.page_fluid(
30
+ ui.h1("Document2Slide Demo"),
31
+ ui_card(
32
+ "Upload PDF",
33
+ ui.input_file("input_pdf", "Choose a .pdf file to upload:", multiple=True),
34
+ ui.output_text("upload_file_status", ),
35
+ ),
36
+ ui_card(
37
+ "Preprocess",
38
+ ui.p(
39
+ ui.input_action_button("preprocess_action", "Preprocess file", class_="btn-primary"),
40
+ ui.output_text("preprocess_result", ),
41
+ ),
42
+ ui.output_text("preprocess_status", ),
43
+ ui.download_button("download_preprocessed", "Download preprocessed file"),
44
+ ),
45
+ ui_card(
46
+ "Download the bullet points in Markdown format.",
47
+ ui.download_button("download_bullet_point", "Download bullet point"),
48
+ ),
49
+ ui_card(
50
+ "Download the beamer source code `.tex` of the slide",
51
+ ui.download_button("download_beamer", "Download beamer source code"),
52
+ ),
53
+ ui_card(
54
+ "Download the PDF of slide.",
55
+ ui.download_button("download_slide", "Download slide generated"),
56
+ ),
57
+ )
58
+
59
+
60
+ def server(input, output, session):
61
+
62
+ @output
63
+ @render.text
64
+ def upload_file_status():
65
+ file_infos = input.input_pdf()
66
+ # print(file_infos) # [{'name': 'Poster.pdf', 'size': 598394, 'type': 'application/pdf', 'datapath': '/tmp/fileupload-2c21fv0a/tmpi91sy07h/0.pdf'}]
67
+ if not file_infos:
68
+ return "There is no file provided currently."
69
+ elif file_infos[0]['type'] != 'application/pdf':
70
+ return "the file you provide is not in PDF format, upload another one!"
71
+ else:
72
+ return "PDF file successfully uploaded!"
73
+
74
+ @output
75
+ @render.text
76
+ def preprocess_status():
77
+ global last_pdf_md5_preprocess_stage
78
+ file_infos = input.input_pdf()
79
+
80
+ file_md5 = compute_hash(file_infos[0]['datapath']) if file_infos else None
81
+
82
+ if (file_infos is not None) and file_infos[0]['type'] == 'application/pdf' and (file_md5 != last_pdf_md5_preprocess_stage):
83
+ return "Ready to preprocess the PDF!"
84
+ elif file_md5 == last_pdf_md5_preprocess_stage:
85
+ return "PDF already preprocessed! You can continue!"
86
+ else:
87
+ return "No PDF ready currently, please upload a PDF!"
88
+
89
+ @output
90
+ @render.text
91
+ @reactive.event(input.preprocess_action) # Take a dependency on the button
92
+ async def preprocess_result():
93
+
94
+ global last_pdf_md5_preprocess_stage
95
+
96
+ file_infos = input.input_pdf()
97
+ if (file_infos is not None) and file_infos[0]['type'] == 'application/pdf':
98
+
99
+ file_name = file_infos[0]['name']
100
+ original_pdf_pth = file_infos[0]['datapath']
101
+ dir_name = os.path.dirname(original_pdf_pth)
102
+ new_pdf_pth = os.path.join(dir_name, file_name)
103
+ os.rename(original_pdf_pth, new_pdf_pth)
104
+ file_infos[0]['datapath'] = new_pdf_pth
105
+
106
+ file_md5 = compute_hash(file_infos[0]['datapath'])
107
+
108
+ try:
109
+ if file_md5 != last_pdf_md5_preprocess_stage:
110
+ process_pdf(pdf_pth=new_pdf_pth, file_name=file_name)
111
+ last_pdf_md5_preprocess_stage = file_md5
112
+ return "Process successfully!"
113
+ else:
114
+ return "Already processed!!!"
115
+
116
+ except:
117
+ return "Something wrong happen, please switch to another file!"
118
+
119
+ else:
120
+ return "No PDF provided!"
121
+
122
+ @session.download()
123
+ def download_preprocessed():
124
+ file_infos = input.input_pdf()
125
+ file_name = file_infos[0]['name'][:-4]
126
+ preprocessed_file_dir = os.path.join(temp_dir, file_name)
127
+ if os.path.exists(preprocessed_file_dir): # this dir exists
128
+ args = ['zip', '-r', file_name + '.zip', './' + file_name]
129
+ call(args, cwd=temp_dir)
130
+ return str(os.path.join(temp_dir, file_name + '.zip'))
131
+
132
+
133
+ app = App(app_ui, server)
core/init_nltk.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import nltk
2
+
3
+ nltk.download('punkt')
core/init_sbt.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import sys
4
+ from subprocess import call
5
+
6
+ if __name__ == '__main__':
7
+ example_pdf_pth = '/project/example/example.pdf'
8
+ pdffigures2_home = '/opt/pdffigures2'
9
+ args = [
10
+ 'sbt',
11
+ '-J-Xmx4G',
12
+ 'runMain org.allenai.pdffigures2.FigureExtractorBatchCli -e -q /project/example/ -m ./example/image/ -d ./example/json/ -s ./example/stat.json',
13
+ ]
14
+ call(args, cwd=pdffigures2_home)
core/read_pdf.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # add module
2
+ import os
3
+ import shutil
4
+ import sys
5
+ from subprocess import call
6
+
7
+ from grobid_client.grobid_client import GrobidClient
8
+
9
+ module_path = os.path.abspath(os.path.join('/project'))
10
+ if module_path not in sys.path:
11
+ sys.path.append(module_path)
12
+
13
+ from core.tei import single_entry
14
+
15
+ # temp_dir = '/project/temp'
16
+ # pdffigures2_home = '/opt/pdffigures2'
17
+ # grobid_home = '/opt/grobid'
18
+ # grobid_python_config_pth = '/opt/grobid_client_python/config.json
19
+ temp_dir = '/home/quanta/Projects/doc2slide-summarizer/temp'
20
+ pdffigures2_home = '/home/quanta/Library/pdffigures2'
21
+ grobid_home = '/home/quanta/Library/grobid/grobid-0.6.2'
22
+ grobid_python_config_pth = '/home/quanta/Library/grobid_client_python/config.json'
23
+
24
+
25
+ def remove_temp_directory():
26
+ if os.path.exists(temp_dir):
27
+ shutil.rmtree(temp_dir)
28
+
29
+
30
+ def grobid_clident():
31
+ return GrobidClient(config_path=grobid_python_config_pth)
32
+
33
+
34
+ def process_pdf(pdf_pth: str, file_name: str):
35
+ """This function will preprocess pdf, generate xml, extract figures, and then move all things to /project/temp"""
36
+
37
+ client = grobid_clident()
38
+ remove_temp_directory()
39
+
40
+ name = file_name[:-4]
41
+
42
+ if not os.path.exists(temp_dir):
43
+ os.makedirs(temp_dir)
44
+ temp_pdf_dir = os.path.join(temp_dir, name, 'pdf')
45
+ if not os.path.exists(temp_pdf_dir):
46
+ os.makedirs(temp_pdf_dir)
47
+ temp_xml_dir = os.path.join(temp_dir, name, 'xml')
48
+ if not os.path.exists(temp_xml_dir):
49
+ os.makedirs(temp_xml_dir)
50
+
51
+ # copy pdf to temp dir
52
+ shutil.copy(pdf_pth, temp_pdf_dir)
53
+
54
+ # process to xml
55
+ client.process(
56
+ 'processFulltextDocument',
57
+ temp_pdf_dir,
58
+ tei_coordinates=True,
59
+ force=True,
60
+ verbose=True,
61
+ output=temp_xml_dir,
62
+ )
63
+
64
+ xml_name = name + '.tei.xml'
65
+ xml_pth = os.path.join(temp_xml_dir, xml_name)
66
+
67
+ # now scan figures
68
+ fig_dir_profix = 'figure'
69
+ img_dir_profix = 'figure/image'
70
+ json_dir_profix = 'figure/json'
71
+
72
+ tmp_fig_dir = os.path.join(pdffigures2_home, fig_dir_profix)
73
+ if not os.path.exists(tmp_fig_dir):
74
+ os.makedirs(tmp_fig_dir)
75
+ tmp_img_dir = os.path.join(pdffigures2_home, img_dir_profix)
76
+ if not os.path.exists(tmp_img_dir):
77
+ os.makedirs(tmp_img_dir)
78
+ tmp_json_dir = os.path.join(pdffigures2_home, json_dir_profix)
79
+ if not os.path.exists(tmp_json_dir):
80
+ os.makedirs(tmp_json_dir)
81
+
82
+ args = [
83
+ 'sbt',
84
+ '-J-Xmx4G',
85
+ 'runMain org.allenai.pdffigures2.FigureExtractorBatchCli -e -q ' + os.path.abspath(temp_pdf_dir) + '/' + ' -m ' + './' + img_dir_profix + '/' + ' -d ' + './' + json_dir_profix + '/' + ' -s ' + './' + fig_dir_profix + '/stat.json',
86
+ ]
87
+ call(args, cwd=pdffigures2_home)
88
+
89
+ shutil.move(tmp_fig_dir, os.path.join(temp_dir, name))
90
+
91
+ figure_json_pth = os.path.join(temp_dir, name, 'figure/json', name + '.json')
92
+
93
+ # merge to single json
94
+ _, title, abstract, text, headers, figures = single_entry('', xml_pth=xml_pth, fig_json_pth=figure_json_pth)
95
+
96
+ temp_json_dir = os.path.join(temp_dir, name, 'json')
97
+ if not os.path.exists(temp_json_dir):
98
+ os.makedirs(temp_json_dir)
99
+
100
+ json_data = {
101
+ 'title': title,
102
+ 'abstract': abstract,
103
+ 'text': text,
104
+ 'headers': headers,
105
+ 'figures': figures,
106
+ }
107
+
108
+ import json
109
+ json_pth = os.path.join(temp_json_dir, name + '.json')
110
+ with open(json_pth, 'w') as f:
111
+ json.dump(json_data, f, indent=4)
112
+
113
+ # get preprocessed data
114
+ with open(json_pth, 'r') as f:
115
+ data = json.load(f)
116
+ paper_length = len(data['text'])
117
+ sections = [{
118
+ 'idx': i,
119
+ 'title': head['section'],
120
+ 'n': head['n'],
121
+ 'text': ' '.join([data['text'][idx]['string'] for idx in range(head['start'], min(head['end'] + 1, paper_length))]),
122
+ 'matched_slides': [],
123
+ } for i, head in enumerate(data['headers'])]
124
+
125
+ with open(os.path.join(temp_dir, name, name + '.preprocessed_text.json'), 'w') as f:
126
+ json.dump([sec['text'] for sec in sections], f, indent=4)
127
+
128
+
129
+ if __name__ == '__main__':
130
+ process_pdf('/project/example/example.pdf')
core/tei.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # code modified from https://github.com/IBM/document2slides/blob/main/sciduet-build/extract_papers.py
2
+ import os
3
+ from os import path
4
+ import json
5
+ from dataclasses import dataclass
6
+ from multiprocessing.pool import Pool
7
+ import pandas as pd
8
+ from pathlib import Path
9
+ from bs4 import BeautifulSoup
10
+ import nltk
11
+ from dataclasses import dataclass
12
+
13
+
14
+ @dataclass
15
+ class Person:
16
+ firstname: str
17
+ middlename: str
18
+ surname: str
19
+
20
+
21
+ def read_tei(tei_file):
22
+ with open(tei_file, 'r', encoding='utf-8') as tei:
23
+ soup = BeautifulSoup(tei, 'xml')
24
+ return soup
25
+
26
+
27
+ def elem_to_text(elem, default=''):
28
+ if elem:
29
+ return elem.getText()
30
+ else:
31
+ return default
32
+
33
+
34
+ class TEIFile(object):
35
+
36
+ def __init__(self, xml_pth: str = None, fig_json_pth: str = ''):
37
+ # self.filename = filename
38
+ # self.dir = os.path.abspath(paper_dir)
39
+ self.xml_pth = os.path.abspath(xml_pth)
40
+ self.fig_json_pth = os.path.abspath(fig_json_pth)
41
+ # self.soup = read_tei(os.path.join(self.dir, 'paper.tei.xml'))
42
+ self.soup = read_tei(self.xml_pth)
43
+ self._text = None
44
+ self._title = ''
45
+ self._abstract = ''
46
+ self._headers = None
47
+ self._figures = None
48
+
49
+ @property
50
+ def doi(self):
51
+ idno_elem = self.soup.find('idno', type='DOI')
52
+ if not idno_elem:
53
+ return ''
54
+ else:
55
+ return idno_elem.getText()
56
+
57
+ @property
58
+ def title(self):
59
+ if not self._title:
60
+ self._title = self.soup.title.getText()
61
+ return self._title
62
+
63
+ @property
64
+ def abstract(self):
65
+ if not self._abstract:
66
+ abstract = self.soup.abstract.getText(separator=' ', strip=True)
67
+ self._abstract = abstract
68
+ return self._abstract
69
+
70
+ @property
71
+ def authors(self):
72
+ authors_in_header = self.soup.analytic.find_all('author')
73
+
74
+ # print(authors_in_header)
75
+
76
+ result = []
77
+ for author in authors_in_header:
78
+ persname = author.persName
79
+ if not persname:
80
+ persname = author.persname
81
+ if not persname:
82
+ continue
83
+ firstname = elem_to_text(persname.find("forename", type="first"))
84
+ middlename = elem_to_text(persname.find("forename", type="middle"))
85
+ surname = elem_to_text(persname.surname)
86
+ person = Person(firstname, middlename, surname)
87
+ result.append(person)
88
+
89
+ return result
90
+
91
+ @property
92
+ def text(self):
93
+ if not self._text:
94
+ self._headers = []
95
+ headerlist = self.soup.body.find_all("head")
96
+ sections = []
97
+ print(headerlist)
98
+ for head in headerlist:
99
+ if head.parent.name == 'div':
100
+ txt = head.parent.get_text(separator=' ', strip=True)
101
+ # the following is only valid for arabic numerals...
102
+ if head.get("n"):
103
+ sections.append([head.text, head.get('n'), txt])
104
+ else:
105
+ if len(sections) == 0:
106
+ print("Grobid processing error.")
107
+ sections[-1][2] += txt
108
+ # sections.append([head.text, 'invalid n', txt])
109
+ start = 0
110
+ for i in sections:
111
+ sent = nltk.tokenize.sent_tokenize(i[2])
112
+ sec_dic = {'section': i[0], 'n': i[1], 'start': start, 'end': start + len(sent) - 1}
113
+ self._headers.append(sec_dic)
114
+ start += len(sent)
115
+ plain_text = " ".join([i[2] for i in sections])
116
+ self._text = [{'id': i, 'string': s} for i, s in enumerate(nltk.tokenize.sent_tokenize(plain_text))]
117
+ return self._text
118
+
119
+ @property
120
+ def headers(self):
121
+ if not self._headers:
122
+ self.text()
123
+ return self._headers
124
+
125
+ @property
126
+ def figures(self):
127
+ if not self._figures:
128
+ # base_name = basename_without_ext(self.filename)
129
+ self._figures = []
130
+ # fn = 'figures/{}.json'.format(base_name) # link to figures dir
131
+ # fn = os.path.join(self.dir, 'paper_figure/paper.json')
132
+ fn = self.fig_json_pth
133
+ if not path.isfile(fn):
134
+ return []
135
+ with open(fn) as f:
136
+ data = json.load(f)
137
+ for i in data:
138
+ elem = {'filename': i['renderURL'], 'caption': i['caption'], 'page': i['page'], 'bbox': i['regionBoundary']}
139
+ self._figures.append(elem)
140
+ return self._figures
141
+
142
+
143
+ def single_entry(uuid: str, xml_pth: str, fig_json_pth: str):
144
+ tei = TEIFile(xml_pth=xml_pth, fig_json_pth=fig_json_pth)
145
+ return uuid, tei.title, tei.abstract, tei.text, tei.headers, tei.figures
requirements.txt ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # for processing pdf
2
+ nltk
3
+ pandas
4
+ tqdm
5
+ bs4
6
+ lxml
7
+ # for interaction with openai
8
+ openai
9
+ # for shiny
10
+ anyio==3.6.2
11
+ appdirs==1.4.4
12
+ asgiref==3.6.0
13
+ click==8.1.3
14
+ contextvars==2.4
15
+ contourpy==1.0.7
16
+ cycler==0.11.0
17
+ fonttools==4.39.3
18
+ h11==0.14.0
19
+ htmltools==0.2.1
20
+ idna==3.4
21
+ immutables==0.19
22
+ kiwisolver==1.4.4
23
+ linkify-it-py==2.0.0
24
+ markdown-it-py==2.2.0
25
+ matplotlib==3.7.1
26
+ mdit-py-plugins==0.3.5
27
+ mdurl==0.1.2
28
+ numpy==1.24.2
29
+ packaging==23.1
30
+ Pillow==9.5.0
31
+ pyparsing==3.0.9
32
+ python-dateutil==2.8.2
33
+ python-multipart==0.0.6
34
+ pytz==2023.3
35
+ seaborn==0.12.2
36
+ shiny==0.3.3
37
+ shinyswatch==0.2.3
38
+ six==1.16.0
39
+ sniffio==1.3.0
40
+ starlette==0.26.1
41
+ typing_extensions==4.5.0
42
+ tzdata==2023.3
43
+ uc-micro-py==1.0.1
44
+ uvicorn==0.21.1
45
+ websockets==11.0.2
46
+ XStatic-bootswatch==3.3.7.0
shiny_example_dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 7860
12
+
13
+ CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
start_service.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ cd /opt/grobid && ./grobid-service/bin/grobid-service &
2
+ cd /project && shiny run /project/app.py --host 0.0.0.0 --port 7860