Bram Vanroy
commited on
Commit
•
c2302bf
1
Parent(s):
5ddc459
push dummy
Browse files- .dockerignore +157 -0
- .gitignore +237 -0
- Dockerfile +21 -0
- README.md +8 -6
- app.py +43 -0
- requirements.txt +8 -0
- utils.py +58 -0
.dockerignore
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
**/.git
|
2 |
+
**/.venv
|
3 |
+
**/.mypy_cache
|
4 |
+
**/.idea
|
5 |
+
|
6 |
+
# Byte-compiled / optimized / DLL files
|
7 |
+
__pycache__/
|
8 |
+
*.py[cod]
|
9 |
+
*$py.class
|
10 |
+
|
11 |
+
# C extensions
|
12 |
+
*.so
|
13 |
+
|
14 |
+
# Distribution / packaging
|
15 |
+
.Python
|
16 |
+
build/
|
17 |
+
develop-eggs/
|
18 |
+
dist/
|
19 |
+
downloads/
|
20 |
+
eggs/
|
21 |
+
.eggs/
|
22 |
+
lib/
|
23 |
+
lib64/
|
24 |
+
parts/
|
25 |
+
sdist/
|
26 |
+
var/
|
27 |
+
wheels/
|
28 |
+
share/python-wheels/
|
29 |
+
*.egg-info/
|
30 |
+
.installed.cfg
|
31 |
+
*.egg
|
32 |
+
MANIFEST
|
33 |
+
|
34 |
+
# PyInstaller
|
35 |
+
# Usually these files are written by a python script from a template
|
36 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
37 |
+
*.manifest
|
38 |
+
*.spec
|
39 |
+
|
40 |
+
# Installer logs
|
41 |
+
pip-log.txt
|
42 |
+
pip-delete-this-directory.txt
|
43 |
+
|
44 |
+
# Unit test / coverage reports
|
45 |
+
htmlcov/
|
46 |
+
.tox/
|
47 |
+
.nox/
|
48 |
+
.coverage
|
49 |
+
.coverage.*
|
50 |
+
.cache
|
51 |
+
nosetests.xml
|
52 |
+
coverage.xml
|
53 |
+
*.cover
|
54 |
+
*.py,cover
|
55 |
+
.hypothesis/
|
56 |
+
.pytest_cache/
|
57 |
+
cover/
|
58 |
+
|
59 |
+
# Translations
|
60 |
+
*.mo
|
61 |
+
*.pot
|
62 |
+
|
63 |
+
# Django stuff:
|
64 |
+
*.log
|
65 |
+
local_settings.py
|
66 |
+
db.sqlite3
|
67 |
+
db.sqlite3-journal
|
68 |
+
|
69 |
+
# Flask stuff:
|
70 |
+
instance/
|
71 |
+
.webassets-cache
|
72 |
+
|
73 |
+
# Scrapy stuff:
|
74 |
+
.scrapy
|
75 |
+
|
76 |
+
# Sphinx documentation
|
77 |
+
docs/_build/
|
78 |
+
|
79 |
+
# PyBuilder
|
80 |
+
.pybuilder/
|
81 |
+
target/
|
82 |
+
|
83 |
+
# Jupyter Notebook
|
84 |
+
.ipynb_checkpoints
|
85 |
+
|
86 |
+
# IPython
|
87 |
+
profile_default/
|
88 |
+
ipython_config.py
|
89 |
+
|
90 |
+
# pyenv
|
91 |
+
# For a library or package, you might want to ignore these files since the code is
|
92 |
+
# intended to run in multiple environments; otherwise, check them in:
|
93 |
+
# .python-version
|
94 |
+
|
95 |
+
# pipenv
|
96 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
97 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
98 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
99 |
+
# install all needed dependencies.
|
100 |
+
#Pipfile.lock
|
101 |
+
|
102 |
+
# poetry
|
103 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
104 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
105 |
+
# commonly ignored for libraries.
|
106 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
107 |
+
#poetry.lock
|
108 |
+
|
109 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
110 |
+
__pypackages__/
|
111 |
+
|
112 |
+
# Celery stuff
|
113 |
+
celerybeat-schedule
|
114 |
+
celerybeat.pid
|
115 |
+
|
116 |
+
# SageMath parsed files
|
117 |
+
*.sage.py
|
118 |
+
|
119 |
+
# Environments
|
120 |
+
.env
|
121 |
+
.venv
|
122 |
+
env/
|
123 |
+
venv/
|
124 |
+
ENV/
|
125 |
+
env.bak/
|
126 |
+
venv.bak/
|
127 |
+
|
128 |
+
# Spyder project settings
|
129 |
+
.spyderproject
|
130 |
+
.spyproject
|
131 |
+
|
132 |
+
# Rope project settings
|
133 |
+
.ropeproject
|
134 |
+
|
135 |
+
# mkdocs documentation
|
136 |
+
/site
|
137 |
+
|
138 |
+
# mypy
|
139 |
+
.mypy_cache/
|
140 |
+
.dmypy.json
|
141 |
+
dmypy.json
|
142 |
+
|
143 |
+
# Pyre type checker
|
144 |
+
.pyre/
|
145 |
+
|
146 |
+
# pytype static type analyzer
|
147 |
+
.pytype/
|
148 |
+
|
149 |
+
# Cython debug symbols
|
150 |
+
cython_debug/
|
151 |
+
|
152 |
+
# PyCharm
|
153 |
+
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
154 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
155 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
156 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
157 |
+
#.idea/
|
.gitignore
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Pipfile*
|
2 |
+
data/*
|
3 |
+
*config.json
|
4 |
+
|
5 |
+
|
6 |
+
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
7 |
+
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
8 |
+
|
9 |
+
.idea/
|
10 |
+
# User-specific stuff
|
11 |
+
.idea/**/workspace.xml
|
12 |
+
.idea/**/tasks.xml
|
13 |
+
.idea/**/usage.statistics.xml
|
14 |
+
.idea/**/dictionaries
|
15 |
+
.idea/**/shelf
|
16 |
+
|
17 |
+
# AWS User-specific
|
18 |
+
.idea/**/aws.xml
|
19 |
+
|
20 |
+
# Generated files
|
21 |
+
.idea/**/contentModel.xml
|
22 |
+
|
23 |
+
# Sensitive or high-churn files
|
24 |
+
.idea/**/dataSources/
|
25 |
+
.idea/**/dataSources.ids
|
26 |
+
.idea/**/dataSources.local.xml
|
27 |
+
.idea/**/sqlDataSources.xml
|
28 |
+
.idea/**/dynamic.xml
|
29 |
+
.idea/**/uiDesigner.xml
|
30 |
+
.idea/**/dbnavigator.xml
|
31 |
+
|
32 |
+
# Gradle
|
33 |
+
.idea/**/gradle.xml
|
34 |
+
.idea/**/libraries
|
35 |
+
|
36 |
+
# Gradle and Maven with auto-import
|
37 |
+
# When using Gradle or Maven with auto-import, you should exclude module files,
|
38 |
+
# since they will be recreated, and may cause churn. Uncomment if using
|
39 |
+
# auto-import.
|
40 |
+
# .idea/artifacts
|
41 |
+
# .idea/compiler.xml
|
42 |
+
# .idea/jarRepositories.xml
|
43 |
+
# .idea/modules.xml
|
44 |
+
# .idea/*.iml
|
45 |
+
# .idea/modules
|
46 |
+
# *.iml
|
47 |
+
# *.ipr
|
48 |
+
|
49 |
+
# CMake
|
50 |
+
cmake-build-*/
|
51 |
+
|
52 |
+
# Mongo Explorer plugin
|
53 |
+
.idea/**/mongoSettings.xml
|
54 |
+
|
55 |
+
# File-based project format
|
56 |
+
*.iws
|
57 |
+
|
58 |
+
# IntelliJ
|
59 |
+
out/
|
60 |
+
|
61 |
+
# mpeltonen/sbt-idea plugin
|
62 |
+
.idea_modules/
|
63 |
+
|
64 |
+
# JIRA plugin
|
65 |
+
atlassian-ide-plugin.xml
|
66 |
+
|
67 |
+
# Cursive Clojure plugin
|
68 |
+
.idea/replstate.xml
|
69 |
+
|
70 |
+
# SonarLint plugin
|
71 |
+
.idea/sonarlint/
|
72 |
+
|
73 |
+
# Crashlytics plugin (for Android Studio and IntelliJ)
|
74 |
+
com_crashlytics_export_strings.xml
|
75 |
+
crashlytics.properties
|
76 |
+
crashlytics-build.properties
|
77 |
+
fabric.properties
|
78 |
+
|
79 |
+
# Editor-based Rest Client
|
80 |
+
.idea/httpRequests
|
81 |
+
|
82 |
+
# Android studio 3.1+ serialized cache file
|
83 |
+
.idea/caches/build_file_checksums.ser
|
84 |
+
|
85 |
+
|
86 |
+
# Byte-compiled / optimized / DLL files
|
87 |
+
__pycache__/
|
88 |
+
*.py[cod]
|
89 |
+
*$py.class
|
90 |
+
|
91 |
+
# C extensions
|
92 |
+
*.so
|
93 |
+
|
94 |
+
# Distribution / packaging
|
95 |
+
.Python
|
96 |
+
build/
|
97 |
+
develop-eggs/
|
98 |
+
dist/
|
99 |
+
downloads/
|
100 |
+
eggs/
|
101 |
+
.eggs/
|
102 |
+
lib/
|
103 |
+
lib64/
|
104 |
+
parts/
|
105 |
+
sdist/
|
106 |
+
var/
|
107 |
+
wheels/
|
108 |
+
share/python-wheels/
|
109 |
+
*.egg-info/
|
110 |
+
.installed.cfg
|
111 |
+
*.egg
|
112 |
+
MANIFEST
|
113 |
+
|
114 |
+
# PyInstaller
|
115 |
+
# Usually these files are written by a python script from a template
|
116 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
117 |
+
*.manifest
|
118 |
+
*.spec
|
119 |
+
|
120 |
+
# Installer logs
|
121 |
+
pip-log.txt
|
122 |
+
pip-delete-this-directory.txt
|
123 |
+
|
124 |
+
# Unit test / coverage reports
|
125 |
+
htmlcov/
|
126 |
+
.tox/
|
127 |
+
.nox/
|
128 |
+
.coverage
|
129 |
+
.coverage.*
|
130 |
+
.cache
|
131 |
+
nosetests.xml
|
132 |
+
coverage.xml
|
133 |
+
*.cover
|
134 |
+
*.py,cover
|
135 |
+
.hypothesis/
|
136 |
+
.pytest_cache/
|
137 |
+
cover/
|
138 |
+
|
139 |
+
# Translations
|
140 |
+
*.mo
|
141 |
+
*.pot
|
142 |
+
|
143 |
+
# Django stuff:
|
144 |
+
*.log
|
145 |
+
local_settings.py
|
146 |
+
db.sqlite3
|
147 |
+
db.sqlite3-journal
|
148 |
+
|
149 |
+
# Flask stuff:
|
150 |
+
instance/
|
151 |
+
.webassets-cache
|
152 |
+
|
153 |
+
# Scrapy stuff:
|
154 |
+
.scrapy
|
155 |
+
|
156 |
+
# Sphinx documentation
|
157 |
+
docs/_build/
|
158 |
+
|
159 |
+
# PyBuilder
|
160 |
+
.pybuilder/
|
161 |
+
target/
|
162 |
+
|
163 |
+
# Jupyter Notebook
|
164 |
+
.ipynb_checkpoints
|
165 |
+
|
166 |
+
# IPython
|
167 |
+
profile_default/
|
168 |
+
ipython_config.py
|
169 |
+
|
170 |
+
# pyenv
|
171 |
+
# For a library or package, you might want to ignore these files since the code is
|
172 |
+
# intended to run in multiple environments; otherwise, check them in:
|
173 |
+
# .python-version
|
174 |
+
|
175 |
+
# pipenv
|
176 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
177 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
178 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
179 |
+
# install all needed dependencies.
|
180 |
+
#Pipfile.lock
|
181 |
+
|
182 |
+
# poetry
|
183 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
184 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
185 |
+
# commonly ignored for libraries.
|
186 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
187 |
+
#poetry.lock
|
188 |
+
|
189 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
190 |
+
__pypackages__/
|
191 |
+
|
192 |
+
# Celery stuff
|
193 |
+
celerybeat-schedule
|
194 |
+
celerybeat.pid
|
195 |
+
|
196 |
+
# SageMath parsed files
|
197 |
+
*.sage.py
|
198 |
+
|
199 |
+
# Environments
|
200 |
+
.env
|
201 |
+
.venv
|
202 |
+
env/
|
203 |
+
venv/
|
204 |
+
ENV/
|
205 |
+
env.bak/
|
206 |
+
venv.bak/
|
207 |
+
|
208 |
+
# Spyder project settings
|
209 |
+
.spyderproject
|
210 |
+
.spyproject
|
211 |
+
|
212 |
+
# Rope project settings
|
213 |
+
.ropeproject
|
214 |
+
|
215 |
+
# mkdocs documentation
|
216 |
+
/site
|
217 |
+
|
218 |
+
# mypy
|
219 |
+
.mypy_cache/
|
220 |
+
.dmypy.json
|
221 |
+
dmypy.json
|
222 |
+
|
223 |
+
# Pyre type checker
|
224 |
+
.pyre/
|
225 |
+
|
226 |
+
# pytype static type analyzer
|
227 |
+
.pytype/
|
228 |
+
|
229 |
+
# Cython debug symbols
|
230 |
+
cython_debug/
|
231 |
+
|
232 |
+
# PyCharm
|
233 |
+
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
234 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
235 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
236 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
237 |
+
#.idea/
|
Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10.10
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY ./requirements.txt /app/requirements.txt
|
6 |
+
|
7 |
+
RUN pip3 install --no-cache-dir -r /app/requirements.txt
|
8 |
+
|
9 |
+
# User
|
10 |
+
RUN useradd -m -u 1000 user
|
11 |
+
USER user
|
12 |
+
ENV HOME /home/user
|
13 |
+
ENV PATH $HOME/.local/bin:$PATH
|
14 |
+
|
15 |
+
WORKDIR $HOME
|
16 |
+
RUN mkdir app
|
17 |
+
WORKDIR $HOME/app
|
18 |
+
COPY . $HOME/app
|
19 |
+
|
20 |
+
EXPOSE 8501
|
21 |
+
CMD streamlit run app.py
|
README.md
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🏃
|
4 |
colorFrom: indigo
|
5 |
colorTo: yellow
|
6 |
-
sdk:
|
7 |
-
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
license: cc-by-nc-sa-4.0
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Dutch Simplification
|
3 |
emoji: 🏃
|
4 |
colorFrom: indigo
|
5 |
colorTo: yellow
|
6 |
+
sdk: docker
|
7 |
+
app_port: 8501
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
license: cc-by-nc-sa-4.0
|
11 |
+
tags:
|
12 |
+
- natural language processing
|
13 |
+
- simplification
|
14 |
+
- dutch
|
15 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils import get_resources, simplify
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
st.set_page_config(
|
6 |
+
page_title="Text Simplification in Dutch",
|
7 |
+
page_icon="🏃"
|
8 |
+
)
|
9 |
+
|
10 |
+
st.title("🏃 Text Simplification in Dutch")
|
11 |
+
|
12 |
+
with st.form("input data"):
|
13 |
+
text = st.text_area(label="Input text", value="Met het naderen van de zonovergoten middaghemel op deze betoverende dag, waarbij de atmosferische omstandigheden een onbelemmerde convergentie van cumulusbewolking en uitgestrekte stratosferische azuurblauwe wijdheid faciliteren, lijken de geaggregeerde weersverschijnselen van vandaag, die variëren van sporadische plensbuien tot kalme zuchtjes wind en zeldzame opvlammingen van bliksem, de delicate balans tussen meteorologische complexiteit en eenvoud te weerspiegelen, waardoor de gepassioneerde observator met een gevoel van ontzag en verwondering wordt vervuld.")
|
14 |
+
submitted = st.form_submit_button("Submit")
|
15 |
+
|
16 |
+
error_ct = st.empty()
|
17 |
+
if submitted:
|
18 |
+
text = text.strip()
|
19 |
+
if not text:
|
20 |
+
error_ct.error("Text cannot be empty!", icon="⚠️")
|
21 |
+
else:
|
22 |
+
error_ct.info("Generating abstract meaning representation (AMR)...", icon="💻")
|
23 |
+
|
24 |
+
model, tokenizer, streamer = get_resources()
|
25 |
+
error_ct.empty()
|
26 |
+
|
27 |
+
for stream_simplification in simplify(text, model, tokenizer, streamer):
|
28 |
+
st.write(stream_simplification)
|
29 |
+
|
30 |
+
|
31 |
+
########################
|
32 |
+
# Information, socials #
|
33 |
+
########################
|
34 |
+
st.header("Project background")
|
35 |
+
|
36 |
+
st.markdown("""""")
|
37 |
+
|
38 |
+
|
39 |
+
st.header("Contact ✒️")
|
40 |
+
|
41 |
+
st.markdown("Would you like additional functionality in the demo, do you have questions, or just want to get in touch?"
|
42 |
+
" Give me a shout on [Twitter](https://twitter.com/BramVanroy)"
|
43 |
+
" or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!")
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.24.3
|
2 |
+
optimum==1.8.6
|
3 |
+
torch==2.0.1
|
4 |
+
sacremoses==0.0.53
|
5 |
+
sentencepiece==0.1.99
|
6 |
+
streamlit==1.22.0
|
7 |
+
transformers==4.29.2
|
8 |
+
tornado==6.3.2
|
utils.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from threading import Thread
|
2 |
+
from typing import Tuple, Generator
|
3 |
+
|
4 |
+
from optimum.bettertransformer import BetterTransformer
|
5 |
+
import streamlit as st
|
6 |
+
import torch
|
7 |
+
from torch.quantization import quantize_dynamic
|
8 |
+
from torch import nn, qint8
|
9 |
+
from transformers import T5ForConditionalGeneration, T5Tokenizer, TextStreamer, TextIteratorStreamer
|
10 |
+
|
11 |
+
|
12 |
+
@st.cache_resource(show_spinner=False)
|
13 |
+
def get_resources(quantize: bool = True, no_cuda: bool = False) -> Tuple[T5ForConditionalGeneration, T5Tokenizer, TextIteratorStreamer]:
|
14 |
+
"""
|
15 |
+
"""
|
16 |
+
tokenizer = T5Tokenizer.from_pretrained("BramVanroy/ul2-base-dutch-simplification-mai-2023", use_fast=False)
|
17 |
+
model = T5ForConditionalGeneration.from_pretrained("BramVanroy/ul2-base-dutch-simplification-mai-2023")
|
18 |
+
|
19 |
+
model = BetterTransformer.transform(model, keep_original_model=False)
|
20 |
+
model.resize_token_embeddings(len(tokenizer))
|
21 |
+
|
22 |
+
if torch.cuda.is_available() and not no_cuda:
|
23 |
+
model = model.to("cuda")
|
24 |
+
elif quantize: # Quantization not supported on CUDA
|
25 |
+
model = quantize_dynamic(model, {nn.Linear, nn.Dropout, nn.LayerNorm}, dtype=qint8)
|
26 |
+
|
27 |
+
model.eval()
|
28 |
+
streamer = TextIteratorStreamer(tokenizer, decode_kwargs={"skip_special_tokens": True, "clean_up_tokenization_spaces": True})
|
29 |
+
|
30 |
+
return model, tokenizer, streamer
|
31 |
+
|
32 |
+
|
33 |
+
def simplify(
|
34 |
+
text: str,
|
35 |
+
model: T5ForConditionalGeneration,
|
36 |
+
tokenizer: T5Tokenizer,
|
37 |
+
streamer: TextIteratorStreamer
|
38 |
+
) -> Generator:
|
39 |
+
"""
|
40 |
+
"""
|
41 |
+
text = "[NLG] " + text
|
42 |
+
|
43 |
+
encoded = tokenizer(text, return_tensors="pt")
|
44 |
+
encoded = {k: v.to(model.device) for k, v in encoded.items()}
|
45 |
+
gen_kwargs = {
|
46 |
+
**encoded,
|
47 |
+
"max_new_tokens": 128,
|
48 |
+
"streamer": streamer,
|
49 |
+
}
|
50 |
+
|
51 |
+
with torch.no_grad():
|
52 |
+
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
53 |
+
thread.start()
|
54 |
+
|
55 |
+
generated_text = ""
|
56 |
+
for new_text in streamer:
|
57 |
+
generated_text += new_text
|
58 |
+
yield generated_text
|