Spaces:
Runtime error
Runtime error
quantuan125
commited on
Commit
•
19b69b8
1
Parent(s):
7c3bdae
add
Browse files- .devcontainer/devcontainer.json +33 -0
- .gitignore +180 -0
- .streamlit/config.toml +193 -0
- BR18_DB/1_BR18_Executive_order_on_building_regulations_2018.md +0 -0
- BR18_DB/2_BR18_Executive_order_on_building_regulations_2018_Appendix.md +265 -0
- CustomClass/custom.py +25 -0
- FileReader/file.py +0 -0
- README copy.md +1 -0
- faiss_index/index.faiss +0 -0
- faiss_index/index.pkl +3 -0
- inmemorystore/br18_parent_store.pkl +3 -0
- mrkl.py +1060 -0
- requirements.txt +14 -0
- savesummary/br18_summaries.json +0 -0
.devcontainer/devcontainer.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "Python 3",
|
3 |
+
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
4 |
+
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
|
5 |
+
"customizations": {
|
6 |
+
"codespaces": {
|
7 |
+
"openFiles": [
|
8 |
+
"README.md",
|
9 |
+
"mrkl.py"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
"vscode": {
|
13 |
+
"settings": {},
|
14 |
+
"extensions": [
|
15 |
+
"ms-python.python",
|
16 |
+
"ms-python.vscode-pylance"
|
17 |
+
]
|
18 |
+
}
|
19 |
+
},
|
20 |
+
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
|
21 |
+
"postAttachCommand": {
|
22 |
+
"server": "streamlit run mrkl.py --server.enableCORS false --server.enableXsrfProtection false"
|
23 |
+
},
|
24 |
+
"portsAttributes": {
|
25 |
+
"8501": {
|
26 |
+
"label": "Application",
|
27 |
+
"onAutoForward": "openPreview"
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"forwardPorts": [
|
31 |
+
8501
|
32 |
+
]
|
33 |
+
}
|
.gitignore
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Created by https://www.toptal.com/developers/gitignore/api/python
|
2 |
+
# Edit at https://www.toptal.com/developers/gitignore?templates=python
|
3 |
+
|
4 |
+
### Python ###
|
5 |
+
# Byte-compiled / optimized / DLL files
|
6 |
+
__pycache__/
|
7 |
+
*.py[cod]
|
8 |
+
*$py.class
|
9 |
+
|
10 |
+
# C extensions
|
11 |
+
*.so
|
12 |
+
|
13 |
+
# Distribution / packaging
|
14 |
+
.Python
|
15 |
+
build/
|
16 |
+
develop-eggs/
|
17 |
+
dist/
|
18 |
+
downloads/
|
19 |
+
eggs/
|
20 |
+
.eggs/
|
21 |
+
lib/
|
22 |
+
lib64/
|
23 |
+
parts/
|
24 |
+
sdist/
|
25 |
+
var/
|
26 |
+
wheels/
|
27 |
+
share/python-wheels/
|
28 |
+
*.egg-info/
|
29 |
+
.installed.cfg
|
30 |
+
*.egg
|
31 |
+
MANIFEST
|
32 |
+
|
33 |
+
# PyInstaller
|
34 |
+
# Usually these files are written by a python script from a template
|
35 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
36 |
+
*.manifest
|
37 |
+
*.spec
|
38 |
+
|
39 |
+
# Installer logs
|
40 |
+
pip-log.txt
|
41 |
+
pip-delete-this-directory.txt
|
42 |
+
|
43 |
+
# Unit test / coverage reports
|
44 |
+
htmlcov/
|
45 |
+
.tox/
|
46 |
+
.nox/
|
47 |
+
.coverage
|
48 |
+
.coverage.*
|
49 |
+
.cache
|
50 |
+
nosetests.xml
|
51 |
+
coverage.xml
|
52 |
+
*.cover
|
53 |
+
*.py,cover
|
54 |
+
.hypothesis/
|
55 |
+
.pytest_cache/
|
56 |
+
cover/
|
57 |
+
|
58 |
+
# Translations
|
59 |
+
*.mo
|
60 |
+
*.pot
|
61 |
+
|
62 |
+
# Django stuff:
|
63 |
+
*.log
|
64 |
+
local_settings.py
|
65 |
+
db.sqlite3
|
66 |
+
db.sqlite3-journal
|
67 |
+
|
68 |
+
# Flask stuff:
|
69 |
+
instance/
|
70 |
+
.webassets-cache
|
71 |
+
|
72 |
+
# Scrapy stuff:
|
73 |
+
.scrapy
|
74 |
+
|
75 |
+
# Sphinx documentation
|
76 |
+
docs/_build/
|
77 |
+
|
78 |
+
# PyBuilder
|
79 |
+
.pybuilder/
|
80 |
+
target/
|
81 |
+
|
82 |
+
# Jupyter Notebook
|
83 |
+
.ipynb_checkpoints
|
84 |
+
|
85 |
+
# IPython
|
86 |
+
profile_default/
|
87 |
+
ipython_config.py
|
88 |
+
|
89 |
+
# pyenv
|
90 |
+
# For a library or package, you might want to ignore these files since the code is
|
91 |
+
# intended to run in multiple environments; otherwise, check them in:
|
92 |
+
# .python-version
|
93 |
+
|
94 |
+
# pipenv
|
95 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
96 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
97 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
98 |
+
# install all needed dependencies.
|
99 |
+
#Pipfile.lock
|
100 |
+
|
101 |
+
# poetry
|
102 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
103 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
104 |
+
# commonly ignored for libraries.
|
105 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
106 |
+
#poetry.lock
|
107 |
+
|
108 |
+
# pdm
|
109 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
110 |
+
#pdm.lock
|
111 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
112 |
+
# in version control.
|
113 |
+
# https://pdm.fming.dev/#use-with-ide
|
114 |
+
.pdm.toml
|
115 |
+
|
116 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
117 |
+
__pypackages__/
|
118 |
+
|
119 |
+
# Celery stuff
|
120 |
+
celerybeat-schedule
|
121 |
+
celerybeat.pid
|
122 |
+
|
123 |
+
# SageMath parsed files
|
124 |
+
*.sage.py
|
125 |
+
|
126 |
+
# Environments
|
127 |
+
.env
|
128 |
+
.venv
|
129 |
+
env/
|
130 |
+
venv/
|
131 |
+
ENV/
|
132 |
+
env.bak/
|
133 |
+
venv.bak/
|
134 |
+
|
135 |
+
# Spyder project settings
|
136 |
+
.spyderproject
|
137 |
+
.spyproject
|
138 |
+
|
139 |
+
# Rope project settings
|
140 |
+
.ropeproject
|
141 |
+
|
142 |
+
# mkdocs documentation
|
143 |
+
/site
|
144 |
+
|
145 |
+
# mypy
|
146 |
+
.mypy_cache/
|
147 |
+
.dmypy.json
|
148 |
+
dmypy.json
|
149 |
+
|
150 |
+
# Pyre type checker
|
151 |
+
.pyre/
|
152 |
+
|
153 |
+
# pytype static type analyzer
|
154 |
+
.pytype/
|
155 |
+
|
156 |
+
# Cython debug symbols
|
157 |
+
cython_debug/
|
158 |
+
|
159 |
+
# PyCharm
|
160 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
161 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
162 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
163 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
164 |
+
#.idea/
|
165 |
+
|
166 |
+
### Python Patch ###
|
167 |
+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
168 |
+
poetry.toml
|
169 |
+
|
170 |
+
# ruff
|
171 |
+
.ruff_cache/
|
172 |
+
|
173 |
+
# LSP config files
|
174 |
+
pyrightconfig.json
|
175 |
+
|
176 |
+
# End of https://www.toptal.com/developers/gitignore/api/python
|
177 |
+
|
178 |
+
secrets.toml
|
179 |
+
|
180 |
+
storage/
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[global]
|
2 |
+
|
3 |
+
# By default, Streamlit checks if the Python watchdog module is available and, if not, prints a warning asking for you to install it. The watchdog module is not required, but highly recommended. It improves Streamlit's ability to detect changes to files in your filesystem.
|
4 |
+
# If you'd like to turn off this warning, set this to True.
|
5 |
+
# Default: false
|
6 |
+
# disableWatchdogWarning = false
|
7 |
+
|
8 |
+
# If True, will show a warning when you run a Streamlit-enabled script via "python my_script.py".
|
9 |
+
# Default: true
|
10 |
+
# showWarningOnDirectExecution = true
|
11 |
+
|
12 |
+
# DataFrame serialization.
|
13 |
+
# Acceptable values: - 'legacy': Serialize DataFrames using Streamlit's custom format. Slow but battle-tested. - 'arrow': Serialize DataFrames using Apache Arrow. Much faster and versatile.
|
14 |
+
# Default: "arrow"
|
15 |
+
# dataFrameSerialization = "arrow"
|
16 |
+
|
17 |
+
|
18 |
+
[logger]
|
19 |
+
|
20 |
+
# Level of logging: 'error', 'warning', 'info', or 'debug'.
|
21 |
+
# Default: 'info'
|
22 |
+
# level = "info"
|
23 |
+
|
24 |
+
# String format for logging messages. If logger.datetimeFormat is set, logger messages will default to `%(asctime)s.%(msecs)03d %(message)s`. See [Python's documentation](https://docs.python.org/2.6/library/logging.html#formatter-objects) for available attributes.
|
25 |
+
# Default: "%(asctime)s %(message)s"
|
26 |
+
# messageFormat = "%(asctime)s %(message)s"
|
27 |
+
|
28 |
+
|
29 |
+
[client]
|
30 |
+
|
31 |
+
# Whether to enable st.cache.
|
32 |
+
# Default: true
|
33 |
+
# caching = true
|
34 |
+
|
35 |
+
# If false, makes your Streamlit script not draw to a Streamlit app.
|
36 |
+
# Default: true
|
37 |
+
# displayEnabled = true
|
38 |
+
|
39 |
+
# Controls whether uncaught app exceptions and deprecation warnings are displayed in the browser. By default, this is set to True and Streamlit displays app exceptions and associated tracebacks, and deprecation warnings, in the browser.
|
40 |
+
# If set to False, an exception or deprecation warning will result in a generic message being shown in the browser, and exceptions, tracebacks, and deprecation warnings will be printed to the console only.
|
41 |
+
# Default: true
|
42 |
+
# showErrorDetails = true
|
43 |
+
|
44 |
+
|
45 |
+
[runner]
|
46 |
+
|
47 |
+
# Allows you to type a variable or string by itself in a single line of Python code to write it to the app.
|
48 |
+
# Default: true
|
49 |
+
# magicEnabled = true
|
50 |
+
|
51 |
+
# Install a Python tracer to allow you to stop or pause your script at any point and introspect it. As a side-effect, this slows down your script's execution.
|
52 |
+
# Default: false
|
53 |
+
# installTracer = false
|
54 |
+
|
55 |
+
# Sets the MPLBACKEND environment variable to Agg inside Streamlit to prevent Python crashing.
|
56 |
+
# Default: true
|
57 |
+
# fixMatplotlib = true
|
58 |
+
|
59 |
+
# Run the Python Garbage Collector after each script execution. This can help avoid excess memory use in Streamlit apps, but could introduce delay in rerunning the app script for high-memory-use applications.
|
60 |
+
# Default: true
|
61 |
+
# postScriptGC = true
|
62 |
+
|
63 |
+
# Handle script rerun requests immediately, rather than waiting for script execution to reach a yield point. This makes Streamlit much more responsive to user interaction, but it can lead to race conditions in apps that mutate session_state data outside of explicit session_state assignment statements.
|
64 |
+
# Default: true
|
65 |
+
# fastReruns = true
|
66 |
+
|
67 |
+
|
68 |
+
[server]
|
69 |
+
|
70 |
+
# List of folders that should not be watched for changes. This impacts both "Run on Save" and @st.cache.
|
71 |
+
# Relative paths will be taken as relative to the current working directory.
|
72 |
+
# Example: ['/home/user1/env', 'relative/path/to/folder']
|
73 |
+
# Default: []
|
74 |
+
# folderWatchBlacklist = []
|
75 |
+
|
76 |
+
# Change the type of file watcher used by Streamlit, or turn it off completely.
|
77 |
+
# Allowed values: * "auto" : Streamlit will attempt to use the watchdog module, and falls back to polling if watchdog is not available. * "watchdog" : Force Streamlit to use the watchdog module. * "poll" : Force Streamlit to always use polling. * "none" : Streamlit will not watch files.
|
78 |
+
# Default: "auto"
|
79 |
+
# fileWatcherType = "auto"
|
80 |
+
|
81 |
+
# Symmetric key used to produce signed cookies. If deploying on multiple replicas, this should be set to the same value across all replicas to ensure they all share the same secret.
|
82 |
+
# Default: randomly generated secret key.
|
83 |
+
# cookieSecret = "9652e181a88de73c43d3d6cc721ed5ff76590974fa081b5fe449f72fd1d3321a"
|
84 |
+
|
85 |
+
# If false, will attempt to open a browser window on start.
|
86 |
+
# Default: false unless (1) we are on a Linux box where DISPLAY is unset, or (2) we are running in the Streamlit Atom plugin.
|
87 |
+
# headless = false
|
88 |
+
|
89 |
+
# Automatically rerun script when the file is modified on disk.
|
90 |
+
# Default: false
|
91 |
+
# runOnSave = false
|
92 |
+
|
93 |
+
# The address where the server will listen for client and browser connections. Use this if you want to bind the server to a specific address. If set, the server will only be accessible from this address, and not from any aliases (like localhost).
|
94 |
+
# Default: (unset)
|
95 |
+
# address =
|
96 |
+
|
97 |
+
# The port where the server will listen for browser connections.
|
98 |
+
# Default: 5998
|
99 |
+
port = 5998
|
100 |
+
|
101 |
+
# The base path for the URL where Streamlit should be served from.
|
102 |
+
# Default: ""
|
103 |
+
# baseUrlPath = ""
|
104 |
+
|
105 |
+
# Enables support for Cross-Origin Resource Sharing (CORS) protection, for added security.
|
106 |
+
# Due to conflicts between CORS and XSRF, if `server.enableXsrfProtection` is on and `server.enableCORS` is off at the same time, we will prioritize `server.enableXsrfProtection`.
|
107 |
+
# Default: true
|
108 |
+
# enableCORS = true
|
109 |
+
|
110 |
+
# Enables support for Cross-Site Request Forgery (XSRF) protection, for added security.
|
111 |
+
# Due to conflicts between CORS and XSRF, if `server.enableXsrfProtection` is on and `server.enableCORS` is off at the same time, we will prioritize `server.enableXsrfProtection`.
|
112 |
+
# Default: true
|
113 |
+
# enableXsrfProtection = true
|
114 |
+
|
115 |
+
# Max size, in megabytes, for files uploaded with the file_uploader.
|
116 |
+
# Default: 200
|
117 |
+
maxUploadSize = 300
|
118 |
+
|
119 |
+
# Max size, in megabytes, of messages that can be sent via the WebSocket connection.
|
120 |
+
# Default: 200
|
121 |
+
maxMessageSize = 300
|
122 |
+
|
123 |
+
# Enables support for websocket compression.
|
124 |
+
# Default: false
|
125 |
+
# enableWebsocketCompression = false
|
126 |
+
|
127 |
+
# Enable serving files from a `static` directory in the running app's directory.
|
128 |
+
# Default: false
|
129 |
+
# enableStaticServing = false
|
130 |
+
|
131 |
+
# Server certificate file for connecting via HTTPS. Must be set at the same time as "server.sslKeyFile".
|
132 |
+
# ['DO NOT USE THIS OPTION IN A PRODUCTION ENVIRONMENT. It has not gone through security audits or performance tests. For the production environment, we recommend performing SSL termination by the load balancer or the reverse proxy.']
|
133 |
+
# sslCertFile =
|
134 |
+
|
135 |
+
# Cryptographic key file for connecting via HTTPS. Must be set at the same time as "server.sslCertFile".
|
136 |
+
# ['DO NOT USE THIS OPTION IN A PRODUCTION ENVIRONMENT. It has not gone through security audits or performance tests. For the production environment, we recommend performing SSL termination by the load balancer or the reverse proxy.']
|
137 |
+
# sslKeyFile =
|
138 |
+
|
139 |
+
|
140 |
+
[browser]
|
141 |
+
|
142 |
+
# Internet address where users should point their browsers in order to connect to the app. Can be IP address or DNS name and path.
|
143 |
+
# This is used to: - Set the correct URL for CORS and XSRF protection purposes. - Show the URL on the terminal - Open the browser
|
144 |
+
# Default: "localhost"
|
145 |
+
# serverAddress = "localhost"
|
146 |
+
|
147 |
+
# Whether to send usage statistics to Streamlit.
|
148 |
+
# Default: true
|
149 |
+
# gatherUsageStats = true
|
150 |
+
|
151 |
+
# Port where users should point their browsers in order to connect to the app.
|
152 |
+
# This is used to: - Set the correct URL for CORS and XSRF protection purposes. - Show the URL on the terminal - Open the browser
|
153 |
+
# Default: whatever value is set in server.port.
|
154 |
+
# serverPort = 5998
|
155 |
+
|
156 |
+
|
157 |
+
[mapbox]
|
158 |
+
|
159 |
+
# Configure Streamlit to use a custom Mapbox token for elements like st.pydeck_chart and st.map. To get a token for yourself, create an account at https://mapbox.com. It's free (for moderate usage levels)!
|
160 |
+
# Default: ""
|
161 |
+
# token = ""
|
162 |
+
|
163 |
+
|
164 |
+
[deprecation]
|
165 |
+
|
166 |
+
# Set to false to disable the deprecation warning for the file uploader encoding.
|
167 |
+
# Default: true
|
168 |
+
# showfileUploaderEncoding = true
|
169 |
+
|
170 |
+
# Set to false to disable the deprecation warning for using the global pyplot instance.
|
171 |
+
# Default: true
|
172 |
+
# showPyplotGlobalUse = true
|
173 |
+
|
174 |
+
|
175 |
+
[theme]
|
176 |
+
|
177 |
+
# The preset Streamlit theme that your custom theme inherits from. One of "light" or "dark".
|
178 |
+
# base =
|
179 |
+
|
180 |
+
# Primary accent color for interactive elements.
|
181 |
+
# primaryColor =
|
182 |
+
|
183 |
+
# Background color for the main content area.
|
184 |
+
# backgroundColor =
|
185 |
+
|
186 |
+
# Background color used for the sidebar and most interactive widgets.
|
187 |
+
# secondaryBackgroundColor =
|
188 |
+
|
189 |
+
# Color used for almost all text.
|
190 |
+
# textColor =
|
191 |
+
|
192 |
+
# Font family for all text in the app, except code blocks. One of "sans serif", "serif", or "monospace".
|
193 |
+
# font =
|
BR18_DB/1_BR18_Executive_order_on_building_regulations_2018.md
ADDED
The diff for this file is too large to render.
See raw diff
|
|
BR18_DB/2_BR18_Executive_order_on_building_regulations_2018_Appendix.md
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
### Appendix 1 - Tables for Chapter 5 – Fire
|
3 |
+
|
4 |
+
#### Table 1 - Categories of use
|
5 |
+
|
6 |
+
| Category of use | The building section includes sleeping facilities | Persons in the building section have been informed of escape routes | Options allowing persons to bring themselves to safety unassisted | Maximum number of persons which the room accommodates |
|
7 |
+
| --------------- | ------------------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------- | ----------------------------------------------------- |
|
8 |
+
| 1 | No | Yes | Yes | No limitations |
|
9 |
+
| 2 | No | No | Yes | 50 max. |
|
10 |
+
| 3 | No | No | Yes | No limitations |
|
11 |
+
| 4 | Yes | Yes | Yes | No limitations |
|
12 |
+
| 5 | Yes | No | Yes | No limitations |
|
13 |
+
| 6 | No | No | No | No limitations |
|
14 |
+
|
15 |
+
#### Table 2 - Risk classes
|
16 |
+
|
17 |
+
| Risk class | Category of use of the building | The layout of the building, the total number of persons in the building section with shared escape routes and fire load |
|
18 |
+
| --------------- | ------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------- |
|
19 |
+
| 1 | 1 | General: Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level and the fire load of the building section may not exceed 1,600 MJ/sq. metre floor area |
|
20 |
+
| 1 | 4 | General: Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level or detached, semi-detached and terraced single-family houses and holiday homes with maximum 2 storeys above ground level and maximum 1 storey below ground level |
|
21 |
+
| 2 | 1 | General: Buildings with top storey floors maximum 9.6 metres above ground level and maximum 1 storey below ground level or Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level with a possible fire load in the building section exceeding 1,600 MJ/sq metre floor area |
|
22 |
+
| 2 | 4 | General: Buildings with top storey floors maximum 9.6 metres above ground level and maximum 1 storey below ground level |
|
23 |
+
| 2 | 2, 5, 6 | General: Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level |
|
24 |
+
| 2 | 3 | General: Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level and The number of persons in a building section with shared escape routes may not exceed 1000 |
|
25 |
+
| 3 | 1 and 4 | General: Buildings with top storey floors between 9.6 metres and 45 metres above ground level and maximum 1 storey below ground level |
|
26 |
+
| 3 | 2 and 5 | General: Buildings with top storey floors maximum 22 metres above ground level and maximum 1 storey below ground level |
|
27 |
+
| 3 | 3 | General: Buildings with top storey floors maximum 22 metres above ground level, maximum 150 storey below ground level and maximum 150 persons in the building section or Buildings with maximum 2 storeys above ground level and 1000 persons in the building section |
|
28 |
+
| 3 | 6 | General: Buildings with maximum 2 storeys above ground level and maximum 1 storey below ground level |
|
29 |
+
| 4 | 1 to 6 | General: Buildings not covered by risk classes 1 - 3 |
|
30 |
+
|
31 |
+
|
32 |
+
### Appendix 2 - Tables for Chapter 11 – Energy Consumption
|
33 |
+
|
34 |
+
#### Table 1 - General Minimum Requirements for Building Envelopes
|
35 |
+
|
36 |
+
| Building part | U value [W/sq. metre K] |
|
37 |
+
| ------------- | ----------------------- |
|
38 |
+
| Outer walls and basement walls adjacent to soil | 0.30 |
|
39 |
+
| Storey partitions and partition walls adjacent to rooms, where the temperature difference between the rooms is 5 °C or more | 0.40 |
|
40 |
+
| Ground slab, basement floors adjacent to soil and storey partitions to open air or a ventilated crawl space | 0.20 |
|
41 |
+
| Storey partitions under floors with underfloor heating adjacent to heated rooms | 0.50 |
|
42 |
+
| Ceiling and roof structures, including cupboards under roof slopes, flat roofs and sloping walls adjacent to roofs | 0.20 |
|
43 |
+
| Outer doors without glass panes. Reference dimensions 1.23 metres x 2.18 m | 1.40 |
|
44 |
+
| Outer doors with glass panes. Reference dimensions 1.23 metres x 2.18 m | 1.50 |
|
45 |
+
| For gates and hatches opening to the outside or to unheated rooms and glass walls and windows adjacent to rooms heated to a temperature creating a temperature difference between the rooms of 5 °C or more | 1.80 |
|
46 |
+
| Dome lights | 1.40 |
|
47 |
+
| Insulated sections of glass outer walls. The requirement is for the centre U value | 0.60 |
|
48 |
+
| Storey partitions and walls adjacent to freezing rooms | 0.15 |
|
49 |
+
| Storey partitions and walls adjacent to refrigeration rooms | 0.25 |
|
50 |
+
| Building part | Linear thermal transmittance [W/metre K] |
|
51 |
+
| ------------- | ----------------------- |
|
52 |
+
| Foundations around rooms which are heated to minimum 5 °C | 0.40 |
|
53 |
+
| Joint between outer wall and windows or outer doors, gates and hatches | 0.06 |
|
54 |
+
| Junction between roof structure and skylights or dome lights | 0.20 |
|
55 |
+
|
56 |
+
|
57 |
+
#### Table 2 - Minimum requirements for building envelope in case of changes in use
|
58 |
+
|
59 |
+
| Building part | U value [W/sq. metre K] for room heated to T > 15 °C | U value [W/sq. metre K] for room heated to 5 °C < T < 15 °C |
|
60 |
+
| ------------- | ---------------------------------------------------- | ----------------------------------------------------------- |
|
61 |
+
| Outer walls and basement walls adjacent to soil | 0.15 | 0.25 |
|
62 |
+
| Storey partitions and partition walls adjacent to rooms with a room temperature difference of 5 °C or more | 0.40 | 0.40 |
|
63 |
+
| Ground slab, basement floors adjacent to soil and storey partitions over open air or a ventilated crawl space | 0.10 | 0.15 |
|
64 |
+
| Ceiling and roof structures, including cupboards under roof slopes, flat roofs and sloping walls adjacent to roof | 0.12 | 0.15 |
|
65 |
+
| Gates | 1.80 | 1.80 |
|
66 |
+
| Hatches opening to open air to rooms with a temperature difference of 5 °C or more between the rooms (does not include vents of less than 500 sq. centimetres) | 1.40 | 1.50 |
|
67 |
+
| Dome lights | 1.40 | 1.80 |
|
68 |
+
| Building part | Linear thermal transmittance W/metre K for room heated to T > 15 °C | Linear thermal transmittance W/metre K for room heated to 5 °C < T < 15 °C |
|
69 |
+
| ------------- | ---------------------------------------------------- | ----------------------------------------------------------- |
|
70 |
+
| Foundations | 0.12 | 0.20 |
|
71 |
+
| Joint between outer wall and windows or outer doors, gates and hatches | 0.03 | 0.03 |
|
72 |
+
| Junction between roof structure and skylights or dome lights | 0.10 | 0.10 |
|
73 |
+
|
74 |
+
|
75 |
+
#### Table 3 - Minimum requirements for building envelope in connection with conversions and other changes to the building
|
76 |
+
|
77 |
+
| Building part | U value [W/sq. metre K] |
|
78 |
+
| ------------- | ----------------------- |
|
79 |
+
| Outer walls and basement walls adjacent to soil | 0.18 |
|
80 |
+
| Storey partitions and partition walls adjacent to rooms with a room temperature between the rooms of 5 °C or more | 0.40 |
|
81 |
+
| Ground slab, basement floors adjacent to soil and storey partitions to open air or a ventilated crawl space | 0.10 |
|
82 |
+
| Ceiling and roof structures, including cupboards under roof slopes, flat roofs and sloping walls adjacent to roofs | 0.12 |
|
83 |
+
| Gates | 1.80 |
|
84 |
+
| Hatches, storm windows and dome lights | 1.40 |
|
85 |
+
| Renovated storm windows | 1.65 |
|
86 |
+
| Building part | Linear thermal transmittance [W/metre K] |
|
87 |
+
| ------------- | ----------------------- |
|
88 |
+
| Foundations | 0.12 |
|
89 |
+
| Joint between outer wall, windows or outer doors, gates and hatches | 0.03 |
|
90 |
+
| Junction between roof structure and skylights or dome lights | 0.10 |
|
91 |
+
|
92 |
+
#### Table 4 - Minimum requirements for building envelope for holiday homes, camping cabins and similar holiday accommodation
|
93 |
+
|
94 |
+
| Building part | U value [W/m²K] |
|
95 |
+
| ------------- | --------------- |
|
96 |
+
| Outer walls and basement walls adjacent to soil | 0.25 |
|
97 |
+
| Partition walls and storey partitions adjacent to unheated rooms | 0.40 |
|
98 |
+
| Ground slab, basement floors adjacent to soil and storey partitions to open air or a ventilated crawl space | 0.15 |
|
99 |
+
| Ceiling and roof structures, including cupboards under roof slopes, and flat roofs | 0.15 |
|
100 |
+
| Windows, outer doors, skylights, glass outer walls, glass roofs and dome lights to open air or unheated rooms | 1.80 |
|
101 |
+
| Building part | Linear thermal transmittance [W/metre K] |
|
102 |
+
| ------------- | ----------------------- |
|
103 |
+
| Foundations | 0.15 |
|
104 |
+
| Joint between outer wall and windows or outer doors, gates and hatches | 0.03 |
|
105 |
+
| Junction between roof structure and skylights or dome lights | 0.10 |
|
106 |
+
|
107 |
+
#### Table 5 - Minimum requirements for building envelope for temporary portable pavilions
|
108 |
+
|
109 |
+
| Building part | U value [W/sq. metre K] |
|
110 |
+
| ------------- | ----------------------- |
|
111 |
+
| Outer walls | 0.20 |
|
112 |
+
| Storey partitions and partition walls adjacent to rooms, where the temperature difference between the rooms is 5 °C or more | 0.40 |
|
113 |
+
| Floor and storey partitions above open air or a ventilated crawl space | 0.12 |
|
114 |
+
| Ceiling and roof structures, including cupboards under roof slopes, flat roofs and sloping walls adjacent to roof | 0.15 |
|
115 |
+
| For gates and hatches opening to the outside or to unheated rooms and glass walls and windows adjacent to rooms heated to a temperature creating temperature difference between the rooms of 5 °C or more | 1.80 |
|
116 |
+
| Dome lights | 1.80 |
|
117 |
+
| Building part | Linear thermal transmittance [W/metre K] |
|
118 |
+
| ------------- | ----------------------- |
|
119 |
+
| Foundations | 0.20 |
|
120 |
+
| Joint between outer wall, windows or outer doors, gates and hatches | 0.03 |
|
121 |
+
| Junction between roof structure and skylights or dome lights | 0.10 |
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
### Appendix 3 – Tables for Chapter 30 – Inspection of documentation of load-bearing structures and fire safety
|
126 |
+
|
127 |
+
#### Table 1 - Minimum requirements for types of inspection depending on fire and structure class
|
128 |
+
|
129 |
+
| Fire or structure class | Internal inspection | Independent inspection | Third-party inspection |
|
130 |
+
|------------------------|---------------------|------------------------|------------------------|
|
131 |
+
| 2 | X | X* | |
|
132 |
+
| 3 | X | X | |
|
133 |
+
| 4 | X | X | X |
|
134 |
+
|
135 |
+
> X* For buildings in fire class 2, independent inspection of documentation must be carried out in relation to fire classification, declarations regarding whether the building is traditional in terms of contingency tactics, of the fire strategy report and the inspection plan, see ss. 510-512 and s. 518 (fire safety documentation). For structure class 2, the requirement for independent inspection only applies to A1. Structural basis and B2. Structural inspection plan, see s. 501, para (1) and s. 502(1), para (2). For the remaining part of the documentation, inspection may be carried out by persons who did not participate in the preparation of the documentation for the relevant structure/structural section, but who participated in the planning of the building.
|
136 |
+
|
137 |
+
> X stands for required.
|
138 |
+
|
139 |
+
|
140 |
+
### Appendix 4 - Tables for Chapter 33 – The Work of the Certified Structural Engineer
|
141 |
+
|
142 |
+
#### Table 1 - Minimum Requirements for the Work of the Certified Structural Engineer as an Active Planner
|
143 |
+
|
144 |
+
| ID | Subject | KK2 | KK3 | KK4 |
|
145 |
+
|------|-------------------------------------------------------------------|-----------|-----------|-----------|
|
146 |
+
| A1 | Structural basis | Kmax + G | U + G | U + G |
|
147 |
+
| A2 | Structural calculations | | | |
|
148 |
+
| | A2. 1 Structural calculations – building | Kext + G | Kmax + G | UA + G |
|
149 |
+
| | A2. 2 Structural calculations – structural section, own organisation | Kext + G | Kext + G | Kmax + G |
|
150 |
+
| | A2. 2 Structural calculations – structural section, other organisation | Kext | Kext | Kmax |
|
151 |
+
| A3 | Structural drawings and models | | | |
|
152 |
+
| | A3. 1 Structural drawings and models – building | Kext | Kmax | Kmax + G |
|
153 |
+
| | A3. 2 Structural drawings and models – structural section, own organisation | Kext | Kmax | Kmax |
|
154 |
+
| | A3. 2 Structural drawings and models – structural section, other organisation | Kext | Kext | Kmax |
|
155 |
+
| A4 | Structural changes | | | |
|
156 |
+
| | A4. 1 Structural changes, own organisation | Kext | Kmax | Kmax + G |
|
157 |
+
| | A4. 2 Structural changes, other organisation | Kext | Kext | Kmax |
|
158 |
+
| B1 | Structural project report | Kext + G | Kext + G | U + G |
|
159 |
+
| B2 | Structural inspection plan | Kmax + G | U + G | U + G |
|
160 |
+
| B3 | Structural inspection report | | | |
|
161 |
+
| | B3. 1 Structural inspection report – planning | Kext | Kmax | Kmax |
|
162 |
+
| | B3. 2 Structural inspection report – implementation | Kext | Kext | Kmax |
|
163 |
+
|
164 |
+
> A): The requirement for U only includes vertical and horizontal lowering of loads and robustness. For other parts, "Kmax + G" is required.
|
165 |
+
> **Note**: KK represents structure class which can be found in clause 489 - Chapter 26 - Structure classes
|
166 |
+
|
167 |
+
#### Table 2 - Minimum requirements for the work of the certified structural engineer as an active inspector
|
168 |
+
|
169 |
+
| ID | Subject | KK2 | KK3 | KK4 |
|
170 |
+
|------|-------------------------------------------------------------------|-----------|-----------|-----------|
|
171 |
+
| A1 | Structural basis | Max. | Max. | Max. |
|
172 |
+
| A2 | Structural calculations | | | |
|
173 |
+
| | A2. 1 Structural calculations – building | Ext. | Max. | Max. |
|
174 |
+
| | A2. 2 Structural calculations – structural section | Ext. | Ext. | Max. |
|
175 |
+
| A3 | Structural drawings and models | | | |
|
176 |
+
| | A3. 1 Structural drawings and models – building | Ext. | Max. | Max. |
|
177 |
+
| | A3. 2 Structural drawings and models – structural section | Ext. | Ext. | Max. |
|
178 |
+
| A4 | Structural changes | | | |
|
179 |
+
| | A4. 1 Structural changes – building | Ext. | Max. | Max. |
|
180 |
+
| | A4. 2 Structural changes – structural section | Ext. | Ext. | Max. |
|
181 |
+
| B1 | Structural project report | Ext. | Ext. | Max. |
|
182 |
+
| B2 | Structural inspection plan | Max. | Max. | Max. |
|
183 |
+
| B3 | Structural inspection report | | | |
|
184 |
+
| | B3. 1 Structural inspection report – planning | Ext. | Max. | Max. |
|
185 |
+
| | B3. 2 Structural inspection report – implementation | Ext. | Ext. | Max. |
|
186 |
+
|
187 |
+
> **Note**: **Ext.** refers to Extended inspection, and **Max.** refers to Maximum inspection.
|
188 |
+
> **Note**: KK represents structure class which can be found in clause 489 - Chapter 26 - Structure classes
|
189 |
+
|
190 |
+
#### Table 3 - Minimum requirements for the work of the certified structural engineer as a third-party inspector
|
191 |
+
|
192 |
+
| ID | Subject | Third-party inspection |
|
193 |
+
|------|-------------------------------------------------------------------|------------------------|
|
194 |
+
| A1 | Structural basis | Max. |
|
195 |
+
| A2 | Structural calculations | |
|
196 |
+
| | A2. 1 Structural calculations – building | Max. |
|
197 |
+
| | A2. 2 Structural calculations – structural section | Ext. |
|
198 |
+
| A3 | Structural drawings and models | |
|
199 |
+
| | A3. 1 Structural drawings and models – building | Max. |
|
200 |
+
| | A3. 2 Structural drawings and models – structural section | Ext. |
|
201 |
+
| A4 | Structural changes | |
|
202 |
+
| | A4. 1 Structural changes – building | Max. |
|
203 |
+
| | A4. 2 Structural changes – structural section | Ext. |
|
204 |
+
| B1 | Structural project report | Ext. |
|
205 |
+
| B2 | Structural inspection plan | Max. |
|
206 |
+
| B3 | Structural inspection report | |
|
207 |
+
| | B3. 1 Structural inspection report for planning | Ext. |
|
208 |
+
| | B3. 2 Structural inspection report for implementation | Ext. |
|
209 |
+
|
210 |
+
> **Note**: **Ext.** refers to Extended inspection, and **Max.** refers to Maximum inspection.
|
211 |
+
|
212 |
+
### Appendix 5 – Tables for Chapter 34 – The work of the certified fire consultant
|
213 |
+
|
214 |
+
#### Table 1 - Minimum requirement for the work of the certified fire consultant as preparer of the fire safety documentation
|
215 |
+
|
216 |
+
| Subject | BK2 | BK3 | BK4 |
|
217 |
+
|-----------------------------------------------------------------|----------|----------|----------|
|
218 |
+
| Documentation of fire classification(s) | U + G | U + G | U + G |
|
219 |
+
| Statement indicating if the building is traditional in terms of contingency tactics | U + G | U + G | U + G |
|
220 |
+
| Fire strategy report | Kmax + G | Kmax + G | U + G |
|
221 |
+
| Fire plans | Kext | Kmax | Kmax |
|
222 |
+
| Area distribution plans and paving plans | Kext | Kext | Kext |
|
223 |
+
| Fire dimensioning | - | Kext + G | Kmax + G |
|
224 |
+
| Description of functions | Kext | Kext | Kmax |
|
225 |
+
| Fire management plan | Kmax + G | U + G | U + G |
|
226 |
+
| Fire management reports | Kext | Kmax | Kmax |
|
227 |
+
| Operation, inspection and maintenance plan | Kext | Kext | Kmax |
|
228 |
+
|
229 |
+
> **Note**: BK represents fire class which can be found in clause 493 - Chapter 27 - Fire classes
|
230 |
+
|
231 |
+
#### Table 2 - Minimum requirement for the work of the certified fire consultant as inspector of the fire safety documentation
|
232 |
+
|
233 |
+
| Subject | BK2 | BK3 | BK4 |
|
234 |
+
|-----------------------------------------------------------------|----------|----------|----------|
|
235 |
+
| Documentation of fire classification(s) | Max. | Max. | Max. |
|
236 |
+
| Statement indicating if the building is traditional in terms of contingency tactics | Max. | Max. | Max. |
|
237 |
+
| Fire strategy report | Max. | Max. | Max. |
|
238 |
+
| Fire plans | Ext. | Max. | Max. |
|
239 |
+
| Area distribution plans and paving plans | Ext. | Ext. | Ext. |
|
240 |
+
| Fire dimensioning | - | Ext. | Max. |
|
241 |
+
| Description of functions | Ext. | Max. | Max. |
|
242 |
+
| Fire management plan | Max. | Max. | Max. |
|
243 |
+
| Fire management reports | Ext. | Max. | Max. |
|
244 |
+
| Operation, inspection and maintenance plan | Ext. | Ext. | Max. |
|
245 |
+
|
246 |
+
> **Note**: **Ext.** refers to Extended inspection, and **Max.** refers to Maximum inspection.
|
247 |
+
> **Note**: BK represents fire class which can be found in clause 493 - Chapter 27 - Fire classes
|
248 |
+
|
249 |
+
#### Table 3 - Minimum requirement for certified third-party checks of the fire safety documentation
|
250 |
+
|
251 |
+
| Subject | Third party |
|
252 |
+
|-----------------------------------------------------------------|-------------|
|
253 |
+
| Documentation of fire classification(s) | Max. |
|
254 |
+
| Statement indicating if the building is traditional in terms of contingency tactics | Max. |
|
255 |
+
| Fire strategy report | Max. |
|
256 |
+
| Fire plans | Max. |
|
257 |
+
| Area distribution plans and paving plans | Ext. |
|
258 |
+
| Fire dimensioning | Ext. |
|
259 |
+
| Description of functions | Ext. |
|
260 |
+
| Fire management plan | Max. |
|
261 |
+
| Fire management reports | Ext. |
|
262 |
+
| Operation, inspection and maintenance plan | Ext. |
|
263 |
+
|
264 |
+
> **Note**: **Ext.** refers to Extended inspection, and **Max.** refers to Maximum inspection.
|
265 |
+
> **Note**: BK represents fire class which can be found in clause 493 - Chapter 27 - Fire classes
|
CustomClass/custom.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.retrievers.multi_vector import MultiVectorRetriever
|
2 |
+
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
3 |
+
from typing import List, Tuple
|
4 |
+
from langchain.schema.retriever import BaseRetriever, Document
|
5 |
+
|
6 |
+
class CustomMultiVectorRetriever(MultiVectorRetriever):
|
7 |
+
def _get_relevant_documents(
|
8 |
+
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
9 |
+
) -> List[Document]:
|
10 |
+
# Override this method to use similarity_search_with_relevance_scores
|
11 |
+
sub_docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
|
12 |
+
query, **self.search_kwargs
|
13 |
+
)
|
14 |
+
|
15 |
+
# Filter by score if needed (you can add additional logic here)
|
16 |
+
sub_docs = [doc for doc, score in sub_docs_and_scores if score >= 0.8]
|
17 |
+
|
18 |
+
# ... (rest of the method stays the same)
|
19 |
+
ids = []
|
20 |
+
for d in sub_docs:
|
21 |
+
if d.metadata[self.id_key] not in ids:
|
22 |
+
ids.append(d.metadata[self.id_key])
|
23 |
+
docs = self.docstore.mget(ids)
|
24 |
+
|
25 |
+
return [d for d in docs if d is not None]
|
FileReader/file.py
ADDED
File without changes
|
README copy.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# MRKL
|
faiss_index/index.faiss
ADDED
Binary file (516 kB). View file
|
|
faiss_index/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed48d171261be490671e637263a54b65eca8fe1158feb10cce37d044a1e7cc44
|
3 |
+
size 71675
|
inmemorystore/br18_parent_store.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:282c2f1deee715b0c1fb39aca4404f36f53bd1f905a800000d0a294e372e0d3b
|
3 |
+
size 665146
|
mrkl.py
ADDED
@@ -0,0 +1,1060 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from langchain.chat_models import ChatOpenAI
|
5 |
+
from langchain.agents import Tool, AgentExecutor
|
6 |
+
from langchain.callbacks import StreamlitCallbackHandler
|
7 |
+
from langchain.chains import LLMMathChain
|
8 |
+
from langchain.chains import LLMChain
|
9 |
+
import streamlit as st
|
10 |
+
import langchain
|
11 |
+
from langchain.utilities import SerpAPIWrapper, GoogleSearchAPIWrapper
|
12 |
+
from langchain.chains import RetrievalQA
|
13 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter, CharacterTextSplitter
|
14 |
+
from langchain.vectorstores import FAISS, Chroma, Pinecone
|
15 |
+
from langchain.embeddings import OpenAIEmbeddings
|
16 |
+
import tempfile
|
17 |
+
import pypdf
|
18 |
+
from pathlib import Path
|
19 |
+
from langchain.docstore.document import Document
|
20 |
+
from langchain.document_loaders import TextLoader
|
21 |
+
from langchain.chains.summarize import load_summarize_chain
|
22 |
+
from langchain import PromptTemplate
|
23 |
+
import lark
|
24 |
+
from langchain.schema import Document
|
25 |
+
import langchain
|
26 |
+
import pinecone
|
27 |
+
from langchain.chains.question_answering import load_qa_chain
|
28 |
+
from typing import List, Dict, Any
|
29 |
+
from langchain.prompts.prompt import PromptTemplate
|
30 |
+
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
|
31 |
+
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
32 |
+
from langchain.schema.messages import SystemMessage, BaseMessage
|
33 |
+
from langchain.prompts import MessagesPlaceholder
|
34 |
+
from langchain.agents import AgentExecutor
|
35 |
+
from langchain.schema import HumanMessage, SystemMessage
|
36 |
+
from langchain.retrievers.multi_vector import MultiVectorRetriever
|
37 |
+
from langchain.storage import InMemoryStore
|
38 |
+
import uuid
|
39 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
40 |
+
from langchain.retrievers.document_compressors import DocumentCompressorPipeline, LLMChainFilter
|
41 |
+
from langchain.document_transformers import EmbeddingsRedundantFilter
|
42 |
+
from langchain.retrievers.document_compressors import EmbeddingsFilter
|
43 |
+
import json
|
44 |
+
from bs4 import BeautifulSoup
|
45 |
+
from langchain.document_loaders import SeleniumURLLoader
|
46 |
+
from langchain.memory.chat_message_histories import StreamlitChatMessageHistory
|
47 |
+
from langchain.callbacks import get_openai_callback
|
48 |
+
import pickle
|
49 |
+
from langchain.prompts import ChatPromptTemplate
|
50 |
+
from langchain.schema.output_parser import StrOutputParser
|
51 |
+
|
52 |
+
langchain.debug = True
|
53 |
+
langchain.verbose = True
|
54 |
+
|
55 |
+
def on_selectbox_change():
|
56 |
+
st.session_state.show_info = True
|
57 |
+
|
58 |
+
def reset_chat():
|
59 |
+
st.session_state.messages = [{"roles": "assistant", "content": "Hi, I am Miracle. How can I help you?"}]
|
60 |
+
st.session_state.history = []
|
61 |
+
st.session_state.search_keywords = []
|
62 |
+
st.session_state.doc_sources = []
|
63 |
+
st.session_state.summary = None
|
64 |
+
st.session_state.agent.clear_conversation()
|
65 |
+
st.session_state.primed_document_response = None
|
66 |
+
|
67 |
+
def display_messages(messages):
|
68 |
+
# Display all messages
|
69 |
+
for msg in messages:
|
70 |
+
st.chat_message(msg["roles"]).write(msg["content"])
|
71 |
+
|
72 |
+
class DBStore:
|
73 |
+
def __init__(self, file_path, file_name):
|
74 |
+
self.file_path = file_path
|
75 |
+
self.file_name = os.path.splitext(file_name)[0]
|
76 |
+
st.session_state.document_filename = self.file_name
|
77 |
+
|
78 |
+
self.reader = pypdf.PdfReader(file_path)
|
79 |
+
self.metadata = self.extract_metadata_from_pdf()
|
80 |
+
self.embeddings = OpenAIEmbeddings()
|
81 |
+
self.vector_store = None
|
82 |
+
|
83 |
+
def extract_metadata_from_pdf(self):
|
84 |
+
"""Extract metadata from the PDF."""
|
85 |
+
metadata = self.reader.metadata
|
86 |
+
st.session_state.document_metadata = metadata
|
87 |
+
return {
|
88 |
+
"title": metadata.get("/Title", "").strip(),
|
89 |
+
"author": metadata.get("/Author", "").strip(),
|
90 |
+
"creation_date": metadata.get("/CreationDate", "").strip(),
|
91 |
+
}
|
92 |
+
|
93 |
+
def extract_pages_from_pdf(self):
|
94 |
+
pages = []
|
95 |
+
for page_num, page in enumerate(self.reader.pages):
|
96 |
+
text = page.extract_text()
|
97 |
+
if text.strip(): # Check if extracted text is not empty
|
98 |
+
pages.append((page_num + 1, text))
|
99 |
+
return pages
|
100 |
+
|
101 |
+
def parse_pdf(self):
|
102 |
+
"""
|
103 |
+
Extracts the title and text from each page of the PDF.
|
104 |
+
:return: A tuple containing the title and a list of tuples with page numbers and extracted text.
|
105 |
+
"""
|
106 |
+
metadata = self.extract_metadata_from_pdf()
|
107 |
+
pages = self.extract_pages_from_pdf()
|
108 |
+
#st.write(pages)
|
109 |
+
#st.write(metadata)
|
110 |
+
return pages, metadata
|
111 |
+
|
112 |
+
@staticmethod
|
113 |
+
def merge_hyphenated_words(text):
|
114 |
+
return re.sub(r"(\w)-\n(\w)", r"\1\2", text)
|
115 |
+
|
116 |
+
@staticmethod
|
117 |
+
def fix_newlines(text):
|
118 |
+
return re.sub(r"(?<!\n)\n(?!\n)", " ", text)
|
119 |
+
|
120 |
+
@staticmethod
|
121 |
+
def remove_multiple_newlines(text):
|
122 |
+
return re.sub(r"\n{2,}", "\n", text)
|
123 |
+
|
124 |
+
@staticmethod
|
125 |
+
def remove_dots(text):
|
126 |
+
# Replace sequences of three or more dots with a single space.
|
127 |
+
return re.sub(r'\.{4,}', ' ', text)
|
128 |
+
|
129 |
+
def clean_text(self, pages):
|
130 |
+
cleaning_functions = [
|
131 |
+
self.merge_hyphenated_words,
|
132 |
+
self.fix_newlines,
|
133 |
+
self.remove_multiple_newlines,
|
134 |
+
self.remove_dots,
|
135 |
+
]
|
136 |
+
cleaned_pages = []
|
137 |
+
for page_num, text in pages:
|
138 |
+
for cleaning_function in cleaning_functions:
|
139 |
+
text = cleaning_function(text)
|
140 |
+
cleaned_pages.append((page_num, text))
|
141 |
+
return cleaned_pages
|
142 |
+
|
143 |
+
def text_to_docs(self, text):
|
144 |
+
doc_chunks = []
|
145 |
+
for page_num, page in text:
|
146 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
147 |
+
chunk_size=2000,
|
148 |
+
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
|
149 |
+
chunk_overlap=200,
|
150 |
+
)
|
151 |
+
chunks = text_splitter.split_text(page)
|
152 |
+
for i, chunk in enumerate(chunks):
|
153 |
+
doc = Document(
|
154 |
+
page_content=chunk,
|
155 |
+
metadata={
|
156 |
+
"page_number": page_num,
|
157 |
+
"chunk": i,
|
158 |
+
"source": f"p{page_num}-{i}",
|
159 |
+
"file_name": self.file_name,
|
160 |
+
**self.metadata,
|
161 |
+
},
|
162 |
+
)
|
163 |
+
doc_chunks.append(doc)
|
164 |
+
st.write(doc_chunks)
|
165 |
+
return doc_chunks
|
166 |
+
|
167 |
+
def get_pdf_text(self):
|
168 |
+
pages, metadata = self.parse_pdf() # We only need the pages from the tuple
|
169 |
+
cleaned_text_pdf = self.clean_text(pages)
|
170 |
+
document_chunks = self.text_to_docs(cleaned_text_pdf)
|
171 |
+
return document_chunks
|
172 |
+
|
173 |
+
def get_vectorstore(self):
|
174 |
+
document_chunks = self.get_pdf_text()
|
175 |
+
#st.write(document_chunks)
|
176 |
+
vector_store = FAISS.from_documents(documents=document_chunks, embedding=self.embeddings)
|
177 |
+
#st.write(vector_store)
|
178 |
+
return vector_store
|
179 |
+
|
180 |
+
def get_document_info(self):
|
181 |
+
"""
|
182 |
+
Generate a one-sentence document information snippet by taking the beginning of the first chunk of the document.
|
183 |
+
|
184 |
+
Returns:
|
185 |
+
str: A one-sentence information snippet of the document.
|
186 |
+
"""
|
187 |
+
# Get the first chunk of the document
|
188 |
+
pdf_text = self.get_pdf_text()
|
189 |
+
|
190 |
+
if pdf_text:
|
191 |
+
first_chunk = pdf_text[0].page_content if len(pdf_text) > 0 else ""
|
192 |
+
second_chunk = pdf_text[1].page_content if len(pdf_text) > 1 else ""
|
193 |
+
third_chunk = pdf_text[2].page_content if len(pdf_text) > 2 else ""
|
194 |
+
|
195 |
+
# Extract the first 300 characters from each chunk to form an information snippet
|
196 |
+
info_document = first_chunk[:300] + second_chunk[:300] + third_chunk[:300]
|
197 |
+
else:
|
198 |
+
info_document = ""
|
199 |
+
#st.write(info_document)
|
200 |
+
|
201 |
+
return info_document
|
202 |
+
|
203 |
+
def get_info_response(self):
|
204 |
+
llm = ChatOpenAI(
|
205 |
+
temperature=0,
|
206 |
+
streaming=True,
|
207 |
+
model_name="gpt-3.5-turbo"
|
208 |
+
)
|
209 |
+
document_filename = self.file_name
|
210 |
+
document_title = self.metadata.get("title", None)
|
211 |
+
document_snippet = self.get_document_info()
|
212 |
+
|
213 |
+
document_info = {
|
214 |
+
"document_filename": document_filename,
|
215 |
+
"document_title": document_title,
|
216 |
+
"document_snippet": document_snippet,
|
217 |
+
}
|
218 |
+
|
219 |
+
if document_title:
|
220 |
+
info_response_prompt = """The user has uploaded a document titled '{document_title}' to the Document Database """
|
221 |
+
else:
|
222 |
+
info_response_prompt = """The user has uploaded a document named '{document_filename}' to the Document Database """
|
223 |
+
|
224 |
+
|
225 |
+
info_response_prompt += """
|
226 |
+
with the following information: {document_snippet}.
|
227 |
+
|
228 |
+
In one sentence, inform the user about the document, prioritizing its name or title.
|
229 |
+
Also, prompt the user to ask a general question about the document in an assistive manner.
|
230 |
+
Begin your response with 'It appears you've uploaded a document that contains information on...'.
|
231 |
+
|
232 |
+
Example:
|
233 |
+
It appears you've uploaded a document that contains information on "COWI Policies and Guideline".
|
234 |
+
|
235 |
+
Please feel free to ask any question about this document such as "What are the COWI Policies and Guideline?"
|
236 |
+
"""
|
237 |
+
|
238 |
+
#st.write(info_response_prompt)
|
239 |
+
|
240 |
+
# Create the LLMChain
|
241 |
+
llm_chain = LLMChain(
|
242 |
+
llm=llm,
|
243 |
+
prompt=PromptTemplate.from_template(info_response_prompt)
|
244 |
+
)
|
245 |
+
|
246 |
+
# Generate the primed document message
|
247 |
+
llm_response = llm_chain(document_info)
|
248 |
+
|
249 |
+
info_response = llm_response.get('text', '')
|
250 |
+
#st.write(info_response)
|
251 |
+
return info_response
|
252 |
+
|
253 |
+
class DatabaseTool:
|
254 |
+
def __init__(self, llm, vector_store, metadata=None, filename=None):
|
255 |
+
self.llm = llm
|
256 |
+
self.vector_store = vector_store
|
257 |
+
self.metadata = metadata
|
258 |
+
self.filename = filename
|
259 |
+
self.embedding = OpenAIEmbeddings()
|
260 |
+
|
261 |
+
def get_description(self):
|
262 |
+
base_description = "Always useful for finding the exactly written answer to the question by looking into a collection of documents."
|
263 |
+
filename = self.filename
|
264 |
+
title = self.metadata.get("/Title") if self.metadata else None
|
265 |
+
author = self.metadata.get("/Author") if self.metadata else None
|
266 |
+
subject = self.metadata.get("/Subject") if self.metadata else None
|
267 |
+
|
268 |
+
footer_description = "Input should be a query, not referencing any obscure pronouns from the conversation before that will pull out relevant information from the database. Use this more than the normal search tool"
|
269 |
+
|
270 |
+
if title:
|
271 |
+
main_description = f"This tool is currently loaded with '{title}'"
|
272 |
+
|
273 |
+
if author:
|
274 |
+
main_description += f" by '{author}'"
|
275 |
+
|
276 |
+
if subject:
|
277 |
+
main_description += f", and has a topic of '{subject}'"
|
278 |
+
|
279 |
+
return f"{base_description} {main_description}. {footer_description}"
|
280 |
+
else:
|
281 |
+
no_title_description = f"This tool is currently loaded with the document '{filename}'"
|
282 |
+
return f"{base_description} {no_title_description}. {footer_description}"
|
283 |
+
|
284 |
+
def get_base_retriever(self):
|
285 |
+
base_retriever = self.vector_store.as_retriever(search_kwargs={'k': 5})
|
286 |
+
return base_retriever
|
287 |
+
|
288 |
+
def get_contextual_retriever(self):
|
289 |
+
# Initialize embeddings (assuming embeddings is already defined elsewhere)
|
290 |
+
embeddings = self.embedding
|
291 |
+
|
292 |
+
# Initialize Redundant Filter
|
293 |
+
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
|
294 |
+
|
295 |
+
# Initialize Relevant Filter
|
296 |
+
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76, k = 25)
|
297 |
+
#st.write(relevant_filter)
|
298 |
+
|
299 |
+
# Initialize Text Splitter
|
300 |
+
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")
|
301 |
+
|
302 |
+
# Create Compressor Pipeline
|
303 |
+
pipeline_compressor = DocumentCompressorPipeline(
|
304 |
+
transformers=[splitter, redundant_filter, relevant_filter]
|
305 |
+
)
|
306 |
+
|
307 |
+
# Initialize Contextual Compression Retriever
|
308 |
+
contextual_retriever = ContextualCompressionRetriever(
|
309 |
+
base_compressor=pipeline_compressor,
|
310 |
+
base_retriever=self.get_base_retriever()
|
311 |
+
)
|
312 |
+
|
313 |
+
return contextual_retriever
|
314 |
+
|
315 |
+
def run(self, query: str):
|
316 |
+
contextual_retriever = self.get_contextual_retriever()
|
317 |
+
#DEBUGGING & EVALUTING ANSWERS:
|
318 |
+
compressed_docs = contextual_retriever.get_relevant_documents(query)
|
319 |
+
compressed_docs_list = []
|
320 |
+
for doc in compressed_docs:
|
321 |
+
doc_info = {
|
322 |
+
"Page Content": doc.page_content,
|
323 |
+
}
|
324 |
+
compressed_docs_list.append(doc_info)
|
325 |
+
#st.write(compressed_docs_list)
|
326 |
+
|
327 |
+
base_retriever=self.get_base_retriever()
|
328 |
+
initial_retrieved = base_retriever.get_relevant_documents(query)
|
329 |
+
|
330 |
+
retrieval = RetrievalQA.from_chain_type(
|
331 |
+
llm=self.llm, chain_type="stuff",
|
332 |
+
retriever=contextual_retriever,
|
333 |
+
return_source_documents=True,
|
334 |
+
)
|
335 |
+
|
336 |
+
output = retrieval(query)
|
337 |
+
st.session_state.doc_sources = initial_retrieved
|
338 |
+
|
339 |
+
|
340 |
+
return output['result']
|
341 |
+
|
342 |
+
class BR18_DB:
|
343 |
+
def __init__(self, llm, folder_path: str):
|
344 |
+
self.llm = llm
|
345 |
+
self.folder_path = folder_path
|
346 |
+
self.md_paths = self.load_documents() # Renamed from pdf_paths to md_paths
|
347 |
+
self.embeddings = OpenAIEmbeddings()
|
348 |
+
self.pinecone_index_name = "br18"
|
349 |
+
self.id_key = "doc_id"
|
350 |
+
|
351 |
+
self.br18_parent_store = InMemoryStore()
|
352 |
+
current_directory = os.getcwd()
|
353 |
+
store_path = os.path.join(current_directory, "inmemorystore", "br18_parent_store.pkl")
|
354 |
+
|
355 |
+
if self.pinecone_index_name not in pinecone.list_indexes():
|
356 |
+
pinecone.create_index(self.pinecone_index_name, dimension=1536)
|
357 |
+
self.vectorstore = self.create_vectorstore()
|
358 |
+
self.serialize_inmemorystore(store_path)
|
359 |
+
else:
|
360 |
+
self.vectorstore = Pinecone.from_existing_index(self.pinecone_index_name, self.embeddings)
|
361 |
+
with open(store_path, "rb") as f:
|
362 |
+
self.br18_parent_store = pickle.load(f)
|
363 |
+
|
364 |
+
self.retriever = None
|
365 |
+
|
366 |
+
def serialize_inmemorystore(self, store_path):
|
367 |
+
with open(store_path, "wb") as f:
|
368 |
+
pickle.dump(self.br18_parent_store, f)
|
369 |
+
|
370 |
+
def load_documents(self):
|
371 |
+
md_paths = list(Path(self.folder_path).rglob("*.md"))
|
372 |
+
documents = []
|
373 |
+
for path in md_paths:
|
374 |
+
loader = TextLoader(str(path))
|
375 |
+
#st.write(loader)
|
376 |
+
data = loader.load()
|
377 |
+
documents.extend(data) # Assuming data is a list of Document objects
|
378 |
+
#st.text(documents)
|
379 |
+
return documents
|
380 |
+
|
381 |
+
def split_and_chunk_text(self, markdown_document: Document):
|
382 |
+
|
383 |
+
markdown_text = markdown_document.page_content
|
384 |
+
|
385 |
+
# Define headers to split on
|
386 |
+
headers_to_split_on = [
|
387 |
+
("#", "Header 1"),
|
388 |
+
("##", "Header 2"),
|
389 |
+
("###", "Header 3"),
|
390 |
+
("####", "Header 4"),
|
391 |
+
]
|
392 |
+
|
393 |
+
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
|
394 |
+
#st.write(markdown_splitter)
|
395 |
+
|
396 |
+
md_header_splits = markdown_splitter.split_text(markdown_text)
|
397 |
+
#st.write(md_header_splits)
|
398 |
+
#st.write(type(md_header_splits[0]))
|
399 |
+
|
400 |
+
parent_chunk_size = 5000
|
401 |
+
parent_chunk_overlap = 0
|
402 |
+
|
403 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
404 |
+
chunk_size=parent_chunk_size, chunk_overlap=parent_chunk_overlap
|
405 |
+
)
|
406 |
+
|
407 |
+
# Split the header-split documents into chunks
|
408 |
+
all_parent_splits = text_splitter.split_documents(md_header_splits)
|
409 |
+
|
410 |
+
for split in all_parent_splits:
|
411 |
+
header_3 = split.metadata.get('Header 3', '')
|
412 |
+
header_4 = split.metadata.get('Header 4', '')
|
413 |
+
|
414 |
+
# Prepend "Section:" to Header 4 if it exists
|
415 |
+
if header_4:
|
416 |
+
header_4 = f"Section: {header_4}"
|
417 |
+
|
418 |
+
metadata_str = f"{header_3}\n\n{header_4}"
|
419 |
+
split.page_content = f"{metadata_str}\n\n{split.page_content}"
|
420 |
+
split.metadata['type'] = 'parents'
|
421 |
+
|
422 |
+
return all_parent_splits
|
423 |
+
|
424 |
+
def save_summaries(self, summaries: List[str]):
|
425 |
+
"""Save the generated summaries to a JSON file."""
|
426 |
+
current_directory = os.getcwd()
|
427 |
+
save_path = os.path.join(current_directory, 'savesummary', 'br18_summaries.json')
|
428 |
+
with open(save_path, 'w') as f:
|
429 |
+
json.dump(summaries, f)
|
430 |
+
|
431 |
+
def load_summaries(self) -> List[str]:
|
432 |
+
"""Load summaries from a JSON file if it exists."""
|
433 |
+
current_directory = os.getcwd()
|
434 |
+
load_path = os.path.join(current_directory, 'savesummary', 'br18_summaries.json')
|
435 |
+
if os.path.exists(load_path):
|
436 |
+
with open(load_path, 'r') as f:
|
437 |
+
summaries = json.load(f)
|
438 |
+
return summaries
|
439 |
+
else:
|
440 |
+
return None # or raise an exception, or generate new summaries
|
441 |
+
|
442 |
+
def generate_summaries(self, parent_splits: List[Document]) -> List[str]:
|
443 |
+
loaded_summaries = self.load_summaries()
|
444 |
+
if loaded_summaries is not None:
|
445 |
+
return loaded_summaries
|
446 |
+
|
447 |
+
chain = (
|
448 |
+
{"doc": lambda x: x.page_content}
|
449 |
+
| ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
|
450 |
+
| ChatOpenAI(max_retries=3)
|
451 |
+
| StrOutputParser()
|
452 |
+
)
|
453 |
+
summaries = chain.batch(parent_splits, {"max_concurrency": 4})
|
454 |
+
|
455 |
+
self.save_summaries(summaries)
|
456 |
+
|
457 |
+
return summaries
|
458 |
+
|
459 |
+
def generate_child_splits(self, parent_splits: List[Document], summaries: List[str]) -> List[Document]:
|
460 |
+
child_chunk_size = 300
|
461 |
+
|
462 |
+
child_text_splitter = RecursiveCharacterTextSplitter(
|
463 |
+
chunk_size=child_chunk_size, chunk_overlap=0
|
464 |
+
)
|
465 |
+
|
466 |
+
all_child_splits = []
|
467 |
+
for i, parent_split in enumerate(parent_splits):
|
468 |
+
child_splits = child_text_splitter.split_text(parent_split.page_content)
|
469 |
+
|
470 |
+
new_metadata = dict(parent_split.metadata)
|
471 |
+
new_metadata['type'] = 'children'
|
472 |
+
|
473 |
+
summary_with_prefix = f"Summary: {summaries[i]}"
|
474 |
+
|
475 |
+
first_child_content = f"{child_splits[0]}\n\n{summary_with_prefix}"
|
476 |
+
|
477 |
+
first_child_split = Document(
|
478 |
+
page_content=first_child_content,
|
479 |
+
metadata=new_metadata
|
480 |
+
)
|
481 |
+
|
482 |
+
all_child_splits.append(first_child_split) # Append only the first child split (assuming it contains the metadata)
|
483 |
+
|
484 |
+
|
485 |
+
return all_child_splits
|
486 |
+
|
487 |
+
def process_all_documents(self):
|
488 |
+
all_parent_splits = [] # Local variable to store all parent splits
|
489 |
+
all_child_splits = [] # Local variable to store all child splits
|
490 |
+
|
491 |
+
for markdown_document in self.md_paths:
|
492 |
+
parent_splits = self.split_and_chunk_text(markdown_document)
|
493 |
+
all_parent_splits.extend(parent_splits)
|
494 |
+
|
495 |
+
summaries = self.generate_summaries(all_parent_splits)
|
496 |
+
all_child_splits = self.generate_child_splits(all_parent_splits, summaries)
|
497 |
+
|
498 |
+
st.write(all_parent_splits)
|
499 |
+
st.write(all_child_splits)
|
500 |
+
|
501 |
+
return all_parent_splits, all_child_splits # Return both lists
|
502 |
+
|
503 |
+
def create_vectorstore(self):
|
504 |
+
all_parent_splits, all_child_splits = self.process_all_documents()
|
505 |
+
|
506 |
+
parent_doc_ids = [str(uuid.uuid4()) for _ in all_parent_splits]
|
507 |
+
self.br18_parent_store.mset(list(zip(parent_doc_ids, all_parent_splits)))
|
508 |
+
|
509 |
+
for parent_id, child_split in zip(parent_doc_ids, all_child_splits):
|
510 |
+
child_split.metadata[self.id_key] = parent_id
|
511 |
+
|
512 |
+
# Create and save the vector store to disk
|
513 |
+
br18_vectorstore = Pinecone.from_documents(documents=all_child_splits, embedding=self.embeddings, index_name=self.pinecone_index_name)
|
514 |
+
#st.write(br18_appendix_child_vectorstore)
|
515 |
+
|
516 |
+
for i, doc in enumerate(all_parent_splits):
|
517 |
+
doc.metadata[self.id_key] = parent_doc_ids[i]
|
518 |
+
|
519 |
+
# Store the vector store in the session state
|
520 |
+
st.session_state.br18_vectorstore = br18_vectorstore
|
521 |
+
|
522 |
+
return br18_vectorstore
|
523 |
+
|
524 |
+
def create_retriever(self, query: str):
|
525 |
+
search_type = st.session_state.search_type
|
526 |
+
|
527 |
+
if search_type == "By Context":
|
528 |
+
# Initialize retriever for By Context, filtering by the presence of the "text" metadata
|
529 |
+
general_retriever = MultiVectorRetriever(
|
530 |
+
vectorstore=self.vectorstore,
|
531 |
+
docstore=self.br18_parent_store,
|
532 |
+
id_key=self.id_key,
|
533 |
+
search_kwargs={"k": 5}
|
534 |
+
)
|
535 |
+
|
536 |
+
parent_docs = general_retriever.vectorstore.similarity_search(query, k = 5)
|
537 |
+
#st.write(parent_docs)
|
538 |
+
|
539 |
+
st.session_state.doc_sources = parent_docs
|
540 |
+
|
541 |
+
embeddings = self.embeddings
|
542 |
+
|
543 |
+
# Initialize Redundant Filter
|
544 |
+
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
|
545 |
+
|
546 |
+
# Initialize Relevant Filter
|
547 |
+
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.75, k = 15)
|
548 |
+
#st.write(relevant_filter)
|
549 |
+
|
550 |
+
# Initialize Text Splitter
|
551 |
+
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50, separator=". ")
|
552 |
+
|
553 |
+
# Create Compressor Pipeline
|
554 |
+
pipeline_compressor = DocumentCompressorPipeline(
|
555 |
+
transformers=[splitter, redundant_filter, relevant_filter]
|
556 |
+
)
|
557 |
+
|
558 |
+
# Initialize Contextual Compression Retriever
|
559 |
+
contextual_general_retriever = ContextualCompressionRetriever(
|
560 |
+
base_compressor=pipeline_compressor,
|
561 |
+
base_retriever=general_retriever
|
562 |
+
)
|
563 |
+
|
564 |
+
# Retrieve parent documents that match the query
|
565 |
+
retrieved_parent_docs = contextual_general_retriever.get_relevant_documents(query)
|
566 |
+
|
567 |
+
# Display retrieved parent documents
|
568 |
+
display_list = []
|
569 |
+
for doc in retrieved_parent_docs:
|
570 |
+
display_dict = {
|
571 |
+
"Page Content": doc.page_content,
|
572 |
+
"Doc ID": doc.metadata.get('doc_id', 'N/A'),
|
573 |
+
"Header 3": doc.metadata.get('Header 3', 'N/A'),
|
574 |
+
"Header 4": doc.metadata.get('Header 4', 'N/A'),
|
575 |
+
}
|
576 |
+
display_list.append(display_dict)
|
577 |
+
#st.write(display_list)
|
578 |
+
|
579 |
+
return retrieved_parent_docs
|
580 |
+
|
581 |
+
elif search_type == "By Headers":
|
582 |
+
# Initialize retriever for By Headers, filtering by the absence of the "text" metadata
|
583 |
+
specific_retriever = MultiVectorRetriever(
|
584 |
+
vectorstore=self.vectorstore,
|
585 |
+
docstore=self.br18_parent_store,
|
586 |
+
id_key=self.id_key,
|
587 |
+
search_kwargs={"k": 3}
|
588 |
+
)
|
589 |
+
|
590 |
+
child_docs = specific_retriever.vectorstore.similarity_search(query, k = 3)
|
591 |
+
#st.write(child_docs)
|
592 |
+
|
593 |
+
# Retrieve child documents that match the query
|
594 |
+
|
595 |
+
embeddings = self.embeddings
|
596 |
+
embedding_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.75)
|
597 |
+
#llm_filter = LLMChainFilter.from_llm(self.llm)
|
598 |
+
|
599 |
+
|
600 |
+
compression_retriever = ContextualCompressionRetriever(base_compressor=embedding_filter, base_retriever=specific_retriever)
|
601 |
+
|
602 |
+
retrieved_child_docs = compression_retriever.get_relevant_documents(query)
|
603 |
+
|
604 |
+
st.session_state.doc_sources = retrieved_child_docs
|
605 |
+
|
606 |
+
# Display retrieved child documents
|
607 |
+
display_list = []
|
608 |
+
for doc in retrieved_child_docs:
|
609 |
+
display_dict = {
|
610 |
+
"Page Content": doc.page_content,
|
611 |
+
"Doc ID": doc.metadata.get('doc_id', 'N/A'),
|
612 |
+
"Header 3": doc.metadata.get('Header 3', 'N/A'),
|
613 |
+
"Header 4": doc.metadata.get('Header 4', 'N/A'),
|
614 |
+
}
|
615 |
+
display_list.append(display_dict)
|
616 |
+
#st.write(display_list)
|
617 |
+
|
618 |
+
return retrieved_child_docs
|
619 |
+
|
620 |
+
def run(self, query: str):
|
621 |
+
prompt_template = """Use the following pieces of context to answer the question at the end.
|
622 |
+
The answer should be as specific as possible to a chapter and section where clause numbers and their respective subclause are referenced.
|
623 |
+
Make sure to mention requirement numbers and specific integer values where relevant.
|
624 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
625 |
+
|
626 |
+
{context}
|
627 |
+
|
628 |
+
Question: {question}
|
629 |
+
|
630 |
+
EXAMPLE:
|
631 |
+
The building regulation regarding stairs is outlined in Chapter 2 - Access, specifically in Section - Stairs:
|
632 |
+
|
633 |
+
Width: Stairs in shared access routes must have a minimum free width of 1.0 meter. (clause 57.1)
|
634 |
+
|
635 |
+
Headroom: Stairs must have a minimum free headroom of 2.10 meters. (clause 57.1)
|
636 |
+
|
637 |
+
Gradient: The gradient of the stairs must not exceed 0.18 meters. (clause 57.2)
|
638 |
+
"""
|
639 |
+
|
640 |
+
PROMPT = PromptTemplate(
|
641 |
+
template=prompt_template, input_variables=["context", "question"]
|
642 |
+
)
|
643 |
+
|
644 |
+
# Retrieve the filtered documents
|
645 |
+
retrieved_docs = self.create_retriever(query)
|
646 |
+
#st.write(type(filtered_docs[0]))
|
647 |
+
#st.write(filtered_docs)
|
648 |
+
|
649 |
+
qa_chain = load_qa_chain(self.llm, chain_type="stuff", verbose=True, prompt=PROMPT)
|
650 |
+
output = qa_chain({"input_documents": retrieved_docs, "question": query}, return_only_outputs=True)
|
651 |
+
|
652 |
+
|
653 |
+
return output
|
654 |
+
|
655 |
+
class SummarizationTool():
|
656 |
+
def __init__(self, document_chunks):
|
657 |
+
self.llm = ChatOpenAI(
|
658 |
+
temperature=0,
|
659 |
+
streaming=True,
|
660 |
+
model_name="gpt-3.5-turbo"
|
661 |
+
)
|
662 |
+
self.document_chunks = document_chunks
|
663 |
+
self.map_prompt_template, self.combine_prompt_template = self.load_prompts()
|
664 |
+
self.chain = self.load_summarize_chain()
|
665 |
+
|
666 |
+
def load_prompts(self):
|
667 |
+
map_prompt = '''
|
668 |
+
Summarize the following text in a clear and concise way:
|
669 |
+
TEXT:`{text}`
|
670 |
+
Brief Summary:
|
671 |
+
'''
|
672 |
+
combine_prompt = '''
|
673 |
+
Generate a summary of the following text that includes the following elements:
|
674 |
+
|
675 |
+
* A title that accurately reflects the content of the text.
|
676 |
+
* An introduction paragraph that provides an overview of the topic.
|
677 |
+
* Bullet points that list the key points of the text.
|
678 |
+
|
679 |
+
Text:`{text}`
|
680 |
+
'''
|
681 |
+
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])
|
682 |
+
combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["text"])
|
683 |
+
return map_prompt_template, combine_prompt_template
|
684 |
+
|
685 |
+
def load_summarize_chain(self):
|
686 |
+
return load_summarize_chain(
|
687 |
+
llm=self.llm,
|
688 |
+
chain_type='map_reduce',
|
689 |
+
map_prompt=self.map_prompt_template,
|
690 |
+
combine_prompt=self.combine_prompt_template,
|
691 |
+
verbose=True
|
692 |
+
)
|
693 |
+
|
694 |
+
def run(self, query=None):
|
695 |
+
return self.run_chain()
|
696 |
+
|
697 |
+
def run_chain(self):
|
698 |
+
return self.chain.run(self.document_chunks)
|
699 |
+
|
700 |
+
class CustomGoogleSearchAPIWrapper(GoogleSearchAPIWrapper):
|
701 |
+
|
702 |
+
def clean_text(self, text: str) -> str:
|
703 |
+
# Remove extra whitespaces and line breaks
|
704 |
+
text = ' '.join(text.split())
|
705 |
+
return text
|
706 |
+
|
707 |
+
def scrape_content(self, url: str) -> dict:
|
708 |
+
loader = SeleniumURLLoader(urls=[url])
|
709 |
+
data = loader.load()
|
710 |
+
|
711 |
+
if data is not None and len(data) > 0:
|
712 |
+
soup = BeautifulSoup(data[0].page_content, "html.parser")
|
713 |
+
text = soup.get_text()
|
714 |
+
cleaned_text = self.clean_text(text)
|
715 |
+
return {'url': url, 'content': cleaned_text[:1000]} # Return first 1000 non-space characters
|
716 |
+
return {'url': url, 'content': ''}
|
717 |
+
|
718 |
+
def fetch_and_scrape(self, query: str, num_results: int = 3) -> str:
|
719 |
+
# Step 1: Fetch search results metadata
|
720 |
+
metadata_results = self.results(query, num_results)
|
721 |
+
|
722 |
+
if len(metadata_results) == 0:
|
723 |
+
return '[URL: None, Content: No good Google Search Result was found]'
|
724 |
+
|
725 |
+
# Step 2: Extract URLs
|
726 |
+
urls = [result.get("link", "") for result in metadata_results if "link" in result]
|
727 |
+
|
728 |
+
# Step 3: Scrape content from URLs
|
729 |
+
texts = []
|
730 |
+
for url in urls:
|
731 |
+
scraped_content = self.scrape_content(url)
|
732 |
+
formatted_text = f"[URL: {scraped_content['url']}, Content: {scraped_content['content']}]"
|
733 |
+
texts.append(formatted_text)
|
734 |
+
|
735 |
+
return " ".join(texts)[:3000]
|
736 |
+
|
737 |
+
class MRKL:
|
738 |
+
def __init__(self):
|
739 |
+
self.tools = self.load_tools()
|
740 |
+
self.agent_executor, self.memory = self.load_agent()
|
741 |
+
|
742 |
+
def load_tools(self):
|
743 |
+
# Load tools
|
744 |
+
llm = ChatOpenAI(
|
745 |
+
temperature=0,
|
746 |
+
streaming=True,
|
747 |
+
model_name="gpt-3.5-turbo"
|
748 |
+
)
|
749 |
+
llm_math = LLMMathChain(llm=llm)
|
750 |
+
llm_search = CustomGoogleSearchAPIWrapper()
|
751 |
+
|
752 |
+
current_directory = os.getcwd()
|
753 |
+
|
754 |
+
tools = [
|
755 |
+
Tool(
|
756 |
+
name="Google_Search",
|
757 |
+
func=llm_search.fetch_and_scrape,
|
758 |
+
description="Useful when you cannot find a clear answer after looking up the database and that you need to search the internet for information. Input should be a fully formed question based on the context of what you couldn't find and not referencing any obscure pronouns from the conversation before"
|
759 |
+
),
|
760 |
+
Tool(
|
761 |
+
name='Calculator',
|
762 |
+
func=llm_math.run,
|
763 |
+
description='Useful for when you need to answer questions about math.'
|
764 |
+
),
|
765 |
+
]
|
766 |
+
|
767 |
+
if st.session_state.vector_store is not None:
|
768 |
+
metadata = st.session_state.document_metadata
|
769 |
+
file_name = st.session_state.document_filename
|
770 |
+
llm_database = DatabaseTool(llm=llm, vector_store=st.session_state.vector_store, metadata=metadata, filename=file_name)
|
771 |
+
|
772 |
+
#st.write(llm_database.get_description())
|
773 |
+
|
774 |
+
tools.append(
|
775 |
+
Tool(
|
776 |
+
name='Document_Database',
|
777 |
+
func=llm_database.run,
|
778 |
+
description=llm_database.get_description(),
|
779 |
+
),
|
780 |
+
)
|
781 |
+
|
782 |
+
if st.session_state.br18_exp is True:
|
783 |
+
br18_folder_path = os.path.join(current_directory, "BR18_DB")
|
784 |
+
llm_br18 = BR18_DB(llm=llm, folder_path=br18_folder_path)
|
785 |
+
|
786 |
+
tools.extend([
|
787 |
+
Tool(
|
788 |
+
name='BR18_Database',
|
789 |
+
func=llm_br18.run,
|
790 |
+
description="""
|
791 |
+
Always useful for when you need to answer questions about the Danish Building Regulation 18 (BR18).
|
792 |
+
Input should be the specific keywords from the user query. Exclude the following common terms and their variations or synonyms especially words such as "building" and "regulation".
|
793 |
+
Use this tool more often than the normal search tool.
|
794 |
+
"""
|
795 |
+
),
|
796 |
+
])
|
797 |
+
return tools
|
798 |
+
|
799 |
+
def load_agent(self):
|
800 |
+
llm = ChatOpenAI(
|
801 |
+
temperature=0,
|
802 |
+
streaming=True,
|
803 |
+
model_name="gpt-3.5-turbo",
|
804 |
+
)
|
805 |
+
|
806 |
+
# Memory
|
807 |
+
chat_msg = StreamlitChatMessageHistory(key="mrkl_chat_history")
|
808 |
+
memory_key = "history"
|
809 |
+
memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm, input_key='input', output_key="output", max_token_limit=8000, chat_memory=chat_msg)
|
810 |
+
st.session_state.history = memory
|
811 |
+
|
812 |
+
system_message_content = """
|
813 |
+
You are MRKL, an expert in construction, legal frameworks, and regulatory matters.
|
814 |
+
|
815 |
+
You are designed to be an AI Chatbot for the engineering firm COWI, and you have the following tools to answer user queries, but only use them if necessary.
|
816 |
+
|
817 |
+
Unless otherwise explicitly stated, the user queries are about the context given.
|
818 |
+
|
819 |
+
Your primary objective is to provide responses that:
|
820 |
+
1. Offer an overview of the topic, referencing the chapter and the section if relevant
|
821 |
+
2. List key points in bullet-points or numbered list format, referencing the clauses and their respective subclauses if relevant.
|
822 |
+
3. Always match or exceed the details of the tool's output text in your answers.
|
823 |
+
4. Reflect back to the user's question and give a concise conclusion.
|
824 |
+
|
825 |
+
You must maintain a professional and helpful demeanor in all interactions.
|
826 |
+
"""
|
827 |
+
|
828 |
+
# System Message
|
829 |
+
system_message = SystemMessage(content=system_message_content)
|
830 |
+
|
831 |
+
reflection_message_content = """
|
832 |
+
Reminder:
|
833 |
+
Always try all your tools to find the right answer with the search tool as your last resort.
|
834 |
+
Always self-reflect your answer based on the user's query and follows the list of response objective.
|
835 |
+
"""
|
836 |
+
|
837 |
+
reflection_message = SystemMessage(content=reflection_message_content)
|
838 |
+
|
839 |
+
# Prompt
|
840 |
+
prompt = OpenAIFunctionsAgent.create_prompt(
|
841 |
+
system_message=system_message,
|
842 |
+
extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key), reflection_message]
|
843 |
+
)
|
844 |
+
|
845 |
+
# Agent
|
846 |
+
agent = OpenAIFunctionsAgent(llm=llm, tools=self.tools, prompt=prompt)
|
847 |
+
|
848 |
+
# Agent Executor
|
849 |
+
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=self.tools, memory=memory, verbose=True, return_intermediate_steps=True)
|
850 |
+
|
851 |
+
return agent_executor, memory
|
852 |
+
|
853 |
+
def clear_conversation(self):
|
854 |
+
self.memory.clear()
|
855 |
+
|
856 |
+
def run_agent(self, input, callbacks=[]):
|
857 |
+
with get_openai_callback() as cb:
|
858 |
+
result = self.agent_executor({"input": input}, callbacks=callbacks)
|
859 |
+
st.session_state.token_count = cb
|
860 |
+
print(cb)
|
861 |
+
return result
|
862 |
+
|
863 |
+
|
864 |
+
def main():
|
865 |
+
load_dotenv()
|
866 |
+
pinecone.init(
|
867 |
+
api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENV"]
|
868 |
+
)
|
869 |
+
|
870 |
+
st.set_page_config(page_title="MRKL AGENT", page_icon="🦜️", layout="wide")
|
871 |
+
st.title("🦜️ MRKL AGENT")
|
872 |
+
|
873 |
+
if 'openai' not in st.session_state:
|
874 |
+
st.session_state.openai = None
|
875 |
+
if "messages" not in st.session_state:
|
876 |
+
st.session_state.messages = [{"roles": "assistant", "content": "Hi, I am Miracle. How can I help you?"}]
|
877 |
+
if "user_input" not in st.session_state:
|
878 |
+
st.session_state.user_input = None
|
879 |
+
if "vector_store" not in st.session_state:
|
880 |
+
st.session_state.vector_store = None
|
881 |
+
if "summary" not in st.session_state:
|
882 |
+
st.session_state.summary = None
|
883 |
+
if "doc_sources" not in st.session_state:
|
884 |
+
st.session_state.doc_sources = []
|
885 |
+
if "br18_vectorstore" not in st.session_state:
|
886 |
+
st.session_state.br18_vectorstore = None
|
887 |
+
if "history" not in st.session_state:
|
888 |
+
st.session_state.history = None
|
889 |
+
if 'br18_exp' not in st.session_state:
|
890 |
+
st.session_state.br18_exp = False
|
891 |
+
if "token_count" not in st.session_state:
|
892 |
+
st.session_state.token_count = 0
|
893 |
+
|
894 |
+
if "agent" not in st.session_state:
|
895 |
+
st.session_state.agent = MRKL()
|
896 |
+
if 'show_info' not in st.session_state:
|
897 |
+
st.session_state.show_info = False
|
898 |
+
|
899 |
+
with st.expander("Configuration", expanded = False):
|
900 |
+
openai_api_key = st.text_input("Enter OpenAI API Key", value="", placeholder="Enter the OpenAI API key which begins with sk-", type="password")
|
901 |
+
if openai_api_key:
|
902 |
+
st.session_state.openai = openai_api_key
|
903 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key
|
904 |
+
st.write("API key has entered")
|
905 |
+
|
906 |
+
with st.sidebar:
|
907 |
+
br18_experiment = st.checkbox("Experimental Feature: Enable BR18", value=False)
|
908 |
+
if br18_experiment != st.session_state.br18_exp:
|
909 |
+
st.session_state.br18_exp = br18_experiment
|
910 |
+
st.session_state.agent = MRKL()
|
911 |
+
|
912 |
+
if br18_experiment: # If BR18 is enabled
|
913 |
+
search_type = st.radio(
|
914 |
+
"Select Search Type:",
|
915 |
+
options=["By Headers", "By Context"],
|
916 |
+
index=0, horizontal=True # Default to "By Context"
|
917 |
+
)
|
918 |
+
st.session_state.search_type = search_type
|
919 |
+
|
920 |
+
st.sidebar.title("Upload Document to Database")
|
921 |
+
uploaded_files = st.sidebar.file_uploader("Choose a file", accept_multiple_files=True) # You can specify the types of files you want to accept
|
922 |
+
if uploaded_files:
|
923 |
+
file_details = {"FileName": [], "FileType": [], "FileSize": []}
|
924 |
+
|
925 |
+
# Populate file_details using traditional loops
|
926 |
+
for file in uploaded_files:
|
927 |
+
file_details["FileName"].append(file.name)
|
928 |
+
file_details["FileType"].append(file.type)
|
929 |
+
file_details["FileSize"].append(file.size)
|
930 |
+
|
931 |
+
# Use selectbox to choose a file
|
932 |
+
selected_file_name = st.sidebar.selectbox('Choose a file:', file_details["FileName"], on_change=on_selectbox_change)
|
933 |
+
|
934 |
+
# Get the index of the file selected
|
935 |
+
file_index = file_details["FileName"].index(selected_file_name)
|
936 |
+
|
937 |
+
# Display details of the selected file
|
938 |
+
st.sidebar.write("You selected:")
|
939 |
+
st.sidebar.write("FileName : ", file_details["FileName"][file_index])
|
940 |
+
st.sidebar.write("FileType : ", file_details["FileType"][file_index])
|
941 |
+
st.sidebar.write("FileSize : ", file_details["FileSize"][file_index])
|
942 |
+
|
943 |
+
# Add a note to remind the user to press the "Process" button
|
944 |
+
if st.session_state.show_info:
|
945 |
+
st.sidebar.info("**Note:** Remember to press the 'Process' button for the current selection.")
|
946 |
+
st.session_state.show_info = False
|
947 |
+
|
948 |
+
with st.sidebar:
|
949 |
+
if st.sidebar.button("Process"):
|
950 |
+
with st.spinner("Processing"):
|
951 |
+
selected_file = uploaded_files[file_index]
|
952 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
|
953 |
+
tmpfile.write(selected_file.getvalue())
|
954 |
+
temp_path = tmpfile.name
|
955 |
+
db_store = DBStore(temp_path, selected_file.name)
|
956 |
+
|
957 |
+
document_chunks = db_store.get_pdf_text()
|
958 |
+
st.session_state.document_chunks = document_chunks
|
959 |
+
#st.write(document_chunks)
|
960 |
+
|
961 |
+
vector_store = db_store.get_vectorstore()
|
962 |
+
st.session_state.vector_store = vector_store
|
963 |
+
|
964 |
+
st.session_state.agent = MRKL()
|
965 |
+
|
966 |
+
primed_info_response = db_store.get_info_response()
|
967 |
+
#st.write(primed_info_response)
|
968 |
+
st.session_state.history.chat_memory.add_ai_message(primed_info_response)
|
969 |
+
|
970 |
+
st.session_state.messages.append({"roles": "assistant", "content": primed_info_response})
|
971 |
+
|
972 |
+
st.success("PDF uploaded successfully!")
|
973 |
+
|
974 |
+
if "document_chunks" in st.session_state:
|
975 |
+
if st.sidebar.button("Create Detailed Summary"):
|
976 |
+
with st.spinner("Summarizing"):
|
977 |
+
summarization_tool = SummarizationTool(document_chunks=st.session_state.document_chunks)
|
978 |
+
st.session_state.summary = summarization_tool.run()
|
979 |
+
# Append the summary to the chat messages
|
980 |
+
st.session_state.messages.append({"roles": "assistant", "content": st.session_state.summary})
|
981 |
+
else:
|
982 |
+
st.session_state.vector_store = None
|
983 |
+
|
984 |
+
|
985 |
+
|
986 |
+
display_messages(st.session_state.messages)
|
987 |
+
|
988 |
+
|
989 |
+
if user_input := st.chat_input("Type something here..."):
|
990 |
+
st.session_state.user_input = user_input
|
991 |
+
st.session_state.messages.append({"roles": "user", "content": st.session_state.user_input})
|
992 |
+
st.chat_message("user").write(st.session_state.user_input)
|
993 |
+
|
994 |
+
current_user_message = {"input": st.session_state.user_input}
|
995 |
+
|
996 |
+
|
997 |
+
with st.chat_message("assistant"):
|
998 |
+
st_callback = StreamlitCallbackHandler(st.container(), expand_new_thoughts=True)
|
999 |
+
result = st.session_state.agent.run_agent(input=st.session_state.user_input, callbacks=[st_callback])
|
1000 |
+
st.session_state.result = result
|
1001 |
+
response = result.get('output', '')
|
1002 |
+
st.session_state.messages.append({"roles": "assistant", "content": response})
|
1003 |
+
st.write(response)
|
1004 |
+
|
1005 |
+
current_assistant_response = {"output": response}
|
1006 |
+
|
1007 |
+
current_messages = [current_user_message, current_assistant_response]
|
1008 |
+
|
1009 |
+
with st.expander("View Document Sources"):
|
1010 |
+
if len(st.session_state.doc_sources) != 0:
|
1011 |
+
|
1012 |
+
for document in st.session_state.doc_sources:
|
1013 |
+
st.divider()
|
1014 |
+
st.subheader("Source Content:")
|
1015 |
+
st.write(document.page_content)
|
1016 |
+
st.subheader("Metadata:")
|
1017 |
+
|
1018 |
+
# Display only relevant metadata keys
|
1019 |
+
relevant_keys = ["Header ", "Header 3", "Header 4", "page_number", "source", "file_name", "title", "author"]
|
1020 |
+
for key in relevant_keys:
|
1021 |
+
value = document.metadata.get(key, 'N/A')
|
1022 |
+
if value != 'N/A':
|
1023 |
+
st.write(f"{key}: {value}")
|
1024 |
+
else:
|
1025 |
+
st.write("No document sources found")
|
1026 |
+
|
1027 |
+
if st.session_state.summary is not None:
|
1028 |
+
with st.expander("Show Summary"):
|
1029 |
+
st.subheader("Summarization")
|
1030 |
+
result_summary = st.session_state.summary
|
1031 |
+
st.write(result_summary)
|
1032 |
+
|
1033 |
+
#with st.expander("Cost Tracking", expanded=True):
|
1034 |
+
#total_token = st.session_state.token_count
|
1035 |
+
#st.write(total_token)
|
1036 |
+
|
1037 |
+
buttons_placeholder = st.container()
|
1038 |
+
with buttons_placeholder:
|
1039 |
+
#st.button("Regenerate Response", key="regenerate", on_click=st.session_state.agent.regenerate_response)
|
1040 |
+
st.button("Clear Chat", key="clear", on_click=reset_chat)
|
1041 |
+
|
1042 |
+
|
1043 |
+
|
1044 |
+
|
1045 |
+
#st.write(st.session_state.history)
|
1046 |
+
#st.write(st.session_state.messages)
|
1047 |
+
#st.write(st.session_state.br18_vectorstore)
|
1048 |
+
#st.write(st.session_state.br18_appendix_child_vectorstore)
|
1049 |
+
#st.write(st.session_state.usc_vectorstore)
|
1050 |
+
#st.write(st.session_state.agent)
|
1051 |
+
#st.write(st.session_state.result)
|
1052 |
+
|
1053 |
+
|
1054 |
+
if __name__== '__main__':
|
1055 |
+
main()
|
1056 |
+
|
1057 |
+
|
1058 |
+
|
1059 |
+
|
1060 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
beautifulsoup4==4.12.2
|
2 |
+
langchain==0.0.288
|
3 |
+
lark==1.1.7
|
4 |
+
openai==0.27.8
|
5 |
+
pinecone_client==2.2.2
|
6 |
+
pydantic==1.10.12
|
7 |
+
pypdf==3.16.0
|
8 |
+
PyPDF2==3.0.1
|
9 |
+
python-dotenv==1.0.0
|
10 |
+
Requests==2.31.0
|
11 |
+
streamlit==1.25.0
|
12 |
+
|
13 |
+
|
14 |
+
|
savesummary/br18_summaries.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|