quantuan125 commited on
Commit
19b69b8
1 Parent(s): 7c3bdae
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Python 3",
3
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
4
+ "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
5
+ "customizations": {
6
+ "codespaces": {
7
+ "openFiles": [
8
+ "README.md",
9
+ "mrkl.py"
10
+ ]
11
+ },
12
+ "vscode": {
13
+ "settings": {},
14
+ "extensions": [
15
+ "ms-python.python",
16
+ "ms-python.vscode-pylance"
17
+ ]
18
+ }
19
+ },
20
+ "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21
+ "postAttachCommand": {
22
+ "server": "streamlit run mrkl.py --server.enableCORS false --server.enableXsrfProtection false"
23
+ },
24
+ "portsAttributes": {
25
+ "8501": {
26
+ "label": "Application",
27
+ "onAutoForward": "openPreview"
28
+ }
29
+ },
30
+ "forwardPorts": [
31
+ 8501
32
+ ]
33
+ }
.gitignore ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python
3
+
4
+ ### Python ###
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+ cover/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ .pybuilder/
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ # For a library or package, you might want to ignore these files since the code is
91
+ # intended to run in multiple environments; otherwise, check them in:
92
+ # .python-version
93
+
94
+ # pipenv
95
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
97
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
98
+ # install all needed dependencies.
99
+ #Pipfile.lock
100
+
101
+ # poetry
102
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
104
+ # commonly ignored for libraries.
105
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106
+ #poetry.lock
107
+
108
+ # pdm
109
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110
+ #pdm.lock
111
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112
+ # in version control.
113
+ # https://pdm.fming.dev/#use-with-ide
114
+ .pdm.toml
115
+
116
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117
+ __pypackages__/
118
+
119
+ # Celery stuff
120
+ celerybeat-schedule
121
+ celerybeat.pid
122
+
123
+ # SageMath parsed files
124
+ *.sage.py
125
+
126
+ # Environments
127
+ .env
128
+ .venv
129
+ env/
130
+ venv/
131
+ ENV/
132
+ env.bak/
133
+ venv.bak/
134
+
135
+ # Spyder project settings
136
+ .spyderproject
137
+ .spyproject
138
+
139
+ # Rope project settings
140
+ .ropeproject
141
+
142
+ # mkdocs documentation
143
+ /site
144
+
145
+ # mypy
146
+ .mypy_cache/
147
+ .dmypy.json
148
+ dmypy.json
149
+
150
+ # Pyre type checker
151
+ .pyre/
152
+
153
+ # pytype static type analyzer
154
+ .pytype/
155
+
156
+ # Cython debug symbols
157
+ cython_debug/
158
+
159
+ # PyCharm
160
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
163
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164
+ #.idea/
165
+
166
+ ### Python Patch ###
167
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168
+ poetry.toml
169
+
170
+ # ruff
171
+ .ruff_cache/
172
+
173
+ # LSP config files
174
+ pyrightconfig.json
175
+
176
+ # End of https://www.toptal.com/developers/gitignore/api/python
177
+
178
+ secrets.toml
179
+
180
+ storage/
.streamlit/config.toml ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [global]
2
+
3
+ # By default, Streamlit checks if the Python watchdog module is available and, if not, prints a warning asking for you to install it. The watchdog module is not required, but highly recommended. It improves Streamlit's ability to detect changes to files in your filesystem.
4
+ # If you'd like to turn off this warning, set this to True.
5
+ # Default: false
6
+ # disableWatchdogWarning = false
7
+
8
+ # If True, will show a warning when you run a Streamlit-enabled script via "python my_script.py".
9
+ # Default: true
10
+ # showWarningOnDirectExecution = true
11
+
12
+ # DataFrame serialization.
13
+ # Acceptable values: - 'legacy': Serialize DataFrames using Streamlit's custom format. Slow but battle-tested. - 'arrow': Serialize DataFrames using Apache Arrow. Much faster and versatile.
14
+ # Default: "arrow"
15
+ # dataFrameSerialization = "arrow"
16
+
17
+
18
+ [logger]
19
+
20
+ # Level of logging: 'error', 'warning', 'info', or 'debug'.
21
+ # Default: 'info'
22
+ # level = "info"
23
+
24
+ # String format for logging messages. If logger.datetimeFormat is set, logger messages will default to `%(asctime)s.%(msecs)03d %(message)s`. See [Python's documentation](https://docs.python.org/2.6/library/logging.html#formatter-objects) for available attributes.
25
+ # Default: "%(asctime)s %(message)s"
26
+ # messageFormat = "%(asctime)s %(message)s"
27
+
28
+
29
+ [client]
30
+
31
+ # Whether to enable st.cache.
32
+ # Default: true
33
+ # caching = true
34
+
35
+ # If false, makes your Streamlit script not draw to a Streamlit app.
36
+ # Default: true
37
+ # displayEnabled = true
38
+
39
+ # Controls whether uncaught app exceptions and deprecation warnings are displayed in the browser. By default, this is set to True and Streamlit displays app exceptions and associated tracebacks, and deprecation warnings, in the browser.
40
+ # If set to False, an exception or deprecation warning will result in a generic message being shown in the browser, and exceptions, tracebacks, and deprecation warnings will be printed to the console only.
41
+ # Default: true
42
+ # showErrorDetails = true
43
+
44
+
45
+ [runner]
46
+
47
+ # Allows you to type a variable or string by itself in a single line of Python code to write it to the app.
48
+ # Default: true
49
+ # magicEnabled = true
50
+
51
+ # Install a Python tracer to allow you to stop or pause your script at any point and introspect it. As a side-effect, this slows down your script's execution.
52
+ # Default: false
53
+ # installTracer = false
54
+
55
+ # Sets the MPLBACKEND environment variable to Agg inside Streamlit to prevent Python crashing.
56
+ # Default: true
57
+ # fixMatplotlib = true
58
+
59
+ # Run the Python Garbage Collector after each script execution. This can help avoid excess memory use in Streamlit apps, but could introduce delay in rerunning the app script for high-memory-use applications.
60
+ # Default: true
61
+ # postScriptGC = true
62
+
63
+ # Handle script rerun requests immediately, rather than waiting for script execution to reach a yield point. This makes Streamlit much more responsive to user interaction, but it can lead to race conditions in apps that mutate session_state data outside of explicit session_state assignment statements.
64
+ # Default: true
65
+ # fastReruns = true
66
+
67
+
68
+ [server]
69
+
70
+ # List of folders that should not be watched for changes. This impacts both "Run on Save" and @st.cache.
71
+ # Relative paths will be taken as relative to the current working directory.
72
+ # Example: ['/home/user1/env', 'relative/path/to/folder']
73
+ # Default: []
74
+ # folderWatchBlacklist = []
75
+
76
+ # Change the type of file watcher used by Streamlit, or turn it off completely.
77
+ # Allowed values: * "auto" : Streamlit will attempt to use the watchdog module, and falls back to polling if watchdog is not available. * "watchdog" : Force Streamlit to use the watchdog module. * "poll" : Force Streamlit to always use polling. * "none" : Streamlit will not watch files.
78
+ # Default: "auto"
79
+ # fileWatcherType = "auto"
80
+
81
+ # Symmetric key used to produce signed cookies. If deploying on multiple replicas, this should be set to the same value across all replicas to ensure they all share the same secret.
82
+ # Default: randomly generated secret key.
83
+ # cookieSecret = "9652e181a88de73c43d3d6cc721ed5ff76590974fa081b5fe449f72fd1d3321a"
84
+
85
+ # If false, will attempt to open a browser window on start.
86
+ # Default: false unless (1) we are on a Linux box where DISPLAY is unset, or (2) we are running in the Streamlit Atom plugin.
87
+ # headless = false
88
+
89
+ # Automatically rerun script when the file is modified on disk.
90
+ # Default: false
91
+ # runOnSave = false
92
+
93
+ # The address where the server will listen for client and browser connections. Use this if you want to bind the server to a specific address. If set, the server will only be accessible from this address, and not from any aliases (like localhost).
94
+ # Default: (unset)
95
+ # address =
96
+
97
+ # The port where the server will listen for browser connections.
98
+ # Default: 5998
99
+ port = 5998
100
+
101
+ # The base path for the URL where Streamlit should be served from.
102
+ # Default: ""
103
+ # baseUrlPath = ""
104
+
105
+ # Enables support for Cross-Origin Resource Sharing (CORS) protection, for added security.
106
+ # Due to conflicts between CORS and XSRF, if `server.enableXsrfProtection` is on and `server.enableCORS` is off at the same time, we will prioritize `server.enableXsrfProtection`.
107
+ # Default: true
108
+ # enableCORS = true
109
+
110
+ # Enables support for Cross-Site Request Forgery (XSRF) protection, for added security.
111
+ # Due to conflicts between CORS and XSRF, if `server.enableXsrfProtection` is on and `server.enableCORS` is off at the same time, we will prioritize `server.enableXsrfProtection`.
112
+ # Default: true
113
+ # enableXsrfProtection = true
114
+
115
+ # Max size, in megabytes, for files uploaded with the file_uploader.
116
+ # Default: 200
117
+ maxUploadSize = 300
118
+
119
+ # Max size, in megabytes, of messages that can be sent via the WebSocket connection.
120
+ # Default: 200
121
+ maxMessageSize = 300
122
+
123
+ # Enables support for websocket compression.
124
+ # Default: false
125
+ # enableWebsocketCompression = false
126
+
127
+ # Enable serving files from a `static` directory in the running app's directory.
128
+ # Default: false
129
+ # enableStaticServing = false
130
+
131
+ # Server certificate file for connecting via HTTPS. Must be set at the same time as "server.sslKeyFile".
132
+ # ['DO NOT USE THIS OPTION IN A PRODUCTION ENVIRONMENT. It has not gone through security audits or performance tests. For the production environment, we recommend performing SSL termination by the load balancer or the reverse proxy.']
133
+ # sslCertFile =
134
+
135
+ # Cryptographic key file for connecting via HTTPS. Must be set at the same time as "server.sslCertFile".
136
+ # ['DO NOT USE THIS OPTION IN A PRODUCTION ENVIRONMENT. It has not gone through security audits or performance tests. For the production environment, we recommend performing SSL termination by the load balancer or the reverse proxy.']
137
+ # sslKeyFile =
138
+
139
+
140
+ [browser]
141
+
142
+ # Internet address where users should point their browsers in order to connect to the app. Can be IP address or DNS name and path.
143
+ # This is used to: - Set the correct URL for CORS and XSRF protection purposes. - Show the URL on the terminal - Open the browser
144
+ # Default: "localhost"
145
+ # serverAddress = "localhost"
146
+
147
+ # Whether to send usage statistics to Streamlit.
148
+ # Default: true
149
+ # gatherUsageStats = true
150
+
151
+ # Port where users should point their browsers in order to connect to the app.
152
+ # This is used to: - Set the correct URL for CORS and XSRF protection purposes. - Show the URL on the terminal - Open the browser
153
+ # Default: whatever value is set in server.port.
154
+ # serverPort = 5998
155
+
156
+
157
+ [mapbox]
158
+
159
+ # Configure Streamlit to use a custom Mapbox token for elements like st.pydeck_chart and st.map. To get a token for yourself, create an account at https://mapbox.com. It's free (for moderate usage levels)!
160
+ # Default: ""
161
+ # token = ""
162
+
163
+
164
+ [deprecation]
165
+
166
+ # Set to false to disable the deprecation warning for the file uploader encoding.
167
+ # Default: true
168
+ # showfileUploaderEncoding = true
169
+
170
+ # Set to false to disable the deprecation warning for using the global pyplot instance.
171
+ # Default: true
172
+ # showPyplotGlobalUse = true
173
+
174
+
175
+ [theme]
176
+
177
+ # The preset Streamlit theme that your custom theme inherits from. One of "light" or "dark".
178
+ # base =
179
+
180
+ # Primary accent color for interactive elements.
181
+ # primaryColor =
182
+
183
+ # Background color for the main content area.
184
+ # backgroundColor =
185
+
186
+ # Background color used for the sidebar and most interactive widgets.
187
+ # secondaryBackgroundColor =
188
+
189
+ # Color used for almost all text.
190
+ # textColor =
191
+
192
+ # Font family for all text in the app, except code blocks. One of "sans serif", "serif", or "monospace".
193
+ # font =
BR18_DB/1_BR18_Executive_order_on_building_regulations_2018.md ADDED
The diff for this file is too large to render. See raw diff
 
BR18_DB/2_BR18_Executive_order_on_building_regulations_2018_Appendix.md ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 
2
+ ### Appendix 1 - Tables for Chapter 5 – Fire
3
+
4
+ #### Table 1 - Categories of use
5
+
6
+ | Category of use | The building section includes sleeping facilities | Persons in the building section have been informed of escape routes | Options allowing persons to bring themselves to safety unassisted | Maximum number of persons which the room accommodates |
7
+ | --------------- | ------------------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------- | ----------------------------------------------------- |
8
+ | 1 | No | Yes | Yes | No limitations |
9
+ | 2 | No | No | Yes | 50 max. |
10
+ | 3 | No | No | Yes | No limitations |
11
+ | 4 | Yes | Yes | Yes | No limitations |
12
+ | 5 | Yes | No | Yes | No limitations |
13
+ | 6 | No | No | No | No limitations |
14
+
15
+ #### Table 2 - Risk classes
16
+
17
+ | Risk class | Category of use of the building | The layout of the building, the total number of persons in the building section with shared escape routes and fire load |
18
+ | --------------- | ------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------- |
19
+ | 1 | 1 | General: Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level and the fire load of the building section may not exceed 1,600 MJ/sq. metre floor area |
20
+ | 1 | 4 | General: Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level or detached, semi-detached and terraced single-family houses and holiday homes with maximum 2 storeys above ground level and maximum 1 storey below ground level |
21
+ | 2 | 1 | General: Buildings with top storey floors maximum 9.6 metres above ground level and maximum 1 storey below ground level or Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level with a possible fire load in the building section exceeding 1,600 MJ/sq metre floor area |
22
+ | 2 | 4 | General: Buildings with top storey floors maximum 9.6 metres above ground level and maximum 1 storey below ground level |
23
+ | 2 | 2, 5, 6 | General: Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level |
24
+ | 2 | 3 | General: Buildings with maximum 1 storey above ground level and maximum 1 storey below ground level and The number of persons in a building section with shared escape routes may not exceed 1000 |
25
+ | 3 | 1 and 4 | General: Buildings with top storey floors between 9.6 metres and 45 metres above ground level and maximum 1 storey below ground level |
26
+ | 3 | 2 and 5 | General: Buildings with top storey floors maximum 22 metres above ground level and maximum 1 storey below ground level |
27
+ | 3 | 3 | General: Buildings with top storey floors maximum 22 metres above ground level, maximum 150 storey below ground level and maximum 150 persons in the building section or Buildings with maximum 2 storeys above ground level and 1000 persons in the building section |
28
+ | 3 | 6 | General: Buildings with maximum 2 storeys above ground level and maximum 1 storey below ground level |
29
+ | 4 | 1 to 6 | General: Buildings not covered by risk classes 1 - 3 |
30
+
31
+
32
+ ### Appendix 2 - Tables for Chapter 11 – Energy Consumption
33
+
34
+ #### Table 1 - General Minimum Requirements for Building Envelopes
35
+
36
+ | Building part | U value [W/sq. metre K] |
37
+ | ------------- | ----------------------- |
38
+ | Outer walls and basement walls adjacent to soil | 0.30 |
39
+ | Storey partitions and partition walls adjacent to rooms, where the temperature difference between the rooms is 5 °C or more | 0.40 |
40
+ | Ground slab, basement floors adjacent to soil and storey partitions to open air or a ventilated crawl space | 0.20 |
41
+ | Storey partitions under floors with underfloor heating adjacent to heated rooms | 0.50 |
42
+ | Ceiling and roof structures, including cupboards under roof slopes, flat roofs and sloping walls adjacent to roofs | 0.20 |
43
+ | Outer doors without glass panes. Reference dimensions 1.23 metres x 2.18 m | 1.40 |
44
+ | Outer doors with glass panes. Reference dimensions 1.23 metres x 2.18 m | 1.50 |
45
+ | For gates and hatches opening to the outside or to unheated rooms and glass walls and windows adjacent to rooms heated to a temperature creating a temperature difference between the rooms of 5 °C or more | 1.80 |
46
+ | Dome lights | 1.40 |
47
+ | Insulated sections of glass outer walls. The requirement is for the centre U value | 0.60 |
48
+ | Storey partitions and walls adjacent to freezing rooms | 0.15 |
49
+ | Storey partitions and walls adjacent to refrigeration rooms | 0.25 |
50
+ | Building part | Linear thermal transmittance [W/metre K] |
51
+ | ------------- | ----------------------- |
52
+ | Foundations around rooms which are heated to minimum 5 °C | 0.40 |
53
+ | Joint between outer wall and windows or outer doors, gates and hatches | 0.06 |
54
+ | Junction between roof structure and skylights or dome lights | 0.20 |
55
+
56
+
57
+ #### Table 2 - Minimum requirements for building envelope in case of changes in use
58
+
59
+ | Building part | U value [W/sq. metre K] for room heated to T > 15 °C | U value [W/sq. metre K] for room heated to 5 °C < T < 15 °C |
60
+ | ------------- | ---------------------------------------------------- | ----------------------------------------------------------- |
61
+ | Outer walls and basement walls adjacent to soil | 0.15 | 0.25 |
62
+ | Storey partitions and partition walls adjacent to rooms with a room temperature difference of 5 °C or more | 0.40 | 0.40 |
63
+ | Ground slab, basement floors adjacent to soil and storey partitions over open air or a ventilated crawl space | 0.10 | 0.15 |
64
+ | Ceiling and roof structures, including cupboards under roof slopes, flat roofs and sloping walls adjacent to roof | 0.12 | 0.15 |
65
+ | Gates | 1.80 | 1.80 |
66
+ | Hatches opening to open air to rooms with a temperature difference of 5 °C or more between the rooms (does not include vents of less than 500 sq. centimetres) | 1.40 | 1.50 |
67
+ | Dome lights | 1.40 | 1.80 |
68
+ | Building part | Linear thermal transmittance W/metre K for room heated to T > 15 °C | Linear thermal transmittance W/metre K for room heated to 5 °C < T < 15 °C |
69
+ | ------------- | ---------------------------------------------------- | ----------------------------------------------------------- |
70
+ | Foundations | 0.12 | 0.20 |
71
+ | Joint between outer wall and windows or outer doors, gates and hatches | 0.03 | 0.03 |
72
+ | Junction between roof structure and skylights or dome lights | 0.10 | 0.10 |
73
+
74
+
75
+ #### Table 3 - Minimum requirements for building envelope in connection with conversions and other changes to the building
76
+
77
+ | Building part | U value [W/sq. metre K] |
78
+ | ------------- | ----------------------- |
79
+ | Outer walls and basement walls adjacent to soil | 0.18 |
80
+ | Storey partitions and partition walls adjacent to rooms with a room temperature between the rooms of 5 °C or more | 0.40 |
81
+ | Ground slab, basement floors adjacent to soil and storey partitions to open air or a ventilated crawl space | 0.10 |
82
+ | Ceiling and roof structures, including cupboards under roof slopes, flat roofs and sloping walls adjacent to roofs | 0.12 |
83
+ | Gates | 1.80 |
84
+ | Hatches, storm windows and dome lights | 1.40 |
85
+ | Renovated storm windows | 1.65 |
86
+ | Building part | Linear thermal transmittance [W/metre K] |
87
+ | ------------- | ----------------------- |
88
+ | Foundations | 0.12 |
89
+ | Joint between outer wall, windows or outer doors, gates and hatches | 0.03 |
90
+ | Junction between roof structure and skylights or dome lights | 0.10 |
91
+
92
+ #### Table 4 - Minimum requirements for building envelope for holiday homes, camping cabins and similar holiday accommodation
93
+
94
+ | Building part | U value [W/m²K] |
95
+ | ------------- | --------------- |
96
+ | Outer walls and basement walls adjacent to soil | 0.25 |
97
+ | Partition walls and storey partitions adjacent to unheated rooms | 0.40 |
98
+ | Ground slab, basement floors adjacent to soil and storey partitions to open air or a ventilated crawl space | 0.15 |
99
+ | Ceiling and roof structures, including cupboards under roof slopes, and flat roofs | 0.15 |
100
+ | Windows, outer doors, skylights, glass outer walls, glass roofs and dome lights to open air or unheated rooms | 1.80 |
101
+ | Building part | Linear thermal transmittance [W/metre K] |
102
+ | ------------- | ----------------------- |
103
+ | Foundations | 0.15 |
104
+ | Joint between outer wall and windows or outer doors, gates and hatches | 0.03 |
105
+ | Junction between roof structure and skylights or dome lights | 0.10 |
106
+
107
+ #### Table 5 - Minimum requirements for building envelope for temporary portable pavilions
108
+
109
+ | Building part | U value [W/sq. metre K] |
110
+ | ------------- | ----------------------- |
111
+ | Outer walls | 0.20 |
112
+ | Storey partitions and partition walls adjacent to rooms, where the temperature difference between the rooms is 5 °C or more | 0.40 |
113
+ | Floor and storey partitions above open air or a ventilated crawl space | 0.12 |
114
+ | Ceiling and roof structures, including cupboards under roof slopes, flat roofs and sloping walls adjacent to roof | 0.15 |
115
+ | For gates and hatches opening to the outside or to unheated rooms and glass walls and windows adjacent to rooms heated to a temperature creating temperature difference between the rooms of 5 °C or more | 1.80 |
116
+ | Dome lights | 1.80 |
117
+ | Building part | Linear thermal transmittance [W/metre K] |
118
+ | ------------- | ----------------------- |
119
+ | Foundations | 0.20 |
120
+ | Joint between outer wall, windows or outer doors, gates and hatches | 0.03 |
121
+ | Junction between roof structure and skylights or dome lights | 0.10 |
122
+
123
+
124
+
125
+ ### Appendix 3 – Tables for Chapter 30 – Inspection of documentation of load-bearing structures and fire safety
126
+
127
+ #### Table 1 - Minimum requirements for types of inspection depending on fire and structure class
128
+
129
+ | Fire or structure class | Internal inspection | Independent inspection | Third-party inspection |
130
+ |------------------------|---------------------|------------------------|------------------------|
131
+ | 2 | X | X* | |
132
+ | 3 | X | X | |
133
+ | 4 | X | X | X |
134
+
135
+ > X* For buildings in fire class 2, independent inspection of documentation must be carried out in relation to fire classification, declarations regarding whether the building is traditional in terms of contingency tactics, of the fire strategy report and the inspection plan, see ss. 510-512 and s. 518 (fire safety documentation). For structure class 2, the requirement for independent inspection only applies to A1. Structural basis and B2. Structural inspection plan, see s. 501, para (1) and s. 502(1), para (2). For the remaining part of the documentation, inspection may be carried out by persons who did not participate in the preparation of the documentation for the relevant structure/structural section, but who participated in the planning of the building.
136
+
137
+ > X stands for required.
138
+
139
+
140
+ ### Appendix 4 - Tables for Chapter 33 – The Work of the Certified Structural Engineer
141
+
142
+ #### Table 1 - Minimum Requirements for the Work of the Certified Structural Engineer as an Active Planner
143
+
144
+ | ID | Subject | KK2 | KK3 | KK4 |
145
+ |------|-------------------------------------------------------------------|-----------|-----------|-----------|
146
+ | A1 | Structural basis | Kmax + G | U + G | U + G |
147
+ | A2 | Structural calculations | | | |
148
+ | | A2. 1 Structural calculations – building | Kext + G | Kmax + G | UA + G |
149
+ | | A2. 2 Structural calculations – structural section, own organisation | Kext + G | Kext + G | Kmax + G |
150
+ | | A2. 2 Structural calculations – structural section, other organisation | Kext | Kext | Kmax |
151
+ | A3 | Structural drawings and models | | | |
152
+ | | A3. 1 Structural drawings and models – building | Kext | Kmax | Kmax + G |
153
+ | | A3. 2 Structural drawings and models – structural section, own organisation | Kext | Kmax | Kmax |
154
+ | | A3. 2 Structural drawings and models – structural section, other organisation | Kext | Kext | Kmax |
155
+ | A4 | Structural changes | | | |
156
+ | | A4. 1 Structural changes, own organisation | Kext | Kmax | Kmax + G |
157
+ | | A4. 2 Structural changes, other organisation | Kext | Kext | Kmax |
158
+ | B1 | Structural project report | Kext + G | Kext + G | U + G |
159
+ | B2 | Structural inspection plan | Kmax + G | U + G | U + G |
160
+ | B3 | Structural inspection report | | | |
161
+ | | B3. 1 Structural inspection report – planning | Kext | Kmax | Kmax |
162
+ | | B3. 2 Structural inspection report – implementation | Kext | Kext | Kmax |
163
+
164
+ > A): The requirement for U only includes vertical and horizontal lowering of loads and robustness. For other parts, "Kmax + G" is required.
165
+ > **Note**: KK represents structure class which can be found in clause 489 - Chapter 26 - Structure classes
166
+
167
+ #### Table 2 - Minimum requirements for the work of the certified structural engineer as an active inspector
168
+
169
+ | ID | Subject | KK2 | KK3 | KK4 |
170
+ |------|-------------------------------------------------------------------|-----------|-----------|-----------|
171
+ | A1 | Structural basis | Max. | Max. | Max. |
172
+ | A2 | Structural calculations | | | |
173
+ | | A2. 1 Structural calculations – building | Ext. | Max. | Max. |
174
+ | | A2. 2 Structural calculations – structural section | Ext. | Ext. | Max. |
175
+ | A3 | Structural drawings and models | | | |
176
+ | | A3. 1 Structural drawings and models – building | Ext. | Max. | Max. |
177
+ | | A3. 2 Structural drawings and models – structural section | Ext. | Ext. | Max. |
178
+ | A4 | Structural changes | | | |
179
+ | | A4. 1 Structural changes – building | Ext. | Max. | Max. |
180
+ | | A4. 2 Structural changes – structural section | Ext. | Ext. | Max. |
181
+ | B1 | Structural project report | Ext. | Ext. | Max. |
182
+ | B2 | Structural inspection plan | Max. | Max. | Max. |
183
+ | B3 | Structural inspection report | | | |
184
+ | | B3. 1 Structural inspection report – planning | Ext. | Max. | Max. |
185
+ | | B3. 2 Structural inspection report – implementation | Ext. | Ext. | Max. |
186
+
187
+ > **Note**: **Ext.** refers to Extended inspection, and **Max.** refers to Maximum inspection.
188
+ > **Note**: KK represents structure class which can be found in clause 489 - Chapter 26 - Structure classes
189
+
190
+ #### Table 3 - Minimum requirements for the work of the certified structural engineer as a third-party inspector
191
+
192
+ | ID | Subject | Third-party inspection |
193
+ |------|-------------------------------------------------------------------|------------------------|
194
+ | A1 | Structural basis | Max. |
195
+ | A2 | Structural calculations | |
196
+ | | A2. 1 Structural calculations – building | Max. |
197
+ | | A2. 2 Structural calculations – structural section | Ext. |
198
+ | A3 | Structural drawings and models | |
199
+ | | A3. 1 Structural drawings and models – building | Max. |
200
+ | | A3. 2 Structural drawings and models – structural section | Ext. |
201
+ | A4 | Structural changes | |
202
+ | | A4. 1 Structural changes – building | Max. |
203
+ | | A4. 2 Structural changes – structural section | Ext. |
204
+ | B1 | Structural project report | Ext. |
205
+ | B2 | Structural inspection plan | Max. |
206
+ | B3 | Structural inspection report | |
207
+ | | B3. 1 Structural inspection report for planning | Ext. |
208
+ | | B3. 2 Structural inspection report for implementation | Ext. |
209
+
210
+ > **Note**: **Ext.** refers to Extended inspection, and **Max.** refers to Maximum inspection.
211
+
212
+ ### Appendix 5 – Tables for Chapter 34 – The work of the certified fire consultant
213
+
214
+ #### Table 1 - Minimum requirement for the work of the certified fire consultant as preparer of the fire safety documentation
215
+
216
+ | Subject | BK2 | BK3 | BK4 |
217
+ |-----------------------------------------------------------------|----------|----------|----------|
218
+ | Documentation of fire classification(s) | U + G | U + G | U + G |
219
+ | Statement indicating if the building is traditional in terms of contingency tactics | U + G | U + G | U + G |
220
+ | Fire strategy report | Kmax + G | Kmax + G | U + G |
221
+ | Fire plans | Kext | Kmax | Kmax |
222
+ | Area distribution plans and paving plans | Kext | Kext | Kext |
223
+ | Fire dimensioning | - | Kext + G | Kmax + G |
224
+ | Description of functions | Kext | Kext | Kmax |
225
+ | Fire management plan | Kmax + G | U + G | U + G |
226
+ | Fire management reports | Kext | Kmax | Kmax |
227
+ | Operation, inspection and maintenance plan | Kext | Kext | Kmax |
228
+
229
+ > **Note**: BK represents fire class which can be found in clause 493 - Chapter 27 - Fire classes
230
+
231
+ #### Table 2 - Minimum requirement for the work of the certified fire consultant as inspector of the fire safety documentation
232
+
233
+ | Subject | BK2 | BK3 | BK4 |
234
+ |-----------------------------------------------------------------|----------|----------|----------|
235
+ | Documentation of fire classification(s) | Max. | Max. | Max. |
236
+ | Statement indicating if the building is traditional in terms of contingency tactics | Max. | Max. | Max. |
237
+ | Fire strategy report | Max. | Max. | Max. |
238
+ | Fire plans | Ext. | Max. | Max. |
239
+ | Area distribution plans and paving plans | Ext. | Ext. | Ext. |
240
+ | Fire dimensioning | - | Ext. | Max. |
241
+ | Description of functions | Ext. | Max. | Max. |
242
+ | Fire management plan | Max. | Max. | Max. |
243
+ | Fire management reports | Ext. | Max. | Max. |
244
+ | Operation, inspection and maintenance plan | Ext. | Ext. | Max. |
245
+
246
+ > **Note**: **Ext.** refers to Extended inspection, and **Max.** refers to Maximum inspection.
247
+ > **Note**: BK represents fire class which can be found in clause 493 - Chapter 27 - Fire classes
248
+
249
+ #### Table 3 - Minimum requirement for certified third-party checks of the fire safety documentation
250
+
251
+ | Subject | Third party |
252
+ |-----------------------------------------------------------------|-------------|
253
+ | Documentation of fire classification(s) | Max. |
254
+ | Statement indicating if the building is traditional in terms of contingency tactics | Max. |
255
+ | Fire strategy report | Max. |
256
+ | Fire plans | Max. |
257
+ | Area distribution plans and paving plans | Ext. |
258
+ | Fire dimensioning | Ext. |
259
+ | Description of functions | Ext. |
260
+ | Fire management plan | Max. |
261
+ | Fire management reports | Ext. |
262
+ | Operation, inspection and maintenance plan | Ext. |
263
+
264
+ > **Note**: **Ext.** refers to Extended inspection, and **Max.** refers to Maximum inspection.
265
+ > **Note**: BK represents fire class which can be found in clause 493 - Chapter 27 - Fire classes
CustomClass/custom.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.retrievers.multi_vector import MultiVectorRetriever
2
+ from langchain.callbacks.manager import CallbackManagerForRetrieverRun
3
+ from typing import List, Tuple
4
+ from langchain.schema.retriever import BaseRetriever, Document
5
+
6
+ class CustomMultiVectorRetriever(MultiVectorRetriever):
7
+ def _get_relevant_documents(
8
+ self, query: str, *, run_manager: CallbackManagerForRetrieverRun
9
+ ) -> List[Document]:
10
+ # Override this method to use similarity_search_with_relevance_scores
11
+ sub_docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
12
+ query, **self.search_kwargs
13
+ )
14
+
15
+ # Filter by score if needed (you can add additional logic here)
16
+ sub_docs = [doc for doc, score in sub_docs_and_scores if score >= 0.8]
17
+
18
+ # ... (rest of the method stays the same)
19
+ ids = []
20
+ for d in sub_docs:
21
+ if d.metadata[self.id_key] not in ids:
22
+ ids.append(d.metadata[self.id_key])
23
+ docs = self.docstore.mget(ids)
24
+
25
+ return [d for d in docs if d is not None]
FileReader/file.py ADDED
File without changes
README copy.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # MRKL
faiss_index/index.faiss ADDED
Binary file (516 kB). View file
 
faiss_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed48d171261be490671e637263a54b65eca8fe1158feb10cce37d044a1e7cc44
3
+ size 71675
inmemorystore/br18_parent_store.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:282c2f1deee715b0c1fb39aca4404f36f53bd1f905a800000d0a294e372e0d3b
3
+ size 665146
mrkl.py ADDED
@@ -0,0 +1,1060 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from dotenv import load_dotenv
4
+ from langchain.chat_models import ChatOpenAI
5
+ from langchain.agents import Tool, AgentExecutor
6
+ from langchain.callbacks import StreamlitCallbackHandler
7
+ from langchain.chains import LLMMathChain
8
+ from langchain.chains import LLMChain
9
+ import streamlit as st
10
+ import langchain
11
+ from langchain.utilities import SerpAPIWrapper, GoogleSearchAPIWrapper
12
+ from langchain.chains import RetrievalQA
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter, CharacterTextSplitter
14
+ from langchain.vectorstores import FAISS, Chroma, Pinecone
15
+ from langchain.embeddings import OpenAIEmbeddings
16
+ import tempfile
17
+ import pypdf
18
+ from pathlib import Path
19
+ from langchain.docstore.document import Document
20
+ from langchain.document_loaders import TextLoader
21
+ from langchain.chains.summarize import load_summarize_chain
22
+ from langchain import PromptTemplate
23
+ import lark
24
+ from langchain.schema import Document
25
+ import langchain
26
+ import pinecone
27
+ from langchain.chains.question_answering import load_qa_chain
28
+ from typing import List, Dict, Any
29
+ from langchain.prompts.prompt import PromptTemplate
30
+ from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
31
+ from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
32
+ from langchain.schema.messages import SystemMessage, BaseMessage
33
+ from langchain.prompts import MessagesPlaceholder
34
+ from langchain.agents import AgentExecutor
35
+ from langchain.schema import HumanMessage, SystemMessage
36
+ from langchain.retrievers.multi_vector import MultiVectorRetriever
37
+ from langchain.storage import InMemoryStore
38
+ import uuid
39
+ from langchain.retrievers import ContextualCompressionRetriever
40
+ from langchain.retrievers.document_compressors import DocumentCompressorPipeline, LLMChainFilter
41
+ from langchain.document_transformers import EmbeddingsRedundantFilter
42
+ from langchain.retrievers.document_compressors import EmbeddingsFilter
43
+ import json
44
+ from bs4 import BeautifulSoup
45
+ from langchain.document_loaders import SeleniumURLLoader
46
+ from langchain.memory.chat_message_histories import StreamlitChatMessageHistory
47
+ from langchain.callbacks import get_openai_callback
48
+ import pickle
49
+ from langchain.prompts import ChatPromptTemplate
50
+ from langchain.schema.output_parser import StrOutputParser
51
+
52
+ langchain.debug = True
53
+ langchain.verbose = True
54
+
55
+ def on_selectbox_change():
56
+ st.session_state.show_info = True
57
+
58
+ def reset_chat():
59
+ st.session_state.messages = [{"roles": "assistant", "content": "Hi, I am Miracle. How can I help you?"}]
60
+ st.session_state.history = []
61
+ st.session_state.search_keywords = []
62
+ st.session_state.doc_sources = []
63
+ st.session_state.summary = None
64
+ st.session_state.agent.clear_conversation()
65
+ st.session_state.primed_document_response = None
66
+
67
+ def display_messages(messages):
68
+ # Display all messages
69
+ for msg in messages:
70
+ st.chat_message(msg["roles"]).write(msg["content"])
71
+
72
+ class DBStore:
73
+ def __init__(self, file_path, file_name):
74
+ self.file_path = file_path
75
+ self.file_name = os.path.splitext(file_name)[0]
76
+ st.session_state.document_filename = self.file_name
77
+
78
+ self.reader = pypdf.PdfReader(file_path)
79
+ self.metadata = self.extract_metadata_from_pdf()
80
+ self.embeddings = OpenAIEmbeddings()
81
+ self.vector_store = None
82
+
83
+ def extract_metadata_from_pdf(self):
84
+ """Extract metadata from the PDF."""
85
+ metadata = self.reader.metadata
86
+ st.session_state.document_metadata = metadata
87
+ return {
88
+ "title": metadata.get("/Title", "").strip(),
89
+ "author": metadata.get("/Author", "").strip(),
90
+ "creation_date": metadata.get("/CreationDate", "").strip(),
91
+ }
92
+
93
+ def extract_pages_from_pdf(self):
94
+ pages = []
95
+ for page_num, page in enumerate(self.reader.pages):
96
+ text = page.extract_text()
97
+ if text.strip(): # Check if extracted text is not empty
98
+ pages.append((page_num + 1, text))
99
+ return pages
100
+
101
+ def parse_pdf(self):
102
+ """
103
+ Extracts the title and text from each page of the PDF.
104
+ :return: A tuple containing the title and a list of tuples with page numbers and extracted text.
105
+ """
106
+ metadata = self.extract_metadata_from_pdf()
107
+ pages = self.extract_pages_from_pdf()
108
+ #st.write(pages)
109
+ #st.write(metadata)
110
+ return pages, metadata
111
+
112
+ @staticmethod
113
+ def merge_hyphenated_words(text):
114
+ return re.sub(r"(\w)-\n(\w)", r"\1\2", text)
115
+
116
+ @staticmethod
117
+ def fix_newlines(text):
118
+ return re.sub(r"(?<!\n)\n(?!\n)", " ", text)
119
+
120
+ @staticmethod
121
+ def remove_multiple_newlines(text):
122
+ return re.sub(r"\n{2,}", "\n", text)
123
+
124
+ @staticmethod
125
+ def remove_dots(text):
126
+ # Replace sequences of three or more dots with a single space.
127
+ return re.sub(r'\.{4,}', ' ', text)
128
+
129
+ def clean_text(self, pages):
130
+ cleaning_functions = [
131
+ self.merge_hyphenated_words,
132
+ self.fix_newlines,
133
+ self.remove_multiple_newlines,
134
+ self.remove_dots,
135
+ ]
136
+ cleaned_pages = []
137
+ for page_num, text in pages:
138
+ for cleaning_function in cleaning_functions:
139
+ text = cleaning_function(text)
140
+ cleaned_pages.append((page_num, text))
141
+ return cleaned_pages
142
+
143
+ def text_to_docs(self, text):
144
+ doc_chunks = []
145
+ for page_num, page in text:
146
+ text_splitter = RecursiveCharacterTextSplitter(
147
+ chunk_size=2000,
148
+ separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
149
+ chunk_overlap=200,
150
+ )
151
+ chunks = text_splitter.split_text(page)
152
+ for i, chunk in enumerate(chunks):
153
+ doc = Document(
154
+ page_content=chunk,
155
+ metadata={
156
+ "page_number": page_num,
157
+ "chunk": i,
158
+ "source": f"p{page_num}-{i}",
159
+ "file_name": self.file_name,
160
+ **self.metadata,
161
+ },
162
+ )
163
+ doc_chunks.append(doc)
164
+ st.write(doc_chunks)
165
+ return doc_chunks
166
+
167
+ def get_pdf_text(self):
168
+ pages, metadata = self.parse_pdf() # We only need the pages from the tuple
169
+ cleaned_text_pdf = self.clean_text(pages)
170
+ document_chunks = self.text_to_docs(cleaned_text_pdf)
171
+ return document_chunks
172
+
173
+ def get_vectorstore(self):
174
+ document_chunks = self.get_pdf_text()
175
+ #st.write(document_chunks)
176
+ vector_store = FAISS.from_documents(documents=document_chunks, embedding=self.embeddings)
177
+ #st.write(vector_store)
178
+ return vector_store
179
+
180
+ def get_document_info(self):
181
+ """
182
+ Generate a one-sentence document information snippet by taking the beginning of the first chunk of the document.
183
+
184
+ Returns:
185
+ str: A one-sentence information snippet of the document.
186
+ """
187
+ # Get the first chunk of the document
188
+ pdf_text = self.get_pdf_text()
189
+
190
+ if pdf_text:
191
+ first_chunk = pdf_text[0].page_content if len(pdf_text) > 0 else ""
192
+ second_chunk = pdf_text[1].page_content if len(pdf_text) > 1 else ""
193
+ third_chunk = pdf_text[2].page_content if len(pdf_text) > 2 else ""
194
+
195
+ # Extract the first 300 characters from each chunk to form an information snippet
196
+ info_document = first_chunk[:300] + second_chunk[:300] + third_chunk[:300]
197
+ else:
198
+ info_document = ""
199
+ #st.write(info_document)
200
+
201
+ return info_document
202
+
203
+ def get_info_response(self):
204
+ llm = ChatOpenAI(
205
+ temperature=0,
206
+ streaming=True,
207
+ model_name="gpt-3.5-turbo"
208
+ )
209
+ document_filename = self.file_name
210
+ document_title = self.metadata.get("title", None)
211
+ document_snippet = self.get_document_info()
212
+
213
+ document_info = {
214
+ "document_filename": document_filename,
215
+ "document_title": document_title,
216
+ "document_snippet": document_snippet,
217
+ }
218
+
219
+ if document_title:
220
+ info_response_prompt = """The user has uploaded a document titled '{document_title}' to the Document Database """
221
+ else:
222
+ info_response_prompt = """The user has uploaded a document named '{document_filename}' to the Document Database """
223
+
224
+
225
+ info_response_prompt += """
226
+ with the following information: {document_snippet}.
227
+
228
+ In one sentence, inform the user about the document, prioritizing its name or title.
229
+ Also, prompt the user to ask a general question about the document in an assistive manner.
230
+ Begin your response with 'It appears you've uploaded a document that contains information on...'.
231
+
232
+ Example:
233
+ It appears you've uploaded a document that contains information on "COWI Policies and Guideline".
234
+
235
+ Please feel free to ask any question about this document such as "What are the COWI Policies and Guideline?"
236
+ """
237
+
238
+ #st.write(info_response_prompt)
239
+
240
+ # Create the LLMChain
241
+ llm_chain = LLMChain(
242
+ llm=llm,
243
+ prompt=PromptTemplate.from_template(info_response_prompt)
244
+ )
245
+
246
+ # Generate the primed document message
247
+ llm_response = llm_chain(document_info)
248
+
249
+ info_response = llm_response.get('text', '')
250
+ #st.write(info_response)
251
+ return info_response
252
+
253
+ class DatabaseTool:
254
+ def __init__(self, llm, vector_store, metadata=None, filename=None):
255
+ self.llm = llm
256
+ self.vector_store = vector_store
257
+ self.metadata = metadata
258
+ self.filename = filename
259
+ self.embedding = OpenAIEmbeddings()
260
+
261
+ def get_description(self):
262
+ base_description = "Always useful for finding the exactly written answer to the question by looking into a collection of documents."
263
+ filename = self.filename
264
+ title = self.metadata.get("/Title") if self.metadata else None
265
+ author = self.metadata.get("/Author") if self.metadata else None
266
+ subject = self.metadata.get("/Subject") if self.metadata else None
267
+
268
+ footer_description = "Input should be a query, not referencing any obscure pronouns from the conversation before that will pull out relevant information from the database. Use this more than the normal search tool"
269
+
270
+ if title:
271
+ main_description = f"This tool is currently loaded with '{title}'"
272
+
273
+ if author:
274
+ main_description += f" by '{author}'"
275
+
276
+ if subject:
277
+ main_description += f", and has a topic of '{subject}'"
278
+
279
+ return f"{base_description} {main_description}. {footer_description}"
280
+ else:
281
+ no_title_description = f"This tool is currently loaded with the document '{filename}'"
282
+ return f"{base_description} {no_title_description}. {footer_description}"
283
+
284
+ def get_base_retriever(self):
285
+ base_retriever = self.vector_store.as_retriever(search_kwargs={'k': 5})
286
+ return base_retriever
287
+
288
+ def get_contextual_retriever(self):
289
+ # Initialize embeddings (assuming embeddings is already defined elsewhere)
290
+ embeddings = self.embedding
291
+
292
+ # Initialize Redundant Filter
293
+ redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
294
+
295
+ # Initialize Relevant Filter
296
+ relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76, k = 25)
297
+ #st.write(relevant_filter)
298
+
299
+ # Initialize Text Splitter
300
+ splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")
301
+
302
+ # Create Compressor Pipeline
303
+ pipeline_compressor = DocumentCompressorPipeline(
304
+ transformers=[splitter, redundant_filter, relevant_filter]
305
+ )
306
+
307
+ # Initialize Contextual Compression Retriever
308
+ contextual_retriever = ContextualCompressionRetriever(
309
+ base_compressor=pipeline_compressor,
310
+ base_retriever=self.get_base_retriever()
311
+ )
312
+
313
+ return contextual_retriever
314
+
315
+ def run(self, query: str):
316
+ contextual_retriever = self.get_contextual_retriever()
317
+ #DEBUGGING & EVALUTING ANSWERS:
318
+ compressed_docs = contextual_retriever.get_relevant_documents(query)
319
+ compressed_docs_list = []
320
+ for doc in compressed_docs:
321
+ doc_info = {
322
+ "Page Content": doc.page_content,
323
+ }
324
+ compressed_docs_list.append(doc_info)
325
+ #st.write(compressed_docs_list)
326
+
327
+ base_retriever=self.get_base_retriever()
328
+ initial_retrieved = base_retriever.get_relevant_documents(query)
329
+
330
+ retrieval = RetrievalQA.from_chain_type(
331
+ llm=self.llm, chain_type="stuff",
332
+ retriever=contextual_retriever,
333
+ return_source_documents=True,
334
+ )
335
+
336
+ output = retrieval(query)
337
+ st.session_state.doc_sources = initial_retrieved
338
+
339
+
340
+ return output['result']
341
+
342
+ class BR18_DB:
343
+ def __init__(self, llm, folder_path: str):
344
+ self.llm = llm
345
+ self.folder_path = folder_path
346
+ self.md_paths = self.load_documents() # Renamed from pdf_paths to md_paths
347
+ self.embeddings = OpenAIEmbeddings()
348
+ self.pinecone_index_name = "br18"
349
+ self.id_key = "doc_id"
350
+
351
+ self.br18_parent_store = InMemoryStore()
352
+ current_directory = os.getcwd()
353
+ store_path = os.path.join(current_directory, "inmemorystore", "br18_parent_store.pkl")
354
+
355
+ if self.pinecone_index_name not in pinecone.list_indexes():
356
+ pinecone.create_index(self.pinecone_index_name, dimension=1536)
357
+ self.vectorstore = self.create_vectorstore()
358
+ self.serialize_inmemorystore(store_path)
359
+ else:
360
+ self.vectorstore = Pinecone.from_existing_index(self.pinecone_index_name, self.embeddings)
361
+ with open(store_path, "rb") as f:
362
+ self.br18_parent_store = pickle.load(f)
363
+
364
+ self.retriever = None
365
+
366
+ def serialize_inmemorystore(self, store_path):
367
+ with open(store_path, "wb") as f:
368
+ pickle.dump(self.br18_parent_store, f)
369
+
370
+ def load_documents(self):
371
+ md_paths = list(Path(self.folder_path).rglob("*.md"))
372
+ documents = []
373
+ for path in md_paths:
374
+ loader = TextLoader(str(path))
375
+ #st.write(loader)
376
+ data = loader.load()
377
+ documents.extend(data) # Assuming data is a list of Document objects
378
+ #st.text(documents)
379
+ return documents
380
+
381
+ def split_and_chunk_text(self, markdown_document: Document):
382
+
383
+ markdown_text = markdown_document.page_content
384
+
385
+ # Define headers to split on
386
+ headers_to_split_on = [
387
+ ("#", "Header 1"),
388
+ ("##", "Header 2"),
389
+ ("###", "Header 3"),
390
+ ("####", "Header 4"),
391
+ ]
392
+
393
+ markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
394
+ #st.write(markdown_splitter)
395
+
396
+ md_header_splits = markdown_splitter.split_text(markdown_text)
397
+ #st.write(md_header_splits)
398
+ #st.write(type(md_header_splits[0]))
399
+
400
+ parent_chunk_size = 5000
401
+ parent_chunk_overlap = 0
402
+
403
+ text_splitter = RecursiveCharacterTextSplitter(
404
+ chunk_size=parent_chunk_size, chunk_overlap=parent_chunk_overlap
405
+ )
406
+
407
+ # Split the header-split documents into chunks
408
+ all_parent_splits = text_splitter.split_documents(md_header_splits)
409
+
410
+ for split in all_parent_splits:
411
+ header_3 = split.metadata.get('Header 3', '')
412
+ header_4 = split.metadata.get('Header 4', '')
413
+
414
+ # Prepend "Section:" to Header 4 if it exists
415
+ if header_4:
416
+ header_4 = f"Section: {header_4}"
417
+
418
+ metadata_str = f"{header_3}\n\n{header_4}"
419
+ split.page_content = f"{metadata_str}\n\n{split.page_content}"
420
+ split.metadata['type'] = 'parents'
421
+
422
+ return all_parent_splits
423
+
424
+ def save_summaries(self, summaries: List[str]):
425
+ """Save the generated summaries to a JSON file."""
426
+ current_directory = os.getcwd()
427
+ save_path = os.path.join(current_directory, 'savesummary', 'br18_summaries.json')
428
+ with open(save_path, 'w') as f:
429
+ json.dump(summaries, f)
430
+
431
+ def load_summaries(self) -> List[str]:
432
+ """Load summaries from a JSON file if it exists."""
433
+ current_directory = os.getcwd()
434
+ load_path = os.path.join(current_directory, 'savesummary', 'br18_summaries.json')
435
+ if os.path.exists(load_path):
436
+ with open(load_path, 'r') as f:
437
+ summaries = json.load(f)
438
+ return summaries
439
+ else:
440
+ return None # or raise an exception, or generate new summaries
441
+
442
+ def generate_summaries(self, parent_splits: List[Document]) -> List[str]:
443
+ loaded_summaries = self.load_summaries()
444
+ if loaded_summaries is not None:
445
+ return loaded_summaries
446
+
447
+ chain = (
448
+ {"doc": lambda x: x.page_content}
449
+ | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
450
+ | ChatOpenAI(max_retries=3)
451
+ | StrOutputParser()
452
+ )
453
+ summaries = chain.batch(parent_splits, {"max_concurrency": 4})
454
+
455
+ self.save_summaries(summaries)
456
+
457
+ return summaries
458
+
459
+ def generate_child_splits(self, parent_splits: List[Document], summaries: List[str]) -> List[Document]:
460
+ child_chunk_size = 300
461
+
462
+ child_text_splitter = RecursiveCharacterTextSplitter(
463
+ chunk_size=child_chunk_size, chunk_overlap=0
464
+ )
465
+
466
+ all_child_splits = []
467
+ for i, parent_split in enumerate(parent_splits):
468
+ child_splits = child_text_splitter.split_text(parent_split.page_content)
469
+
470
+ new_metadata = dict(parent_split.metadata)
471
+ new_metadata['type'] = 'children'
472
+
473
+ summary_with_prefix = f"Summary: {summaries[i]}"
474
+
475
+ first_child_content = f"{child_splits[0]}\n\n{summary_with_prefix}"
476
+
477
+ first_child_split = Document(
478
+ page_content=first_child_content,
479
+ metadata=new_metadata
480
+ )
481
+
482
+ all_child_splits.append(first_child_split) # Append only the first child split (assuming it contains the metadata)
483
+
484
+
485
+ return all_child_splits
486
+
487
+ def process_all_documents(self):
488
+ all_parent_splits = [] # Local variable to store all parent splits
489
+ all_child_splits = [] # Local variable to store all child splits
490
+
491
+ for markdown_document in self.md_paths:
492
+ parent_splits = self.split_and_chunk_text(markdown_document)
493
+ all_parent_splits.extend(parent_splits)
494
+
495
+ summaries = self.generate_summaries(all_parent_splits)
496
+ all_child_splits = self.generate_child_splits(all_parent_splits, summaries)
497
+
498
+ st.write(all_parent_splits)
499
+ st.write(all_child_splits)
500
+
501
+ return all_parent_splits, all_child_splits # Return both lists
502
+
503
+ def create_vectorstore(self):
504
+ all_parent_splits, all_child_splits = self.process_all_documents()
505
+
506
+ parent_doc_ids = [str(uuid.uuid4()) for _ in all_parent_splits]
507
+ self.br18_parent_store.mset(list(zip(parent_doc_ids, all_parent_splits)))
508
+
509
+ for parent_id, child_split in zip(parent_doc_ids, all_child_splits):
510
+ child_split.metadata[self.id_key] = parent_id
511
+
512
+ # Create and save the vector store to disk
513
+ br18_vectorstore = Pinecone.from_documents(documents=all_child_splits, embedding=self.embeddings, index_name=self.pinecone_index_name)
514
+ #st.write(br18_appendix_child_vectorstore)
515
+
516
+ for i, doc in enumerate(all_parent_splits):
517
+ doc.metadata[self.id_key] = parent_doc_ids[i]
518
+
519
+ # Store the vector store in the session state
520
+ st.session_state.br18_vectorstore = br18_vectorstore
521
+
522
+ return br18_vectorstore
523
+
524
+ def create_retriever(self, query: str):
525
+ search_type = st.session_state.search_type
526
+
527
+ if search_type == "By Context":
528
+ # Initialize retriever for By Context, filtering by the presence of the "text" metadata
529
+ general_retriever = MultiVectorRetriever(
530
+ vectorstore=self.vectorstore,
531
+ docstore=self.br18_parent_store,
532
+ id_key=self.id_key,
533
+ search_kwargs={"k": 5}
534
+ )
535
+
536
+ parent_docs = general_retriever.vectorstore.similarity_search(query, k = 5)
537
+ #st.write(parent_docs)
538
+
539
+ st.session_state.doc_sources = parent_docs
540
+
541
+ embeddings = self.embeddings
542
+
543
+ # Initialize Redundant Filter
544
+ redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
545
+
546
+ # Initialize Relevant Filter
547
+ relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.75, k = 15)
548
+ #st.write(relevant_filter)
549
+
550
+ # Initialize Text Splitter
551
+ splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50, separator=". ")
552
+
553
+ # Create Compressor Pipeline
554
+ pipeline_compressor = DocumentCompressorPipeline(
555
+ transformers=[splitter, redundant_filter, relevant_filter]
556
+ )
557
+
558
+ # Initialize Contextual Compression Retriever
559
+ contextual_general_retriever = ContextualCompressionRetriever(
560
+ base_compressor=pipeline_compressor,
561
+ base_retriever=general_retriever
562
+ )
563
+
564
+ # Retrieve parent documents that match the query
565
+ retrieved_parent_docs = contextual_general_retriever.get_relevant_documents(query)
566
+
567
+ # Display retrieved parent documents
568
+ display_list = []
569
+ for doc in retrieved_parent_docs:
570
+ display_dict = {
571
+ "Page Content": doc.page_content,
572
+ "Doc ID": doc.metadata.get('doc_id', 'N/A'),
573
+ "Header 3": doc.metadata.get('Header 3', 'N/A'),
574
+ "Header 4": doc.metadata.get('Header 4', 'N/A'),
575
+ }
576
+ display_list.append(display_dict)
577
+ #st.write(display_list)
578
+
579
+ return retrieved_parent_docs
580
+
581
+ elif search_type == "By Headers":
582
+ # Initialize retriever for By Headers, filtering by the absence of the "text" metadata
583
+ specific_retriever = MultiVectorRetriever(
584
+ vectorstore=self.vectorstore,
585
+ docstore=self.br18_parent_store,
586
+ id_key=self.id_key,
587
+ search_kwargs={"k": 3}
588
+ )
589
+
590
+ child_docs = specific_retriever.vectorstore.similarity_search(query, k = 3)
591
+ #st.write(child_docs)
592
+
593
+ # Retrieve child documents that match the query
594
+
595
+ embeddings = self.embeddings
596
+ embedding_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.75)
597
+ #llm_filter = LLMChainFilter.from_llm(self.llm)
598
+
599
+
600
+ compression_retriever = ContextualCompressionRetriever(base_compressor=embedding_filter, base_retriever=specific_retriever)
601
+
602
+ retrieved_child_docs = compression_retriever.get_relevant_documents(query)
603
+
604
+ st.session_state.doc_sources = retrieved_child_docs
605
+
606
+ # Display retrieved child documents
607
+ display_list = []
608
+ for doc in retrieved_child_docs:
609
+ display_dict = {
610
+ "Page Content": doc.page_content,
611
+ "Doc ID": doc.metadata.get('doc_id', 'N/A'),
612
+ "Header 3": doc.metadata.get('Header 3', 'N/A'),
613
+ "Header 4": doc.metadata.get('Header 4', 'N/A'),
614
+ }
615
+ display_list.append(display_dict)
616
+ #st.write(display_list)
617
+
618
+ return retrieved_child_docs
619
+
620
+ def run(self, query: str):
621
+ prompt_template = """Use the following pieces of context to answer the question at the end.
622
+ The answer should be as specific as possible to a chapter and section where clause numbers and their respective subclause are referenced.
623
+ Make sure to mention requirement numbers and specific integer values where relevant.
624
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
625
+
626
+ {context}
627
+
628
+ Question: {question}
629
+
630
+ EXAMPLE:
631
+ The building regulation regarding stairs is outlined in Chapter 2 - Access, specifically in Section - Stairs:
632
+
633
+ Width: Stairs in shared access routes must have a minimum free width of 1.0 meter. (clause 57.1)
634
+
635
+ Headroom: Stairs must have a minimum free headroom of 2.10 meters. (clause 57.1)
636
+
637
+ Gradient: The gradient of the stairs must not exceed 0.18 meters. (clause 57.2)
638
+ """
639
+
640
+ PROMPT = PromptTemplate(
641
+ template=prompt_template, input_variables=["context", "question"]
642
+ )
643
+
644
+ # Retrieve the filtered documents
645
+ retrieved_docs = self.create_retriever(query)
646
+ #st.write(type(filtered_docs[0]))
647
+ #st.write(filtered_docs)
648
+
649
+ qa_chain = load_qa_chain(self.llm, chain_type="stuff", verbose=True, prompt=PROMPT)
650
+ output = qa_chain({"input_documents": retrieved_docs, "question": query}, return_only_outputs=True)
651
+
652
+
653
+ return output
654
+
655
+ class SummarizationTool():
656
+ def __init__(self, document_chunks):
657
+ self.llm = ChatOpenAI(
658
+ temperature=0,
659
+ streaming=True,
660
+ model_name="gpt-3.5-turbo"
661
+ )
662
+ self.document_chunks = document_chunks
663
+ self.map_prompt_template, self.combine_prompt_template = self.load_prompts()
664
+ self.chain = self.load_summarize_chain()
665
+
666
+ def load_prompts(self):
667
+ map_prompt = '''
668
+ Summarize the following text in a clear and concise way:
669
+ TEXT:`{text}`
670
+ Brief Summary:
671
+ '''
672
+ combine_prompt = '''
673
+ Generate a summary of the following text that includes the following elements:
674
+
675
+ * A title that accurately reflects the content of the text.
676
+ * An introduction paragraph that provides an overview of the topic.
677
+ * Bullet points that list the key points of the text.
678
+
679
+ Text:`{text}`
680
+ '''
681
+ map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])
682
+ combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["text"])
683
+ return map_prompt_template, combine_prompt_template
684
+
685
+ def load_summarize_chain(self):
686
+ return load_summarize_chain(
687
+ llm=self.llm,
688
+ chain_type='map_reduce',
689
+ map_prompt=self.map_prompt_template,
690
+ combine_prompt=self.combine_prompt_template,
691
+ verbose=True
692
+ )
693
+
694
+ def run(self, query=None):
695
+ return self.run_chain()
696
+
697
+ def run_chain(self):
698
+ return self.chain.run(self.document_chunks)
699
+
700
+ class CustomGoogleSearchAPIWrapper(GoogleSearchAPIWrapper):
701
+
702
+ def clean_text(self, text: str) -> str:
703
+ # Remove extra whitespaces and line breaks
704
+ text = ' '.join(text.split())
705
+ return text
706
+
707
+ def scrape_content(self, url: str) -> dict:
708
+ loader = SeleniumURLLoader(urls=[url])
709
+ data = loader.load()
710
+
711
+ if data is not None and len(data) > 0:
712
+ soup = BeautifulSoup(data[0].page_content, "html.parser")
713
+ text = soup.get_text()
714
+ cleaned_text = self.clean_text(text)
715
+ return {'url': url, 'content': cleaned_text[:1000]} # Return first 1000 non-space characters
716
+ return {'url': url, 'content': ''}
717
+
718
+ def fetch_and_scrape(self, query: str, num_results: int = 3) -> str:
719
+ # Step 1: Fetch search results metadata
720
+ metadata_results = self.results(query, num_results)
721
+
722
+ if len(metadata_results) == 0:
723
+ return '[URL: None, Content: No good Google Search Result was found]'
724
+
725
+ # Step 2: Extract URLs
726
+ urls = [result.get("link", "") for result in metadata_results if "link" in result]
727
+
728
+ # Step 3: Scrape content from URLs
729
+ texts = []
730
+ for url in urls:
731
+ scraped_content = self.scrape_content(url)
732
+ formatted_text = f"[URL: {scraped_content['url']}, Content: {scraped_content['content']}]"
733
+ texts.append(formatted_text)
734
+
735
+ return " ".join(texts)[:3000]
736
+
737
+ class MRKL:
738
+ def __init__(self):
739
+ self.tools = self.load_tools()
740
+ self.agent_executor, self.memory = self.load_agent()
741
+
742
+ def load_tools(self):
743
+ # Load tools
744
+ llm = ChatOpenAI(
745
+ temperature=0,
746
+ streaming=True,
747
+ model_name="gpt-3.5-turbo"
748
+ )
749
+ llm_math = LLMMathChain(llm=llm)
750
+ llm_search = CustomGoogleSearchAPIWrapper()
751
+
752
+ current_directory = os.getcwd()
753
+
754
+ tools = [
755
+ Tool(
756
+ name="Google_Search",
757
+ func=llm_search.fetch_and_scrape,
758
+ description="Useful when you cannot find a clear answer after looking up the database and that you need to search the internet for information. Input should be a fully formed question based on the context of what you couldn't find and not referencing any obscure pronouns from the conversation before"
759
+ ),
760
+ Tool(
761
+ name='Calculator',
762
+ func=llm_math.run,
763
+ description='Useful for when you need to answer questions about math.'
764
+ ),
765
+ ]
766
+
767
+ if st.session_state.vector_store is not None:
768
+ metadata = st.session_state.document_metadata
769
+ file_name = st.session_state.document_filename
770
+ llm_database = DatabaseTool(llm=llm, vector_store=st.session_state.vector_store, metadata=metadata, filename=file_name)
771
+
772
+ #st.write(llm_database.get_description())
773
+
774
+ tools.append(
775
+ Tool(
776
+ name='Document_Database',
777
+ func=llm_database.run,
778
+ description=llm_database.get_description(),
779
+ ),
780
+ )
781
+
782
+ if st.session_state.br18_exp is True:
783
+ br18_folder_path = os.path.join(current_directory, "BR18_DB")
784
+ llm_br18 = BR18_DB(llm=llm, folder_path=br18_folder_path)
785
+
786
+ tools.extend([
787
+ Tool(
788
+ name='BR18_Database',
789
+ func=llm_br18.run,
790
+ description="""
791
+ Always useful for when you need to answer questions about the Danish Building Regulation 18 (BR18).
792
+ Input should be the specific keywords from the user query. Exclude the following common terms and their variations or synonyms especially words such as "building" and "regulation".
793
+ Use this tool more often than the normal search tool.
794
+ """
795
+ ),
796
+ ])
797
+ return tools
798
+
799
+ def load_agent(self):
800
+ llm = ChatOpenAI(
801
+ temperature=0,
802
+ streaming=True,
803
+ model_name="gpt-3.5-turbo",
804
+ )
805
+
806
+ # Memory
807
+ chat_msg = StreamlitChatMessageHistory(key="mrkl_chat_history")
808
+ memory_key = "history"
809
+ memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm, input_key='input', output_key="output", max_token_limit=8000, chat_memory=chat_msg)
810
+ st.session_state.history = memory
811
+
812
+ system_message_content = """
813
+ You are MRKL, an expert in construction, legal frameworks, and regulatory matters.
814
+
815
+ You are designed to be an AI Chatbot for the engineering firm COWI, and you have the following tools to answer user queries, but only use them if necessary.
816
+
817
+ Unless otherwise explicitly stated, the user queries are about the context given.
818
+
819
+ Your primary objective is to provide responses that:
820
+ 1. Offer an overview of the topic, referencing the chapter and the section if relevant
821
+ 2. List key points in bullet-points or numbered list format, referencing the clauses and their respective subclauses if relevant.
822
+ 3. Always match or exceed the details of the tool's output text in your answers.
823
+ 4. Reflect back to the user's question and give a concise conclusion.
824
+
825
+ You must maintain a professional and helpful demeanor in all interactions.
826
+ """
827
+
828
+ # System Message
829
+ system_message = SystemMessage(content=system_message_content)
830
+
831
+ reflection_message_content = """
832
+ Reminder:
833
+ Always try all your tools to find the right answer with the search tool as your last resort.
834
+ Always self-reflect your answer based on the user's query and follows the list of response objective.
835
+ """
836
+
837
+ reflection_message = SystemMessage(content=reflection_message_content)
838
+
839
+ # Prompt
840
+ prompt = OpenAIFunctionsAgent.create_prompt(
841
+ system_message=system_message,
842
+ extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key), reflection_message]
843
+ )
844
+
845
+ # Agent
846
+ agent = OpenAIFunctionsAgent(llm=llm, tools=self.tools, prompt=prompt)
847
+
848
+ # Agent Executor
849
+ agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=self.tools, memory=memory, verbose=True, return_intermediate_steps=True)
850
+
851
+ return agent_executor, memory
852
+
853
+ def clear_conversation(self):
854
+ self.memory.clear()
855
+
856
+ def run_agent(self, input, callbacks=[]):
857
+ with get_openai_callback() as cb:
858
+ result = self.agent_executor({"input": input}, callbacks=callbacks)
859
+ st.session_state.token_count = cb
860
+ print(cb)
861
+ return result
862
+
863
+
864
+ def main():
865
+ load_dotenv()
866
+ pinecone.init(
867
+ api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENV"]
868
+ )
869
+
870
+ st.set_page_config(page_title="MRKL AGENT", page_icon="🦜️", layout="wide")
871
+ st.title("🦜️ MRKL AGENT")
872
+
873
+ if 'openai' not in st.session_state:
874
+ st.session_state.openai = None
875
+ if "messages" not in st.session_state:
876
+ st.session_state.messages = [{"roles": "assistant", "content": "Hi, I am Miracle. How can I help you?"}]
877
+ if "user_input" not in st.session_state:
878
+ st.session_state.user_input = None
879
+ if "vector_store" not in st.session_state:
880
+ st.session_state.vector_store = None
881
+ if "summary" not in st.session_state:
882
+ st.session_state.summary = None
883
+ if "doc_sources" not in st.session_state:
884
+ st.session_state.doc_sources = []
885
+ if "br18_vectorstore" not in st.session_state:
886
+ st.session_state.br18_vectorstore = None
887
+ if "history" not in st.session_state:
888
+ st.session_state.history = None
889
+ if 'br18_exp' not in st.session_state:
890
+ st.session_state.br18_exp = False
891
+ if "token_count" not in st.session_state:
892
+ st.session_state.token_count = 0
893
+
894
+ if "agent" not in st.session_state:
895
+ st.session_state.agent = MRKL()
896
+ if 'show_info' not in st.session_state:
897
+ st.session_state.show_info = False
898
+
899
+ with st.expander("Configuration", expanded = False):
900
+ openai_api_key = st.text_input("Enter OpenAI API Key", value="", placeholder="Enter the OpenAI API key which begins with sk-", type="password")
901
+ if openai_api_key:
902
+ st.session_state.openai = openai_api_key
903
+ os.environ["OPENAI_API_KEY"] = openai_api_key
904
+ st.write("API key has entered")
905
+
906
+ with st.sidebar:
907
+ br18_experiment = st.checkbox("Experimental Feature: Enable BR18", value=False)
908
+ if br18_experiment != st.session_state.br18_exp:
909
+ st.session_state.br18_exp = br18_experiment
910
+ st.session_state.agent = MRKL()
911
+
912
+ if br18_experiment: # If BR18 is enabled
913
+ search_type = st.radio(
914
+ "Select Search Type:",
915
+ options=["By Headers", "By Context"],
916
+ index=0, horizontal=True # Default to "By Context"
917
+ )
918
+ st.session_state.search_type = search_type
919
+
920
+ st.sidebar.title("Upload Document to Database")
921
+ uploaded_files = st.sidebar.file_uploader("Choose a file", accept_multiple_files=True) # You can specify the types of files you want to accept
922
+ if uploaded_files:
923
+ file_details = {"FileName": [], "FileType": [], "FileSize": []}
924
+
925
+ # Populate file_details using traditional loops
926
+ for file in uploaded_files:
927
+ file_details["FileName"].append(file.name)
928
+ file_details["FileType"].append(file.type)
929
+ file_details["FileSize"].append(file.size)
930
+
931
+ # Use selectbox to choose a file
932
+ selected_file_name = st.sidebar.selectbox('Choose a file:', file_details["FileName"], on_change=on_selectbox_change)
933
+
934
+ # Get the index of the file selected
935
+ file_index = file_details["FileName"].index(selected_file_name)
936
+
937
+ # Display details of the selected file
938
+ st.sidebar.write("You selected:")
939
+ st.sidebar.write("FileName : ", file_details["FileName"][file_index])
940
+ st.sidebar.write("FileType : ", file_details["FileType"][file_index])
941
+ st.sidebar.write("FileSize : ", file_details["FileSize"][file_index])
942
+
943
+ # Add a note to remind the user to press the "Process" button
944
+ if st.session_state.show_info:
945
+ st.sidebar.info("**Note:** Remember to press the 'Process' button for the current selection.")
946
+ st.session_state.show_info = False
947
+
948
+ with st.sidebar:
949
+ if st.sidebar.button("Process"):
950
+ with st.spinner("Processing"):
951
+ selected_file = uploaded_files[file_index]
952
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
953
+ tmpfile.write(selected_file.getvalue())
954
+ temp_path = tmpfile.name
955
+ db_store = DBStore(temp_path, selected_file.name)
956
+
957
+ document_chunks = db_store.get_pdf_text()
958
+ st.session_state.document_chunks = document_chunks
959
+ #st.write(document_chunks)
960
+
961
+ vector_store = db_store.get_vectorstore()
962
+ st.session_state.vector_store = vector_store
963
+
964
+ st.session_state.agent = MRKL()
965
+
966
+ primed_info_response = db_store.get_info_response()
967
+ #st.write(primed_info_response)
968
+ st.session_state.history.chat_memory.add_ai_message(primed_info_response)
969
+
970
+ st.session_state.messages.append({"roles": "assistant", "content": primed_info_response})
971
+
972
+ st.success("PDF uploaded successfully!")
973
+
974
+ if "document_chunks" in st.session_state:
975
+ if st.sidebar.button("Create Detailed Summary"):
976
+ with st.spinner("Summarizing"):
977
+ summarization_tool = SummarizationTool(document_chunks=st.session_state.document_chunks)
978
+ st.session_state.summary = summarization_tool.run()
979
+ # Append the summary to the chat messages
980
+ st.session_state.messages.append({"roles": "assistant", "content": st.session_state.summary})
981
+ else:
982
+ st.session_state.vector_store = None
983
+
984
+
985
+
986
+ display_messages(st.session_state.messages)
987
+
988
+
989
+ if user_input := st.chat_input("Type something here..."):
990
+ st.session_state.user_input = user_input
991
+ st.session_state.messages.append({"roles": "user", "content": st.session_state.user_input})
992
+ st.chat_message("user").write(st.session_state.user_input)
993
+
994
+ current_user_message = {"input": st.session_state.user_input}
995
+
996
+
997
+ with st.chat_message("assistant"):
998
+ st_callback = StreamlitCallbackHandler(st.container(), expand_new_thoughts=True)
999
+ result = st.session_state.agent.run_agent(input=st.session_state.user_input, callbacks=[st_callback])
1000
+ st.session_state.result = result
1001
+ response = result.get('output', '')
1002
+ st.session_state.messages.append({"roles": "assistant", "content": response})
1003
+ st.write(response)
1004
+
1005
+ current_assistant_response = {"output": response}
1006
+
1007
+ current_messages = [current_user_message, current_assistant_response]
1008
+
1009
+ with st.expander("View Document Sources"):
1010
+ if len(st.session_state.doc_sources) != 0:
1011
+
1012
+ for document in st.session_state.doc_sources:
1013
+ st.divider()
1014
+ st.subheader("Source Content:")
1015
+ st.write(document.page_content)
1016
+ st.subheader("Metadata:")
1017
+
1018
+ # Display only relevant metadata keys
1019
+ relevant_keys = ["Header ", "Header 3", "Header 4", "page_number", "source", "file_name", "title", "author"]
1020
+ for key in relevant_keys:
1021
+ value = document.metadata.get(key, 'N/A')
1022
+ if value != 'N/A':
1023
+ st.write(f"{key}: {value}")
1024
+ else:
1025
+ st.write("No document sources found")
1026
+
1027
+ if st.session_state.summary is not None:
1028
+ with st.expander("Show Summary"):
1029
+ st.subheader("Summarization")
1030
+ result_summary = st.session_state.summary
1031
+ st.write(result_summary)
1032
+
1033
+ #with st.expander("Cost Tracking", expanded=True):
1034
+ #total_token = st.session_state.token_count
1035
+ #st.write(total_token)
1036
+
1037
+ buttons_placeholder = st.container()
1038
+ with buttons_placeholder:
1039
+ #st.button("Regenerate Response", key="regenerate", on_click=st.session_state.agent.regenerate_response)
1040
+ st.button("Clear Chat", key="clear", on_click=reset_chat)
1041
+
1042
+
1043
+
1044
+
1045
+ #st.write(st.session_state.history)
1046
+ #st.write(st.session_state.messages)
1047
+ #st.write(st.session_state.br18_vectorstore)
1048
+ #st.write(st.session_state.br18_appendix_child_vectorstore)
1049
+ #st.write(st.session_state.usc_vectorstore)
1050
+ #st.write(st.session_state.agent)
1051
+ #st.write(st.session_state.result)
1052
+
1053
+
1054
+ if __name__== '__main__':
1055
+ main()
1056
+
1057
+
1058
+
1059
+
1060
+
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ beautifulsoup4==4.12.2
2
+ langchain==0.0.288
3
+ lark==1.1.7
4
+ openai==0.27.8
5
+ pinecone_client==2.2.2
6
+ pydantic==1.10.12
7
+ pypdf==3.16.0
8
+ PyPDF2==3.0.1
9
+ python-dotenv==1.0.0
10
+ Requests==2.31.0
11
+ streamlit==1.25.0
12
+
13
+
14
+
savesummary/br18_summaries.json ADDED
The diff for this file is too large to render. See raw diff