heikowagner commited on
Commit
7009660
1 Parent(s): bcd81f1
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ./docker/zeppelin/logs/*
2
+ *.log
3
+ *.log.*
4
+ *__pycache__*
5
+ root
6
+ *.ipynb_checkpoints*
7
+ .vscode
8
+ /app/mymodels
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM tensorflow/tensorflow:latest-gpu
2
+ WORKDIR /app
3
+ # RUN apt-get upgrade -y
4
+ RUN apt-get update -y
5
+ RUN apt-get install -y git
6
+ RUN apt install -y make wget git gcc g++ lhasa libgmp-dev libmpfr-dev libmpc-dev flex bison gettext texinfo ncurses-dev autoconf rsync
7
+ COPY ./app .
8
+ RUN pip install -r requirements.txt
9
+ RUN ls -la
10
+ #RUN python load_docs.py
11
+ RUN python run.py
12
+ CMD ["streamlit", "run", "app.py", "--server.port=8080"]
13
+ #CMD ls -la
14
+ EXPOSE 8080
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
- title: GPT Docker
3
- emoji: 👀
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: docker
 
 
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: MyGPT
3
+ emoji: 🏢
4
+ colorFrom: gray
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.17.0
8
+ app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app/7fd9b7dc7c8bd6c62fec03923bb04b6ba28e6c736017b43e67f3e808c92438c7.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09bac093b25dfef86ce79a7325d893cf826fb0de7bcdf122b8ab0ec5692425c4
3
+ size 3500346
app/VectorStore/chroma-collections.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26740390ba936629dcb106c9948b55752ac6c763915bf0e7ad4a1273ac9ba084
3
+ size 745
app/VectorStore/chroma-embeddings.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79fe220db8ba8a7a77617dd295bf51f5438257e676b00ddd28a1fcf62c757fb
3
+ size 240218512
app/VectorStore/index/id_to_uuid_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f64ceca9dc08a1fa345c23f4012132ef11f0c472dd64c6f80e445a65f29f536e
3
+ size 104759
app/VectorStore/index/id_to_uuid_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a848348a950ba8fcd605e4b04a2fd81f938587e0234abbb3b4db7ad693bf8d0d
3
+ size 1445149
app/VectorStore/index/index_52984ff2-d9c3-459b-acc0-0b0aa559d50f.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518d2424133c568190196628f29226cca2e9c198616b03990347f2ce0c11ea7e
3
+ size 10402668
app/VectorStore/index/index_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6278607ce39747eb13ed7e117bc114cc231f4b8f4638269a2f3536e4729e338
3
+ size 143197708
app/VectorStore/index/index_metadata_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e41a80750fa05ef9dd656d26239c0d9f06c1d278825090d6fd4f9645756d35
3
+ size 74
app/VectorStore/index/index_metadata_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff10f5fcfc8d53e24171409bb560e5a7addbac73f57a25d3c8c71a3a01645d80
3
+ size 74
app/VectorStore/index/uuid_to_id_52984ff2-d9c3-459b-acc0-0b0aa559d50f.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90ccb948caa40144e512f5ab70fb9d8fe4b08dcf18b3d7e4c368bee1f0283a47
3
+ size 122516
app/VectorStore/index/uuid_to_id_c7afa6d4-2cfe-493a-927d-a30b6f6ed996.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91cdf0f3a7086cd4c5c96b8e54b8dd930a0aa4f29ab587809eb061a1511c5cb2
3
+ size 1689782
app/app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import langchain
3
+ import load_model
4
+ import utils as ut
5
+ import chromadb
6
+ from chromadb.config import Settings
7
+ import os
8
+
9
+ persist_directory = load_model.persist_directory
10
+
11
+ st.title('myGPT')
12
+ st.header('An GPT example brought to you by Heiko Wagner')
13
+
14
+ st.markdown('*\"Parametrised models are simply functions that depend on inputs and trainable parameters. There is no fundamental difference between the two, except that trainable parameters are shared across training samples whereas the input varies from sample to sample.\"* [(Yann LeCun, Deep learning course)](https://atcold.github.io/pytorch-Deep-Learning/en/week02/02-1/#Parametrised-models)')
15
+
16
+ st.latex(r'''h(\boldsymbol x, \boldsymbol w)= \sum_{k=1}^{K}\boldsymbol w_{k} \phi_{k}(\boldsymbol x)''')
17
+
18
+ import torch
19
+ torch.cuda.empty_cache()
20
+
21
+ model_type = st.selectbox(
22
+ 'Select the Documents to be used to answer your question',
23
+ ('OpenAI', 'local_model') )
24
+
25
+ if model_type=='OpenAI':
26
+ openai_key= st.text_area('OpenAI Key:', '')
27
+ os.environ["OPENAI_API_KEY"] = openai_key
28
+ llm= load_model.load_openai_model()
29
+ else:
30
+ llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
31
+
32
+
33
+ client = chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
34
+ persist_directory=persist_directory
35
+ ))
36
+
37
+ collections = tuple( [collection.name for collection in client.list_collections()] )
38
+ print(collections)
39
+ option = st.selectbox(
40
+ 'Select the Documents to be used to answer your question',
41
+ collections )
42
+
43
+ st.write('You selected:', option)
44
+
45
+ chain = load_model.create_chain(llm, collection=option)
46
+ try:
47
+ query = st.text_area('Ask a question:', 'Hallo how are you today?')
48
+ result = chain({"query": query})
49
+ ut.format_result_set(result)
50
+ finally:
51
+ del chain
52
+ torch.cuda.empty_cache()
app/fce3ad7ed281744c55c5838aa5677e15468ae6bae3aa5fe76e6aac20d2c19f2f.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a98a5f02d12b69d1b62de62ffef9fc98fbe229f2369e002e3f47ca78bdefeb3e
3
+ size 29884059
app/load_docs.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ from load_vectors import load_from_web, load_and_split, create_and_add
3
+
4
+ docs = [
5
+ "https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/85ec0278-bf2f-4392-94b9-c086717fa8f6_axa_urd2022_accessible_va.pdf"
6
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/d97a94ff-a848-474b-b802-c22afc8311cd_axa_half_year_2022_financial_report.pdf"
7
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/51954d53-c0cf-4f90-84f7-53ee27dbe4e6_axa_ri2021_va_accessible.pdf"
8
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/e3f52b5e-d4aa-4fc8-8bcd-f432df86e804_axa_urd_2021_en_accessible.pdf"
9
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/4f303cec-a12d-480b-accb-7b56f706f60e_axa-ri2020-en-accessible.pdf"
10
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/d6aef906-e41f-40c7-ac9c-29044e98939d_AXA_URD_2020_EN_accessible_b.pdf"
11
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F47b47783-ddd1-47c3-912f-bc6e318ebbb3_axa_half_year_2020_financial_report.pdf"
12
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ffd5a8bd8-9ef1-40eb-b953-c268c0ab4bf9_axa-ri2019-en-accessible.pdf"
13
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F90abd6c7-80c4-48ef-84bf-1d038670d9b7_axa-urd2019-en.pdf"
14
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F3ef6a9cc-6215-4e58-83b5-756774ef5b73_axa_half_year_2019_financial_report2.pdf"
15
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F0a5e0bd9-78f2-4ef8-b32c-1d3d35ddce80_axa-ri2018-en-accessible.pdf"
16
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F913d1869-3d11-4eb2-b013-4caedb747fab_axa-ddr2018b-en.pdf"
17
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F476f79c9-c0c7-4ce3-88ed-4f99b3d22259_axa_half_year_2018_financial_report.pdf"
18
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F01f6966b-c26c-4935-91dc-1b296511ba8c_axa_ri2017_gb_planche.pdf"
19
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fec440dc9-69df-41b5-a3af-5b5f4fc29670_axa_reference_document_2017c.pdf"
20
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F72c59a61-8124-4066-a86d-bece5f41ce53_axa_us_statutory_statements_fy17.pdf"
21
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9237d78f-c1ac-43ca-9623-d0382a5aaaec_axa_us_statutory_statements_3q17.pdf"
22
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ffdd639e0-2ea6-4c3f-8a42-8bca4359e858_axa_us_statutory_statements_2q17.pdf"
23
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F877e30a9-df72-480f-ac25-edcfcd4049c2_axa_us_statutory_statements_1q17.pdf"
24
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F6f3108fd-fabc-4dc6-a984-23eb0dca7a19_axa-ri2016-en_01.pdf"
25
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F268bab7a-2e78-4843-844a-fd3ad2d340bc_axa_reference_document_2016.pdf"
26
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fd2f66d05-e6ad-47a2-ab72-9bc727bd49c2_axa_half_year_2016_financial_report.pdf"
27
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F7a5f0af2-03c3-4a82-a077-46fdc52e5685_axa_us_statutory_statements_fy16.pdf"
28
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fdd643342-e975-473d-af54-c64491252a19_axa_us_statutory_statements_3q16.pdf"
29
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F53e10a7a-9348-40dc-935e-01fb0a1d0441_axa_us_statutory_statements_2q16.pdf"
30
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F8906bad6-14cb-4594-b7c0-029f8fc2172d_axa_us_statutory_statements_1q16.pdf"
31
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F2d8e525a-1161-453a-a14f-817f0f070f79_axa_activity_cr_report_2015_accessible.pdf"
32
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F73719a96-c3b1-456b-abaf-63b80c06968c_axa_reference_document_2015.pdf"
33
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fe2936c1a-65f0-40db-b34b-bef9c27e91c0_axa_2015_half_year_financial_report.pdf"
34
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fdaac2a30-a3b8-4839-9331-041805836a6f_axa_us_statutory_statements_fy15.pdf"
35
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F61a6c98a-08fb-4cb1-b6c0-4d1ef0f72aa9_axa_us_statutory_statements_3q15.pdf"
36
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fe0689ffc-5aec-4388-a10e-26d1d1a7eb9a_axa_us_statutory_statements_2q15.pdf"
37
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fbfa8ef5b-6533-4773-8502-5170a51735c9_axa_us_statutory_statements_1q15.pdf"
38
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fbbb94857-f5d4-4afd-81d0-e85666883936_axa_annual+financial+report_2014.pdf"
39
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fb826839c-76c9-48c7-b8c1-9eda7fe3b032_axa_activity_csr_report_2014_va_b.pdf"
40
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fac63e0f9-60ba-47c2-9e23-f1d25731c7ee_axa_2014_half_year_financial_report.pdf"
41
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fb7db2a55-8eb6-4131-bc03-698e4bc756d6_axa_us_statutory_statements_fy2014.pdf"
42
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F417b48df-c585-4cb6-9d10-719d81228756_axa_us_statutory_statements_3q14.pdf"
43
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F4586d978-6fb8-4c44-b934-e15c14143b6d_axa_us_statutory_statements_2q14.pdf"
44
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F29cc016e-aff9-49c5-bb04-d55598aab844_axa_us_statutory_statements_1q14.pdf"
45
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F25fee379-c187-40e7-bf3a-5fe1423cec0f_axa_annual+financial+report_2013.pdf"
46
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F37614ed4-1fe0-483e-a0eb-0acefdedd065_axa_2013_half_year_financial_report.pdf"
47
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Febb51afc-af0e-4aff-9494-5b852b3233e5_axa_us_statutory_statements_fy2013.pdf"
48
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fdd4cd68e-710e-4e00-ba96-c7560d738a43_axa_us_statutory_statements_3q13.pdf"
49
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Feab93a81-859a-487c-941c-11e4ce08d5f0_axa_us_statutory_statements_2q13.pdf"
50
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F879d09d7-8ff7-4c43-9a24-7ee44ee55404_axa_us_statutory_statements_1q13.pdf"
51
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9224097f-d703-4efd-8050-6553ef4336f8_axa_annual+financial+report_2012b.pdf"
52
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fcb9d1279-948a-4238-ab8f-754e9e10f2a5_axa_activity_csr_report_2012b_va.pdf"
53
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fbed64ef2-5078-425a-a616-ffb1947e0b65_axa_2012_half_year_financial_report.pdf"
54
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fa148165a-b818-4ea1-b7ee-7949cc86ff9a_axa_us_statutory_statements_fy2012.pdf"
55
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F601ed5e8-189d-4e59-b0d4-d1c1eedb2ffe_axa_us_statutory_statements_3q12.pdf"
56
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9637b674-c740-4115-9c90-3a8827516cc0_axa_us_statutory_statements_2q12.pdf"
57
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F319a5964-ea51-4d51-96c8-cf6838047b72_axa_us_statutory_statements_1q12.pdf"
58
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F0b75d1fe-4b11-4462-9883-4e3bc7532bf4_axa_annual+financial+report_2011.pdf"
59
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F17e098ab-3335-4ee1-ade7-058517a952c4_axa_activity_csr_report_2011_vab.pdf"
60
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F03996908-7e75-465e-8082-b44f02da326a_axa_us_statutory_statements_fy2011.pdf"
61
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fea813e84-7d08-4cf2-bea1-3a01fd4bdf62_axa_us_statutory_statements_3q11.pdf"
62
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fda28b496-275b-451d-bffd-108714eb2c39_axa_us_statutory_statements_2q11.pdf"
63
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fd6aa2b39-896e-47cf-9882-9985c8d44276_axa_us_statutory_statements_1q11.pdf"
64
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fc76f47f4-0917-4fb1-b1ae-78e2a4fbcef5_axa_annual+financial+report_2010c+%281%29.pdf"
65
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F68c2771e-5ed8-41d9-bb59-f37f6403b4bf_axa_activity_csr_report_2010_vac.pdf"
66
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9b7812a1-a1a2-4e17-9bf2-88c11aac4e08_axa_2010_half_year_financial_report.pdf.pdf"
67
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F3f4cc3bd-6823-4ccf-a918-f0c9d9063c2a_axa_us_statutory_statements_fy2010.pdf"
68
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F5a8a399f-9a0a-4475-8fbd-5bc0ca1dffe6_axa_us_statutory_statements_3q10.pdf"
69
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F67ba6c6d-7063-41d4-ad4e-75d86b15da43_axa_us_statutory_statements_1q10.pdf"
70
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fa151a532-da4f-4d12-8b3b-9867df4f9724_axa_annual+financial+report_2009.pdf"
71
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F5f89c4dd-d935-47fe-ac69-23fada9bfc96_axa_2009_half_year_financial_report.pdf"
72
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ff322c77a-e2a2-4cd7-88a0-edd8ad4cd021_axa_annual+financial+report_2008.pdf"
73
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fb7f88f05-053a-460b-aa4d-6163d3644cfc_axa_activity_csr_report_2008_vad.pdf"
74
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ff657a419-e066-485a-a58e-1d2870a6a035_axa_2008_half_year_financial_report.pdf"
75
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F820b669d-b3b5-4c14-986d-2223e2bcbcfb_axa_annual+financial+report_2007.pdf"
76
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F2741b55e-9349-47ef-9704-3cbca0853b76_axa_activity_csr_report_2007.pdf"
77
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F42159571-a3f1-4d36-b4b9-a5493fcc95e3_axa_2007_half_year_financial_report.pdf"
78
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F40f9da2a-1bcb-4e5e-9380-18f64b3ce86e_axa_annual+financial+report_2006b.pdf"
79
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fa6a14e0c-62cd-4812-a2d0-3a0aae8c862d_axa_activity_csr_report_2006b.pdf"
80
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Faf242b66-1308-4331-829f-fa91bd0db43e_axa_annual+financial+report_2005.pdf"
81
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F6b3313d1-3b72-4f28-bc7b-f445b9b3190c_axa_activity_csr_report_2005.pdf"
82
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F048b0d90-b28f-4fc3-bc30-b02cf8e0d6fc_axa_annual+financial+report_2004_ci.pdf"
83
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F02acbd05-712f-4b73-93f0-dffa37e2faa2_axa_annual+financial+report_2004_ci.pdf"
84
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fcf0b84a5-6da9-499d-985f-530559940494_axa_activity_csr_report_2004.pdf"
85
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fafa397b5-d613-40f3-a28f-81bde0d461e2_axa_annual+financial+report_2003.pdf"
86
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F2a31ebb9-ba04-4998-982e-9dd336abca1f_axa_annual+financial+report_2002.pdf"
87
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F51e5f017-954b-4f81-84f9-15a086bf1e33_axa_annual+financial+report_2002_ci01.pdf"
88
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F05fea38c-c626-4aaf-9ead-10e9c8f849c1_axa_annual+financial+report_2002_ci02.pdf"
89
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F3e41d00d-42b3-4bfd-babc-8b9f76b73d95_axa_activity_csr_report_2002.pdf"
90
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F4f2676f4-d36c-4d2e-b088-ef26878ff28b_axa_annual+financial+report_2001.pdf"
91
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ffdfa0941-6fb5-4ce8-9f42-3b0152e72ce2_axa_activity_csr_report_2001.pdf"
92
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F98922150-f1c5-4df4-9006-a8ef17a514cd_axa_annual+financial+report_2000.pdf"
93
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F1a645a94-1c56-43be-9a5a-94495e902a23_axa_activity_csr_report_2000.pdf"
94
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F51c109ca-2bba-45b3-a03b-78fdd16faeca_axa_annual+financial+report_1999.pdf"
95
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F21cdedc6-c082-4ae6-abb3-4c57f0cf9dd8_axa_annual+financial+report_1998.pdf"
96
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fd3132d9d-b656-470d-ba4f-fe8d51586e4b_axa_activity_csr_report_1998.pdf"
97
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F746d88d3-a4f7-4126-b539-a5da353f53d7_axa_annual+financial+report_1997.pdf"
98
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F97097956-6cd5-4fb4-a6ea-9aeb32fd9023_axa_activity_csr_report_1997.pdf"
99
+ ]
100
+
101
+
102
+ docs_tarifs= [
103
+ "https://www.axa.de/site/axa-de/get/documents_E1805589786/axade/medien/privatkunden/fahrzeugversicherungen/kfz-versicherung/start-and-drive/start-and-drive-versicherungsbedingungen.pdf",
104
+ "https://www.axa.de/site/axa-de/get/documents_E-298610932/axade/medien/privatkunden/haftpflicht-und-recht/rechtsschutz/versicherungsbedingungen-roland-rechtsschutz.pdf",
105
+ "https://www.axa.de/site/axa-de/get/documents_E101690225/axade/medien/privatkunden/haftpflicht-und-recht/private%20haftpflichtversicherung/privathaftpflicht-versicherungsbedingungen-leistungspaket-S-5-mio.pdf",
106
+ "https://www.axa.de/site/axa-de/get/documents_E-1067805129/axade/medien/privatkunden/haftpflicht-und-recht/private%20haftpflichtversicherung/privathaftpflicht-versicherungsbedingungen-leistungspaket-S-10-mio.pdf",
107
+ "https://www.axa.de/site/axa-de/get/documents_E1026401604/axade/medien/privatkunden/haftpflicht-und-recht/private%20haftpflichtversicherung/privathaftpflicht-versicherungsbedingungen-leistungspaket-M.pdf",
108
+ "https://www.axa.de/site/axa-de/get/documents_E1450059874/axade/medien/privatkunden/haftpflicht-und-recht/private%20haftpflichtversicherung/privathaftpflicht-versicherungsbedingungen-leistungspaket-L.pdf",
109
+ "https://www.axa.de/site/axa-de/get/documents_E1636759799/axade/medien/privatkunden/haus-und-wohnen/hausratversicherung/hausrat-versicherungsbedingungen-S.pdf",
110
+ "https://www.axa.de/site/axa-de/get/documents_E1147682774/axade/medien/privatkunden/haus-und-wohnen/hausratversicherung/hausrat-versicherungsbedingungen-M-20%25.pdf",
111
+ "https://www.axa.de/site/axa-de/get/documents_E1642308493/axade/medien/privatkunden/haus-und-wohnen/hausratversicherung/hausrat-versicherungsbedingungen-M-40%25.pdf",
112
+ "https://www.axa.de/site/axa-de/get/documents_E1883536226/axade/medien/privatkunden/haus-und-wohnen/hausratversicherung/hausrat-versicherungsbedingungen-L.pdf",
113
+ ]
114
+
115
+ docs_list = [
116
+ "https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/85ec0278-bf2f-4392-94b9-c086717fa8f6_axa_urd2022_accessible_va.pdf"
117
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/e3f52b5e-d4aa-4fc8-8bcd-f432df86e804_axa_urd_2021_en_accessible.pdf"
118
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com/d6aef906-e41f-40c7-ac9c-29044e98939d_AXA_URD_2020_EN_accessible_b.pdf"
119
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ffd5a8bd8-9ef1-40eb-b953-c268c0ab4bf9_axa-ri2019-en-accessible.pdf"
120
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F0a5e0bd9-78f2-4ef8-b32c-1d3d35ddce80_axa-ri2018-en-accessible.pdf"
121
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F01f6966b-c26c-4935-91dc-1b296511ba8c_axa_ri2017_gb_planche.pdf"
122
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F6f3108fd-fabc-4dc6-a984-23eb0dca7a19_axa-ri2016-en_01.pdf"
123
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fe2936c1a-65f0-40db-b34b-bef9c27e91c0_axa_2015_half_year_financial_report.pdf"
124
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fbbb94857-f5d4-4afd-81d0-e85666883936_axa_annual+financial+report_2014.pdf"
125
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F25fee379-c187-40e7-bf3a-5fe1423cec0f_axa_annual+financial+report_2013.pdf"
126
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F9224097f-d703-4efd-8050-6553ef4336f8_axa_annual+financial+report_2012b.pdf"
127
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F0b75d1fe-4b11-4462-9883-4e3bc7532bf4_axa_annual+financial+report_2011.pdf"
128
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fc76f47f4-0917-4fb1-b1ae-78e2a4fbcef5_axa_annual+financial+report_2010c+%281%29.pdf"
129
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fa151a532-da4f-4d12-8b3b-9867df4f9724_axa_annual+financial+report_2009.pdf"
130
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Ff322c77a-e2a2-4cd7-88a0-edd8ad4cd021_axa_annual+financial+report_2008.pdf"
131
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F820b669d-b3b5-4c14-986d-2223e2bcbcfb_axa_annual+financial+report_2007.pdf"
132
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F40f9da2a-1bcb-4e5e-9380-18f64b3ce86e_axa_annual+financial+report_2006b.pdf"
133
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Faf242b66-1308-4331-829f-fa91bd0db43e_axa_annual+financial+report_2005.pdf"
134
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F02acbd05-712f-4b73-93f0-dffa37e2faa2_axa_annual+financial+report_2004_ci.pdf"
135
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2Fafa397b5-d613-40f3-a28f-81bde0d461e2_axa_annual+financial+report_2003.pdf"
136
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F2a31ebb9-ba04-4998-982e-9dd336abca1f_axa_annual+financial+report_2002.pdf"
137
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F4f2676f4-d36c-4d2e-b088-ef26878ff28b_axa_annual+financial+report_2001.pdf"
138
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F98922150-f1c5-4df4-9006-a8ef17a514cd_axa_annual+financial+report_2000.pdf"
139
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F51c109ca-2bba-45b3-a03b-78fdd16faeca_axa_annual+financial+report_1999.pdf"
140
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F21cdedc6-c082-4ae6-abb3-4c57f0cf9dd8_axa_annual+financial+report_1998.pdf"
141
+ ,"https://www-axa-com.cdn.axa-contento-118412.eu/www-axa-com%2F746d88d3-a4f7-4126-b539-a5da353f53d7_axa_annual+financial+report_1997.pdf"
142
+ ]
143
+
144
+
145
+ docs = load_from_web(docs_tarifs)
146
+ sub_docs = load_and_split(docs, chunk_size=700)
147
+
148
+ # %%
149
+ create_and_add("axa_terms", sub_docs, "hkunlp/instructor-large")
150
+
151
+ docs = load_from_web(docs_list)
152
+ sub_docs = load_and_split(docs)
153
+
154
+ # %%
155
+ create_and_add("axa_gpt", sub_docs, "hkunlp/instructor-large")
app/load_model.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ # git clone https://huggingface.co/nyanko7/LLaMA-7B
3
+ # python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu117/torch2.00/index.html
4
+ # apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
5
+ from transformers import LlamaForCausalLM, LlamaTokenizer
6
+ from langchain.embeddings import LlamaCppEmbeddings, HuggingFaceInstructEmbeddings, OpenAIEmbeddings
7
+ from langchain.llms import LlamaCpp, HuggingFacePipeline
8
+ from langchain.vectorstores import Chroma
9
+ from transformers import pipeline
10
+ import torch
11
+ torch.backends.cuda.matmul.allow_tf32 = True
12
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
13
+ import streamlit as st
14
+ import cloudpickle
15
+ import os
16
+ from langchain.chains import RetrievalQA
17
+ from langchain.indexes import VectorstoreIndexCreator
18
+ from langchain.llms import OpenAI
19
+
20
+ from chromadb.config import Settings
21
+ import chromadb
22
+
23
+ import pathlib
24
+
25
+ current_path = str( pathlib.Path(__file__).parent.resolve() )
26
+ print(current_path)
27
+ persist_directory = current_path + "/VectorStore"
28
+
29
+ # %%
30
+ os.environ["OPENAI_API_KEY"] = "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"
31
+ llm =OpenAI(temperature=0.9)
32
+ llm
33
+
34
+ @st.cache_resource
35
+ def load_cpu_model():
36
+ """Does not work atm, bc cpu model is not persisted"""
37
+ model_path= "./llama.cpp/models/LLaMA-7B/ggml-model-q4_0.bin"
38
+ device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
39
+ llm = LlamaCpp(
40
+ model_path=model_path,
41
+ n_ctx=6000,
42
+ n_threads=16,
43
+ temperature=0.6,
44
+ top_p=0.95
45
+ )
46
+
47
+ llama_embeddings = LlamaCppEmbeddings(model_path=model_path)
48
+ return llm
49
+
50
+ @st.cache_resource(max_entries =1)
51
+ def load_gpu_model(used_model = "chavinlo/gpt4-x-alpaca"):
52
+ torch.cuda.empty_cache()
53
+ tokenizer = LlamaTokenizer.from_pretrained(used_model)
54
+
55
+ if not torch.cuda.is_available():
56
+ device_map = {
57
+ "": "cpu"
58
+ }
59
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
60
+ torch_dtype=torch.float32
61
+ load_in_8bit=False
62
+ else:
63
+ device_map="auto"
64
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True) #atm no offload, bc device_map="auto"
65
+
66
+
67
+ base_model = LlamaForCausalLM.from_pretrained(
68
+ used_model,
69
+ device_map=device_map,
70
+ offload_folder=current_path + "/models_gpt/",
71
+ low_cpu_mem_usage=True,
72
+ quantization_config=quantization_config,
73
+ cache_dir = current_path + "/mymodels/"
74
+ )
75
+ pipe = pipeline(
76
+ "text-generation",
77
+ model=base_model,
78
+ tokenizer=tokenizer,
79
+ max_length=8000,
80
+ temperature=0.6,
81
+ top_p=0.95,
82
+ repetition_penalty=1.2
83
+ )
84
+ llm = HuggingFacePipeline(pipeline=pipe)
85
+ return llm
86
+
87
+ #@st.cache_resource
88
+ def load_openai_model():
89
+ return OpenAI(temperature=0.9)
90
+
91
+ @st.cache_resource
92
+ def load_openai_embedding():
93
+ return OpenAIEmbeddings()
94
+
95
+ @st.cache_resource
96
+ def load_embedding(model_name):
97
+ embeddings = HuggingFaceInstructEmbeddings(
98
+ query_instruction="Represent the query for retrieval: ",
99
+ model_name = model_name,
100
+ cache_folder=current_path + "/mymodels/"
101
+ )
102
+ return embeddings
103
+
104
+ def load_vectorstore(model_name, collection):
105
+ embeddings = load_embedding(model_name)
106
+
107
+ client_settings = Settings(
108
+ chroma_db_impl="duckdb+parquet",
109
+ persist_directory=persist_directory,
110
+ anonymized_telemetry=False
111
+ )
112
+ vectorstore = Chroma(
113
+ collection_name=collection,
114
+ embedding_function=embeddings,
115
+ client_settings=client_settings,
116
+ persist_directory=persist_directory,
117
+ )
118
+ return vectorstore
119
+
120
+ def add_document_to_vectorstore(vectorstore, docs):
121
+ pass
122
+
123
+ def create_chain(_llm, collection, model_name = "hkunlp/instructor-large"):
124
+ vectorstore = load_vectorstore(model_name, collection)
125
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
126
+ chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
127
+ return chain
128
+ # %%
app/load_vectors.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ import nltk
3
+ from langchain.indexes import VectorstoreIndexCreator
4
+ from langchain.text_splitter import CharacterTextSplitter, NLTKTextSplitter
5
+ from langchain.document_loaders import OnlinePDFLoader
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.embeddings import LlamaCppEmbeddings, HuggingFaceInstructEmbeddings
8
+ from chromadb.config import Settings
9
+ import chromadb
10
+ from chromadb.utils import embedding_functions
11
+ from hashlib import sha256
12
+ import cloudpickle
13
+ import logging
14
+ import os
15
+ from load_model import load_embedding
16
+ import torch
17
+ import re
18
+ import pathlib
19
+
20
+ current_path = str( pathlib.Path(__file__).parent.resolve() )
21
+
22
+ os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
23
+ nltk.download('punkt')
24
+
25
+ persist_directory = current_path + "/VectorStore"
26
+ logger = logging.getLogger()
27
+
28
+
29
+ # %%
30
+
31
+ def create_collection(collection_name, model_name, client):
32
+ """Not used atm"""
33
+ if not torch.cuda.is_available():
34
+ device= "cpu"
35
+ else:
36
+ device= "cuda"
37
+ ef = embedding_functions.InstructorEmbeddingFunction(
38
+ model_name=model_name, device=device)
39
+ client.get_or_create_collection(collection_name, embedding_function=ef)
40
+ return True
41
+
42
+ def create_and_add(collection_name, sub_docs, model_name):
43
+ client_settings = chromadb.config.Settings(
44
+ chroma_db_impl="duckdb+parquet",
45
+ persist_directory=persist_directory,
46
+ anonymized_telemetry=False
47
+ )
48
+
49
+ client = chromadb.Client(client_settings)
50
+ collection_name = collection_name + "_" + re.sub('[^A-Za-z0-9]+', '', model_name)
51
+
52
+ embeddings = load_embedding(model_name)
53
+ logging.info(f"Adding documents to {collection_name}")
54
+ vectorstore = Chroma(
55
+ collection_name=collection_name,
56
+ embedding_function=embeddings,
57
+ client_settings=client_settings,
58
+ persist_directory=persist_directory,
59
+ )
60
+ vectorstore.add_documents(documents=sub_docs, embedding=embeddings)
61
+ vectorstore.persist()
62
+
63
+ # Test Vectorstore
64
+ vectorstore2 = Chroma(
65
+ collection_name=collection_name,
66
+ embedding_function=embeddings,
67
+ client_settings=client_settings,
68
+ persist_directory=persist_directory,
69
+ )
70
+ print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
71
+
72
+ return vectorstore
73
+
74
+ def load_from_web(urls, cache=True):
75
+ docs_list = urls
76
+ filename=f"./{sha256(str(urls).encode('utf-8')).hexdigest()}.pkl"
77
+
78
+ isFile = os.path.isfile(filename)
79
+
80
+ if cache and isFile:
81
+ logger.info("Using Cache")
82
+ pikd = open(filename, "rb")
83
+ docs = cloudpickle.load(pikd)
84
+ else:
85
+ loaders=[OnlinePDFLoader(pdf) for pdf in docs_list]
86
+ docs = []
87
+ for loader in loaders:
88
+ docs.extend(loader.load())
89
+ with open(filename, 'wb') as output:
90
+ cloudpickle.dump(docs, output)
91
+
92
+ #update metadata
93
+ i=0
94
+ for doc in docs:
95
+ doc.metadata = {'source': docs_list[i], 'url': docs_list[i], 'company':'AXA'}
96
+ i=i+1
97
+ return docs
98
+
99
+ def load_and_split(docs, chunk_size=700):
100
+ text_splitter = NLTKTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
101
+ sub_docs = text_splitter.split_documents(docs)
102
+ return sub_docs
app/requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/hwchase17/langchain.git
2
+ git+https://github.com/huggingface/transformers.git
3
+ git+https://github.com/chroma-core/chroma.git
4
+ accelerate
5
+ bitsandbytes
6
+ InstructorEmbedding
7
+ cloudpickle
8
+ streamlit
9
+ requests==2.20.1
10
+ latex2markdown
11
+ openai
12
+ unstructured
app/result.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b61c0f601cb65f2779f18fdbe5bf47f88d61f23dfbe2afdafb64c951207da8
3
+ size 429
app/run.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ import load_model
3
+ import cloudpickle
4
+
5
+ # %%
6
+ # llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
7
+ llm= load_model.load_openai_model()
8
+
9
+ # %%
10
+ chain = load_model.create_chain(llm, collection="axa_terms_hkunlpinstructorlarge")
11
+ result = chain({"query": "What are AXAs green Goals?"})
12
+ print(result)
app/st_render_doc.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import utils as ut
3
+ import cloudpickle
4
+
5
+ filename="./result.pkl"
6
+ pikd = open(filename, "rb")
7
+ result = dict( cloudpickle.load(pikd) )
8
+ del pikd
9
+ ut.format_result_set(result)
app/utils.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import latex2markdown
3
+ from langchain.docstore.document import Document
4
+
5
+ def format_document(document: Document):
6
+ """TODO: Implement a nice style"""
7
+ return document.dict()
8
+
9
+ def format_result_set(result):
10
+ st.write(latex2markdown.LaTeX2Markdown(result["result"]).to_markdown())
11
+
12
+ agree = st.checkbox('Show source documents')
13
+ source_documents = result["source_documents"]
14
+ if agree:
15
+ st.write('Source Documents:')
16
+ for document in source_documents:
17
+ st.write(format_document(document))
18
+
19
+
docker-compose.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.9"
2
+ services:
3
+ streamlit_app:
4
+ build: .
5
+ tty: true
6
+ ports:
7
+ - 8080:8080
8
+ deploy:
9
+ resources:
10
+ reservations:
11
+ devices:
12
+ - capabilities: [gpu]
13
+ dev_app:
14
+ image: tensorflow/tensorflow:latest-gpu
15
+ tty: true
16
+ volumes:
17
+ - ./app:/app
18
+ - ./root:/root
19
+ deploy:
20
+ resources:
21
+ reservations:
22
+ devices:
23
+ - capabilities: [gpu]