Spaces:
Runtime error
Runtime error
Ashwini
commited on
Commit
•
42738ce
1
Parent(s):
9e27273
Add application file
Browse files- Dockerfile +10 -0
- llm_inference/.env +1 -0
- llm_inference/.env.example +1 -0
- llm_inference/poetry.lock +325 -0
- llm_inference/pyproject.toml +17 -0
- llm_inference/src/llm_inference/__init__.py +3 -0
- llm_inference/src/llm_inference/configs/configs.json +4 -0
- llm_inference/src/llm_inference/configs/logging.conf +28 -0
- llm_inference/src/llm_inference/helper_functions/__init__.py +16 -0
- llm_inference/src/llm_inference/helper_functions/call_llm.py +96 -0
- llm_inference/src/llm_inference/helper_functions/load_and_extract.py +66 -0
- llm_inference/src/llm_inference/inference.py +69 -0
- llm_inference/src/llm_inference/prompts/comparision.txt +18 -0
- llm_inference/src/llm_inference/prompts/extract_from_html.txt +55 -0
- llm_inference/tests/__init__.py +0 -0
- main.py +28 -0
- requirements.txt +86 -0
Dockerfile
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.12-alpine
|
2 |
+
|
3 |
+
COPY . .
|
4 |
+
|
5 |
+
WORKDIR /
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /requirements.txt
|
8 |
+
|
9 |
+
CMD ["python3","main.py"]
|
10 |
+
|
llm_inference/.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
GROQ_API_KEY=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
|
llm_inference/.env.example
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
GROQ_API_KEY=
|
llm_inference/poetry.lock
ADDED
@@ -0,0 +1,325 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
2 |
+
|
3 |
+
[[package]]
|
4 |
+
name = "annotated-types"
|
5 |
+
version = "0.7.0"
|
6 |
+
description = "Reusable constraint types to use with typing.Annotated"
|
7 |
+
optional = false
|
8 |
+
python-versions = ">=3.8"
|
9 |
+
files = [
|
10 |
+
{file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
|
11 |
+
{file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
|
12 |
+
]
|
13 |
+
|
14 |
+
[[package]]
|
15 |
+
name = "anyio"
|
16 |
+
version = "4.6.2.post1"
|
17 |
+
description = "High level compatibility layer for multiple asynchronous event loop implementations"
|
18 |
+
optional = false
|
19 |
+
python-versions = ">=3.9"
|
20 |
+
files = [
|
21 |
+
{file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"},
|
22 |
+
{file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"},
|
23 |
+
]
|
24 |
+
|
25 |
+
[package.dependencies]
|
26 |
+
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
|
27 |
+
idna = ">=2.8"
|
28 |
+
sniffio = ">=1.1"
|
29 |
+
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
|
30 |
+
|
31 |
+
[package.extras]
|
32 |
+
doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
|
33 |
+
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
|
34 |
+
trio = ["trio (>=0.26.1)"]
|
35 |
+
|
36 |
+
[[package]]
|
37 |
+
name = "certifi"
|
38 |
+
version = "2024.8.30"
|
39 |
+
description = "Python package for providing Mozilla's CA Bundle."
|
40 |
+
optional = false
|
41 |
+
python-versions = ">=3.6"
|
42 |
+
files = [
|
43 |
+
{file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
|
44 |
+
{file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
|
45 |
+
]
|
46 |
+
|
47 |
+
[[package]]
|
48 |
+
name = "distro"
|
49 |
+
version = "1.9.0"
|
50 |
+
description = "Distro - an OS platform information API"
|
51 |
+
optional = false
|
52 |
+
python-versions = ">=3.6"
|
53 |
+
files = [
|
54 |
+
{file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
|
55 |
+
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
|
56 |
+
]
|
57 |
+
|
58 |
+
[[package]]
|
59 |
+
name = "exceptiongroup"
|
60 |
+
version = "1.2.2"
|
61 |
+
description = "Backport of PEP 654 (exception groups)"
|
62 |
+
optional = false
|
63 |
+
python-versions = ">=3.7"
|
64 |
+
files = [
|
65 |
+
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
|
66 |
+
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
|
67 |
+
]
|
68 |
+
|
69 |
+
[package.extras]
|
70 |
+
test = ["pytest (>=6)"]
|
71 |
+
|
72 |
+
[[package]]
|
73 |
+
name = "groq"
|
74 |
+
version = "0.11.0"
|
75 |
+
description = "The official Python library for the groq API"
|
76 |
+
optional = false
|
77 |
+
python-versions = ">=3.7"
|
78 |
+
files = [
|
79 |
+
{file = "groq-0.11.0-py3-none-any.whl", hash = "sha256:e328531c979542e563668c62260aec13b43a6ee0ca9e2fb22dff1d26f8c8ce54"},
|
80 |
+
{file = "groq-0.11.0.tar.gz", hash = "sha256:dbb9aefedf388ddd4801ec7bf3eba7f5edb67948fec0cd2829d97244059f42a7"},
|
81 |
+
]
|
82 |
+
|
83 |
+
[package.dependencies]
|
84 |
+
anyio = ">=3.5.0,<5"
|
85 |
+
distro = ">=1.7.0,<2"
|
86 |
+
httpx = ">=0.23.0,<1"
|
87 |
+
pydantic = ">=1.9.0,<3"
|
88 |
+
sniffio = "*"
|
89 |
+
typing-extensions = ">=4.7,<5"
|
90 |
+
|
91 |
+
[[package]]
|
92 |
+
name = "h11"
|
93 |
+
version = "0.14.0"
|
94 |
+
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
|
95 |
+
optional = false
|
96 |
+
python-versions = ">=3.7"
|
97 |
+
files = [
|
98 |
+
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
|
99 |
+
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
|
100 |
+
]
|
101 |
+
|
102 |
+
[[package]]
|
103 |
+
name = "httpcore"
|
104 |
+
version = "1.0.6"
|
105 |
+
description = "A minimal low-level HTTP client."
|
106 |
+
optional = false
|
107 |
+
python-versions = ">=3.8"
|
108 |
+
files = [
|
109 |
+
{file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"},
|
110 |
+
{file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"},
|
111 |
+
]
|
112 |
+
|
113 |
+
[package.dependencies]
|
114 |
+
certifi = "*"
|
115 |
+
h11 = ">=0.13,<0.15"
|
116 |
+
|
117 |
+
[package.extras]
|
118 |
+
asyncio = ["anyio (>=4.0,<5.0)"]
|
119 |
+
http2 = ["h2 (>=3,<5)"]
|
120 |
+
socks = ["socksio (==1.*)"]
|
121 |
+
trio = ["trio (>=0.22.0,<1.0)"]
|
122 |
+
|
123 |
+
[[package]]
|
124 |
+
name = "httpx"
|
125 |
+
version = "0.27.2"
|
126 |
+
description = "The next generation HTTP client."
|
127 |
+
optional = false
|
128 |
+
python-versions = ">=3.8"
|
129 |
+
files = [
|
130 |
+
{file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
|
131 |
+
{file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
|
132 |
+
]
|
133 |
+
|
134 |
+
[package.dependencies]
|
135 |
+
anyio = "*"
|
136 |
+
certifi = "*"
|
137 |
+
httpcore = "==1.*"
|
138 |
+
idna = "*"
|
139 |
+
sniffio = "*"
|
140 |
+
|
141 |
+
[package.extras]
|
142 |
+
brotli = ["brotli", "brotlicffi"]
|
143 |
+
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
|
144 |
+
http2 = ["h2 (>=3,<5)"]
|
145 |
+
socks = ["socksio (==1.*)"]
|
146 |
+
zstd = ["zstandard (>=0.18.0)"]
|
147 |
+
|
148 |
+
[[package]]
|
149 |
+
name = "idna"
|
150 |
+
version = "3.10"
|
151 |
+
description = "Internationalized Domain Names in Applications (IDNA)"
|
152 |
+
optional = false
|
153 |
+
python-versions = ">=3.6"
|
154 |
+
files = [
|
155 |
+
{file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
|
156 |
+
{file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
|
157 |
+
]
|
158 |
+
|
159 |
+
[package.extras]
|
160 |
+
all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
|
161 |
+
|
162 |
+
[[package]]
|
163 |
+
name = "pydantic"
|
164 |
+
version = "2.9.2"
|
165 |
+
description = "Data validation using Python type hints"
|
166 |
+
optional = false
|
167 |
+
python-versions = ">=3.8"
|
168 |
+
files = [
|
169 |
+
{file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"},
|
170 |
+
{file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"},
|
171 |
+
]
|
172 |
+
|
173 |
+
[package.dependencies]
|
174 |
+
annotated-types = ">=0.6.0"
|
175 |
+
pydantic-core = "2.23.4"
|
176 |
+
typing-extensions = [
|
177 |
+
{version = ">=4.12.2", markers = "python_version >= \"3.13\""},
|
178 |
+
{version = ">=4.6.1", markers = "python_version < \"3.13\""},
|
179 |
+
]
|
180 |
+
|
181 |
+
[package.extras]
|
182 |
+
email = ["email-validator (>=2.0.0)"]
|
183 |
+
timezone = ["tzdata"]
|
184 |
+
|
185 |
+
[[package]]
|
186 |
+
name = "pydantic-core"
|
187 |
+
version = "2.23.4"
|
188 |
+
description = "Core functionality for Pydantic validation and serialization"
|
189 |
+
optional = false
|
190 |
+
python-versions = ">=3.8"
|
191 |
+
files = [
|
192 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"},
|
193 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"},
|
194 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"},
|
195 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"},
|
196 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"},
|
197 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"},
|
198 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"},
|
199 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"},
|
200 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"},
|
201 |
+
{file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"},
|
202 |
+
{file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"},
|
203 |
+
{file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"},
|
204 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"},
|
205 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"},
|
206 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"},
|
207 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"},
|
208 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"},
|
209 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"},
|
210 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"},
|
211 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"},
|
212 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"},
|
213 |
+
{file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"},
|
214 |
+
{file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"},
|
215 |
+
{file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"},
|
216 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"},
|
217 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"},
|
218 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"},
|
219 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"},
|
220 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"},
|
221 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"},
|
222 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"},
|
223 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"},
|
224 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"},
|
225 |
+
{file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"},
|
226 |
+
{file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"},
|
227 |
+
{file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"},
|
228 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"},
|
229 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"},
|
230 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"},
|
231 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"},
|
232 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"},
|
233 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"},
|
234 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"},
|
235 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"},
|
236 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"},
|
237 |
+
{file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"},
|
238 |
+
{file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"},
|
239 |
+
{file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"},
|
240 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d4488a93b071c04dc20f5cecc3631fc78b9789dd72483ba15d423b5b3689b555"},
|
241 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:81965a16b675b35e1d09dd14df53f190f9129c0202356ed44ab2728b1c905658"},
|
242 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffa2ebd4c8530079140dd2d7f794a9d9a73cbb8e9d59ffe24c63436efa8f271"},
|
243 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61817945f2fe7d166e75fbfb28004034b48e44878177fc54d81688e7b85a3665"},
|
244 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d2c342c4bc01b88402d60189f3df065fb0dda3654744d5a165a5288a657368"},
|
245 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e11661ce0fd30a6790e8bcdf263b9ec5988e95e63cf901972107efc49218b13"},
|
246 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d18368b137c6295db49ce7218b1a9ba15c5bc254c96d7c9f9e924a9bc7825ad"},
|
247 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec4e55f79b1c4ffb2eecd8a0cfba9955a2588497d96851f4c8f99aa4a1d39b12"},
|
248 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:374a5e5049eda9e0a44c696c7ade3ff355f06b1fe0bb945ea3cac2bc336478a2"},
|
249 |
+
{file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5c364564d17da23db1106787675fc7af45f2f7b58b4173bfdd105564e132e6fb"},
|
250 |
+
{file = "pydantic_core-2.23.4-cp38-none-win32.whl", hash = "sha256:d7a80d21d613eec45e3d41eb22f8f94ddc758a6c4720842dc74c0581f54993d6"},
|
251 |
+
{file = "pydantic_core-2.23.4-cp38-none-win_amd64.whl", hash = "sha256:5f5ff8d839f4566a474a969508fe1c5e59c31c80d9e140566f9a37bba7b8d556"},
|
252 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"},
|
253 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"},
|
254 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"},
|
255 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"},
|
256 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"},
|
257 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"},
|
258 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"},
|
259 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"},
|
260 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"},
|
261 |
+
{file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"},
|
262 |
+
{file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"},
|
263 |
+
{file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"},
|
264 |
+
{file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"},
|
265 |
+
{file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"},
|
266 |
+
{file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"},
|
267 |
+
{file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"},
|
268 |
+
{file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"},
|
269 |
+
{file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"},
|
270 |
+
{file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"},
|
271 |
+
{file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"},
|
272 |
+
{file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"},
|
273 |
+
{file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"},
|
274 |
+
{file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"},
|
275 |
+
{file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"},
|
276 |
+
{file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"},
|
277 |
+
{file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"},
|
278 |
+
{file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"},
|
279 |
+
{file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"},
|
280 |
+
{file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"},
|
281 |
+
]
|
282 |
+
|
283 |
+
[package.dependencies]
|
284 |
+
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
|
285 |
+
|
286 |
+
[[package]]
|
287 |
+
name = "python-dotenv"
|
288 |
+
version = "1.0.1"
|
289 |
+
description = "Read key-value pairs from a .env file and set them as environment variables"
|
290 |
+
optional = false
|
291 |
+
python-versions = ">=3.8"
|
292 |
+
files = [
|
293 |
+
{file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
|
294 |
+
{file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
|
295 |
+
]
|
296 |
+
|
297 |
+
[package.extras]
|
298 |
+
cli = ["click (>=5.0)"]
|
299 |
+
|
300 |
+
[[package]]
|
301 |
+
name = "sniffio"
|
302 |
+
version = "1.3.1"
|
303 |
+
description = "Sniff out which async library your code is running under"
|
304 |
+
optional = false
|
305 |
+
python-versions = ">=3.7"
|
306 |
+
files = [
|
307 |
+
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
|
308 |
+
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
309 |
+
]
|
310 |
+
|
311 |
+
[[package]]
|
312 |
+
name = "typing-extensions"
|
313 |
+
version = "4.12.2"
|
314 |
+
description = "Backported and Experimental Type Hints for Python 3.8+"
|
315 |
+
optional = false
|
316 |
+
python-versions = ">=3.8"
|
317 |
+
files = [
|
318 |
+
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
|
319 |
+
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
320 |
+
]
|
321 |
+
|
322 |
+
[metadata]
|
323 |
+
lock-version = "2.0"
|
324 |
+
python-versions = "^3.10"
|
325 |
+
content-hash = "3870166940d342433359f9ca17ab03c970e49921d2fdd8964ec735f3b4411ab7"
|
llm_inference/pyproject.toml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "hackathon-mlh"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["Prashjeev Rai <prashjeevrai@gmail.com>"]
|
6 |
+
readme = "README.md"
|
7 |
+
packages = [{include = "hackathon_mlh", from = "src"}]
|
8 |
+
|
9 |
+
[tool.poetry.dependencies]
|
10 |
+
python = "^3.10"
|
11 |
+
groq = "^0.11.0"
|
12 |
+
python-dotenv = "^1.0.1"
|
13 |
+
|
14 |
+
|
15 |
+
[build-system]
|
16 |
+
requires = ["poetry-core"]
|
17 |
+
build-backend = "poetry.core.masonry.api"
|
llm_inference/src/llm_inference/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .inference import html_parser, required_content_bool
|
2 |
+
|
3 |
+
__all__ = ["html_parser", "required_content_bool"]
|
llm_inference/src/llm_inference/configs/configs.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "llama3-70b-8192",
|
3 |
+
"temperature": 0
|
4 |
+
}
|
llm_inference/src/llm_inference/configs/logging.conf
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[loggers]
|
2 |
+
keys=root,mainLogger
|
3 |
+
|
4 |
+
[handlers]
|
5 |
+
keys=fileHandler
|
6 |
+
|
7 |
+
[formatters]
|
8 |
+
keys=simpleFormatter
|
9 |
+
|
10 |
+
[logger_root]
|
11 |
+
level=INFO
|
12 |
+
handlers=fileHandler
|
13 |
+
|
14 |
+
[logger_mainLogger]
|
15 |
+
level=INFO
|
16 |
+
handlers=fileHandler
|
17 |
+
qualname=__main__
|
18 |
+
propagate=0
|
19 |
+
|
20 |
+
[handler_fileHandler]
|
21 |
+
class=FileHandler
|
22 |
+
level=INFO
|
23 |
+
formatter=simpleFormatter
|
24 |
+
args=(r"ai-backend\llm_inference\src\llm_inference\logging\program.log",'w')
|
25 |
+
|
26 |
+
[formatter_simpleFormatter]
|
27 |
+
format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
|
28 |
+
datefmt=%Y-%m-%d %H:%M:%S
|
llm_inference/src/llm_inference/helper_functions/__init__.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .load_and_extract import (
|
2 |
+
load_prompt,
|
3 |
+
save_responses_to_file,
|
4 |
+
load_config,
|
5 |
+
load_html_file
|
6 |
+
)
|
7 |
+
|
8 |
+
from .call_llm import PromptProcessor
|
9 |
+
|
10 |
+
__all__ = [
|
11 |
+
"load_prompt",
|
12 |
+
"save_responses_to_file",
|
13 |
+
"load_config",
|
14 |
+
"PromptProcessor",
|
15 |
+
"load_html_file"
|
16 |
+
]
|
llm_inference/src/llm_inference/helper_functions/call_llm.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
import asyncio
|
3 |
+
import logging
|
4 |
+
from groq import Groq
|
5 |
+
|
6 |
+
from .load_and_extract import load_config, load_prompt
|
7 |
+
|
8 |
+
|
9 |
+
class PromptProcessor:
|
10 |
+
def __init__(self, config_path: str):
|
11 |
+
# Load environment variables
|
12 |
+
load_dotenv()
|
13 |
+
# Load configuration
|
14 |
+
self.config = load_config(config_path)
|
15 |
+
# Initialize the LLM
|
16 |
+
self.client = Groq()
|
17 |
+
|
18 |
+
async def _async_prompting(
|
19 |
+
self,
|
20 |
+
content: str,
|
21 |
+
prompt_file_path: str,
|
22 |
+
logger: logging.Logger,
|
23 |
+
):
|
24 |
+
logger = logging.getLogger(__name__)
|
25 |
+
try:
|
26 |
+
try:
|
27 |
+
# Load the prompt
|
28 |
+
prompt = load_prompt(file_path=prompt_file_path)
|
29 |
+
except Exception as e:
|
30 |
+
logger.fatal(f"Failed to load prompt: {e}")
|
31 |
+
raise
|
32 |
+
|
33 |
+
chat_completion = self.client.chat.completions.create(
|
34 |
+
model=self.config.get("model_name"),
|
35 |
+
temperature=self.config.get("temperature"),
|
36 |
+
messages=[
|
37 |
+
{
|
38 |
+
"role": "system",
|
39 |
+
"content": prompt,
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"role": "user",
|
43 |
+
"content": content
|
44 |
+
}
|
45 |
+
|
46 |
+
]
|
47 |
+
)
|
48 |
+
|
49 |
+
response = chat_completion.choices[0].message.content
|
50 |
+
logger.info("Result: Output received")
|
51 |
+
return response
|
52 |
+
except Exception as e:
|
53 |
+
logger.error(f"Error in _async_prompting: {repr(e)}")
|
54 |
+
return e
|
55 |
+
|
56 |
+
async def prompting_with_timeout(
|
57 |
+
self,
|
58 |
+
prompt_path: str,
|
59 |
+
data_string: str,
|
60 |
+
logger: logging.Logger,
|
61 |
+
timeout: int = 500,
|
62 |
+
):
|
63 |
+
logger = logging.getLogger(__name__)
|
64 |
+
try:
|
65 |
+
# Call the async prompt function with a timeout
|
66 |
+
output = await asyncio.wait_for(
|
67 |
+
self._async_prompting(
|
68 |
+
content=data_string,
|
69 |
+
prompt_file_path=prompt_path,
|
70 |
+
logger=logger
|
71 |
+
),
|
72 |
+
timeout,
|
73 |
+
)
|
74 |
+
return output
|
75 |
+
except asyncio.TimeoutError:
|
76 |
+
logger.warning("Result: Operation timed out")
|
77 |
+
except Exception as e:
|
78 |
+
logger.warning(f"Unexpected error: {e}")
|
79 |
+
|
80 |
+
|
81 |
+
# Usage Example
|
82 |
+
if __name__ == "__main__":
|
83 |
+
config_path = "src/post_ocr_extraction/configs/config.json"
|
84 |
+
processor = PromptProcessor(config_path)
|
85 |
+
prompt_path = "path/to/prompt/file"
|
86 |
+
data_string = "Your data string here"
|
87 |
+
timeout = 10 # seconds
|
88 |
+
|
89 |
+
# Run the asynchronous method
|
90 |
+
async def run():
|
91 |
+
result = await processor.prompting_with_timeout(
|
92 |
+
prompt_path, data_string, timeout
|
93 |
+
)
|
94 |
+
print(result)
|
95 |
+
|
96 |
+
asyncio.run(run())
|
llm_inference/src/llm_inference/helper_functions/load_and_extract.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
|
4 |
+
|
5 |
+
def load_prompt(file_path: str) -> str:
|
6 |
+
with open(file_path, "r", encoding="utf-8") as file:
|
7 |
+
return file.read()
|
8 |
+
|
9 |
+
|
10 |
+
def save_responses_to_file(
|
11 |
+
responses: str,
|
12 |
+
filename: str,
|
13 |
+
directory: str,
|
14 |
+
):
|
15 |
+
if not os.path.exists(directory):
|
16 |
+
os.makedirs(directory)
|
17 |
+
|
18 |
+
filepath = os.path.join(directory, filename)
|
19 |
+
|
20 |
+
with open(filepath, "w") as file:
|
21 |
+
for i, response in enumerate(responses):
|
22 |
+
file.write(f"Response Number {i}:\n")
|
23 |
+
|
24 |
+
# Convert response to JSON string
|
25 |
+
if isinstance(response, dict):
|
26 |
+
response_str = json.dumps(
|
27 |
+
response, indent=4
|
28 |
+
) # Format the dict as a JSON string
|
29 |
+
else:
|
30 |
+
response_str = str(response) # Handle other types (e.g., strings)
|
31 |
+
|
32 |
+
file.write(response_str + "\n\n")
|
33 |
+
|
34 |
+
|
35 |
+
def load_config(config_path: str):
|
36 |
+
"""Load configuration from a JSON file."""
|
37 |
+
try:
|
38 |
+
with open(config_path, "r") as file:
|
39 |
+
config = json.load(file)
|
40 |
+
return config
|
41 |
+
except FileNotFoundError:
|
42 |
+
raise FileNotFoundError(f"Configuration file not found: {config_path}")
|
43 |
+
except json.JSONDecodeError:
|
44 |
+
raise ValueError(f"Error decoding JSON from the file: {config_path}")
|
45 |
+
|
46 |
+
|
47 |
+
def load_html_file(file_path:str):
|
48 |
+
"""
|
49 |
+
Loads the content of an HTML file from the given file path.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
file_path (str): The path to the HTML file.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
str: The content of the HTML file as a string.
|
56 |
+
"""
|
57 |
+
try:
|
58 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
59 |
+
html_content = file.read()
|
60 |
+
return html_content
|
61 |
+
except FileNotFoundError:
|
62 |
+
print(f"Error: The file at path '{file_path}' was not found.")
|
63 |
+
return None
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Error: An unexpected error occurred - {e}")
|
66 |
+
return None
|
llm_inference/src/llm_inference/inference.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging.config
|
2 |
+
import asyncio
|
3 |
+
from .helper_functions import PromptProcessor, load_html_file
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Configuration file paths
|
7 |
+
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "configs", "configs.json"))
|
8 |
+
# html_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "html_for_testing", "test.html"))
|
9 |
+
html_parser_prompt_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "prompts", "extract_from_html.txt"))
|
10 |
+
comparision_prompt_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "prompts", "comparision.txt"))
|
11 |
+
|
12 |
+
# Load HTML content
|
13 |
+
# html_file = load_html_file(file_path=html_file_path)
|
14 |
+
|
15 |
+
# Configure logging
|
16 |
+
logging_config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "configs", "logging.conf"))
|
17 |
+
logging.config.fileConfig(logging_config_path)
|
18 |
+
|
19 |
+
# Get logger
|
20 |
+
logger = logging.getLogger(__name__)
|
21 |
+
|
22 |
+
# Initialize the LLM processor
|
23 |
+
llm = PromptProcessor(config_path=config_path)
|
24 |
+
|
25 |
+
# Define an asynchronous function to run the prompt
|
26 |
+
async def html_parser(content: str) -> str:
|
27 |
+
"""
|
28 |
+
Parameters:
|
29 |
+
content: This is the html content from which the LLM extracts relevant
|
30 |
+
information.
|
31 |
+
Output:
|
32 |
+
The relevant information in str format.
|
33 |
+
"""
|
34 |
+
# Await the async prompt method
|
35 |
+
return await llm._async_prompting(content=content, prompt_file_path=html_parser_prompt_path, logger=logger)
|
36 |
+
|
37 |
+
async def required_content_bool(content: str, user_req: str) -> bool:
|
38 |
+
"""
|
39 |
+
Parameters:
|
40 |
+
content: The relevant information extracted from the webpage.
|
41 |
+
user_req: The product/information the user requested
|
42 |
+
Output:
|
43 |
+
True or False indicating whether the webpage contains the product/information requested by the user
|
44 |
+
"""
|
45 |
+
# Format the input for the LLM
|
46 |
+
prompt = f"""
|
47 |
+
Website Content: {content}
|
48 |
+
User Query: {user_req}
|
49 |
+
|
50 |
+
Compare the website content with the user's query and determine whether the website contains the requested information.
|
51 |
+
Your response should be either "yes" or "no".
|
52 |
+
"""
|
53 |
+
comparision_output = await llm._async_prompting(content=prompt, prompt_file_path=comparision_prompt_path, logger=logger)
|
54 |
+
|
55 |
+
# Check the LLM response and map to boolean
|
56 |
+
if comparision_output.strip().lower() == "yes":
|
57 |
+
logger.info("Returned positive boolean")
|
58 |
+
return True
|
59 |
+
elif comparision_output.strip().lower() == "no":
|
60 |
+
logger.info("Returned negative boolean")
|
61 |
+
return False
|
62 |
+
else:
|
63 |
+
logger.fatal("Did not return a boolean, raising value error")
|
64 |
+
raise ValueError("Unexpected response from LLM. Expected 'yes' or 'no'.")
|
65 |
+
|
66 |
+
|
67 |
+
# Run the asynchronous function
|
68 |
+
# if __name__ == "__main__":
|
69 |
+
# print(asyncio.run(required_content_bool(content="This website contains information aboout dogs", user_req="I want cats")))
|
llm_inference/src/llm_inference/prompts/comparision.txt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You will receive two pieces of information:
|
2 |
+
|
3 |
+
Website Content: A long block of text containing the content of a webpage. This
|
4 |
+
includes all the information available on the page (e.g., titles, paragraphs,
|
5 |
+
lists, links, etc.).
|
6 |
+
|
7 |
+
User Query: A short string describing what the user is looking for. It may be a
|
8 |
+
specific topic, product, or piece of information.
|
9 |
+
|
10 |
+
Your task is to:
|
11 |
+
|
12 |
+
Compare the website content with the user's query.
|
13 |
+
Determine whether the website content contains the information that the user is
|
14 |
+
asking for.
|
15 |
+
Your response should be a single word: either "yes" or "no".
|
16 |
+
|
17 |
+
"Yes": If the website contains the information the user is asking for.
|
18 |
+
"No": If the website does not contain the requested information.
|
llm_inference/src/llm_inference/prompts/extract_from_html.txt
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are tasked with extracting useful and relevant information from the given
|
2 |
+
HTML file. Your goal is to analyze the content of the HTML page and provide a
|
3 |
+
structured summary of the key data, focusing only on the textual content and
|
4 |
+
excluding any HTML code, scripts, or style information. The following steps
|
5 |
+
outline the information you should extract:
|
6 |
+
|
7 |
+
Page Title and Meta Information:
|
8 |
+
|
9 |
+
Extract the title of the page (if available) and provide a brief description.
|
10 |
+
Extract any meta description and keywords, if provided, and summarize their
|
11 |
+
content.
|
12 |
+
Identify and summarize any other useful meta information, such as author,
|
13 |
+
language, or character set.
|
14 |
+
Headings and Structure:
|
15 |
+
|
16 |
+
List the main headings on the page (e.g., titles, sections) in the order they
|
17 |
+
appear.
|
18 |
+
Provide a summary of the page's structure, highlighting the major sections
|
19 |
+
and sub-sections based on these headings.
|
20 |
+
Text Content:
|
21 |
+
Extract and summarize all significant visible text on the page, excluding
|
22 |
+
navigation, footer, or boilerplate content.
|
23 |
+
Focus on paragraphs, lists, and other important written content that convey
|
24 |
+
the main message of the page.
|
25 |
+
Links and URLs:
|
26 |
+
|
27 |
+
Identify and summarize the key hyperlinks (URLs) on the page, including
|
28 |
+
important internal and external links.
|
29 |
+
Provide a brief description of what each link points to, where relevant.
|
30 |
+
Images:
|
31 |
+
|
32 |
+
Identify and summarize the images on the page, providing descriptions of their
|
33 |
+
content based on any available alt text or surrounding context (avoid listing
|
34 |
+
technical attributes such as "src" or "alt").
|
35 |
+
Summarize the role or context of the images as they appear in the content.
|
36 |
+
Tables and Lists:
|
37 |
+
|
38 |
+
If there are any tables or lists on the page, summarize their content in a
|
39 |
+
readable format, focusing on the main data or information being presented.
|
40 |
+
Other Relevant Information:
|
41 |
+
|
42 |
+
Identify and summarize any other key elements of the page, such as embedded
|
43 |
+
videos, forms, or other types of media or content that contribute to the main
|
44 |
+
purpose of the page.
|
45 |
+
Avoid including any code, scripts, or style details.
|
46 |
+
Important Notes:
|
47 |
+
Natural Language Focus: Your goal is to provide a structured summary in natural
|
48 |
+
language, focusing on the content that a human reader would find useful and
|
49 |
+
relevant.
|
50 |
+
Omit Technical Details: Do not include any technical details about the HTML
|
51 |
+
structure, such as tags, classes, or scripts. Focus purely on the meaningful
|
52 |
+
content and information presented to the user.
|
53 |
+
Ensure that the extracted information is clear, concise, and presented in a
|
54 |
+
logical order, helping the reader easily understand the key points of the
|
55 |
+
webpage's content.
|
llm_inference/tests/__init__.py
ADDED
File without changes
|
main.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from fastapi import HTTPException
|
4 |
+
from llm_inference.src.llm_inference.inference import html_parser, required_content_bool
|
5 |
+
|
6 |
+
app = FastAPI()
|
7 |
+
|
8 |
+
# Configure CORS
|
9 |
+
app.add_middleware(
|
10 |
+
CORSMiddleware,
|
11 |
+
allow_origins=["*"], # In production, replace with specific origins
|
12 |
+
allow_credentials=True,
|
13 |
+
allow_methods=["*"],
|
14 |
+
allow_headers=["*"],
|
15 |
+
)
|
16 |
+
|
17 |
+
@app.post("/run_prompt")
|
18 |
+
async def api_run_prompt(content: str, user_req: str):
|
19 |
+
try:
|
20 |
+
parsed_response = await html_parser(content)
|
21 |
+
required_content = await required_content_bool(parsed_response, user_req)
|
22 |
+
return {"response": required_content}
|
23 |
+
except Exception as e:
|
24 |
+
raise HTTPException(status_code=500, detail=str(e))
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
import uvicorn
|
28 |
+
uvicorn.run(app, port=7860)
|
requirements.txt
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
attrs==23.2.0
|
2 |
+
autocommand==2.2.2
|
3 |
+
Babel==2.14.0
|
4 |
+
bcc==0.30.0
|
5 |
+
bcrypt==4.2.0
|
6 |
+
blinker==1.8.2
|
7 |
+
Brlapi==0.8.5
|
8 |
+
certifi==2024.6.2
|
9 |
+
chardet==5.2.0
|
10 |
+
click==8.1.7
|
11 |
+
cloud-init==24.3.1
|
12 |
+
colorama==0.4.6
|
13 |
+
command-not-found==0.3
|
14 |
+
configobj==5.0.8
|
15 |
+
cryptography==42.0.5
|
16 |
+
cupshelpers==1.0
|
17 |
+
dbus-python==1.3.2
|
18 |
+
defer==1.0.6
|
19 |
+
distro==1.9.0
|
20 |
+
distro-info==1.9
|
21 |
+
duplicity==2.1.4
|
22 |
+
fasteners==0.18
|
23 |
+
gyp-next==0.16.2
|
24 |
+
httplib2==0.22.0
|
25 |
+
idna==3.6
|
26 |
+
inflect==7.3.1
|
27 |
+
jaraco.context==6.0.0
|
28 |
+
jaraco.functools==4.0.2
|
29 |
+
Jinja2==3.1.3
|
30 |
+
jsonpatch==1.32
|
31 |
+
jsonpointer==2.0
|
32 |
+
jsonschema==4.19.2
|
33 |
+
jsonschema-specifications==2023.12.1
|
34 |
+
language-selector==0.1
|
35 |
+
launchpadlib==2.0.0
|
36 |
+
lazr.restfulclient==0.14.6
|
37 |
+
lazr.uri==1.0.6
|
38 |
+
louis==3.30.0
|
39 |
+
Mako==1.3.5.dev0
|
40 |
+
markdown-it-py==3.0.0
|
41 |
+
MarkupSafe==2.1.5
|
42 |
+
mdurl==0.1.2
|
43 |
+
monotonic==1.6
|
44 |
+
more-itertools==10.3.0
|
45 |
+
netaddr==0.10.1
|
46 |
+
netifaces==0.11.0
|
47 |
+
oauthlib==3.2.2
|
48 |
+
olefile==0.46
|
49 |
+
packaging==24.1
|
50 |
+
paramiko==3.4.0
|
51 |
+
pexpect==4.9.0
|
52 |
+
pillow==10.4.0
|
53 |
+
ptyprocess==0.7.0
|
54 |
+
pycairo==1.26.1
|
55 |
+
pycups==2.0.4
|
56 |
+
Pygments==2.18.0
|
57 |
+
PyGObject==3.48.2
|
58 |
+
PyJWT==2.7.0
|
59 |
+
PyNaCl==1.5.0
|
60 |
+
pyparsing==3.1.2
|
61 |
+
pyserial==3.5
|
62 |
+
python-apt==2.9.0+ubuntu1
|
63 |
+
python-dateutil==2.9.0
|
64 |
+
python-debian==0.1.49+ubuntu3
|
65 |
+
pytz==2024.1
|
66 |
+
pyxdg==0.28
|
67 |
+
PyYAML==6.0.2
|
68 |
+
referencing==0.35.1
|
69 |
+
requests==2.32.3
|
70 |
+
rich==13.7.1
|
71 |
+
rpds-py==0.20.0
|
72 |
+
screen-resolution-extra==0.0.0
|
73 |
+
six==1.16.0
|
74 |
+
systemd-python==235
|
75 |
+
typeguard==4.3.0
|
76 |
+
typing_extensions==4.12.2
|
77 |
+
ubuntu-drivers-common==0.0.0
|
78 |
+
ubuntu-pro-client==8001
|
79 |
+
ufw==0.36.2
|
80 |
+
unattended-upgrades==0.1
|
81 |
+
urllib3==2.0.7
|
82 |
+
usb-creator==0.3.16
|
83 |
+
wadllib==1.3.6
|
84 |
+
wheel==0.44.0
|
85 |
+
xdg==5
|
86 |
+
xkit==0.0.0
|