docsa_HD
commited on
Commit
•
081f7f6
1
Parent(s):
1861746
Edit
Browse files
main.py
CHANGED
@@ -1,53 +1,25 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
import gradio as gr
|
4 |
-
import pandas as pd
|
5 |
import requests
|
6 |
-
import json
|
7 |
-
import faiss
|
8 |
import nest_asyncio
|
9 |
import sys
|
10 |
import boto3
|
11 |
|
12 |
from pathlib import Path
|
13 |
from bs4 import BeautifulSoup
|
14 |
-
from typing import Union, List
|
15 |
-
import asyncio
|
16 |
from llama_index.core import (
|
17 |
-
StorageContext,
|
18 |
-
ServiceContext,
|
19 |
-
VectorStoreIndex,
|
20 |
Settings,
|
21 |
-
load_index_from_storage
|
22 |
)
|
23 |
-
|
24 |
-
from llama_index.core.llms import ChatMessage
|
25 |
-
from llama_index.core.schema import IndexNode
|
26 |
-
from llama_index.core.storage.docstore import SimpleDocumentStore
|
27 |
from llama_index.retrievers.bm25 import BM25Retriever
|
28 |
-
from llama_index.embeddings.openai import OpenAIEmbedding
|
29 |
-
# from llama_index.vector_stores.faiss import FaissVectorStore
|
30 |
from llama_index.core.retrievers import QueryFusionRetriever
|
31 |
-
from llama_index.core.workflow import Event, Context, Workflow, StartEvent, StopEvent, step
|
32 |
-
from llama_index.core.schema import NodeWithScore
|
33 |
-
from llama_index.core.prompts import PromptTemplate
|
34 |
-
from llama_index.core.response_synthesizers import ResponseMode, get_response_synthesizer
|
35 |
-
|
36 |
-
from prompts import CITATION_QA_TEMPLATE, CITATION_REFINE_TEMPLATE
|
37 |
|
38 |
|
39 |
from dotenv import load_dotenv
|
40 |
|
41 |
load_dotenv()
|
42 |
|
43 |
-
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
44 |
-
|
45 |
-
# os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
46 |
-
#
|
47 |
-
# embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")
|
48 |
-
# Settings.embed_model = embed_model
|
49 |
-
Settings.context_window = 20000
|
50 |
-
Settings.chunk_size = 2048
|
51 |
Settings.similarity_top_k = 20
|
52 |
|
53 |
# Параметри S3
|
@@ -92,31 +64,10 @@ def download_s3_folder(bucket_name, prefix, local_dir):
|
|
92 |
download_s3_folder(BUCKET_NAME, PREFIX_RETRIEVER, LOCAL_DIR)
|
93 |
|
94 |
|
95 |
-
# Apply nest_asyncio to handle nested async calls
|
96 |
-
nest_asyncio.apply()
|
97 |
-
|
98 |
-
import re
|
99 |
-
import gradio as gr
|
100 |
-
import nest_asyncio
|
101 |
-
from pathlib import Path
|
102 |
-
import requests
|
103 |
-
from bs4 import BeautifulSoup
|
104 |
-
import sys
|
105 |
-
|
106 |
nest_asyncio.apply()
|
107 |
|
108 |
state_nodes = gr.State()
|
109 |
|
110 |
-
import re
|
111 |
-
import gradio as gr
|
112 |
-
import nest_asyncio
|
113 |
-
from pathlib import Path
|
114 |
-
import requests
|
115 |
-
from bs4 import BeautifulSoup
|
116 |
-
import sys
|
117 |
-
|
118 |
-
|
119 |
-
nest_asyncio.apply()
|
120 |
|
121 |
def parse_doc_ids(doc_ids):
|
122 |
if doc_ids is None:
|
@@ -169,7 +120,6 @@ def initialize_components():
|
|
169 |
|
170 |
global retriever_bm25
|
171 |
|
172 |
-
docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore_es_filter.json"))
|
173 |
bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es"))
|
174 |
|
175 |
retriever_bm25 = QueryFusionRetriever(
|
@@ -216,7 +166,7 @@ async def search_without_ai_action(url):
|
|
216 |
links = get_links_html(doc_ids)
|
217 |
links_lp = get_links_html_lp(lp_ids)
|
218 |
|
219 |
-
search_output_content += f"\n[
|
220 |
|
221 |
return search_output_content, nodes
|
222 |
except Exception as e:
|
@@ -233,7 +183,7 @@ async def search_without_ai_action_text(question_input):
|
|
233 |
lp_ids = node.node.metadata.get('lp_id')
|
234 |
links = get_links_html(doc_ids)
|
235 |
links_lp = get_links_html_lp(lp_ids)
|
236 |
-
search_output_content += f"\n[
|
237 |
|
238 |
|
239 |
return search_output_content, nodes
|
@@ -245,7 +195,7 @@ def create_gradio_interface():
|
|
245 |
with gr.Blocks() as app:
|
246 |
gr.Markdown("# Знаходьте правові позиції Верховного Суду")
|
247 |
|
248 |
-
input_field = gr.Textbox(label="Введіть текст або посилання на судове рішення", lines=
|
249 |
search_button = gr.Button("Пошук", interactive=False)
|
250 |
warning_message = gr.Markdown(visible=False)
|
251 |
|
|
|
1 |
import os
|
2 |
import re
|
3 |
import gradio as gr
|
|
|
4 |
import requests
|
|
|
|
|
5 |
import nest_asyncio
|
6 |
import sys
|
7 |
import boto3
|
8 |
|
9 |
from pathlib import Path
|
10 |
from bs4 import BeautifulSoup
|
|
|
|
|
11 |
from llama_index.core import (
|
|
|
|
|
|
|
12 |
Settings,
|
|
|
13 |
)
|
14 |
+
|
|
|
|
|
|
|
15 |
from llama_index.retrievers.bm25 import BM25Retriever
|
|
|
|
|
16 |
from llama_index.core.retrievers import QueryFusionRetriever
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
from dotenv import load_dotenv
|
20 |
|
21 |
load_dotenv()
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
Settings.similarity_top_k = 20
|
24 |
|
25 |
# Параметри S3
|
|
|
64 |
download_s3_folder(BUCKET_NAME, PREFIX_RETRIEVER, LOCAL_DIR)
|
65 |
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
nest_asyncio.apply()
|
68 |
|
69 |
state_nodes = gr.State()
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
def parse_doc_ids(doc_ids):
|
73 |
if doc_ids is None:
|
|
|
120 |
|
121 |
global retriever_bm25
|
122 |
|
|
|
123 |
bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es"))
|
124 |
|
125 |
retriever_bm25 = QueryFusionRetriever(
|
|
|
166 |
links = get_links_html(doc_ids)
|
167 |
links_lp = get_links_html_lp(lp_ids)
|
168 |
|
169 |
+
search_output_content += f"\n[{index}] *{source_title}* ⚖️ {links_lp} | {links} 👉 Score: {node.score} \n"
|
170 |
|
171 |
return search_output_content, nodes
|
172 |
except Exception as e:
|
|
|
183 |
lp_ids = node.node.metadata.get('lp_id')
|
184 |
links = get_links_html(doc_ids)
|
185 |
links_lp = get_links_html_lp(lp_ids)
|
186 |
+
search_output_content += f"\n[{index}] *{source_title}* ⚖️ {links_lp} | {links} 👉 Score: {node.score} \n"
|
187 |
|
188 |
|
189 |
return search_output_content, nodes
|
|
|
195 |
with gr.Blocks() as app:
|
196 |
gr.Markdown("# Знаходьте правові позиції Верховного Суду")
|
197 |
|
198 |
+
input_field = gr.Textbox(label="Введіть текст або посилання на судове рішення", lines=1)
|
199 |
search_button = gr.Button("Пошук", interactive=False)
|
200 |
warning_message = gr.Markdown(visible=False)
|
201 |
|