docsa_HD commited on
Commit
081f7f6
1 Parent(s): 1861746
Files changed (1) hide show
  1. main.py +4 -54
main.py CHANGED
@@ -1,53 +1,25 @@
1
  import os
2
  import re
3
  import gradio as gr
4
- import pandas as pd
5
  import requests
6
- import json
7
- import faiss
8
  import nest_asyncio
9
  import sys
10
  import boto3
11
 
12
  from pathlib import Path
13
  from bs4 import BeautifulSoup
14
- from typing import Union, List
15
- import asyncio
16
  from llama_index.core import (
17
- StorageContext,
18
- ServiceContext,
19
- VectorStoreIndex,
20
  Settings,
21
- load_index_from_storage
22
  )
23
- from llama_index.llms.openai import OpenAI
24
- from llama_index.core.llms import ChatMessage
25
- from llama_index.core.schema import IndexNode
26
- from llama_index.core.storage.docstore import SimpleDocumentStore
27
  from llama_index.retrievers.bm25 import BM25Retriever
28
- from llama_index.embeddings.openai import OpenAIEmbedding
29
- # from llama_index.vector_stores.faiss import FaissVectorStore
30
  from llama_index.core.retrievers import QueryFusionRetriever
31
- from llama_index.core.workflow import Event, Context, Workflow, StartEvent, StopEvent, step
32
- from llama_index.core.schema import NodeWithScore
33
- from llama_index.core.prompts import PromptTemplate
34
- from llama_index.core.response_synthesizers import ResponseMode, get_response_synthesizer
35
-
36
- from prompts import CITATION_QA_TEMPLATE, CITATION_REFINE_TEMPLATE
37
 
38
 
39
  from dotenv import load_dotenv
40
 
41
  load_dotenv()
42
 
43
- # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
44
-
45
- # os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
46
- #
47
- # embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")
48
- # Settings.embed_model = embed_model
49
- Settings.context_window = 20000
50
- Settings.chunk_size = 2048
51
  Settings.similarity_top_k = 20
52
 
53
  # Параметри S3
@@ -92,31 +64,10 @@ def download_s3_folder(bucket_name, prefix, local_dir):
92
  download_s3_folder(BUCKET_NAME, PREFIX_RETRIEVER, LOCAL_DIR)
93
 
94
 
95
- # Apply nest_asyncio to handle nested async calls
96
- nest_asyncio.apply()
97
-
98
- import re
99
- import gradio as gr
100
- import nest_asyncio
101
- from pathlib import Path
102
- import requests
103
- from bs4 import BeautifulSoup
104
- import sys
105
-
106
  nest_asyncio.apply()
107
 
108
  state_nodes = gr.State()
109
 
110
- import re
111
- import gradio as gr
112
- import nest_asyncio
113
- from pathlib import Path
114
- import requests
115
- from bs4 import BeautifulSoup
116
- import sys
117
-
118
-
119
- nest_asyncio.apply()
120
 
121
  def parse_doc_ids(doc_ids):
122
  if doc_ids is None:
@@ -169,7 +120,6 @@ def initialize_components():
169
 
170
  global retriever_bm25
171
 
172
- docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore_es_filter.json"))
173
  bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es"))
174
 
175
  retriever_bm25 = QueryFusionRetriever(
@@ -216,7 +166,7 @@ async def search_without_ai_action(url):
216
  links = get_links_html(doc_ids)
217
  links_lp = get_links_html_lp(lp_ids)
218
 
219
- search_output_content += f"\n[ {index}] *{source_title}* {links_lp} | {links} 👉 Score: {node.score} \n"
220
 
221
  return search_output_content, nodes
222
  except Exception as e:
@@ -233,7 +183,7 @@ async def search_without_ai_action_text(question_input):
233
  lp_ids = node.node.metadata.get('lp_id')
234
  links = get_links_html(doc_ids)
235
  links_lp = get_links_html_lp(lp_ids)
236
- search_output_content += f"\n[ {index}] *{source_title}* {links_lp} | {links} 👉 Score: {node.score} \n"
237
 
238
 
239
  return search_output_content, nodes
@@ -245,7 +195,7 @@ def create_gradio_interface():
245
  with gr.Blocks() as app:
246
  gr.Markdown("# Знаходьте правові позиції Верховного Суду")
247
 
248
- input_field = gr.Textbox(label="Введіть текст або посилання на судове рішення", lines=2)
249
  search_button = gr.Button("Пошук", interactive=False)
250
  warning_message = gr.Markdown(visible=False)
251
 
 
1
  import os
2
  import re
3
  import gradio as gr
 
4
  import requests
 
 
5
  import nest_asyncio
6
  import sys
7
  import boto3
8
 
9
  from pathlib import Path
10
  from bs4 import BeautifulSoup
 
 
11
  from llama_index.core import (
 
 
 
12
  Settings,
 
13
  )
14
+
 
 
 
15
  from llama_index.retrievers.bm25 import BM25Retriever
 
 
16
  from llama_index.core.retrievers import QueryFusionRetriever
 
 
 
 
 
 
17
 
18
 
19
  from dotenv import load_dotenv
20
 
21
  load_dotenv()
22
 
 
 
 
 
 
 
 
 
23
  Settings.similarity_top_k = 20
24
 
25
  # Параметри S3
 
64
  download_s3_folder(BUCKET_NAME, PREFIX_RETRIEVER, LOCAL_DIR)
65
 
66
 
 
 
 
 
 
 
 
 
 
 
 
67
  nest_asyncio.apply()
68
 
69
  state_nodes = gr.State()
70
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def parse_doc_ids(doc_ids):
73
  if doc_ids is None:
 
120
 
121
  global retriever_bm25
122
 
 
123
  bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es"))
124
 
125
  retriever_bm25 = QueryFusionRetriever(
 
166
  links = get_links_html(doc_ids)
167
  links_lp = get_links_html_lp(lp_ids)
168
 
169
+ search_output_content += f"\n[{index}] *{source_title}* ⚖️ {links_lp} | {links} 👉 Score: {node.score} \n"
170
 
171
  return search_output_content, nodes
172
  except Exception as e:
 
183
  lp_ids = node.node.metadata.get('lp_id')
184
  links = get_links_html(doc_ids)
185
  links_lp = get_links_html_lp(lp_ids)
186
+ search_output_content += f"\n[{index}] *{source_title}* ⚖️ {links_lp} | {links} 👉 Score: {node.score} \n"
187
 
188
 
189
  return search_output_content, nodes
 
195
  with gr.Blocks() as app:
196
  gr.Markdown("# Знаходьте правові позиції Верховного Суду")
197
 
198
+ input_field = gr.Textbox(label="Введіть текст або посилання на судове рішення", lines=1)
199
  search_button = gr.Button("Пошук", interactive=False)
200
  warning_message = gr.Markdown(visible=False)
201