Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ from threading import Thread
|
|
7 |
from sentence_transformers import SentenceTransformer
|
8 |
import faiss
|
9 |
import fitz # PyMuPDF
|
|
|
10 |
|
11 |
# νκ²½ λ³μμμ Hugging Face ν ν° κ°μ Έμ€κΈ°
|
12 |
token = os.environ.get("HF_TOKEN")
|
@@ -21,6 +22,7 @@ index = None
|
|
21 |
data = None
|
22 |
|
23 |
# μλ² λ© λͺ¨λΈ Lazy Loading
|
|
|
24 |
def load_embedding_model():
|
25 |
global ST
|
26 |
if ST is None:
|
@@ -28,6 +30,7 @@ def load_embedding_model():
|
|
28 |
return ST
|
29 |
|
30 |
# LLaMA λͺ¨λΈ λ° ν ν¬λμ΄μ Lazy Loading
|
|
|
31 |
def load_model():
|
32 |
global model, tokenizer
|
33 |
if model is None or tokenizer is None:
|
@@ -42,6 +45,7 @@ def load_model():
|
|
42 |
return model, tokenizer
|
43 |
|
44 |
# PDFμμ ν
μ€νΈ μΆμΆ λ° μλ² λ© Lazy Loading
|
|
|
45 |
def load_law_data():
|
46 |
global law_sentences, law_embeddings, index
|
47 |
if law_sentences is None or law_embeddings is None or index is None:
|
@@ -59,6 +63,7 @@ def load_law_data():
|
|
59 |
index.add(law_embeddings)
|
60 |
|
61 |
# Hugging Faceμμ λ²λ₯ μλ΄ λ°μ΄ν°μ
λ‘λ (Lazy Loading)
|
|
|
62 |
def load_dataset_data():
|
63 |
global data
|
64 |
if data is None:
|
@@ -68,7 +73,9 @@ def load_dataset_data():
|
|
68 |
data.add_faiss_index(column="question_embedding")
|
69 |
return data
|
70 |
|
|
|
71 |
# λ²λ₯ λ¬Έμ κ²μ ν¨μ
|
|
|
72 |
def search_law(query, k=5):
|
73 |
load_law_data() # PDF ν
μ€νΈμ μλ² λ© Lazy Loading
|
74 |
query_embedding = load_embedding_model().encode([query])
|
@@ -76,6 +83,7 @@ def search_law(query, k=5):
|
|
76 |
return [(law_sentences[i], D[0][idx]) for idx, i in enumerate(I[0])]
|
77 |
|
78 |
# λ²λ₯ μλ΄ λ°μ΄ν° κ²μ ν¨μ
|
|
|
79 |
def search_qa(query, k=3):
|
80 |
dataset_data = load_dataset_data()
|
81 |
scores, retrieved_examples = dataset_data.get_nearest_examples(
|
@@ -84,6 +92,7 @@ def search_qa(query, k=3):
|
|
84 |
return [retrieved_examples["answer"][i] for i in range(k)]
|
85 |
|
86 |
# μ΅μ’
ν둬ννΈ μμ±
|
|
|
87 |
def format_prompt(prompt, law_docs, qa_docs):
|
88 |
PROMPT = f"Question: {prompt}\n\nLegal Context:\n"
|
89 |
for doc in law_docs:
|
@@ -94,6 +103,7 @@ def format_prompt(prompt, law_docs, qa_docs):
|
|
94 |
return PROMPT
|
95 |
|
96 |
# μ±λ΄ μλ΅ ν¨μ
|
|
|
97 |
def talk(prompt, history):
|
98 |
law_results = search_law(prompt, k=3)
|
99 |
qa_results = search_qa(prompt, k=3)
|
@@ -157,4 +167,5 @@ demo = gr.ChatInterface(
|
|
157 |
)
|
158 |
|
159 |
# Gradio λ°λͺ¨ μ€ν
|
|
|
160 |
demo.launch(debug=True, server_port=7860)
|
|
|
7 |
from sentence_transformers import SentenceTransformer
|
8 |
import faiss
|
9 |
import fitz # PyMuPDF
|
10 |
+
import spaces
|
11 |
|
12 |
# νκ²½ λ³μμμ Hugging Face ν ν° κ°μ Έμ€κΈ°
|
13 |
token = os.environ.get("HF_TOKEN")
|
|
|
22 |
data = None
|
23 |
|
24 |
# μλ² λ© λͺ¨λΈ Lazy Loading
|
25 |
+
@spaces.GPU
|
26 |
def load_embedding_model():
|
27 |
global ST
|
28 |
if ST is None:
|
|
|
30 |
return ST
|
31 |
|
32 |
# LLaMA λͺ¨λΈ λ° ν ν¬λμ΄μ Lazy Loading
|
33 |
+
@spaces.GPU
|
34 |
def load_model():
|
35 |
global model, tokenizer
|
36 |
if model is None or tokenizer is None:
|
|
|
45 |
return model, tokenizer
|
46 |
|
47 |
# PDFμμ ν
μ€νΈ μΆμΆ λ° μλ² λ© Lazy Loading
|
48 |
+
@spaces.GPU
|
49 |
def load_law_data():
|
50 |
global law_sentences, law_embeddings, index
|
51 |
if law_sentences is None or law_embeddings is None or index is None:
|
|
|
63 |
index.add(law_embeddings)
|
64 |
|
65 |
# Hugging Faceμμ λ²λ₯ μλ΄ λ°μ΄ν°μ
λ‘λ (Lazy Loading)
|
66 |
+
@spaces.GPU
|
67 |
def load_dataset_data():
|
68 |
global data
|
69 |
if data is None:
|
|
|
73 |
data.add_faiss_index(column="question_embedding")
|
74 |
return data
|
75 |
|
76 |
+
|
77 |
# λ²λ₯ λ¬Έμ κ²μ ν¨μ
|
78 |
+
@spaces.GPU
|
79 |
def search_law(query, k=5):
|
80 |
load_law_data() # PDF ν
μ€νΈμ μλ² λ© Lazy Loading
|
81 |
query_embedding = load_embedding_model().encode([query])
|
|
|
83 |
return [(law_sentences[i], D[0][idx]) for idx, i in enumerate(I[0])]
|
84 |
|
85 |
# λ²λ₯ μλ΄ λ°μ΄ν° κ²μ ν¨μ
|
86 |
+
@spaces.GPU
|
87 |
def search_qa(query, k=3):
|
88 |
dataset_data = load_dataset_data()
|
89 |
scores, retrieved_examples = dataset_data.get_nearest_examples(
|
|
|
92 |
return [retrieved_examples["answer"][i] for i in range(k)]
|
93 |
|
94 |
# μ΅μ’
ν둬ννΈ μμ±
|
95 |
+
@spaces.GPU
|
96 |
def format_prompt(prompt, law_docs, qa_docs):
|
97 |
PROMPT = f"Question: {prompt}\n\nLegal Context:\n"
|
98 |
for doc in law_docs:
|
|
|
103 |
return PROMPT
|
104 |
|
105 |
# μ±λ΄ μλ΅ ν¨μ
|
106 |
+
@spaces.GPU
|
107 |
def talk(prompt, history):
|
108 |
law_results = search_law(prompt, k=3)
|
109 |
qa_results = search_qa(prompt, k=3)
|
|
|
167 |
)
|
168 |
|
169 |
# Gradio λ°λͺ¨ μ€ν
|
170 |
+
@spaces.GPU
|
171 |
demo.launch(debug=True, server_port=7860)
|