Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from datasets import load_dataset
|
3 |
import os
|
@@ -8,6 +9,7 @@ from sentence_transformers import SentenceTransformer
|
|
8 |
import faiss
|
9 |
import fitz # PyMuPDF
|
10 |
|
|
|
11 |
# νκ²½ λ³μμμ Hugging Face ν ν° κ°μ Έμ€κΈ°
|
12 |
token = os.environ.get("HF_TOKEN")
|
13 |
|
@@ -62,13 +64,15 @@ You are given the extracted parts of legal documents and a question. Provide a c
|
|
62 |
If you don't know the answer, just say "I do not know." Don't make up an answer.
|
63 |
you must answer korean."""
|
64 |
|
65 |
-
# λ²λ₯ λ¬Έμ κ²μ
|
|
|
66 |
def search_law(query, k=5):
|
67 |
query_embedding = ST.encode([query])
|
68 |
D, I = index.search(query_embedding, k)
|
69 |
return [(law_sentences[i], D[0][idx]) for idx, i in enumerate(I[0])]
|
70 |
|
71 |
# λ²λ₯ μλ΄ λ°μ΄ν° κ²μ ν¨μ
|
|
|
72 |
def search_qa(query, k=3):
|
73 |
scores, retrieved_examples = data.get_nearest_examples(
|
74 |
"question_embedding", ST.encode(query), k=k
|
@@ -86,6 +90,7 @@ def format_prompt(prompt, law_docs, qa_docs):
|
|
86 |
return PROMPT
|
87 |
|
88 |
# μ±λ΄ μλ΅ ν¨μ
|
|
|
89 |
def talk(prompt, history):
|
90 |
law_results = search_law(prompt, k=3)
|
91 |
qa_results = search_qa(prompt, k=3)
|
|
|
1 |
+
import space
|
2 |
import gradio as gr
|
3 |
from datasets import load_dataset
|
4 |
import os
|
|
|
9 |
import faiss
|
10 |
import fitz # PyMuPDF
|
11 |
|
12 |
+
|
13 |
# νκ²½ λ³μμμ Hugging Face ν ν° κ°μ Έμ€κΈ°
|
14 |
token = os.environ.get("HF_TOKEN")
|
15 |
|
|
|
64 |
If you don't know the answer, just say "I do not know." Don't make up an answer.
|
65 |
you must answer korean."""
|
66 |
|
67 |
+
# λ²λ₯ λ¬Έμ κ²μ
|
68 |
+
@spaces.Gpu
|
69 |
def search_law(query, k=5):
|
70 |
query_embedding = ST.encode([query])
|
71 |
D, I = index.search(query_embedding, k)
|
72 |
return [(law_sentences[i], D[0][idx]) for idx, i in enumerate(I[0])]
|
73 |
|
74 |
# λ²λ₯ μλ΄ λ°μ΄ν° κ²μ ν¨μ
|
75 |
+
@spaces.Gpu
|
76 |
def search_qa(query, k=3):
|
77 |
scores, retrieved_examples = data.get_nearest_examples(
|
78 |
"question_embedding", ST.encode(query), k=k
|
|
|
90 |
return PROMPT
|
91 |
|
92 |
# μ±λ΄ μλ΅ ν¨μ
|
93 |
+
@spaces.Gpu
|
94 |
def talk(prompt, history):
|
95 |
law_results = search_law(prompt, k=3)
|
96 |
qa_results = search_qa(prompt, k=3)
|