EnverLee commited on
Commit
f4d4afc
β€’
1 Parent(s): 0a1b8ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -0
app.py CHANGED
@@ -7,6 +7,7 @@ from threading import Thread
7
  from sentence_transformers import SentenceTransformer
8
  import faiss
9
  import fitz # PyMuPDF
 
10
 
11
  # ν™˜κ²½ λ³€μˆ˜μ—μ„œ Hugging Face 토큰 κ°€μ Έμ˜€κΈ°
12
  token = os.environ.get("HF_TOKEN")
@@ -21,6 +22,7 @@ index = None
21
  data = None
22
 
23
  # μž„λ² λ”© λͺ¨λΈ Lazy Loading
 
24
  def load_embedding_model():
25
  global ST
26
  if ST is None:
@@ -28,6 +30,7 @@ def load_embedding_model():
28
  return ST
29
 
30
  # LLaMA λͺ¨λΈ 및 ν† ν¬λ‚˜μ΄μ € Lazy Loading
 
31
  def load_model():
32
  global model, tokenizer
33
  if model is None or tokenizer is None:
@@ -42,6 +45,7 @@ def load_model():
42
  return model, tokenizer
43
 
44
  # PDFμ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ 및 μž„λ² λ”© Lazy Loading
 
45
  def load_law_data():
46
  global law_sentences, law_embeddings, index
47
  if law_sentences is None or law_embeddings is None or index is None:
@@ -59,6 +63,7 @@ def load_law_data():
59
  index.add(law_embeddings)
60
 
61
  # Hugging Faceμ—μ„œ 법λ₯  상담 데이터셋 λ‘œλ“œ (Lazy Loading)
 
62
  def load_dataset_data():
63
  global data
64
  if data is None:
@@ -68,7 +73,9 @@ def load_dataset_data():
68
  data.add_faiss_index(column="question_embedding")
69
  return data
70
 
 
71
  # 법λ₯  λ¬Έμ„œ 검색 ν•¨μˆ˜
 
72
  def search_law(query, k=5):
73
  load_law_data() # PDF ν…μŠ€νŠΈμ™€ μž„λ² λ”© Lazy Loading
74
  query_embedding = load_embedding_model().encode([query])
@@ -76,6 +83,7 @@ def search_law(query, k=5):
76
  return [(law_sentences[i], D[0][idx]) for idx, i in enumerate(I[0])]
77
 
78
  # 법λ₯  상담 데이터 검색 ν•¨μˆ˜
 
79
  def search_qa(query, k=3):
80
  dataset_data = load_dataset_data()
81
  scores, retrieved_examples = dataset_data.get_nearest_examples(
@@ -84,6 +92,7 @@ def search_qa(query, k=3):
84
  return [retrieved_examples["answer"][i] for i in range(k)]
85
 
86
  # μ΅œμ’… ν”„λ‘¬ν”„νŠΈ 생성
 
87
  def format_prompt(prompt, law_docs, qa_docs):
88
  PROMPT = f"Question: {prompt}\n\nLegal Context:\n"
89
  for doc in law_docs:
@@ -94,6 +103,7 @@ def format_prompt(prompt, law_docs, qa_docs):
94
  return PROMPT
95
 
96
  # 챗봇 응닡 ν•¨μˆ˜
 
97
  def talk(prompt, history):
98
  law_results = search_law(prompt, k=3)
99
  qa_results = search_qa(prompt, k=3)
@@ -157,4 +167,5 @@ demo = gr.ChatInterface(
157
  )
158
 
159
  # Gradio 데λͺ¨ μ‹€ν–‰
 
160
  demo.launch(debug=True, server_port=7860)
 
7
  from sentence_transformers import SentenceTransformer
8
  import faiss
9
  import fitz # PyMuPDF
10
+ import spaces
11
 
12
  # ν™˜κ²½ λ³€μˆ˜μ—μ„œ Hugging Face 토큰 κ°€μ Έμ˜€κΈ°
13
  token = os.environ.get("HF_TOKEN")
 
22
  data = None
23
 
24
  # μž„λ² λ”© λͺ¨λΈ Lazy Loading
25
+ @spaces.GPU
26
  def load_embedding_model():
27
  global ST
28
  if ST is None:
 
30
  return ST
31
 
32
  # LLaMA λͺ¨λΈ 및 ν† ν¬λ‚˜μ΄μ € Lazy Loading
33
+ @spaces.GPU
34
  def load_model():
35
  global model, tokenizer
36
  if model is None or tokenizer is None:
 
45
  return model, tokenizer
46
 
47
  # PDFμ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ 및 μž„λ² λ”© Lazy Loading
48
+ @spaces.GPU
49
  def load_law_data():
50
  global law_sentences, law_embeddings, index
51
  if law_sentences is None or law_embeddings is None or index is None:
 
63
  index.add(law_embeddings)
64
 
65
  # Hugging Faceμ—μ„œ 법λ₯  상담 데이터셋 λ‘œλ“œ (Lazy Loading)
66
+ @spaces.GPU
67
  def load_dataset_data():
68
  global data
69
  if data is None:
 
73
  data.add_faiss_index(column="question_embedding")
74
  return data
75
 
76
+
77
  # 법λ₯  λ¬Έμ„œ 검색 ν•¨μˆ˜
78
+ @spaces.GPU
79
  def search_law(query, k=5):
80
  load_law_data() # PDF ν…μŠ€νŠΈμ™€ μž„λ² λ”© Lazy Loading
81
  query_embedding = load_embedding_model().encode([query])
 
83
  return [(law_sentences[i], D[0][idx]) for idx, i in enumerate(I[0])]
84
 
85
  # 법λ₯  상담 데이터 검색 ν•¨μˆ˜
86
+ @spaces.GPU
87
  def search_qa(query, k=3):
88
  dataset_data = load_dataset_data()
89
  scores, retrieved_examples = dataset_data.get_nearest_examples(
 
92
  return [retrieved_examples["answer"][i] for i in range(k)]
93
 
94
  # μ΅œμ’… ν”„λ‘¬ν”„νŠΈ 생성
95
+ @spaces.GPU
96
  def format_prompt(prompt, law_docs, qa_docs):
97
  PROMPT = f"Question: {prompt}\n\nLegal Context:\n"
98
  for doc in law_docs:
 
103
  return PROMPT
104
 
105
  # 챗봇 응닡 ν•¨μˆ˜
106
+ @spaces.GPU
107
  def talk(prompt, history):
108
  law_results = search_law(prompt, k=3)
109
  qa_results = search_qa(prompt, k=3)
 
167
  )
168
 
169
  # Gradio 데λͺ¨ μ‹€ν–‰
170
+ @spaces.GPU
171
  demo.launch(debug=True, server_port=7860)