EnverLee commited on
Commit
0fbf7d5
·
verified ·
1 Parent(s): 7863ba4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -13
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from datasets import load_dataset
3
  import os
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
5
  import torch
6
  from threading import Thread
7
  from sentence_transformers import SentenceTransformer
@@ -11,7 +11,6 @@ import fitz # PyMuPDF
11
  # 환경 변수에서 Hugging Face 토큰 가져오기
12
  token = os.environ.get("HF_TOKEN")
13
 
14
-
15
  # 임베딩 모델 로드
16
  ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
17
 
@@ -43,17 +42,13 @@ data = dataset["train"]
43
  data = data.map(lambda x: {"question_embedding": ST.encode(x["question"])}, batched=True)
44
  data.add_faiss_index(column="question_embedding")
45
 
46
- # LLaMA 모델 설정
47
  model_id = "google/gemma-2-2b-it"
48
- bnb_config = BitsAndBytesConfig(
49
- load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
50
- )
51
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
52
  model = AutoModelForCausalLM.from_pretrained(
53
  model_id,
54
- torch_dtype=torch.bfloat16,
55
  device_map="auto",
56
- quantization_config=bnb_config,
57
  token=token
58
  )
59
 
@@ -62,7 +57,6 @@ You are given the extracted parts of legal documents and a question. Provide a c
62
  If you don't know the answer, just say "I do not know." Don't make up an answer.
63
  you must answer korean.
64
  You're a LAWEYE legal advisor bot. Your job is to provide korean legal assistance by asking questions to korean speaker, then offering advice or guidance based on the information and law provisions provided. Make sure you only respond with one question at a time.
65
-
66
  Example 1:
67
  User: I need help with a contract dispute.
68
  Assistant: Hello! I'm your friendly GPT legal advisor bot (v0.1.0), and I'm here to help you with your contract dispute by asking you a series of questions. You can ask for help, more details, or a summary at any time. Let's get started! What is the nature of the contract in question?
@@ -72,7 +66,6 @@ User: California
72
  Assistant: When did you enter into the lease agreement?
73
  User: January 1st, 2022
74
  ...
75
-
76
  Example 2:
77
  User: I need help with a copyright issue.
78
  Assistant: Hi there! I'm your legal advisor bot, and I'll be assisting you with your copyright issue by asking some questions. You can request help, more details, or a summary at any time. Let's begin! What is the copyrighted material you are concerned about?
@@ -184,7 +177,8 @@ Damages: Present evidence of the financial harm you suffered due to the infringe
184
  Injunction: Request a court order to stop the defendant from continuing to infringe on your intellectual property rights.
185
  ...
186
 
187
- Begin by introducing yourself, next tell them they can ask for help or more details or a summary at any time, and start by asking what they need help with."""
 
188
 
189
  # 법률 문서 검색 함수
190
  def search_law(query, k=5):
@@ -203,7 +197,7 @@ def search_qa(query, k=3):
203
  def format_prompt(prompt, law_docs, qa_docs):
204
  PROMPT = f"Question: {prompt}\n\nLegal Context:\n"
205
  for doc in law_docs:
206
- PROMPT += f"{doc[0]}\n" # Assuming doc[0] contains the relevant text
207
  PROMPT += "\nLegal QA:\n"
208
  for doc in qa_docs:
209
  PROMPT += f"{doc}\n"
@@ -271,4 +265,4 @@ demo = gr.ChatInterface(
271
  )
272
 
273
  # Gradio 데모 실행
274
- demo.launch(debug=True)
 
1
  import gradio as gr
2
  from datasets import load_dataset
3
  import os
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
  import torch
6
  from threading import Thread
7
  from sentence_transformers import SentenceTransformer
 
11
  # 환경 변수에서 Hugging Face 토큰 가져오기
12
  token = os.environ.get("HF_TOKEN")
13
 
 
14
  # 임베딩 모델 로드
15
  ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
16
 
 
42
  data = data.map(lambda x: {"question_embedding": ST.encode(x["question"])}, batched=True)
43
  data.add_faiss_index(column="question_embedding")
44
 
45
+ # LLaMA 모델 설정 (양자화 없이)
46
  model_id = "google/gemma-2-2b-it"
 
 
 
47
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
48
  model = AutoModelForCausalLM.from_pretrained(
49
  model_id,
50
+ torch_dtype=torch.bfloat16, # 양자화 없이 bfloat16 사용
51
  device_map="auto",
 
52
  token=token
53
  )
54
 
 
57
  If you don't know the answer, just say "I do not know." Don't make up an answer.
58
  you must answer korean.
59
  You're a LAWEYE legal advisor bot. Your job is to provide korean legal assistance by asking questions to korean speaker, then offering advice or guidance based on the information and law provisions provided. Make sure you only respond with one question at a time.
 
60
  Example 1:
61
  User: I need help with a contract dispute.
62
  Assistant: Hello! I'm your friendly GPT legal advisor bot (v0.1.0), and I'm here to help you with your contract dispute by asking you a series of questions. You can ask for help, more details, or a summary at any time. Let's get started! What is the nature of the contract in question?
 
66
  Assistant: When did you enter into the lease agreement?
67
  User: January 1st, 2022
68
  ...
 
69
  Example 2:
70
  User: I need help with a copyright issue.
71
  Assistant: Hi there! I'm your legal advisor bot, and I'll be assisting you with your copyright issue by asking some questions. You can request help, more details, or a summary at any time. Let's begin! What is the copyrighted material you are concerned about?
 
177
  Injunction: Request a court order to stop the defendant from continuing to infringe on your intellectual property rights.
178
  ...
179
 
180
+ Begin by introducing yourself, next tell them they can ask for help or more details or a summary at any time, and start by asking what they need help with.
181
+ you must answer korean."""
182
 
183
  # 법률 문서 검색 함수
184
  def search_law(query, k=5):
 
197
  def format_prompt(prompt, law_docs, qa_docs):
198
  PROMPT = f"Question: {prompt}\n\nLegal Context:\n"
199
  for doc in law_docs:
200
+ PROMPT += f"{doc[0]}\n"
201
  PROMPT += "\nLegal QA:\n"
202
  for doc in qa_docs:
203
  PROMPT += f"{doc}\n"
 
265
  )
266
 
267
  # Gradio 데모 실행
268
+ demo.launch(debug=True)