hewoo commited on
Commit
ee37e7f
โ€ข
1 Parent(s): a91fe67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -53
app.py CHANGED
@@ -2,86 +2,54 @@ import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from sentence_transformers import SentenceTransformer
4
  from langchain.vectorstores import Chroma
5
- import gc
6
- import psutil
7
 
8
- # ๋ชจ๋ธ ID (๊ณต๊ฐœ๋œ ๋ชจ๋ธ์ด์–ด์•ผ ํ•จ)
9
  model_id = "hewoo/hehehehe"
 
10
 
11
- # ๋ฉ”๋ชจ๋ฆฌ ๋ชจ๋‹ˆํ„ฐ๋ง ํ•จ์ˆ˜
12
- def monitor_memory():
13
- memory_info = psutil.virtual_memory()
14
- st.write(f"ํ˜„์žฌ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰: {memory_info.percent}%")
15
 
16
- # ์บ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋ธ ๋ฐ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
17
- @st.cache_resource
18
- def load_model():
19
- tokenizer = AutoTokenizer.from_pretrained(model_id)
20
- model = AutoModelForCausalLM.from_pretrained(model_id)
21
- return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.5, top_p=0.85, top_k=40, repetition_penalty=1.2)
22
 
23
- # ์‚ฌ์šฉ์ž ์ •์˜ ์ž„๋ฒ ๋”ฉ ํด๋ž˜์Šค
24
  class CustomEmbedding:
25
  def __init__(self, model):
26
  self.model = model
27
 
28
  def embed_query(self, text):
29
- return self.model.encode(text, convert_to_tensor=False).tolist()
30
 
31
  def embed_documents(self, texts):
32
- return [self.model.encode(text, convert_to_tensor=False).tolist() for text in texts]
33
 
34
- # ํ•œ๊ตญ์–ด ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ •
35
- @st.cache_resource
36
- def load_embedding_model():
37
- return SentenceTransformer("jhgan/ko-sroberta-multitask")
38
 
39
- @st.cache_resource
40
- def load_vectorstore(_embedding_model): # _embedding_model๋กœ ์ด๋ฆ„ ๋ณ€๊ฒฝ
41
- embedding_function = CustomEmbedding(_embedding_model)
42
- return Chroma(persist_directory="./chroma_batch_vectors", embedding_function=embedding_function)
43
 
44
  # ์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต ์ƒ์„ฑ ํ•จ์ˆ˜
45
  def generate_response(user_input):
46
- retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
47
  search_results = retriever.get_relevant_documents(user_input)
48
  context = "\n".join([result.page_content for result in search_results])
49
-
50
- prompt = f"""๋‹ค์Œ์€ ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๋Š” ํ•œ๊ตญ์–ด ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
51
- ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•ด ์ฃผ์–ด์ง„ ๋งฅ๋ฝ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ •ํ™•ํ•˜๊ณ  ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ํ•œ๊ตญ์–ด๋กœ ์ž‘์„ฑํ•˜์„ธ์š”.
52
- ๋งŒ์•ฝ ๋งฅ๋ฝ์— ๊ด€๋ จ ์ •๋ณด๊ฐ€ ์—†์œผ๋ฉด, "์ฃ„์†กํ•˜์ง€๋งŒ ํ•ด๋‹น ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."๋ผ๊ณ  ๋‹ต๋ณ€ํ•˜์„ธ์š”.
53
-
54
- ๋งฅ๋ฝ:
55
- {context}
56
-
57
- ์งˆ๋ฌธ:
58
- {user_input}
59
-
60
- ๋‹ต๋ณ€:"""
61
-
62
- response = pipe(prompt)[0]["generated_text"]
63
  return response
64
 
65
- # ๋ชจ๋ธ ๋ฐ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
66
- pipe = load_model()
67
- embedding_model = load_embedding_model()
68
- vectorstore = load_vectorstore(embedding_model)
69
-
70
  # Streamlit ์•ฑ UI
71
- st.title("์ฑ—๋ด‡ ๋ฐ๋ชจ")
72
  st.write("Llama 3.2-3B ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.")
73
 
74
- monitor_memory() # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
75
-
76
  # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ›๊ธฐ
77
  user_input = st.text_input("์งˆ๋ฌธ")
78
  if user_input:
79
  response = generate_response(user_input)
80
  st.write("์ฑ—๋ด‡ ์‘๋‹ต:", response)
81
- monitor_memory() # ๋ฉ”๋ชจ๋ฆฌ ์ƒํƒœ ์—…๋ฐ์ดํŠธ
82
-
83
- # ๋ฉ”๋ชจ๋ฆฌ ํ•ด์ œ
84
- del response
85
- gc.collect()
86
-
87
 
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from sentence_transformers import SentenceTransformer
4
  from langchain.vectorstores import Chroma
5
+ import os
 
6
 
7
+ # Hugging Face ๋ชจ๋ธ ID
8
  model_id = "hewoo/hehehehe"
9
+ token = os.getenv("HF_API_TOKEN") # ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์‚ฌ์šฉ์ž์—๊ฒŒ Hugging Face API ํ† ํฐ ์ž…๋ ฅ์„ ์š”์ฒญํ•  ์ˆ˜ ์žˆ์Œ
10
 
11
+ # ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
12
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
13
+ model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=token)
 
14
 
15
+ # ํ…์ŠคํŠธ ์ƒ์„ฑ ํŒŒ์ดํ”„๋ผ์ธ ์„ค์ •
16
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.5, top_p=0.85, top_k=40, repetition_penalty=1.2)
 
 
 
 
17
 
18
+ # ์‚ฌ์šฉ์ž ์ •์˜ ์ž„๋ฒ ๋”ฉ ํด๋ž˜์Šค ์ƒ์„ฑ
19
  class CustomEmbedding:
20
  def __init__(self, model):
21
  self.model = model
22
 
23
  def embed_query(self, text):
24
+ return self.model.encode(text, convert_to_tensor=True).tolist()
25
 
26
  def embed_documents(self, texts):
27
+ return [self.model.encode(text, convert_to_tensor=True).tolist() for text in texts]
28
 
29
+ # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ •
30
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
31
+ embedding_function = CustomEmbedding(embedding_model)
 
32
 
33
+ # Chroma ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ •
34
+ persist_directory = "./chroma_batch_vectors" # Spaces ํ™˜๊ฒฝ์— ๋งž๊ฒŒ ์กฐ์ • ํ•„์š”
35
+ vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
36
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
37
 
38
  # ์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต ์ƒ์„ฑ ํ•จ์ˆ˜
39
  def generate_response(user_input):
 
40
  search_results = retriever.get_relevant_documents(user_input)
41
  context = "\n".join([result.page_content for result in search_results])
42
+ input_text = f"๋งฅ๋ฝ: {context}\n์งˆ๋ฌธ: {user_input}"
43
+ response = pipe(input_text)[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
44
  return response
45
 
 
 
 
 
 
46
  # Streamlit ์•ฑ UI
47
+ st.title("์ฑ—๋ด‡ test")
48
  st.write("Llama 3.2-3B ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.")
49
 
 
 
50
  # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ›๊ธฐ
51
  user_input = st.text_input("์งˆ๋ฌธ")
52
  if user_input:
53
  response = generate_response(user_input)
54
  st.write("์ฑ—๋ด‡ ์‘๋‹ต:", response)
 
 
 
 
 
 
55