hewoo commited on
Commit
a91fe67
โ€ข
1 Parent(s): 6ab87db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -18
app.py CHANGED
@@ -2,19 +2,23 @@ import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from sentence_transformers import SentenceTransformer
4
  from langchain.vectorstores import Chroma
5
- import os
6
- import psutil
7
  import gc
 
8
 
9
- # Hugging Face ๋ชจ๋ธ ID
10
  model_id = "hewoo/hehehehe"
11
 
12
- # ์บ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋ธ๊ณผ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
 
 
 
 
 
13
  @st.cache_resource
14
  def load_model():
15
  tokenizer = AutoTokenizer.from_pretrained(model_id)
16
  model = AutoModelForCausalLM.from_pretrained(model_id)
17
- return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150)
18
 
19
  # ์‚ฌ์šฉ์ž ์ •์˜ ์ž„๋ฒ ๋”ฉ ํด๋ž˜์Šค
20
  class CustomEmbedding:
@@ -22,18 +26,18 @@ class CustomEmbedding:
22
  self.model = model
23
 
24
  def embed_query(self, text):
25
- return self.model.encode(text, convert_to_tensor=True).tolist()
26
 
27
  def embed_documents(self, texts):
28
- return [self.model.encode(text, convert_to_tensor=True).tolist() for text in texts]
29
 
30
- # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ • (์บ์‹œ ์ ์šฉ)
31
  @st.cache_resource
32
  def load_embedding_model():
33
  return SentenceTransformer("jhgan/ko-sroberta-multitask")
34
 
35
  @st.cache_resource
36
- def load_vectorstore(_embedding_model):
37
  embedding_function = CustomEmbedding(_embedding_model)
38
  return Chroma(persist_directory="./chroma_batch_vectors", embedding_function=embedding_function)
39
 
@@ -56,8 +60,7 @@ def generate_response(user_input):
56
  ๋‹ต๋ณ€:"""
57
 
58
  response = pipe(prompt)[0]["generated_text"]
59
- response = response.split("๋‹ต๋ณ€:")[-1].strip()
60
- return response, context
61
 
62
  # ๋ชจ๋ธ ๋ฐ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
63
  pipe = load_model()
@@ -66,20 +69,19 @@ vectorstore = load_vectorstore(embedding_model)
66
 
67
  # Streamlit ์•ฑ UI
68
  st.title("์ฑ—๋ด‡ ๋ฐ๋ชจ")
69
- st.write("ํ•œ๊ตญ์–ด ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.")
 
 
70
 
71
  # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ›๊ธฐ
72
  user_input = st.text_input("์งˆ๋ฌธ")
73
  if user_input:
74
- response, context = generate_response(user_input)
75
  st.write("์ฑ—๋ด‡ ์‘๋‹ต:", response)
76
- st.write("์‚ฌ์šฉ๋œ ๋งฅ๋ฝ:", context)
77
-
78
- # ๋ฉ”๋ชจ๋ฆฌ ์ƒํƒœ ๋ชจ๋‹ˆํ„ฐ๋ง
79
- memory_usage = psutil.virtual_memory().used / (1024 ** 3)
80
- st.write(f"ํ˜„์žฌ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰: {memory_usage:.2f} GB")
81
 
82
  # ๋ฉ”๋ชจ๋ฆฌ ํ•ด์ œ
83
  del response
84
  gc.collect()
85
 
 
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from sentence_transformers import SentenceTransformer
4
  from langchain.vectorstores import Chroma
 
 
5
  import gc
6
+ import psutil
7
 
8
+ # ๋ชจ๋ธ ID (๊ณต๊ฐœ๋œ ๋ชจ๋ธ์ด์–ด์•ผ ํ•จ)
9
  model_id = "hewoo/hehehehe"
10
 
11
+ # ๋ฉ”๋ชจ๋ฆฌ ๋ชจ๋‹ˆํ„ฐ๋ง ํ•จ์ˆ˜
12
+ def monitor_memory():
13
+ memory_info = psutil.virtual_memory()
14
+ st.write(f"ํ˜„์žฌ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰: {memory_info.percent}%")
15
+
16
+ # ์บ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋ธ ๋ฐ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
17
  @st.cache_resource
18
  def load_model():
19
  tokenizer = AutoTokenizer.from_pretrained(model_id)
20
  model = AutoModelForCausalLM.from_pretrained(model_id)
21
+ return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.5, top_p=0.85, top_k=40, repetition_penalty=1.2)
22
 
23
  # ์‚ฌ์šฉ์ž ์ •์˜ ์ž„๋ฒ ๋”ฉ ํด๋ž˜์Šค
24
  class CustomEmbedding:
 
26
  self.model = model
27
 
28
  def embed_query(self, text):
29
+ return self.model.encode(text, convert_to_tensor=False).tolist()
30
 
31
  def embed_documents(self, texts):
32
+ return [self.model.encode(text, convert_to_tensor=False).tolist() for text in texts]
33
 
34
+ # ํ•œ๊ตญ์–ด ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ •
35
  @st.cache_resource
36
  def load_embedding_model():
37
  return SentenceTransformer("jhgan/ko-sroberta-multitask")
38
 
39
  @st.cache_resource
40
+ def load_vectorstore(_embedding_model): # _embedding_model๋กœ ์ด๋ฆ„ ๋ณ€๊ฒฝ
41
  embedding_function = CustomEmbedding(_embedding_model)
42
  return Chroma(persist_directory="./chroma_batch_vectors", embedding_function=embedding_function)
43
 
 
60
  ๋‹ต๋ณ€:"""
61
 
62
  response = pipe(prompt)[0]["generated_text"]
63
+ return response
 
64
 
65
  # ๋ชจ๋ธ ๋ฐ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
66
  pipe = load_model()
 
69
 
70
  # Streamlit ์•ฑ UI
71
  st.title("์ฑ—๋ด‡ ๋ฐ๋ชจ")
72
+ st.write("Llama 3.2-3B ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.")
73
+
74
+ monitor_memory() # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
75
 
76
  # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ›๊ธฐ
77
  user_input = st.text_input("์งˆ๋ฌธ")
78
  if user_input:
79
+ response = generate_response(user_input)
80
  st.write("์ฑ—๋ด‡ ์‘๋‹ต:", response)
81
+ monitor_memory() # ๋ฉ”๋ชจ๋ฆฌ ์ƒํƒœ ์—…๋ฐ์ดํŠธ
 
 
 
 
82
 
83
  # ๋ฉ”๋ชจ๋ฆฌ ํ•ด์ œ
84
  del response
85
  gc.collect()
86
 
87
+