Update app.py
Browse files
app.py
CHANGED
@@ -3,17 +3,18 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
from langchain.vectorstores import Chroma
|
5 |
import os
|
|
|
|
|
6 |
|
7 |
# Hugging Face λͺ¨λΈ ID
|
8 |
model_id = "hewoo/hehehehe"
|
9 |
-
|
|
|
|
|
10 |
|
11 |
-
# λͺ¨λΈκ³Ό ν ν¬λμ΄μ λ‘λ
|
12 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
|
13 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=token)
|
14 |
|
15 |
# ν
μ€νΈ μμ± νμ΄νλΌμΈ μ€μ
|
16 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.
|
17 |
|
18 |
# μ¬μ©μ μ μ μλ² λ© ν΄λμ€ μμ±
|
19 |
class CustomEmbedding:
|
@@ -27,29 +28,49 @@ class CustomEmbedding:
|
|
27 |
return [self.model.encode(text, convert_to_tensor=True).tolist() for text in texts]
|
28 |
|
29 |
# μλ² λ© λͺ¨λΈ λ° λ²‘ν° μ€ν μ΄ μ€μ
|
30 |
-
embedding_model = SentenceTransformer("
|
31 |
embedding_function = CustomEmbedding(embedding_model)
|
32 |
|
33 |
# Chroma λ²‘ν° μ€ν μ΄ μ€μ
|
34 |
-
persist_directory = "./chroma_batch_vectors"
|
35 |
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
|
36 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
37 |
|
38 |
# μ§λ¬Έμ λν μλ΅ μμ± ν¨μ
|
39 |
def generate_response(user_input):
|
|
|
|
|
|
|
40 |
search_results = retriever.get_relevant_documents(user_input)
|
41 |
context = "\n".join([result.page_content for result in search_results])
|
42 |
-
input_text = f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
response = pipe(input_text)[0]["generated_text"]
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
# Streamlit μ± UI
|
47 |
-
st.title("μ±λ΄
|
48 |
st.write("Llama 3.2-3B λͺ¨λΈμ μ¬μ©ν μ±λ΄μ
λλ€. μ§λ¬Έμ μ
λ ₯ν΄ μ£ΌμΈμ.")
|
49 |
|
50 |
# μ¬μ©μ μ
λ ₯ λ°κΈ°
|
51 |
user_input = st.text_input("μ§λ¬Έ")
|
52 |
if user_input:
|
53 |
-
response = generate_response(user_input)
|
54 |
st.write("μ±λ΄ μλ΅:", response)
|
|
|
|
|
55 |
|
|
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
from langchain.vectorstores import Chroma
|
5 |
import os
|
6 |
+
import psutil
|
7 |
+
import time
|
8 |
|
9 |
# Hugging Face λͺ¨λΈ ID
|
10 |
model_id = "hewoo/hehehehe"
|
11 |
+
# λͺ¨λΈκ³Ό ν ν¬λμ΄μ λ‘λ (ν ν° μμ΄ μ¬μ©)
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
13 |
+
model = AutoModelForCausalLM.from_pretrained(model_id)
|
14 |
|
|
|
|
|
|
|
15 |
|
16 |
# ν
μ€νΈ μμ± νμ΄νλΌμΈ μ€μ
|
17 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.3, top_p=0.85, top_k=40, repetition_penalty=1.2)
|
18 |
|
19 |
# μ¬μ©μ μ μ μλ² λ© ν΄λμ€ μμ±
|
20 |
class CustomEmbedding:
|
|
|
28 |
return [self.model.encode(text, convert_to_tensor=True).tolist() for text in texts]
|
29 |
|
30 |
# μλ² λ© λͺ¨λΈ λ° λ²‘ν° μ€ν μ΄ μ€μ
|
31 |
+
embedding_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
|
32 |
embedding_function = CustomEmbedding(embedding_model)
|
33 |
|
34 |
# Chroma λ²‘ν° μ€ν μ΄ μ€μ
|
35 |
+
persist_directory = "./chroma_batch_vectors"
|
36 |
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
|
37 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
38 |
|
39 |
# μ§λ¬Έμ λν μλ΅ μμ± ν¨μ
|
40 |
def generate_response(user_input):
|
41 |
+
start_time = time.time() # μμ μκ° κΈ°λ‘
|
42 |
+
|
43 |
+
# λ¬Έμ κ²μ λ° λ§₯λ½ μμ±
|
44 |
search_results = retriever.get_relevant_documents(user_input)
|
45 |
context = "\n".join([result.page_content for result in search_results])
|
46 |
+
input_text = f"""μλλ νκ΅μ΄λ‘λ§ λ΅λ³νλ μ΄μμ€ν΄νΈμ
λλ€.
|
47 |
+
μ¬μ©μμ μ§λ¬Έμ λν΄ μ 곡λ λ§₯λ½μ λ°νμΌλ‘ μ ννκ³ μμΈν λ΅λ³μ νκ΅μ΄λ‘ μμ±νμΈμ.
|
48 |
+
|
49 |
+
λ§₯λ½: {context}
|
50 |
+
μ§λ¬Έ: {user_input}
|
51 |
+
λ΅λ³:"""
|
52 |
+
|
53 |
+
# μλ΅ μμ±
|
54 |
response = pipe(input_text)[0]["generated_text"]
|
55 |
+
|
56 |
+
end_time = time.time() # λλ μκ° κΈ°λ‘
|
57 |
+
response_time = end_time - start_time # μλ΅ μκ° κ³μ°
|
58 |
+
|
59 |
+
# λ©λͺ¨λ¦¬ μ¬μ©λ λͺ¨λν°λ§
|
60 |
+
memory_info = psutil.virtual_memory()
|
61 |
+
memory_usage = memory_info.percent # λ©λͺ¨λ¦¬ μ¬μ©λ λΉμ¨(%)
|
62 |
+
|
63 |
+
return response, response_time, memory_usage
|
64 |
|
65 |
# Streamlit μ± UI
|
66 |
+
st.title("μ±λ΄ λ°λͺ¨")
|
67 |
st.write("Llama 3.2-3B λͺ¨λΈμ μ¬μ©ν μ±λ΄μ
λλ€. μ§λ¬Έμ μ
λ ₯ν΄ μ£ΌμΈμ.")
|
68 |
|
69 |
# μ¬μ©μ μ
λ ₯ λ°κΈ°
|
70 |
user_input = st.text_input("μ§λ¬Έ")
|
71 |
if user_input:
|
72 |
+
response, response_time, memory_usage = generate_response(user_input)
|
73 |
st.write("μ±λ΄ μλ΅:", response)
|
74 |
+
st.write(f"μλ΅ μκ°: {response_time:.2f}μ΄")
|
75 |
+
st.write(f"νμ¬ λ©λͺ¨λ¦¬ μ¬μ©λ: {memory_usage}%")
|
76 |
|