|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
from sentence_transformers import SentenceTransformer |
|
from langchain.vectorstores import Chroma |
|
import os |
|
import psutil |
|
import time |
|
|
|
|
|
model_id = "hewoo/hehehehe" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
|
|
|
|
|
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.3, top_p=0.85, top_k=40, repetition_penalty=1.2) |
|
|
|
|
|
class CustomEmbedding: |
|
def __init__(self, model): |
|
self.model = model |
|
|
|
def embed_query(self, text): |
|
return self.model.encode(text, convert_to_tensor=True).tolist() |
|
|
|
def embed_documents(self, texts): |
|
return [self.model.encode(text, convert_to_tensor=True).tolist() for text in texts] |
|
|
|
|
|
embedding_model = SentenceTransformer("jhgan/ko-sroberta-multitask") |
|
embedding_function = CustomEmbedding(embedding_model) |
|
|
|
|
|
persist_directory = "./chroma_batch_vectors" |
|
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_function) |
|
retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) |
|
|
|
|
|
def generate_response(user_input): |
|
start_time = time.time() |
|
|
|
|
|
search_results = retriever.get_relevant_documents(user_input) |
|
context = "\n".join([result.page_content for result in search_results]) |
|
input_text = f"""μλλ νκ΅μ΄λ‘λ§ λ΅λ³νλ μ΄μμ€ν΄νΈμ
λλ€. |
|
μ¬μ©μμ μ§λ¬Έμ λν΄ μ 곡λ λ§₯λ½μ λ°νμΌλ‘ μ ννκ³ μμΈν λ΅λ³μ νκ΅μ΄λ‘ μμ±νμΈμ. |
|
|
|
λ§₯λ½: {context} |
|
μ§λ¬Έ: {user_input} |
|
λ΅λ³:""" |
|
|
|
|
|
response = pipe(input_text)[0]["generated_text"] |
|
|
|
end_time = time.time() |
|
response_time = end_time - start_time |
|
|
|
|
|
memory_info = psutil.virtual_memory() |
|
memory_usage = memory_info.percent |
|
|
|
return response, response_time, memory_usage |
|
|
|
|
|
st.title("μ±λ΄ λ°λͺ¨") |
|
st.write("Llama 3.2-3B λͺ¨λΈμ μ¬μ©ν μ±λ΄μ
λλ€. μ§λ¬Έμ μ
λ ₯ν΄ μ£ΌμΈμ.") |
|
|
|
|
|
user_input = st.text_input("μ§λ¬Έ") |
|
if user_input: |
|
response, response_time, memory_usage = generate_response(user_input) |
|
st.write("μ±λ΄ μλ΅:", response) |
|
st.write(f"μλ΅ μκ°: {response_time:.2f}μ΄") |
|
st.write(f"νμ¬ λ©λͺ¨λ¦¬ μ¬μ©λ: {memory_usage}%") |
|
|
|
|