Spaces:
Sleeping
Sleeping
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.document_loaders import TextLoader | |
from langchain.schema.runnable import RunnablePassthrough | |
from langchain.schema.output_parser import StrOutputParser | |
from langchain_pinecone import PineconeVectorStore | |
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate | |
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings | |
from dotenv import load_dotenv, find_dotenv | |
import os | |
from pinecone import Pinecone, PodSpec | |
load_dotenv(find_dotenv()) | |
class ChatbotMemory(): | |
loader = TextLoader('dataset.txt', autodetect_encoding=True) | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103) | |
docs = text_splitter.split_documents(documents) | |
embeddings = GoogleGenerativeAIEmbeddings( | |
model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")) | |
pinecone = Pinecone( | |
api_key=os.environ.get("PINECONE_API_KEY") | |
) | |
index_name = "chatbot" | |
if index_name not in pinecone.list_indexes().names(): | |
pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter")) | |
docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name) | |
else: | |
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) | |
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) | |
def contextualized_question(input: dict): | |
if input.get("chat_history"): | |
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) | |
contextualize_q_system_prompt = """Given a chat history and the latest user question \ | |
which might reference context in the chat history, formulate a standalone question \ | |
which can be understood without the chat history. Do NOT answer the question, \ | |
just reformulate it if needed and otherwise return it as is.""" | |
contextualize_q_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", contextualize_q_system_prompt), | |
MessagesPlaceholder(variable_name="chat_history"), | |
("human", "{question}"), | |
] | |
) | |
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser() | |
return contextualize_q_chain | |
else: | |
return input["question"] | |
template = """ | |
INSTRUCTION: Act as PRAN, Personal Assistant of Rohan Shaw, this is conversation \ | |
to a user who wants to get his query solved about Rohan Shaw, his professional and personal life. Use the CONTEXT to answer in a helpful manner to the QUESTION. \ | |
If you don't know any ANSWER, say you don't know \ | |
Always follow general guardrails before generating any response. \ | |
Always try to keep the conversation in context to Rohan Shaw. Keep your replies short \ | |
compassionate and informative.\ | |
Give the answer from the CONTEXT\ | |
You should help user to get his query solved and also try to increase engagement and promoting Rohan Shaw.\ | |
CONTEXT: {context} | |
QUESTION: {question} | |
ANSWER: | |
""" | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", template), | |
MessagesPlaceholder(variable_name="chat_history"), | |
("human", "{question}"), | |
] | |
) | |
rag_chain = ( | |
RunnablePassthrough.assign( | |
context=contextualized_question | docsearch.as_retriever() | |
) | |
| prompt | |
| llm | |
) |