ArturG9 commited on
Commit
27e1e01
1 Parent(s): 86010a6

Upload 2 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ mistral-7b-v0.1-layla-v4-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
Stramlit_app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import sys
4
+ import shutil
5
+ from langchain import HuggingFacePipeline,HuggingFaceHub,HuggingFaceTextGenInference
6
+ from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter
7
+ from langchain.document_loaders import PyPDFLoader
8
+ from langchain.document_loaders.pdf import PyPDFDirectoryLoader
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ from transformers import pipeline
11
+ import torch
12
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,AutoModelForSeq2SeqLM
13
+ from langchain.retrievers.self_query.base import SelfQueryRetriever
14
+ from langchain.chains.query_constructor.base import AttributeInfo
15
+ from langchain.retrievers import ContextualCompressionRetriever
16
+ from langchain.retrievers.document_compressors import LLMChainExtractor
17
+ from langchain_huggingface.llms import HuggingFacePipeline
18
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
19
+ from langchain.vectorstores import DocArrayInMemorySearch
20
+ from langchain.document_loaders import TextLoader
21
+ from langchain.chains import RetrievalQA, ConversationalRetrievalChain
22
+ from langchain.memory import ConversationBufferMemory
23
+ from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
24
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
25
+ from langchain.chains.combine_documents import create_stuff_documents_chain
26
+ import panel as pn
27
+ from langchain_core.runnables.history import RunnableWithMessageHistory
28
+ from langchain_core.chat_history import BaseChatMessageHistory
29
+ from langchain_community.chat_message_histories import ChatMessageHistory
30
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
31
+ from langchain_community.llms import Aphrodite
32
+ from typing import Callable, Dict, List, Optional, Union
33
+ from langchain.vectorstores import Chroma
34
+ import re
35
+ import streamlit as st
36
+ from langchain_community.llms import llamacpp
37
+ from utills import split_docs, retriever_from_chroma, history_aware_retriever,chroma_db
38
+ from langchain_community.chat_message_histories.streamlit import StreamlitChatMessageHistory
39
+
40
+
41
+
42
+
43
+
44
+
45
+ script_dir = os.path.dirname(os.path.abspath(__file__))
46
+ data_path = os.path.join(script_dir, "data")
47
+ model_path = os.path.join(script_dir, '/mistral-7b-v0.1-layla-v4-Q4_K_M.gguf.2')
48
+ store = {}
49
+
50
+ model_name = "sentence-transformers/all-mpnet-base-v2"
51
+ model_kwargs = {'device': 'cpu'}
52
+ encode_kwargs = {'normalize_embeddings': True}
53
+ hf = HuggingFaceEmbeddings(
54
+ model_name=model_name,
55
+ model_kwargs=model_kwargs,
56
+ encode_kwargs=encode_kwargs)
57
+
58
+
59
+
60
+
61
+ documents = []
62
+
63
+ for filename in os.listdir(data_path):
64
+
65
+ if filename.endswith('.txt'):
66
+
67
+ file_path = os.path.join(data_path, filename)
68
+
69
+ documents = TextLoader(file_path).load()
70
+
71
+ documents.extend(documents)
72
+
73
+
74
+ docs = split_docs(documents, 450, 20)
75
+ chroma_db = chroma_db(docs,hf)
76
+ retriever = retriever_from_chroma(chroma_db, "mmr", 6)
77
+
78
+
79
+ model_name = "sentence-transformers/all-mpnet-base-v2"
80
+ model_kwargs = {'device': 'cpu'}
81
+ encode_kwargs = {'normalize_embeddings': True}
82
+ hf = HuggingFaceEmbeddings(
83
+ model_name=model_name,
84
+ model_kwargs=model_kwargs,
85
+ encode_kwargs=encode_kwargs
86
+ )
87
+
88
+
89
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
90
+
91
+ llm = llamacpp.LlamaCpp(
92
+ model_path= model_path,
93
+ n_gpu_layers=0,
94
+ temperature=0.1,
95
+ top_p=0.5,
96
+ n_ctx=31000,
97
+ max_tokens=250,
98
+ repeat_penalty=1.7,
99
+ stop=["", "Instruction:", "### Instruction:", "###<user>", "</user>"],
100
+ callback_manager=callback_manager,
101
+ verbose=False,
102
+ )
103
+
104
+
105
+ contextualize_q_system_prompt = """Given a context, chat history and the latest user question
106
+ which maybe reference context in the chat history, formulate a standalone question
107
+ which can be understood without the chat history. Do NOT answer the question,
108
+ just reformulate it if needed and otherwise return it as is."""
109
+
110
+ ha_retriever = history_aware_retriever(llm, retriever, contextualize_q_system_prompt)
111
+
112
+ qa_system_prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Be as informative as possible, be polite and formal.\n{context}"""
113
+
114
+ qa_prompt = ChatPromptTemplate.from_messages(
115
+ [
116
+ ("system", qa_system_prompt),
117
+ MessagesPlaceholder("chat_history"),
118
+ ("human", "{input}"),
119
+ ]
120
+ )
121
+
122
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
123
+ rag_chain = create_retrieval_chain(ha_retriever, question_answer_chain)
124
+ msgs = StreamlitChatMessageHistory(key="special_app_key")
125
+
126
+ conversational_rag_chain = RunnableWithMessageHistory(
127
+ rag_chain,
128
+ lambda session_id: msgs,
129
+ input_messages_key="input",
130
+ history_messages_key="chat_history",
131
+ output_messages_key="answer",
132
+ )
133
+
134
+
135
+
136
+
137
+
138
+
139
+ def display_chat_history(chat_history):
140
+ """Displays the chat history in Streamlit."""
141
+ for msg in chat_history.messages:
142
+ st.chat_message(msg.type).write(msg.content)
143
+
144
+ def display_documents(docs, on_click=None):
145
+ """Displays retrieved documents with optional click action."""
146
+ if docs: # Check if documents exist before displaying
147
+ for i, document in enumerate(docs): # Iterate over docs, not documents
148
+ st.write(f"**Docs {i+1}**")
149
+ st.markdown(document, unsafe_allow_html=True) # Allow HTML formatting
150
+ if on_click:
151
+ if st.button(f"Expand Article {i+1}"):
152
+ on_click(i) # Call the user-defined click function
153
+
154
+ def main(conversational_rag_chain):
155
+ """Main function for the Streamlit app."""
156
+ msgs = st.session_state.get("chat_history", StreamlitChatMessageHistory()) # Initialize chat history
157
+ chain_with_history =conversational_rag_chain
158
+
159
+ st.title("Conversational RAG Chatbot")
160
+
161
+ # Display chat history
162
+ display_chat_history(msgs)
163
+
164
+ if prompt := st.chat_input():
165
+ st.chat_message("human").write(prompt)
166
+
167
+ # Process user input
168
+ config = {"configurable": {"session_id": "any"}}
169
+ response = chain_with_history.invoke({"question": prompt}, config)
170
+ st.chat_message("ai").write(response.content)
171
+
172
+ # Display retrieved documents (if any and present in response)
173
+ if "docs" in response and response["documents"]:
174
+ docs = response["documents"]
175
+ def expand_document(index):
176
+ # Implement your document expansion logic here (e.g., show extra details)
177
+ st.write(f"Expanding document {index+1}...")
178
+ display_documents(docs, expand_document) # Pass click function
179
+
180
+ st.session_state["chat_history"] = msgs # Update chat history in session state
181
+
182
+ if __name__ == "__main__":
183
+ main()
mistral-7b-v0.1-layla-v4-Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bb0f255d9735ac472dedb5e0cf734da8793e46383de26e823554e7138486d9f
3
+ size 4368438944