import streamlit as st |
from langchain.text_splitter import RecursiveCharacterTextSplitter |
from langchain.document_loaders import PyPDFLoader, YoutubeLoader |
from langchain.embeddings import HuggingFaceEmbeddings |
from langchain.prompts import PromptTemplate |
from langchain.vectorstores import FAISS |
from langchain.llms import CTransformers |
import os |
import torch |
def model_response(query,docs_page_content): |
if torch.cuda.is_available(): |
config={'max_new_tokens':1024, |
'context_length': 4096, |
'temperature':0.05, |
'gpu_layers': 10} |
else: |
config={'max_new_tokens':2048, |
'context_length': 4096, |
'temperature':0.05} |
llm=CTransformers(model='models/llama-2-7b-chat.ggmlv3.q8_0.bin', |
model_type='llama', |
config=config) |
template= """ |
You are a helpful assistant that that can answer questions about documents using the provided transcripts. Answer the following question: {query} by searching the following transcript: {docs_page_content} |
Only use the factual information from the transcript to answer the question. If you feel like you don't have enough information to answer the question, say "I don't know". Your answers should be verbose and detailed. |
""" |
prompt=PromptTemplate(input_variables=["query","docs_page_content"], |
template=template) |
response=llm(prompt.format(query=query,docs_page_content=docs_page_content)) |
if response: |
if doc_type == 'PDF File': |
os.remove("files/pdf_file.pdf") |
container = st.container(border=True) |
container.write(response) |
def similarity_finder(data): |
text_splitter=RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0) |
docs=text_splitter.split_documents(data) |
db = FAISS.from_documents(docs, embeddings) |
query_similarity = db.similarity_search(query, k= 4) |
docs_page_content = " ".join([d.page_content for d in query_similarity]) |
return docs_page_content |
docs_page_content = ' ' |
embeddings=HuggingFaceEmbeddings() |
st.title('InsightFlow') |
st.markdown(' ##### Conversational Learning Companion') |
st.markdown(' ###### This web app helps you understand the concepts better.') |
with st.sidebar: |
st.subheader('Inputs Tab', divider=True) |
doc_type = st.radio('Select source:', ('Select Source', 'Youtube Video', 'PDF File')) |
if doc_type == 'Youtube Video': |
query = st.text_area('What is your question?',placeholder='Ask question from the uploaded pdf') |
with st.sidebar: |
yt_link = st.text_input('YouTube Link:', placeholder='https://www.youtube.com/watch?v=AbCxYz') |
if st.button('Submit'): |
if yt_link is not None: |
if query != '': |
loader = YoutubeLoader.from_youtube_url(yt_link) |
data = loader.load() |
docs_page_content = similarity_finder(data) |
if yt_link is not None: |
if query != '': |
model_response(query,docs_page_content) |
elif doc_type == 'PDF File': |
query = st.text_area('What is your question?','', placeholder='Ask question from the uploaded pdf') |
with st.sidebar: |
pdf_file = st.file_uploader("Choose a PDF file:", accept_multiple_files=False, type=['pdf']) |
if pdf_file is not None: |
with open('files/pdf_file.pdf', "wb") as f: |
f.write(pdf_file.read()) |
if st.button('Submit'): |
if pdf_file is not None: |
if query != '': |
loader = PyPDFLoader("files/pdf_file.pdf") |
data = loader.load() |
docs_page_content = similarity_finder(data) |
if pdf_file is not None: |
if query != '': |
model_response(query,docs_page_content) |
elif doc_type == 'Select Source': |
with st.sidebar: |
if st.button('Submit'): |
st.write('Please choose a source.') |