Spaces:
Sleeping
Sleeping
File size: 7,775 Bytes
92bebf5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import streamlit as st
import pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone, Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import tiktoken
import random
# Hardcode the OpenAI API key
openai_api_key = "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"
# Pinecone API key and environment
api_key = "58e247f3-041d-48ed-8466-61b39efa56a9"
environment = "gcp-starter"
# Initialize Pinecone
pinecone.init(api_key=api_key, environment=environment)
# Define the name of the Pinecone index
index_name = 'mi-resource-qa'
# Initialize the OpenAI embeddings object with the hardcoded API key
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Define functions
def insert_or_fetch_embeddings(index_name):
if index_name in pinecone.list_indexes():
vector_store = Pinecone.from_existing_index(index_name, embeddings)
return vector_store
else:
raise ValueError(f"Index {index_name} does not exist. Please create it before fetching.")
# Initialize or fetch Pinecone vector store
vector_store = insert_or_fetch_embeddings(index_name)
# Define the metadata for filtering
# metadata = {'source': '/Users/cheynelevesseur/Desktop/Python_Code/Projects/LLM/Intensifying Literacy Instruction - Essential Practices (NATIONAL).pdf'}
# calculate embedding cost using tiktoken
def calculate_embedding_cost(text):
import tiktoken
enc = tiktoken.encoding_for_model('text-embedding-ada-002')
total_tokens = len(enc.encode(text))
# print(f'Total Tokens: {total_tokens}')
# print(f'Embedding Cost in USD: {total_tokens / 1000 * 0.0004:.6f}')
return total_tokens, total_tokens / 1000 * 0.0004
def ask_with_memory(vector_store, query, chat_history=[]):
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=1, openai_api_key=openai_api_key)
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})
chain= ConversationalRetrievalChain.from_llm(llm, retriever)
result = chain({'question': query, 'chat_history': st.session_state['history']})
# Append to chat history as a dictionary
st.session_state['history'].append((query, result['answer']))
return (result['answer'])
# Initialize chat history
if 'history' not in st.session_state:
st.session_state['history'] = []
# # STREAMLIT APPLICATION SETUP WITH PASSWORD
# Define the correct password
# correct_password = "MiBLSi"
#Add the image with a specified width
image_width = 300 # Set the desired width in pixels
st.image('/Users/cheynelevesseur/Desktop/Python_Code/Projects/LLM/Streamlit_Document_Reader_Simple/MTSS.ai_Logo.png', width=image_width)
st.subheader('Ink QA™ | Dynamic PDFs')
# Using Markdown for formatted text
st.markdown("""
Resource: **Intensifying Literacy Instruction: Essential Practices**
""", unsafe_allow_html=True)
with st.sidebar:
# Password input field
# password = st.text_input("Enter Password:", type="password")
st.image('/Users/cheynelevesseur/Desktop/Python_Code/Projects/LLM/Streamlit_Document_Reader_Simple/mimtss.png', width=200)
st.image('/Users/cheynelevesseur/Desktop/Python_Code/Projects/LLM/Streamlit_Document_Reader_Simple/Literacy_Cover.png', width=200)
st.link_button("View | Download", "https://mimtsstac.org/sites/default/files/session-documents/Intensifying%20Literacy%20Instruction%20-%20Essential%20Practices%20%28NATIONAL%29.pdf")
Audio_Header_text = """
**Tune into Dr. St. Martin's introduction**"""
st.markdown(Audio_Header_text)
# Path or URL to the audio file
audio_file_path = '/Users/cheynelevesseur/Desktop/Python_Code/Projects/LLM/Streamlit_Document_Reader_Simple/Audio_Introduction_Literacy.m4a'
# Display the audio player widget
st.audio(audio_file_path, format='audio/mp4', start_time=0)
# Citation text with Markdown formatting
citation_Content_text = """
**Citation**
St. Martin, K., Vaughn, S., Troia, G., Fien, & H., Coyne, M. (2023). *Intensifying literacy instruction: Essential practices, Version 2.0*. Lansing, MI: MiMTSS Technical Assistance Center, Michigan Department of Education.
**Table of Contents**
* **Introduction**: pg. 1
* **Intensifying Literacy Instruction: Essential Practices**: pg. 4
* **Purpose**: pg. 4
* **Practice 1**: Knowledge and Use of a Learning Progression for Developing Skilled Readers and Writers: pg. 6
* **Practice 2**: Design and Use of an Intervention Platform as the Foundation for Effective Intervention: pg. 13
* **Practice 3**: On-going Data-Based Decision Making for Providing and Intensifying Interventions: pg. 16
* **Practice 4**: Adaptations to Increase the Instructional Intensity of the Intervention: pg. 20
* **Practice 5**: Infrastructures to Support Students with Significant and Persistent Literacy Needs: pg. 24
* **Motivation and Engagement**: pg. 28
* **Considerations for Understanding How Students' Learning and Behavior are Enhanced**: pg. 28
* **Summary**: pg. 29
* **Endnotes**: pg. 30
* **Acknowledgment**: pg. 39
"""
st.markdown(citation_Content_text)
# if password == correct_password:
# Define a list of possible placeholder texts
placeholders = [
'Example: Summarize the article in 200 words or less',
'Example: What are the essential practices?',
'Example: I am a teacher, why is this resource important?',
'Example: How can this resource support my instruction in reading and writing?',
'Example: Does this resource align with the learning progression for developing skilled readers and writers?',
'Example: How does this resource address the needs of students scoring below the 20th percentile?',
'Example: Are there assessment tools included in this resource to monitor student progress?',
'Example: Does this resource provide guidance on data collection and analysis for monitoring student outcomes?',
"Example: How can this resource be used to support students' social-emotional development?",
"Example: How does this resource align with the district's literacy goals and objectives?",
'Example: What research and evidence support the effectiveness of this resource?',
'Example: Does this resource provide guidance on implementation fidelity'
]
# Select a random placeholder from the list
if 'placeholder' not in st.session_state:
st.session_state.placeholder = random.choice(placeholders)
q = st.text_input(label='Ask a question or make a request ', value='', placeholder=st.session_state.placeholder)
# q = st.text_input(label='Ask a question or make a request ', value='')
k = 3 # Set k to 3
# # Initialize chat history if not present
# if 'history' not in st.session_state:
# st.session_state.history = []
if q:
with st.spinner('Thinking...'):
answer = ask_with_memory(vector_store, q, st.session_state.history)
# Display the response in a text area
st.text_area('Response: ', value=answer, height=400, key="response_text_area")
st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
# # Prepare chat history text for display
# history_text = "\n\n".join(f"Q: {entry[0]}\nA: {entry[1]}" for entry in st.session_state.history)
# Prepare chat history text for display in reverse order
history_text = "\n\n".join(f"Q: {entry[0]}\nA: {entry[1]}" for entry in reversed(st.session_state.history))
# Display chat history
st.text_area('Chat History', value=history_text, height=800) |