groq-llama3

Running

File size: 6,747 Bytes

import os
from dotenv import find_dotenv, load_dotenv
import streamlit as st
from groq import Groq

# Load environment variables
load_dotenv(find_dotenv())

# Set up Streamlit page configuration
st.set_page_config(
    page_icon="📃",
    layout="wide",
    page_title="Groq & LLaMA3x Chat Bot"
)

# App Title
st.title("Groq Chat with LLaMA3x")

# Initialize the Groq client using the API key from the environment variables
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Cache the model fetching function to improve performance
@st.cache_data
def fetch_available_models():
    """
    Fetches the available models from the Groq API.
    Returns a list of models or an empty list if there's an error.
    """
    try:
        models_response = client.models.list()
        return models_response.data
    except Exception as e:
        st.error(f"Error fetching models: {e}")
        return []

# Load available models and filter them
available_models = fetch_available_models()
filtered_models = [
    model for model in available_models if model.id.startswith('llama-3')
]

# Prepare a dictionary of model metadata
models = {
    model.id: {
        "name": model.id,
        "tokens": 4000,
        "developer": model.owned_by,
    }
    for model in filtered_models
}

# Initialize session state variables
if "messages" not in st.session_state:
    st.session_state.messages = []

if "selected_model" not in st.session_state:
    st.session_state.selected_model = None

# Sidebar: Controls
with st.sidebar:
    
    # Powered by Groq logo
    st.markdown(
        """
        <a href="https://groq.com" target="_blank" rel="noopener noreferrer">
            <img
                src="https://groq.com/wp-content/uploads/2024/03/PBG-mark1-color.svg"
                alt="Powered by Groq for fast inference."
                width="100%"
            />
        </a>
        """,
        unsafe_allow_html=True
    )
    st.markdown("---")

    # Define a function to clear messages when the model changes
    def reset_chat_on_model_change():
        st.session_state.messages = []

    # Model selection dropdown
    if models:
        model_option = st.selectbox(
            "Choose a model:",
            options=list(models.keys()),
            format_func=lambda x: f"{models[x]['name']} ({models[x]['developer']})",
            on_change=reset_chat_on_model_change,  # Reset chat when model changes
        )
    else:
        st.warning("No available models to select.")
        model_option = None

    # Token limit slider
    if models:
        max_tokens_range = models[model_option]["tokens"]
        max_tokens = st.slider(
            "Max Tokens:",
            min_value=200,
            max_value=max_tokens_range,
            value=max(100, int(max_tokens_range * 0.5)),
            step=256,
            help=f"Adjust the maximum number of tokens for the response. Maximum for the selected model: {max_tokens_range}"
        )
    else:
        max_tokens = 200

    # Additional options
    stream_mode = st.checkbox("Enable Streaming", value=True)

    # Button to clear the chat
    if st.button("Clear Chat"):
        st.session_state.messages = []

    st.markdown("### Usage Summary")
    usage_box = st.empty()

    # Disclaimer
    st.markdown(
        """
        -----
        ⚠️ **Important:**  
        *The responses provided by this application are generated automatically using an AI model.  
        Users are responsible for verifying the accuracy of the information before relying on it.  
        Always cross-check facts and data for critical decisions.*
        """
    )

# Main Chat Interface
st.markdown("### Chat Interface")

# Display the chat history
for message in st.session_state.messages:
    avatar = "🔋" if message["role"] == "assistant" else "🧑‍💻"
    with st.chat_message(message["role"], avatar=avatar):
        st.markdown(message["content"])

# Capture user input
user_input = st.chat_input("Enter your message here...")

if user_input:
    # Append the user input to the session state
    st.session_state.messages.append({"role": "user", "content": user_input})
    with st.chat_message("user", avatar="🧑‍💻"):
        st.markdown(user_input)

    # Generate a response using the selected model
    try:
        full_response = ""
        usage_summary = ""

        if stream_mode:
            # Generate a response with streaming enabled
            chat_completion = client.chat.completions.create(
                model=model_option,
                messages=[
                    {"role": m["role"], "content": m["content"]}
                    for m in st.session_state.messages
                ],
                max_tokens=max_tokens,
                stream=True
            )

            with st.chat_message("assistant", avatar="🔋"):
                response_placeholder = st.empty()

                for chunk in chat_completion:
                    if chunk.choices[0].delta.content:
                        full_response += chunk.choices[0].delta.content
                        response_placeholder.markdown(full_response)
        else:
            # Generate a response without streaming
            chat_completion = client.chat.completions.create(
                model=model_option,
                messages=[
                    {"role": m["role"], "content": m["content"]}
                    for m in st.session_state.messages
                ],
                max_tokens=max_tokens,
                stream=False
            )

            response = chat_completion.choices[0].message.content
            usage_data = chat_completion.usage

            with st.chat_message("assistant", avatar="🔋"):
                st.markdown(response)
                full_response = response

            if usage_data:
                usage_summary = (
                    f"**Token Usage:**\n"
                    f"- Prompt Tokens: {usage_data.prompt_tokens}\n"
                    f"- Response Tokens: {usage_data.completion_tokens}\n"
                    f"- Total Tokens: {usage_data.total_tokens}\n\n"
                    f"**Timings:**\n"
                    f"- Prompt Time: {round(usage_data.prompt_time,5)} secs\n"
                    f"- Response Time: {round(usage_data.completion_time,5)} secs\n"
                    f"- Total Time: {round(usage_data.total_time,5)} secs"
                )

        if usage_summary:
            usage_box.markdown(usage_summary)

        # Append the assistant's response to the session state
        st.session_state.messages.append(
            {"role": "assistant", "content": full_response}
        )

    except Exception as e:
        st.error(f"Error generating the response: {e}")