import streamlit as st

import transformers, torch
import json, os

from huggingface_hub import login

# CONSTANTS
MAX_NEW_TOKENS = 256
SYSTEM_MESSAGE = "You are a hepful, knowledgeable assistant"

# ENV VARS
# To avert Permision error with transformer and hf models
os.environ['SENTENCE_TRANSFORMERS_HOME'] = '.'
token = os.getenv("HF_TOKEN_WRITE") # Must be a write token
# STREAMLIT UI AREA

st.write("## Ask your Local LLM")
text_input = st.text_input("Query", value="Why is the sky Blue")
submit = st.button("Submit")

# MODEL AREA
# Use the token to authenticate
login(token=token, 
      write_permission=True     # Must be set to True when we pass in our own token
                                # Otherwise we get Permission Denied.
      )
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

@st.cache_resource
def load_model():
    pipeline = transformers.pipeline(
        "text-generation",
        model=model_id,
        model_kwargs={"torch_dtype": torch.bfloat16},
        device_map="auto",
    )

pipeline = load_model()

message_store_path = "messages.jsonl"
messages = [
    {"role": "system", "content": SYSTEM_MESSAGE},
]

if os.path.exists(message_store_path):
    with open(message_store_path, "r", encoding="utf-8") as f:
        messages = [json.loads(line) for line in f]
    print(messages)

@st.cache_data
def infer(message: str, messages: list[dict]):
    """
    Params:
        message: Most recent query to the llm.
        messages: Chat history up to current point properly formatted like
            {"role": "user", "content": "What is your name?"}
    """
    messages.append({"role": "user", "content": message})

    # Perfom inference
    output =  pipeline(
        messages,
        max_new_tokens=MAX_NEW_TOKENS)
    
    # Save the newly updated messages object
    with open(message_store_path, "w", encoding="utf-8") as f:
        for line in output:
            json.dump(line, f)
            f.write("\n")
    
    return output[-1]['generated_text'][-1]['content']

if submit:
    response = infer(text_input, messages)
    response