test / app.py
ngrigg's picture
upload
1232177
raw
history blame
2.74 kB
import streamlit as st
import pandas as pd
import asyncio
from llama_models import process_text_local
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
# Ensure API key is loaded correctly
api_key = os.getenv("HUGGINGFACE_API_KEY")
if api_key is None:
raise ValueError("Hugging Face API key is not set. Please add it as a secret in your Hugging Face Space settings.")
print(f"Hugging Face API Key: {api_key}")
async def process_csv(file):
print("Reading CSV file...")
df = pd.read_csv(file, header=None) # Read the CSV file without a header
print("CSV file read successfully.")
descriptions = df[0].tolist() # Access the first column directly
SAMPLE_SIZE = min(5, len(descriptions)) # Adjust sample size as needed
descriptions_subset = descriptions[:SAMPLE_SIZE]
model_name = "instruction-pretrain/finance-Llama3-8B" # or any other model you want to use
print(f"Model name: {model_name}")
print(f"Processing {SAMPLE_SIZE} descriptions out of {len(descriptions)} total descriptions.")
results = []
for i, desc in enumerate(descriptions_subset):
print(f"Processing description {i+1}/{SAMPLE_SIZE}...")
result = await process_text_local(model_name, desc)
print(f"Description {i+1} processed. Result: {result[:50]}...") # Print first 50 characters of the result
results.append(result)
# Fill the rest of the results with empty strings to match the length of the DataFrame
results.extend([''] * (len(descriptions) - SAMPLE_SIZE))
print("Assigning results to DataFrame...")
df['predictions'] = results
df.columns = df.columns.astype(str) # Convert column names to strings to avoid warnings
print("Results assigned to DataFrame successfully.")
print(df.head()) # Print first few rows of the DataFrame to verify
return df
st.title("Finance Model Deployment")
st.write("""
### Upload a CSV file with company descriptions to extract key products, geographies, and important keywords:
""")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
if st.button("Predict"):
with st.spinner("Processing..."):
print("Starting CSV processing...")
df = asyncio.run(process_csv(uploaded_file))
print("CSV processing completed. Displaying results.")
st.write(df)
st.download_button(
label="Download Predictions as CSV",
data=df.to_csv(index=False).encode('utf-8'),
file_name='predictions.csv',
mime='text/csv'
)
print("Results displayed and download button created.")