File size: 2,740 Bytes
d387575 8270298 1232177 8270298 d387575 8270298 2d0dc09 1232177 2d0dc09 8270298 73e820c 8270298 73e820c 8270298 cfa4436 8270298 cfa4436 73e820c 8270298 73e820c 1232177 73e820c 8270298 cfa4436 73e820c 8270298 73e820c 2d0dc09 8270298 73e820c 8270298 73e820c 8270298 73e820c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import streamlit as st
import pandas as pd
import asyncio
from llama_models import process_text_local
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
# Ensure API key is loaded correctly
api_key = os.getenv("HUGGINGFACE_API_KEY")
if api_key is None:
raise ValueError("Hugging Face API key is not set. Please add it as a secret in your Hugging Face Space settings.")
print(f"Hugging Face API Key: {api_key}")
async def process_csv(file):
print("Reading CSV file...")
df = pd.read_csv(file, header=None) # Read the CSV file without a header
print("CSV file read successfully.")
descriptions = df[0].tolist() # Access the first column directly
SAMPLE_SIZE = min(5, len(descriptions)) # Adjust sample size as needed
descriptions_subset = descriptions[:SAMPLE_SIZE]
model_name = "instruction-pretrain/finance-Llama3-8B" # or any other model you want to use
print(f"Model name: {model_name}")
print(f"Processing {SAMPLE_SIZE} descriptions out of {len(descriptions)} total descriptions.")
results = []
for i, desc in enumerate(descriptions_subset):
print(f"Processing description {i+1}/{SAMPLE_SIZE}...")
result = await process_text_local(model_name, desc)
print(f"Description {i+1} processed. Result: {result[:50]}...") # Print first 50 characters of the result
results.append(result)
# Fill the rest of the results with empty strings to match the length of the DataFrame
results.extend([''] * (len(descriptions) - SAMPLE_SIZE))
print("Assigning results to DataFrame...")
df['predictions'] = results
df.columns = df.columns.astype(str) # Convert column names to strings to avoid warnings
print("Results assigned to DataFrame successfully.")
print(df.head()) # Print first few rows of the DataFrame to verify
return df
st.title("Finance Model Deployment")
st.write("""
### Upload a CSV file with company descriptions to extract key products, geographies, and important keywords:
""")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
if st.button("Predict"):
with st.spinner("Processing..."):
print("Starting CSV processing...")
df = asyncio.run(process_csv(uploaded_file))
print("CSV processing completed. Displaying results.")
st.write(df)
st.download_button(
label="Download Predictions as CSV",
data=df.to_csv(index=False).encode('utf-8'),
file_name='predictions.csv',
mime='text/csv'
)
print("Results displayed and download button created.")
|