File size: 2,740 Bytes
d387575
8270298
 
1232177
8270298
 
d387575
8270298
 
 
2d0dc09
 
1232177
 
2d0dc09
 
8270298
73e820c
8270298
73e820c
 
8270298
 
cfa4436
8270298
cfa4436
73e820c
 
8270298
 
73e820c
 
1232177
73e820c
8270298
 
cfa4436
 
 
73e820c
8270298
73e820c
 
2d0dc09
8270298
 
 
 
 
 
 
 
 
 
 
 
 
73e820c
8270298
73e820c
8270298
 
 
 
 
 
 
73e820c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import streamlit as st
import pandas as pd
import asyncio
from llama_models import process_text_local
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Ensure API key is loaded correctly
api_key = os.getenv("HUGGINGFACE_API_KEY")
if api_key is None:
    raise ValueError("Hugging Face API key is not set. Please add it as a secret in your Hugging Face Space settings.")
print(f"Hugging Face API Key: {api_key}")

async def process_csv(file):
    print("Reading CSV file...")
    df = pd.read_csv(file, header=None)  # Read the CSV file without a header
    print("CSV file read successfully.")
    
    descriptions = df[0].tolist()  # Access the first column directly
    SAMPLE_SIZE = min(5, len(descriptions))  # Adjust sample size as needed
    descriptions_subset = descriptions[:SAMPLE_SIZE]

    model_name = "instruction-pretrain/finance-Llama3-8B"  # or any other model you want to use
    print(f"Model name: {model_name}")
    print(f"Processing {SAMPLE_SIZE} descriptions out of {len(descriptions)} total descriptions.")

    results = []
    for i, desc in enumerate(descriptions_subset):
        print(f"Processing description {i+1}/{SAMPLE_SIZE}...")
        result = await process_text_local(model_name, desc)
        print(f"Description {i+1} processed. Result: {result[:50]}...")  # Print first 50 characters of the result
        results.append(result)
    
    # Fill the rest of the results with empty strings to match the length of the DataFrame
    results.extend([''] * (len(descriptions) - SAMPLE_SIZE))
    
    print("Assigning results to DataFrame...")
    df['predictions'] = results
    df.columns = df.columns.astype(str)  # Convert column names to strings to avoid warnings
    print("Results assigned to DataFrame successfully.")
    print(df.head())  # Print first few rows of the DataFrame to verify
    return df

st.title("Finance Model Deployment")

st.write("""
### Upload a CSV file with company descriptions to extract key products, geographies, and important keywords:
""")

uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

if uploaded_file is not None:
    if st.button("Predict"):
        with st.spinner("Processing..."):
            print("Starting CSV processing...")
            df = asyncio.run(process_csv(uploaded_file))
            print("CSV processing completed. Displaying results.")
            st.write(df)
            st.download_button(
                label="Download Predictions as CSV",
                data=df.to_csv(index=False).encode('utf-8'),
                file_name='predictions.csv',
                mime='text/csv'
            )
            print("Results displayed and download button created.")