Add logging and fix column name types
Browse files- app.py +14 -1
- llama_models.py +11 -2
app.py
CHANGED
@@ -9,22 +9,32 @@ import os
|
|
9 |
load_dotenv()
|
10 |
|
11 |
async def process_csv(file):
|
|
|
12 |
df = pd.read_csv(file, header=None) # Read the CSV file without a header
|
|
|
|
|
13 |
descriptions = df[0].tolist() # Access the first column directly
|
14 |
SAMPLE_SIZE = min(5, len(descriptions)) # Adjust sample size as needed
|
15 |
descriptions_subset = descriptions[:SAMPLE_SIZE]
|
16 |
|
17 |
model_name = "instruction-pretrain/finance-Llama3-8B" # or any other model you want to use
|
|
|
|
|
18 |
|
19 |
results = []
|
20 |
-
for desc in descriptions_subset:
|
|
|
21 |
result = await process_text(model_name, desc)
|
|
|
22 |
results.append(result)
|
23 |
|
24 |
# Fill the rest of the results with empty strings to match the length of the DataFrame
|
25 |
results.extend([''] * (len(descriptions) - SAMPLE_SIZE))
|
26 |
|
|
|
27 |
df['predictions'] = results
|
|
|
|
|
28 |
return df
|
29 |
|
30 |
st.title("Finance Model Deployment")
|
@@ -38,7 +48,9 @@ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
|
38 |
if uploaded_file is not None:
|
39 |
if st.button("Predict"):
|
40 |
with st.spinner("Processing..."):
|
|
|
41 |
df = asyncio.run(process_csv(uploaded_file))
|
|
|
42 |
st.write(df)
|
43 |
st.download_button(
|
44 |
label="Download Predictions as CSV",
|
@@ -46,3 +58,4 @@ if uploaded_file is not None:
|
|
46 |
file_name='predictions.csv',
|
47 |
mime='text/csv'
|
48 |
)
|
|
|
|
9 |
load_dotenv()
|
10 |
|
11 |
async def process_csv(file):
|
12 |
+
print("Reading CSV file...")
|
13 |
df = pd.read_csv(file, header=None) # Read the CSV file without a header
|
14 |
+
print("CSV file read successfully.")
|
15 |
+
|
16 |
descriptions = df[0].tolist() # Access the first column directly
|
17 |
SAMPLE_SIZE = min(5, len(descriptions)) # Adjust sample size as needed
|
18 |
descriptions_subset = descriptions[:SAMPLE_SIZE]
|
19 |
|
20 |
model_name = "instruction-pretrain/finance-Llama3-8B" # or any other model you want to use
|
21 |
+
print(f"Model name: {model_name}")
|
22 |
+
print(f"Processing {SAMPLE_SIZE} descriptions out of {len(descriptions)} total descriptions.")
|
23 |
|
24 |
results = []
|
25 |
+
for i, desc in enumerate(descriptions_subset):
|
26 |
+
print(f"Processing description {i+1}/{SAMPLE_SIZE}...")
|
27 |
result = await process_text(model_name, desc)
|
28 |
+
print(f"Description {i+1} processed. Result: {result[:50]}...") # Print first 50 characters of the result
|
29 |
results.append(result)
|
30 |
|
31 |
# Fill the rest of the results with empty strings to match the length of the DataFrame
|
32 |
results.extend([''] * (len(descriptions) - SAMPLE_SIZE))
|
33 |
|
34 |
+
print("Assigning results to DataFrame...")
|
35 |
df['predictions'] = results
|
36 |
+
df.columns = df.columns.astype(str) # Convert column names to strings to avoid warnings
|
37 |
+
print("Results assigned to DataFrame successfully.")
|
38 |
return df
|
39 |
|
40 |
st.title("Finance Model Deployment")
|
|
|
48 |
if uploaded_file is not None:
|
49 |
if st.button("Predict"):
|
50 |
with st.spinner("Processing..."):
|
51 |
+
print("Starting CSV processing...")
|
52 |
df = asyncio.run(process_csv(uploaded_file))
|
53 |
+
print("CSV processing completed. Displaying results.")
|
54 |
st.write(df)
|
55 |
st.download_button(
|
56 |
label="Download Predictions as CSV",
|
|
|
58 |
file_name='predictions.csv',
|
59 |
mime='text/csv'
|
60 |
)
|
61 |
+
print("Results displayed and download button created.")
|
llama_models.py
CHANGED
@@ -3,10 +3,16 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
3 |
import aiohttp
|
4 |
|
5 |
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
|
|
|
|
|
6 |
|
7 |
def load_model(model_name):
|
8 |
-
tokenizer
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
return tokenizer, model
|
11 |
|
12 |
async def process_text(model_name, text):
|
@@ -14,10 +20,13 @@ async def process_text(model_name, text):
|
|
14 |
prompt = f"Given the following company description, extract key products, geographies, and important keywords:\n\n{text}\n\nProducts, geographies, and keywords:"
|
15 |
|
16 |
async with aiohttp.ClientSession() as session:
|
|
|
17 |
async with session.post(f"https://api-inference.huggingface.co/models/{model_name}",
|
18 |
headers={"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"},
|
19 |
json={"inputs": prompt}) as response:
|
|
|
20 |
result = await response.json()
|
|
|
21 |
if isinstance(result, list) and len(result) > 0:
|
22 |
return result[0].get('generated_text', '').strip()
|
23 |
elif isinstance(result, dict):
|
|
|
3 |
import aiohttp
|
4 |
|
5 |
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
|
6 |
+
model = None
|
7 |
+
tokenizer = None
|
8 |
|
9 |
def load_model(model_name):
|
10 |
+
global tokenizer, model
|
11 |
+
if not tokenizer or not model:
|
12 |
+
print("Loading model and tokenizer...")
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
14 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
15 |
+
print("Model and tokenizer loaded successfully.")
|
16 |
return tokenizer, model
|
17 |
|
18 |
async def process_text(model_name, text):
|
|
|
20 |
prompt = f"Given the following company description, extract key products, geographies, and important keywords:\n\n{text}\n\nProducts, geographies, and keywords:"
|
21 |
|
22 |
async with aiohttp.ClientSession() as session:
|
23 |
+
print(f"Sending request to model API for text: {text[:50]}...")
|
24 |
async with session.post(f"https://api-inference.huggingface.co/models/{model_name}",
|
25 |
headers={"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"},
|
26 |
json={"inputs": prompt}) as response:
|
27 |
+
print(f"Received response with status code: {response.status}")
|
28 |
result = await response.json()
|
29 |
+
print(f"Raw API response: {result}")
|
30 |
if isinstance(result, list) and len(result) > 0:
|
31 |
return result[0].get('generated_text', '').strip()
|
32 |
elif isinstance(result, dict):
|