Spaces:

ngrigg
/

test

Sleeping

App Files Files Community

ngrigg commited on Jul 24

Commit

73e820c

•

1 Parent(s): cfa4436

Add logging and fix column name types

Browse files

Files changed (2) hide show

app.py +14 -1
llama_models.py +11 -2

app.py CHANGED Viewed

@@ -9,22 +9,32 @@ import os
 load_dotenv()
 async def process_csv(file):
     df = pd.read_csv(file, header=None)  # Read the CSV file without a header
     descriptions = df[0].tolist()  # Access the first column directly
     SAMPLE_SIZE = min(5, len(descriptions))  # Adjust sample size as needed
     descriptions_subset = descriptions[:SAMPLE_SIZE]
     model_name = "instruction-pretrain/finance-Llama3-8B"  # or any other model you want to use
     results = []
-    for desc in descriptions_subset:
         result = await process_text(model_name, desc)
         results.append(result)
     # Fill the rest of the results with empty strings to match the length of the DataFrame
     results.extend([''] * (len(descriptions) - SAMPLE_SIZE))
     df['predictions'] = results
     return df
 st.title("Finance Model Deployment")
@@ -38,7 +48,9 @@ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 if uploaded_file is not None:
     if st.button("Predict"):
         with st.spinner("Processing..."):
             df = asyncio.run(process_csv(uploaded_file))
             st.write(df)
             st.download_button(
                 label="Download Predictions as CSV",
@@ -46,3 +58,4 @@ if uploaded_file is not None:
                 file_name='predictions.csv',
                 mime='text/csv'
             )

 load_dotenv()
 async def process_csv(file):
+    print("Reading CSV file...")
     df = pd.read_csv(file, header=None)  # Read the CSV file without a header
+    print("CSV file read successfully.")
     descriptions = df[0].tolist()  # Access the first column directly
     SAMPLE_SIZE = min(5, len(descriptions))  # Adjust sample size as needed
     descriptions_subset = descriptions[:SAMPLE_SIZE]
     model_name = "instruction-pretrain/finance-Llama3-8B"  # or any other model you want to use
+    print(f"Model name: {model_name}")
+    print(f"Processing {SAMPLE_SIZE} descriptions out of {len(descriptions)} total descriptions.")
     results = []
+    for i, desc in enumerate(descriptions_subset):
+        print(f"Processing description {i+1}/{SAMPLE_SIZE}...")
         result = await process_text(model_name, desc)
+        print(f"Description {i+1} processed. Result: {result[:50]}...")  # Print first 50 characters of the result
         results.append(result)
     # Fill the rest of the results with empty strings to match the length of the DataFrame
     results.extend([''] * (len(descriptions) - SAMPLE_SIZE))
+    print("Assigning results to DataFrame...")
     df['predictions'] = results
+    df.columns = df.columns.astype(str)  # Convert column names to strings to avoid warnings
+    print("Results assigned to DataFrame successfully.")
     return df
 st.title("Finance Model Deployment")
 if uploaded_file is not None:
     if st.button("Predict"):
         with st.spinner("Processing..."):
+            print("Starting CSV processing...")
             df = asyncio.run(process_csv(uploaded_file))
+            print("CSV processing completed. Displaying results.")
             st.write(df)
             st.download_button(
                 label="Download Predictions as CSV",
                 file_name='predictions.csv',
                 mime='text/csv'
             )
+            print("Results displayed and download button created.")

llama_models.py CHANGED Viewed

@@ -3,10 +3,16 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import aiohttp
 HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
 def load_model(model_name):
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name)  # Use AutoModelForCausalLM for Llama
     return tokenizer, model
 async def process_text(model_name, text):
@@ -14,10 +20,13 @@ async def process_text(model_name, text):
     prompt = f"Given the following company description, extract key products, geographies, and important keywords:\n\n{text}\n\nProducts, geographies, and keywords:"
     async with aiohttp.ClientSession() as session:
         async with session.post(f"https://api-inference.huggingface.co/models/{model_name}",
                                 headers={"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"},
                                 json={"inputs": prompt}) as response:
             result = await response.json()
             if isinstance(result, list) and len(result) > 0:
                 return result[0].get('generated_text', '').strip()
             elif isinstance(result, dict):

 import aiohttp
 HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
+model = None
+tokenizer = None
 def load_model(model_name):
+    global tokenizer, model
+    if not tokenizer or not model:
+        print("Loading model and tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(model_name)
+        print("Model and tokenizer loaded successfully.")
     return tokenizer, model
 async def process_text(model_name, text):
     prompt = f"Given the following company description, extract key products, geographies, and important keywords:\n\n{text}\n\nProducts, geographies, and keywords:"
     async with aiohttp.ClientSession() as session:
+        print(f"Sending request to model API for text: {text[:50]}...")
         async with session.post(f"https://api-inference.huggingface.co/models/{model_name}",
                                 headers={"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"},
                                 json={"inputs": prompt}) as response:
+            print(f"Received response with status code: {response.status}")
             result = await response.json()
+            print(f"Raw API response: {result}")
             if isinstance(result, list) and len(result) > 0:
                 return result[0].get('generated_text', '').strip()
             elif isinstance(result, dict):