asadAbdullah commited on
Commit
6d55797
1 Parent(s): b120a8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -40
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # App
2
  # Import required libraries
3
  import os
4
  import pandas as pd
@@ -7,51 +6,47 @@ from transformers import pipeline
7
  from sentence_transformers import SentenceTransformer, util
8
  import requests
9
  import json
10
- from pyngrok import ngrok
11
 
12
- # Set up Hugging Face API token
 
 
 
13
 
14
- api_key = os.getenv("HF_API_KEY") # Replace with your Hugging Face API token
15
-
16
-
17
- # Load the CSV dataset
18
- data = pd.read_csv('genetic-Final.csv')
19
 
 
 
 
 
20
 
21
  # Initialize Sentence Transformer model for RAG-based retrieval
22
  retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
23
 
24
- # Create embeddings for the entire dataset for retrieval
25
- # data['embeddings'] = data['description'].apply(lambda x: retriever_model.encode(x))
26
-
27
- # Drop unnecessary columns (Unnamed columns)
28
- data = data.drop(columns=['Unnamed: 0', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13'])
29
-
30
- # Combine relevant columns into one combined description field
31
- data['combined_description'] = (
32
- data['Symptoms'].fillna('') + " " +
33
- data['Severity Level'].fillna('') + " " +
34
- data['Risk Assessment'].fillna('') + " " +
35
- data['Treatment Options'].fillna('') + " " +
36
- data['Suggested Medical Tests'].fillna('') + " " +
37
- data['Minimum Values for Medical Tests'].fillna('') + " " +
38
- data['Emergency Treatment'].fillna('')
39
- )
40
-
41
- # Initialize the Sentence Transformer model for embeddings
42
- retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
43
-
44
- # Function to safely generate embeddings for each row
45
  def generate_embedding(description):
46
- if description: # Check if the description is not empty or NaN
47
- return retriever_model.encode(description).tolist() # Convert the numpy array to list
48
  else:
49
  return []
50
 
51
- # Generate embeddings for the combined description
52
- data['embeddings'] = data['combined_description'].apply(generate_embedding)
 
53
 
54
- # Function to retrieve relevant information from CSV dataset based on user query
55
  def get_relevant_info(query, top_k=3):
56
  query_embedding = retriever_model.encode(query)
57
  similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']]
@@ -61,11 +56,20 @@ def get_relevant_info(query, top_k=3):
61
  # Function to generate response using Hugging Face Model API
62
  def generate_response(input_text):
63
  api_url = "https://api-inference.huggingface.co/models/m42-health/Llama3-Med42-8B"
64
- headers = {"Authorization": f"Bearer {os.environ['HUGGINGFACEHUB_API_TOKEN']}"}
65
  payload = {"inputs": input_text}
66
 
67
- response = requests.post(api_url, headers=headers, json=payload)
68
- return json.loads(response.content.decode("utf-8"))[0]["generated_text"]
 
 
 
 
 
 
 
 
 
69
 
70
  # Streamlit UI for the Chatbot
71
  def main():
@@ -86,7 +90,7 @@ def main():
86
  relevant_info = get_relevant_info(user_query)
87
  st.write("#### Relevant Medical Information:")
88
  for i, row in relevant_info.iterrows():
89
- st.write(f"- {row['description']}")
90
 
91
  # Generate a response from the Llama3-Med42-8B model
92
  response = generate_response(user_query)
@@ -95,10 +99,10 @@ def main():
95
 
96
  # Process the uploaded file (if any)
97
  if uploaded_file:
98
- # Display analysis of the uploaded report file
99
  st.write("### Uploaded Report Analysis:")
100
  report_text = "Extracted report content here" # Placeholder for file processing logic
101
  st.write(report_text)
102
 
103
  if __name__ == "__main__":
104
- main()
 
 
1
  # Import required libraries
2
  import os
3
  import pandas as pd
 
6
  from sentence_transformers import SentenceTransformer, util
7
  import requests
8
  import json
 
9
 
10
+ # Configure Hugging Face API token securely
11
+ # Set this in Hugging Face Space Secrets instead of directly in code
12
+ # os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_hugging_face_api_token" # Avoid hardcoding
13
+ api_key = os.getenv("HF_API_KEY")
14
 
15
+ # Load the CSV dataset (place the CSV in the same directory as app.py in Hugging Face Spaces)
16
+ # Ensure the dataset is uploaded in your Space or provide a fallback mechanism
 
 
 
17
 
18
+ try:
19
+ data = pd.read_csv('genetic-Final.csv') # Make sure the dataset filename is correct
20
+ except FileNotFoundError:
21
+ st.error("Dataset file not found. Please upload it to this directory.")
22
 
23
  # Initialize Sentence Transformer model for RAG-based retrieval
24
  retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
25
 
26
+ # Preprocess the dataset for embeddings and drop unnecessary columns
27
+ if 'combined_description' not in data.columns:
28
+ data['combined_description'] = (
29
+ data['Symptoms'].fillna('') + " " +
30
+ data['Severity Level'].fillna('') + " " +
31
+ data['Risk Assessment'].fillna('') + " " +
32
+ data['Treatment Options'].fillna('') + " " +
33
+ data['Suggested Medical Tests'].fillna('') + " " +
34
+ data['Minimum Values for Medical Tests'].fillna('') + " " +
35
+ data['Emergency Treatment'].fillna('')
36
+ )
37
+
38
+ # Function to generate embeddings safely for each row
 
 
 
 
 
 
 
 
39
  def generate_embedding(description):
40
+ if description:
41
+ return retriever_model.encode(description).tolist()
42
  else:
43
  return []
44
 
45
+ # Generate embeddings for the combined description (do this only if not already embedded)
46
+ if 'embeddings' not in data.columns:
47
+ data['embeddings'] = data['combined_description'].apply(generate_embedding)
48
 
49
+ # Function to retrieve relevant information based on user query
50
  def get_relevant_info(query, top_k=3):
51
  query_embedding = retriever_model.encode(query)
52
  similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']]
 
56
  # Function to generate response using Hugging Face Model API
57
  def generate_response(input_text):
58
  api_url = "https://api-inference.huggingface.co/models/m42-health/Llama3-Med42-8B"
59
+ headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}"}
60
  payload = {"inputs": input_text}
61
 
62
+ try:
63
+ response = requests.post(api_url, headers=headers, json=payload)
64
+ response_data = response.json()
65
+ if isinstance(response_data, list) and "generated_text" in response_data[0]:
66
+ return response_data[0]["generated_text"]
67
+ else:
68
+ st.error("Unexpected response structure from API.")
69
+ return "Sorry, I couldn't generate a response. Please try again."
70
+ except Exception as e:
71
+ st.error(f"Error during API request: {e}")
72
+ return "Sorry, there was an error processing your request. Please try again."
73
 
74
  # Streamlit UI for the Chatbot
75
  def main():
 
90
  relevant_info = get_relevant_info(user_query)
91
  st.write("#### Relevant Medical Information:")
92
  for i, row in relevant_info.iterrows():
93
+ st.write(f"- {row['combined_description']}") # Adjust to show meaningful info
94
 
95
  # Generate a response from the Llama3-Med42-8B model
96
  response = generate_response(user_query)
 
99
 
100
  # Process the uploaded file (if any)
101
  if uploaded_file:
102
+ # Display analysis of the uploaded report file (process based on file type)
103
  st.write("### Uploaded Report Analysis:")
104
  report_text = "Extracted report content here" # Placeholder for file processing logic
105
  st.write(report_text)
106
 
107
  if __name__ == "__main__":
108
+ main()