Spaces:
Sleeping
Sleeping
File size: 8,849 Bytes
728b290 7e5c1c8 6d55797 728b290 7e5c1c8 6d55797 7e5c1c8 6d55797 728b290 7e5c1c8 6d55797 f69047d 7e5c1c8 44d57c3 6d55797 7e5c1c8 728b290 7e5c1c8 728b290 7e5c1c8 7ed7e20 7e5c1c8 7ed7e20 7e5c1c8 f69047d 728b290 7e5c1c8 f69047d 728b290 6d55797 7e5c1c8 6d55797 7e5c1c8 6d55797 7e5c1c8 728b290 7ed7e20 7e5c1c8 728b290 7e5c1c8 728b290 7e5c1c8 728b290 7e5c1c8 728b290 7e5c1c8 728b290 7e5c1c8 728b290 7e5c1c8 728b290 7e5c1c8 728b290 6d55797 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
# Import required libraries
import os
import pandas as pd
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import requests
import json
# Configure Hugging Face API token securely (ensure it's set in environment variables)
api_key = os.getenv("HF_API_KEY")
# Load the CSV dataset (place the CSV in the same directory as app.py in Hugging Face Spaces)
try:
data = pd.read_csv('genetic-Final.csv') # Ensure the dataset filename is correct
except FileNotFoundError:
st.error("Dataset file not found. Please upload it to this directory.")
# Initialize Sentence Transformer model for RAG-based retrieval
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Preprocess the dataset by creating a combined description column
if 'combined_description' not in data.columns:
data['combined_description'] = (
data['Symptoms'].fillna('') + " " +
data['Severity Level'].fillna('') + " " +
data['Risk Assessment'].fillna('') + " " +
data['Treatment Options'].fillna('') + " " +
data['Suggested Medical Tests'].fillna('') + " " +
data['Minimum Values for Medical Tests'].fillna('') + " " +
data['Emergency Treatment'].fillna('')
)
# Define weights for each column based on importance
column_weights = {
'Symptoms': 0.4,
'Severity Level': 0.2,
'Risk Assessment': 0.1,
'Treatment Options': 0.15,
'Suggested Medical Tests': 0.05,
'Minimum Values for Medical Tests': 0.05,
'Emergency Treatment': 0.05
}
# Precompute embeddings for each weighted column
for col in column_weights.keys():
if f"{col}_embeddings" not in data.columns:
data[f"{col}_embeddings"] = data[col].fillna("").apply(lambda x: retriever_model.encode(x).tolist())
# Function to retrieve relevant information with weighted scoring
def get_weighted_relevant_info(query, top_k=3):
query_embedding = retriever_model.encode(query)
weighted_similarities = []
for idx, row in data.iterrows():
weighted_score = 0
for col, weight in column_weights.items():
if row[f"{col}_embeddings"]:
col_similarity = util.cos_sim(query_embedding, row[f"{col}_embeddings"])[0][0].item()
weighted_score += col_similarity * weight
weighted_similarities.append(weighted_score)
top_indices = sorted(range(len(weighted_similarities)), key=lambda i: weighted_similarities[i], reverse=True)[:top_k]
return data.iloc[top_indices]
# Generate embeddings for the combined description if not already done
if 'embeddings' not in data.columns:
data['embeddings'] = data['combined_description'].apply(lambda x: retriever_model.encode(x).tolist() if x else [])
# Function to retrieve relevant information based on user query (non-weighted)
def get_relevant_info(query, top_k=3):
query_embedding = retriever_model.encode(query)
similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']]
top_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k]
return data.iloc[top_indices]
# Enhanced response generation function with debugging
# Import required libraries
import os
import pandas as pd
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import requests
import json
# Configure Hugging Face API token securely (ensure it's set in environment variables)
api_key = os.getenv("HF_API_KEY")
# Load the CSV dataset (place the CSV in the same directory as app.py in Hugging Face Spaces)
try:
data = pd.read_csv('genetic-Final.csv') # Ensure the dataset filename is correct
except FileNotFoundError:
st.error("Dataset file not found. Please upload it to this directory.")
# Initialize Sentence Transformer model for RAG-based retrieval
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Preprocess the dataset by creating a combined description column
if 'combined_description' not in data.columns:
data['combined_description'] = (
data['Symptoms'].fillna('') + " " +
data['Severity Level'].fillna('') + " " +
data['Risk Assessment'].fillna('') + " " +
data['Treatment Options'].fillna('') + " " +
data['Suggested Medical Tests'].fillna('') + " " +
data['Minimum Values for Medical Tests'].fillna('') + " " +
data['Emergency Treatment'].fillna('')
)
# Define weights for each column based on importance
column_weights = {
'Symptoms': 0.4,
'Severity Level': 0.2,
'Risk Assessment': 0.1,
'Treatment Options': 0.15,
'Suggested Medical Tests': 0.05,
'Minimum Values for Medical Tests': 0.05,
'Emergency Treatment': 0.05
}
# Precompute embeddings for each weighted column
for col in column_weights.keys():
if f"{col}_embeddings" not in data.columns:
data[f"{col}_embeddings"] = data[col].fillna("").apply(lambda x: retriever_model.encode(x).tolist())
# Function to retrieve relevant information with weighted scoring
def get_weighted_relevant_info(query, top_k=3):
query_embedding = retriever_model.encode(query)
weighted_similarities = []
for idx, row in data.iterrows():
weighted_score = 0
for col, weight in column_weights.items():
if row[f"{col}_embeddings"]:
col_similarity = util.cos_sim(query_embedding, row[f"{col}_embeddings"])[0][0].item()
weighted_score += col_similarity * weight
weighted_similarities.append(weighted_score)
top_indices = sorted(range(len(weighted_similarities)), key=lambda i: weighted_similarities[i], reverse=True)[:top_k]
return data.iloc[top_indices]
# Generate embeddings for the combined description if not already done
if 'embeddings' not in data.columns:
data['embeddings'] = data['combined_description'].apply(lambda x: retriever_model.encode(x).tolist() if x else [])
# Function to retrieve relevant information based on user query (non-weighted)
def get_relevant_info(query, top_k=3):
query_embedding = retriever_model.encode(query)
similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']]
top_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k]
return data.iloc[top_indices]
# Enhanced response generation function with debugging
def generate_response(input_text, relevant_info):
context = "\n".join(relevant_info['combined_description'].tolist())
input_with_context = f"Context: {context}\n\nUser Query: {input_text}"
api_url = "https://api-inference.huggingface.co/models/m42-health/Llama3-Med42-8B"
headers = {"Authorization": f"Bearer {api_key}"}
payload = {"inputs": input_with_context}
try:
response = requests.post(api_url, headers=headers, json=payload)
st.write("API Raw Response:", response.text) # Display raw response for debugging
# Check response status
if response.status_code != 200:
return f"Error: API responded with status code {response.status_code}. Full response: {response.json()}"
# Parse and validate response
response_data = response.json()
if isinstance(response_data, list) and "generated_text" in response_data[0]:
return response_data[0]["generated_text"]
else:
return f"Unexpected response format from API. Full response: {response_data}"
except Exception as e:
return f"Error during API request: {e}"
# Streamlit UI for the Chatbot
def main():
st.title("Medical Report and Analysis Chatbot")
st.sidebar.header("Upload Medical Report or Enter Query")
# Text input for user queries
user_query = st.sidebar.text_input("Type your question or query")
# File uploader for medical report
uploaded_file = st.sidebar.file_uploader("Upload a medical report (optional)", type=["txt", "pdf", "csv"])
# Process the query if provided
if user_query:
st.write("### FAQ and Responses:")
# Retrieve relevant information from the dataset
relevant_info = get_weighted_relevant_info(user_query)
for i, row in relevant_info.iterrows():
st.write(f"- {row['combined_description']}")
# Generate a response from the model
response = generate_response(user_query, relevant_info)
st.write("#### Model's Response:")
st.write(response)
# Process the uploaded file (if any)
if uploaded_file:
st.write("### Uploaded Report Analysis:")
report_text = "Extracted report content here" # Placeholder for file processing
st.write(report_text)
if __name__ == "__main__":
main()
|