Spaces:
Sleeping
Sleeping
# App | |
# Import required libraries | |
import os | |
import pandas as pd | |
import streamlit as st | |
from transformers import pipeline | |
from sentence_transformers import SentenceTransformer, util | |
import requests | |
import json | |
from pyngrok import ngrok | |
# Set up Hugging Face API token | |
api_key = os.getenv("HF_API_KEY") # Replace with your Hugging Face API token | |
# Load the CSV dataset | |
data = pd.read_csv('/content/genetic_diseases_dataset.csv') | |
# Initialize Sentence Transformer model for RAG-based retrieval | |
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
# Create embeddings for the entire dataset for retrieval | |
# data['embeddings'] = data['description'].apply(lambda x: retriever_model.encode(x)) | |
# Drop unnecessary columns (Unnamed columns) | |
data = data.drop(columns=['Unnamed: 0', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13']) | |
# Combine relevant columns into one combined description field | |
data['combined_description'] = ( | |
data['Symptoms'].fillna('') + " " + | |
data['Severity Level'].fillna('') + " " + | |
data['Risk Assessment'].fillna('') + " " + | |
data['Treatment Options'].fillna('') + " " + | |
data['Suggested Medical Tests'].fillna('') + " " + | |
data['Minimum Values for Medical Tests'].fillna('') + " " + | |
data['Emergency Treatment'].fillna('') | |
) | |
# Initialize the Sentence Transformer model for embeddings | |
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
# Function to safely generate embeddings for each row | |
def generate_embedding(description): | |
if description: # Check if the description is not empty or NaN | |
return retriever_model.encode(description).tolist() # Convert the numpy array to list | |
else: | |
return [] | |
# Generate embeddings for the combined description | |
data['embeddings'] = data['combined_description'].apply(generate_embedding) | |
# Function to retrieve relevant information from CSV dataset based on user query | |
def get_relevant_info(query, top_k=3): | |
query_embedding = retriever_model.encode(query) | |
similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']] | |
top_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k] | |
return data.iloc[top_indices] | |
# Function to generate response using Hugging Face Model API | |
def generate_response(input_text): | |
api_url = "https://api-inference.huggingface.co/models/m42-health/Llama3-Med42-8B" | |
headers = {"Authorization": f"Bearer {os.environ['HUGGINGFACEHUB_API_TOKEN']}"} | |
payload = {"inputs": input_text} | |
response = requests.post(api_url, headers=headers, json=payload) | |
return json.loads(response.content.decode("utf-8"))[0]["generated_text"] | |
# Streamlit UI for the Chatbot | |
def main(): | |
st.title("Medical Report and Analysis Chatbot") | |
st.sidebar.header("Upload Medical Report or Enter Query") | |
# Text input for user queries | |
user_query = st.sidebar.text_input("Type your question or query") | |
# File uploader for medical report | |
uploaded_file = st.sidebar.file_uploader("Upload a medical report (optional)", type=["txt", "pdf", "csv"]) | |
# Process the query if provided | |
if user_query: | |
st.write("### Query Response:") | |
# Retrieve relevant information from dataset | |
relevant_info = get_relevant_info(user_query) | |
st.write("#### Relevant Medical Information:") | |
for i, row in relevant_info.iterrows(): | |
st.write(f"- {row['description']}") | |
# Generate a response from the Llama3-Med42-8B model | |
response = generate_response(user_query) | |
st.write("#### Model's Response:") | |
st.write(response) | |
# Process the uploaded file (if any) | |
if uploaded_file: | |
# Display analysis of the uploaded report file | |
st.write("### Uploaded Report Analysis:") | |
report_text = "Extracted report content here" # Placeholder for file processing logic | |
st.write(report_text) | |
if __name__ == "__main__": | |
main() | |