Spaces:
Sleeping
Sleeping
asadAbdullah
commited on
Commit
•
728b290
1
Parent(s):
aa2d4a6
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# App
|
2 |
+
# Import required libraries
|
3 |
+
import os
|
4 |
+
import pandas as pd
|
5 |
+
import streamlit as st
|
6 |
+
from transformers import pipeline
|
7 |
+
from sentence_transformers import SentenceTransformer, util
|
8 |
+
import requests
|
9 |
+
import json
|
10 |
+
from pyngrok import ngrok
|
11 |
+
|
12 |
+
# Set up Hugging Face API token
|
13 |
+
|
14 |
+
api_key = os.getenv("HF_API_KEY") # Replace with your Hugging Face API token
|
15 |
+
|
16 |
+
|
17 |
+
# Load the CSV dataset
|
18 |
+
data = pd.read_csv('/content/genetic_diseases_dataset.csv')
|
19 |
+
|
20 |
+
|
21 |
+
# Initialize Sentence Transformer model for RAG-based retrieval
|
22 |
+
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
23 |
+
|
24 |
+
# Create embeddings for the entire dataset for retrieval
|
25 |
+
# data['embeddings'] = data['description'].apply(lambda x: retriever_model.encode(x))
|
26 |
+
|
27 |
+
# Drop unnecessary columns (Unnamed columns)
|
28 |
+
data = data.drop(columns=['Unnamed: 0', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13'])
|
29 |
+
|
30 |
+
# Combine relevant columns into one combined description field
|
31 |
+
data['combined_description'] = (
|
32 |
+
data['Symptoms'].fillna('') + " " +
|
33 |
+
data['Severity Level'].fillna('') + " " +
|
34 |
+
data['Risk Assessment'].fillna('') + " " +
|
35 |
+
data['Treatment Options'].fillna('') + " " +
|
36 |
+
data['Suggested Medical Tests'].fillna('') + " " +
|
37 |
+
data['Minimum Values for Medical Tests'].fillna('') + " " +
|
38 |
+
data['Emergency Treatment'].fillna('')
|
39 |
+
)
|
40 |
+
|
41 |
+
# Initialize the Sentence Transformer model for embeddings
|
42 |
+
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
43 |
+
|
44 |
+
# Function to safely generate embeddings for each row
|
45 |
+
def generate_embedding(description):
|
46 |
+
if description: # Check if the description is not empty or NaN
|
47 |
+
return retriever_model.encode(description).tolist() # Convert the numpy array to list
|
48 |
+
else:
|
49 |
+
return []
|
50 |
+
|
51 |
+
# Generate embeddings for the combined description
|
52 |
+
data['embeddings'] = data['combined_description'].apply(generate_embedding)
|
53 |
+
|
54 |
+
# Function to retrieve relevant information from CSV dataset based on user query
|
55 |
+
def get_relevant_info(query, top_k=3):
|
56 |
+
query_embedding = retriever_model.encode(query)
|
57 |
+
similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']]
|
58 |
+
top_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k]
|
59 |
+
return data.iloc[top_indices]
|
60 |
+
|
61 |
+
# Function to generate response using Hugging Face Model API
|
62 |
+
def generate_response(input_text):
|
63 |
+
api_url = "https://api-inference.huggingface.co/models/m42-health/Llama3-Med42-8B"
|
64 |
+
headers = {"Authorization": f"Bearer {os.environ['HUGGINGFACEHUB_API_TOKEN']}"}
|
65 |
+
payload = {"inputs": input_text}
|
66 |
+
|
67 |
+
response = requests.post(api_url, headers=headers, json=payload)
|
68 |
+
return json.loads(response.content.decode("utf-8"))[0]["generated_text"]
|
69 |
+
|
70 |
+
# Streamlit UI for the Chatbot
|
71 |
+
def main():
|
72 |
+
st.title("Medical Report and Analysis Chatbot")
|
73 |
+
st.sidebar.header("Upload Medical Report or Enter Query")
|
74 |
+
|
75 |
+
# Text input for user queries
|
76 |
+
user_query = st.sidebar.text_input("Type your question or query")
|
77 |
+
|
78 |
+
# File uploader for medical report
|
79 |
+
uploaded_file = st.sidebar.file_uploader("Upload a medical report (optional)", type=["txt", "pdf", "csv"])
|
80 |
+
|
81 |
+
# Process the query if provided
|
82 |
+
if user_query:
|
83 |
+
st.write("### Query Response:")
|
84 |
+
|
85 |
+
# Retrieve relevant information from dataset
|
86 |
+
relevant_info = get_relevant_info(user_query)
|
87 |
+
st.write("#### Relevant Medical Information:")
|
88 |
+
for i, row in relevant_info.iterrows():
|
89 |
+
st.write(f"- {row['description']}")
|
90 |
+
|
91 |
+
# Generate a response from the Llama3-Med42-8B model
|
92 |
+
response = generate_response(user_query)
|
93 |
+
st.write("#### Model's Response:")
|
94 |
+
st.write(response)
|
95 |
+
|
96 |
+
# Process the uploaded file (if any)
|
97 |
+
if uploaded_file:
|
98 |
+
# Display analysis of the uploaded report file
|
99 |
+
st.write("### Uploaded Report Analysis:")
|
100 |
+
report_text = "Extracted report content here" # Placeholder for file processing logic
|
101 |
+
st.write(report_text)
|
102 |
+
|
103 |
+
if __name__ == "__main__":
|
104 |
+
main()
|