File size: 3,692 Bytes
2a1a9a9
728b290
 
 
 
 
 
 
 
2a1a9a9
728b290
2a1a9a9
6d55797
728b290
6feb2e4
2a1a9a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e5c1c8
 
2a1a9a9
6feb2e4
2a1a9a9
 
6feb2e4
 
7ed7e20
6feb2e4
2a1a9a9
7e5c1c8
2a1a9a9
7e5c1c8
 
 
 
 
 
2a1a9a9
 
 
 
 
7ed7e20
2a1a9a9
 
7ed7e20
7e5c1c8
728b290
7e5c1c8
 
728b290
 
7e5c1c8
728b290
7e5c1c8
728b290
 
 
 
6feb2e4
7e5c1c8
6feb2e4
 
 
7e5c1c8
2a1a9a9
728b290
2a1a9a9
 
7e5c1c8
 
728b290
7e5c1c8
728b290
2a1a9a9
728b290
6feb2e4
728b290
 
2a1a9a9
728b290
6d55797
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

# Import required libraries
import os
import pandas as pd
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import requests
import json
from pyngrok import ngrok

# Set up Hugging Face API token
api_key = os.getenv("HF_API_KEY")

# Load the CSV dataset
data = pd.read_csv('genetic-Final.csv')

# Drop unnecessary columns (Unnamed columns)
data = data.drop(columns=['Unnamed: 0', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13'])

# Combine relevant columns into one combined description field
data['combined_description'] = (
    data['Symptoms'].fillna('') + " " +
    data['Severity Level'].fillna('') + " " +
    data['Risk Assessment'].fillna('') + " " +
    data['Treatment Options'].fillna('') + " " +
    data['Suggested Medical Tests'].fillna('') + " " +
    data['Minimum Values for Medical Tests'].fillna('') + " " +
    data['Emergency Treatment'].fillna('')
)

# Initialize the Sentence Transformer model for embeddings
retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Function to safely generate embeddings for each row
def generate_embedding(description):
    if description:  # Check if the description is not empty or NaN
        return retriever_model.encode(description).tolist()  # Convert the numpy array to list
    else:
        return []

# Generate embeddings for the combined description
data['embeddings'] = data['combined_description'].apply(generate_embedding)

# Function to retrieve relevant information from CSV dataset based on user query
def get_relevant_info(query, top_k=3):
    query_embedding = retriever_model.encode(query)
    similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']]
    top_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k]
    return data.iloc[top_indices]

# Function to generate response using Hugging Face Model API
def generate_response(input_text):
    api_url = "https://api-inference.huggingface.co/models/m42-health/Llama3-Med42-8B"
    headers = {"Authorization": f"Bearer {os.environ['HUGGINGFACEHUB_API_TOKEN']}"}
    payload = {"inputs": input_text}

    response = requests.post(api_url, headers=headers, json=payload)
    return json.loads(response.content.decode("utf-8"))[0]["generated_text"]

# Streamlit UI for the Chatbot
def main():
    st.title("Medical Report and Analysis Chatbot")
    st.sidebar.header("Upload Medical Report or Enter Query")

    # Text input for user queries
    user_query = st.sidebar.text_input("Type your question or query")

    # File uploader for medical report
    uploaded_file = st.sidebar.file_uploader("Upload a medical report (optional)", type=["txt", "pdf", "csv"])

    # Process the query if provided
    if user_query:
        st.write("### Query Response:")

        # Retrieve relevant information from dataset
        relevant_info = get_relevant_info(user_query)
        st.write("#### Relevant Medical Information:")
        for i, row in relevant_info.iterrows():
            st.write(f"- {row['combined_description']}")

        # Generate a response from the Llama3-Med42-8B model
        response = generate_response(user_query)
        st.write("#### Model's Response:")
        st.write(response)

    # Process the uploaded file (if any)
    if uploaded_file:
        # Display analysis of the uploaded report file
        st.write("### Uploaded Report Analysis:")
        report_text = "Extracted report content here"  # Placeholder for file processing logic
        st.write(report_text)

# Start Streamlit app in Colab using ngrok
if __name__ == "__main__":
    main()