File size: 9,788 Bytes
ca3430a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fb9e3a
ca3430a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fb9e3a
ca3430a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8efaf4f
ca3430a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import streamlit as st
import numpy as np
import pandas as pd
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"  ### load on cpu if GPU is making issue
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow.keras.models import load_model
import time
# from PIL import Image

st.set_page_config(page_title="TCR-ESM",page_icon="dna")

hide_streamlit_style = """
<style>
    #root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 2rem;}
</style>

"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

# image = Image.open('TCR-ESM.png')
# st.image(image)

st.title('TCR-ESM')
st.subheader('a webserver accompanying our work on predicting TCR-peptide-MHC binding with large protein model (ESM1v) embeddings')

dataset = st.radio("Please select the Training Databse",('MCPAS', 'VDJDB'), horizontal=True)

task = st.radio("Please select the Prediction Task",("TCR\u03B1-TCR\u03B2-Peptide-MHC", "TCR\u03B1-TCR\u03B2-Peptide", "TCR\u03B1-Peptide-MHC",
                                                        "TCR\u03B2-Peptide-MHC", "TCR\u03B1-Peptide", "TCR\u03B2-Peptide"), horizontal=True)

with open("sample_input_data.zip", "rb") as file:
    btn = st.download_button(label="Download Sample Input Data",data=file,file_name="sample_input_data.zip", mime="application/octet-stream")
# st.download_button('Download Sample Input Data', open('tcresm_sample_input.zip'))

############## get numpy files
if task == "TCR\u03B1-TCR\u03B2-Peptide-MHC":
    alpha = st.file_uploader("Choose the .npy file containing TCR\u03B1 Embeddings", key=101)
    beta = st.file_uploader("Choose the .npy file containing TCR\u03B2 Embeddings",  key=103)
    pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=109)
    mhc = st.file_uploader("Choose the .npy file containing MHC Embeddings",   key=113)
    shorttask = 'abpm'
    group = (alpha,beta,pepti,mhc)
elif task == "TCR\u03B1-TCR\u03B2-Peptide":
    alpha = st.file_uploader("Choose the .npy file containing TCR\u03B1 Embeddings", key=127)
    beta = st.file_uploader("Choose the .npy file containing TCR\u03B2 Embeddings",  key=131)
    pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=137)
    shorttask = 'abp'
    group = (alpha,beta,pepti)
elif task == "TCR\u03B1-Peptide-MHC":
    alpha = st.file_uploader("Choose the .npy file containing TCR\u03B1 Embeddings", key=139)
    pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=149)
    mhc = st.file_uploader("Choose the .npy file containing MHC Embeddings",   key=151)
    shorttask = 'apm'
    group = (alpha,pepti,mhc)
elif task == "TCR\u03B2-Peptide-MHC":
    beta = st.file_uploader("Choose the .npy file containing TCR\u03B2 Embeddings",  key=157)
    pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=163)
    mhc = st.file_uploader("Choose the .npy file containing MHC Embeddings",   key=167)
    shorttask = 'bpm'
    group = (beta,pepti,mhc)
elif task == "TCR\u03B1-Peptide":
    alpha = st.file_uploader("Choose the .npy file containing TCR\u03B1 Embeddings", key=173)
    pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=179)
    shorttask = 'ap'
    group = (alpha,pepti)
elif task == "TCR\u03B2-Peptide":
    beta = st.file_uploader("Choose the .npy file containing TCR\u03B2 Embeddings",  key=181)
    pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=191)
    shorttask = 'bp'
    group = (beta,pepti)





##################### ML predict function
# @st.cache_data
def predict_on_batch_output(dataset,shorttask,group):

    if dataset == 'MCPAS':
        dataset='mcpas'
    elif dataset== 'VDJDB':
        dataset ='vdjdb'


    if dataset=='mcpas' and shorttask=='abp':
        #load data
        alpha, beta, pep = group
        alpha_np, beta_np, pep_np = np.load(alpha), np.load(beta), np.load(pep)
        #load model
        model = load_model('models/mcpas/bestmodel_alphabetapeptide.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([alpha_np, beta_np, pep_np])
    elif dataset=='mcpas' and shorttask=='abpm':
        #load data
        alpha, beta, pep, mhc = group
        alpha_np, beta_np, pep_np, mhc_np = np.load(alpha), np.load(beta), np.load(pep), np.load(mhc)
        #load model
        model = load_model('models/mcpas/bestmodel_alphabetaptptidemhc.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([alpha_np, beta_np, pep_np, mhc_np])
    elif dataset=='mcpas' and shorttask=='ap':
        #load data
        alpha, pep, = group
        alpha_np, pep_np, = np.load(alpha), np.load(pep)
        #load model
        model = load_model('models/mcpas/bestmodel_alphapeptide.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([alpha_np,pep_np])
    elif dataset=='mcpas' and shorttask=='bp':
        #load data
        beta, pep = group
        beta_np, pep_np = np.load(beta), np.load(pep)
        #load model
        model = load_model('models/mcpas/bestmodel_betapeptide.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([beta_np, pep_np])
    elif dataset=='mcpas' and shorttask=='apm':
        #load data
        alpha, pep, mhc = group
        alpha_np, pep_np, mhc_np = np.load(alpha), np.load(pep), np.load(mhc)
        #load model
        model = load_model('models/mcpas/bestmodel_alphapeptidemhc.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([alpha_np, pep_np, mhc_np])
    elif dataset=='mcpas' and shorttask=='bpm':
        #load data
        beta, pep, mhc = group
        beta_np, pep_np, mhc_np = np.load(beta), np.load(pep), np.load(mhc)
        #load model
        model = load_model('models/mcpas/bestmodel_betapeptidemhc.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([beta_np, pep_np, mhc_np])
    elif dataset=='vdjdb' and shorttask=='abp':
        #load data
        alpha, beta, pep = group
        alpha_np, beta_np, pep_np = np.load(alpha), np.load(beta), np.load(pep)
        #load model
        model = load_model('models/vdjdb/bestmodel_alphabetapeptide.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([alpha_np, beta_np, pep_np])
    elif dataset=='vdjdb' and shorttask=='abpm':
        #load data
        alpha, beta, pep, mhc = group
        alpha_np, beta_np, pep_np, mhc_np = np.load(alpha), np.load(beta), np.load(pep), np.load(mhc)
        #load model
        model = load_model('models/vdjdb/bestmodel_alphabetapeptidemhc.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([alpha_np, beta_np, pep_np, mhc_np])
    elif dataset=='vdjdb' and shorttask=='ap':
        #load data
        alpha, pep, = group
        alpha_np, pep_np, = np.load(alpha), np.load(pep)
        #load model
        model = load_model('models/vdjdb/bestmodel_alphapeptide.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([alpha_np, pep_np])
    elif dataset=='vdjdb' and shorttask=='bp':
        #load data
        beta, pep = group
        beta_np, pep_np = np.load(beta), np.load(pep)
        #load model
        model = load_model('models/vdjdb/bestmodel_betapeptide.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([beta_np, pep_np])
    elif dataset=='vdjdb' and shorttask=='apm':
        #load data
        alpha, pep, mhc = group
        alpha_np, pep_np, mhc_np = np.load(alpha), np.load(pep), np.load(mhc)
        #load model
        model = load_model('models/vdjdb/bestmodel_alphapeptidemhc.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([alpha_np, pep_np, mhc_np])
    elif dataset=='vdjdb' and shorttask=='bpm':
        #load data
        beta, pep, mhc = group
        beta_np, pep_np, mhc_np = np.load(beta), np.load(pep), np.load(mhc)
        #load model
        model = load_model('models/vdjdb/bestmodel_betapeptidemhc.hdf5',compile=False)
        #predict_on_batch
        output = model.predict_on_batch([beta_np, pep_np, mhc_np])

    # return np.around(output.squeeze(), 4)

    val = np.squeeze(output)
    return val

# @st.cache_data
def convert_df(df):
    # IMPORTANT: Cache the conversion to prevent computation on every rerun
    return df.to_csv().encode('utf-8')


#####################
if st.button('Submit'):
    # with st.spinner('Wait for it...'):
    #     time.sleep(0.5)
    # res = predict_on_batch_output(dataset,shorttask,group)
    # st.write("Binding Probabilities")
    # st.dataframe((np.round(res, 4)))
    # csv = convert_df(pd.DataFrame(np.round(res, 4), columns=['output']))
    # st.download_button(label="Download Predictions",data=csv,file_name='tcresm_predictions.csv', mime='text/csv')
    try:
        res = predict_on_batch_output(dataset,shorttask,group)
        with st.spinner('Calculating ...'):
            time.sleep(0.5)
            st.write("Binding Probabilities")
            st.dataframe((np.round(res, 4)), use_container_width=500, height=500)
            csv = convert_df(pd.DataFrame(np.round(res, 4), columns=['output']))
            st.download_button(label="Download Predictions",data=csv,file_name='tcresm_predictions.csv', mime='text/csv')
    except:
        st.error('Please ensure you have uploaded the files before pressing the Submit button', icon="🚨")
    


if st.button("Clear All"):
    # Clear values from *all* all in-memory and on-disk data caches:
    # i.e. clear values from both square and cube
    st.cache.clear()



st.caption('Developed By: Shashank Yadav : shashank[at]arizona.edu', unsafe_allow_html=True)