Spaces:
Runtime error
Runtime error
Add profile app.py
Browse files- app.py +104 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AlbertForMaskedLM, AlbertTokenizer, pipeline
|
2 |
+
import numpy as np
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
|
6 |
+
## Load the model
|
7 |
+
tokenizer = AlbertTokenizer.from_pretrained("Rostlab/prot_albert", do_lower_case=False )
|
8 |
+
model = AlbertForMaskedLM.from_pretrained("Rostlab/prot_albert")
|
9 |
+
|
10 |
+
#pipeline
|
11 |
+
fill_mask = pipeline("fill-mask", model = model, tokenizer=tokenizer, top_k = 21)
|
12 |
+
|
13 |
+
## Initialization
|
14 |
+
header=['SeqNo','PDB','No','V','L','I','M','F','W','Y','G','A','P','S','T','C','H','R','K','Q','E','N','D','NOCC','NDEL','NINS','ENTROPY','RELENT','WEIGHT','CHAIN','AUTHCHAIN']
|
15 |
+
rem=[]
|
16 |
+
|
17 |
+
Hash1={
|
18 |
+
|
19 |
+
'V':0,
|
20 |
+
'L':1,
|
21 |
+
'I':2,
|
22 |
+
'M':3,
|
23 |
+
'F':4,
|
24 |
+
'W':5,
|
25 |
+
'Y':6,
|
26 |
+
'G':7,
|
27 |
+
'A':8,
|
28 |
+
'P':9,
|
29 |
+
'S':10,
|
30 |
+
'T':11,
|
31 |
+
'C':12,
|
32 |
+
'H':13,
|
33 |
+
'R':14,
|
34 |
+
'K':15,
|
35 |
+
'Q':16,
|
36 |
+
'E':17,
|
37 |
+
'N':18,
|
38 |
+
'D':19,
|
39 |
+
'X':20
|
40 |
+
}
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
def ReadfastaFile(filename):
|
45 |
+
seq=[]
|
46 |
+
name=[]
|
47 |
+
human=""
|
48 |
+
fn=open(filename,"r")
|
49 |
+
S=""
|
50 |
+
for h in fn:
|
51 |
+
h=h.rstrip()
|
52 |
+
if not ">" in h:
|
53 |
+
S=S+h
|
54 |
+
fn.close()
|
55 |
+
S=S.upper()
|
56 |
+
return(S)
|
57 |
+
|
58 |
+
|
59 |
+
def Predict_profile(sequence, header = header,rem=rem,Hash1 = Hash1):
|
60 |
+
f=list()
|
61 |
+
f.append("PDBNO"+"\t")
|
62 |
+
for i in range(3,23):
|
63 |
+
f.append(header[i]+"\t")
|
64 |
+
f.append("X\n")
|
65 |
+
|
66 |
+
a = (len(Hash1),len(sequence))
|
67 |
+
pred_Profile=np.zeros(a)
|
68 |
+
for i in range(len(S)):
|
69 |
+
if i not in rem:
|
70 |
+
T=np.copy(list(sequence))
|
71 |
+
T=" ".join(T)
|
72 |
+
T=T.split(" ")
|
73 |
+
T[i]='[MASK]'
|
74 |
+
T=" ".join(T)
|
75 |
+
l=fill_mask(T)
|
76 |
+
number=len(l)
|
77 |
+
for k in range(number):
|
78 |
+
token=l[k]['token_str']
|
79 |
+
token=token.replace("▁","")
|
80 |
+
score=l[k]['score']
|
81 |
+
if token not in Hash1:
|
82 |
+
print(i,token)
|
83 |
+
|
84 |
+
else:
|
85 |
+
pred_Profile[Hash1[token]][i]=int(score*100)
|
86 |
+
f.append(str(i+1))
|
87 |
+
for k in range(len(Hash1)): #without X
|
88 |
+
f.append("\t"+str(pred_Profile[k][i]))
|
89 |
+
f.append("\n")
|
90 |
+
print(i)
|
91 |
+
if len(rem)!=0:
|
92 |
+
pred_Profile=np.delete(pred_Profile,rem,1)
|
93 |
+
return(pred_Profile)
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
iface = gr.Interface(fn=Predict_profile,
|
100 |
+
inputs=["text"],
|
101 |
+
outputs="text",
|
102 |
+
description="Please enter the sequence",
|
103 |
+
title="Protein profile prediction using ProtAbert")
|
104 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
transformers
|