Spaces:
Running
on
T4
Running
on
T4
simonduerr
commited on
Commit
•
3055c36
1
Parent(s):
2aa7536
Update app.py
Browse files
app.py
CHANGED
@@ -157,7 +157,7 @@ def make_tied_positions_for_homomers(pdb_dict_list):
|
|
157 |
return my_dict
|
158 |
|
159 |
|
160 |
-
def align_structures(pdb1, pdb2, lenRes, index):
|
161 |
"""Take two structure and superimpose pdb1 on pdb2"""
|
162 |
import Bio.PDB
|
163 |
import subprocess
|
@@ -173,13 +173,13 @@ def align_structures(pdb1, pdb2, lenRes, index):
|
|
173 |
|
174 |
io = Bio.PDB.PDBIO()
|
175 |
io.set_structure(ref_structure)
|
176 |
-
io.save(f"outputs/reference.pdb")
|
177 |
io.set_structure(sample_structure)
|
178 |
-
io.save(f"outputs/out_{index}_aligned.pdb")
|
179 |
# Doing this to get around biopython CEALIGN bug
|
180 |
# subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
|
181 |
|
182 |
-
return aligner.rms, "outputs/reference.pdb", f"outputs/out_{index}_aligned.pdb"
|
183 |
|
184 |
|
185 |
def save_pdb(outs, filename, LEN):
|
@@ -198,7 +198,7 @@ def save_pdb(outs, filename, LEN):
|
|
198 |
|
199 |
|
200 |
@ray.remote(num_gpus=1, max_calls=1)
|
201 |
-
def run_alphafold(sequences, num_recycles):
|
202 |
recycles = int(num_recycles)
|
203 |
RUNNER, OPT = setup_af(sequences[0])
|
204 |
plddts = []
|
@@ -232,7 +232,8 @@ def run_alphafold(sequences, num_recycles):
|
|
232 |
outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
|
233 |
)
|
234 |
else:
|
235 |
-
|
|
|
236 |
return plddts, paes, LEN
|
237 |
|
238 |
|
@@ -320,8 +321,10 @@ def preprocess_mol(pdb_code="", filepath=""):
|
|
320 |
os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
|
321 |
print(os.getcwd())
|
322 |
print(os.listdir())
|
|
|
323 |
mol = Molecule(f"{pdb_code}.pdb")
|
324 |
-
|
|
|
325 |
# clean messy files and only include protein itself
|
326 |
mol.filter("protein")
|
327 |
# renumber using moleculekit 0...len(protein)
|
@@ -334,8 +337,10 @@ def preprocess_mol(pdb_code="", filepath=""):
|
|
334 |
indexes.append(j)
|
335 |
j += 1
|
336 |
df["proteinMPNN_index"] = indexes
|
337 |
-
|
338 |
-
|
|
|
|
|
339 |
|
340 |
|
341 |
def assign_sasa(mol):
|
@@ -822,13 +827,14 @@ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
|
|
822 |
plt.figure(),
|
823 |
plt.figure(),
|
824 |
)
|
|
|
825 |
|
826 |
-
plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles))
|
827 |
|
828 |
sequences = {}
|
829 |
for i in range(lenSeqs):
|
830 |
rms, input_pdb, aligned_pdb = align_structures(
|
831 |
-
pdb, f"outputs/out_{i}.pdb", num_res, i
|
832 |
)
|
833 |
sequences[i] = {
|
834 |
"Seq": i,
|
@@ -896,6 +902,7 @@ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
|
|
896 |
selectedResidues,
|
897 |
allSeqs,
|
898 |
sequences,
|
|
|
899 |
),
|
900 |
plotAF_plddt,
|
901 |
pae_plots,
|
@@ -913,10 +920,10 @@ def read_mol(molpath):
|
|
913 |
|
914 |
|
915 |
def molecule(
|
916 |
-
input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences
|
917 |
):
|
918 |
|
919 |
-
mol = read_mol("outputs/reference.pdb")
|
920 |
options = ""
|
921 |
pred_mol = "["
|
922 |
seqdata = "{"
|
@@ -937,7 +944,7 @@ def molecule(
|
|
937 |
+ '"}'
|
938 |
)
|
939 |
options += f'<option {selected} value="{i}">sequence {i} </option>' # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
|
940 |
-
p = f"outputs/out_{i}_aligned.pdb"
|
941 |
pred_mol += f"`{read_mol(p)}`"
|
942 |
selected = ""
|
943 |
if i != lenSeqs - 1:
|
|
|
157 |
return my_dict
|
158 |
|
159 |
|
160 |
+
def align_structures(pdb1, pdb2, lenRes, index, random_dir):
|
161 |
"""Take two structure and superimpose pdb1 on pdb2"""
|
162 |
import Bio.PDB
|
163 |
import subprocess
|
|
|
173 |
|
174 |
io = Bio.PDB.PDBIO()
|
175 |
io.set_structure(ref_structure)
|
176 |
+
io.save(f"{random_dir}/outputs/reference.pdb")
|
177 |
io.set_structure(sample_structure)
|
178 |
+
io.save(f"{random_dir}/outputs/out_{index}_aligned.pdb")
|
179 |
# Doing this to get around biopython CEALIGN bug
|
180 |
# subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
|
181 |
|
182 |
+
return aligner.rms, f"{random_dir}/outputs/reference.pdb", f"{random_dir}/outputs/out_{index}_aligned.pdb"
|
183 |
|
184 |
|
185 |
def save_pdb(outs, filename, LEN):
|
|
|
198 |
|
199 |
|
200 |
@ray.remote(num_gpus=1, max_calls=1)
|
201 |
+
def run_alphafold(sequences, num_recycles, random_dir):
|
202 |
recycles = int(num_recycles)
|
203 |
RUNNER, OPT = setup_af(sequences[0])
|
204 |
plddts = []
|
|
|
232 |
outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
|
233 |
)
|
234 |
else:
|
235 |
+
print(f"saving to {random_dir.name}")
|
236 |
+
save_pdb(outs, f"{random_dir.name}/outputs/out_{i}.pdb", LEN)
|
237 |
return plddts, paes, LEN
|
238 |
|
239 |
|
|
|
321 |
os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
|
322 |
print(os.getcwd())
|
323 |
print(os.listdir())
|
324 |
+
print(os.system(f"cat {pdb_code}.pdb"))
|
325 |
mol = Molecule(f"{pdb_code}.pdb")
|
326 |
+
tf_original = tempfile.NamedTemporaryFile(delete=False)
|
327 |
+
mol.write(tf_original.name)
|
328 |
# clean messy files and only include protein itself
|
329 |
mol.filter("protein")
|
330 |
# renumber using moleculekit 0...len(protein)
|
|
|
337 |
indexes.append(j)
|
338 |
j += 1
|
339 |
df["proteinMPNN_index"] = indexes
|
340 |
+
tf_cleaned = tempfile.NamedTemporaryFile(delete=False)
|
341 |
+
|
342 |
+
mol.write(tf_cleaned.name)
|
343 |
+
return tf_cleaned.name, df
|
344 |
|
345 |
|
346 |
def assign_sasa(mol):
|
|
|
827 |
plt.figure(),
|
828 |
plt.figure(),
|
829 |
)
|
830 |
+
random_dir = tempfile.NamedTemporaryDir(delete=False)
|
831 |
|
832 |
+
plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles, random_dir ))
|
833 |
|
834 |
sequences = {}
|
835 |
for i in range(lenSeqs):
|
836 |
rms, input_pdb, aligned_pdb = align_structures(
|
837 |
+
pdb, f"{random_dir}/outputs/out_{i}.pdb", num_res, i, random_dir.name
|
838 |
)
|
839 |
sequences[i] = {
|
840 |
"Seq": i,
|
|
|
902 |
selectedResidues,
|
903 |
allSeqs,
|
904 |
sequences,
|
905 |
+
random_dir.name
|
906 |
),
|
907 |
plotAF_plddt,
|
908 |
pae_plots,
|
|
|
920 |
|
921 |
|
922 |
def molecule(
|
923 |
+
input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences, random_dir
|
924 |
):
|
925 |
|
926 |
+
mol = read_mol(f"{random_dir}/outputs/reference.pdb")
|
927 |
options = ""
|
928 |
pred_mol = "["
|
929 |
seqdata = "{"
|
|
|
944 |
+ '"}'
|
945 |
)
|
946 |
options += f'<option {selected} value="{i}">sequence {i} </option>' # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
|
947 |
+
p = f"{random_dir}/outputs/out_{i}_aligned.pdb"
|
948 |
pred_mol += f"`{read_mol(p)}`"
|
949 |
selected = ""
|
950 |
if i != lenSeqs - 1:
|