Spaces:
Running
on
T4
Running
on
T4
Simon Duerr
commited on
Commit
•
85bd48b
1
Parent(s):
e65166b
add fast af
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- LICENSE +1 -1
- af_backprop/README.md +6 -0
- af_backprop/alphafold/__init__.py +14 -0
- af_backprop/alphafold/common/__init__.py +14 -0
- af_backprop/alphafold/common/confidence.py +155 -0
- af_backprop/alphafold/common/protein.py +229 -0
- af_backprop/alphafold/common/residue_constants.py +911 -0
- af_backprop/alphafold/data/__init__.py +14 -0
- af_backprop/alphafold/data/mmcif_parsing.py +384 -0
- af_backprop/alphafold/data/parsers.py +364 -0
- af_backprop/alphafold/data/pipeline.py +209 -0
- af_backprop/alphafold/data/prep_inputs.py +133 -0
- af_backprop/alphafold/data/templates.py +910 -0
- af_backprop/alphafold/data/tools/__init__.py +14 -0
- af_backprop/alphafold/data/tools/hhblits.py +155 -0
- af_backprop/alphafold/data/tools/hhsearch.py +91 -0
- af_backprop/alphafold/data/tools/hmmbuild.py +138 -0
- af_backprop/alphafold/data/tools/hmmsearch.py +90 -0
- af_backprop/alphafold/data/tools/jackhmmer.py +198 -0
- af_backprop/alphafold/data/tools/kalign.py +104 -0
- af_backprop/alphafold/data/tools/utils.py +40 -0
- af_backprop/alphafold/model/__init__.py +14 -0
- af_backprop/alphafold/model/all_atom.py +1155 -0
- af_backprop/alphafold/model/common_modules.py +84 -0
- af_backprop/alphafold/model/config.py +412 -0
- af_backprop/alphafold/model/data.py +39 -0
- af_backprop/alphafold/model/features.py +102 -0
- af_backprop/alphafold/model/folding.py +1016 -0
- af_backprop/alphafold/model/layer_stack.py +274 -0
- af_backprop/alphafold/model/lddt.py +88 -0
- af_backprop/alphafold/model/mapping.py +218 -0
- af_backprop/alphafold/model/model.py +145 -0
- af_backprop/alphafold/model/modules.py +2164 -0
- af_backprop/alphafold/model/prng.py +70 -0
- af_backprop/alphafold/model/quat_affine.py +459 -0
- af_backprop/alphafold/model/r3.py +320 -0
- af_backprop/alphafold/model/tf/__init__.py +14 -0
- af_backprop/alphafold/model/tf/data_transforms.py +625 -0
- af_backprop/alphafold/model/tf/input_pipeline.py +166 -0
- af_backprop/alphafold/model/tf/protein_features.py +129 -0
- af_backprop/alphafold/model/tf/proteins_dataset.py +166 -0
- af_backprop/alphafold/model/tf/shape_helpers.py +47 -0
- af_backprop/alphafold/model/tf/shape_placeholders.py +20 -0
- af_backprop/alphafold/model/tf/utils.py +47 -0
- af_backprop/alphafold/model/utils.py +81 -0
- af_backprop/examples/AlphaFold_single.ipynb +311 -0
- af_backprop/examples/af_design.ipynb +41 -0
- af_backprop/examples/fixbb_design.ipynb +29 -0
- af_backprop/examples/sc_hall/1QJG.pdb +1156 -0
- af_backprop/examples/sc_hall/1QJS_starting.pdb +880 -0
LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
MIT License
|
2 |
|
3 |
-
Copyright (c) 2022 Justas Dauparas, Simon Duerr
|
4 |
|
5 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
of this software and associated documentation files (the "Software"), to deal
|
|
|
1 |
MIT License
|
2 |
|
3 |
+
Copyright (c) 2022 Justas Dauparas,Sergey Ovichinnikov, Simon Duerr
|
4 |
|
5 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
of this software and associated documentation files (the "Software"), to deal
|
af_backprop/README.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# af_backprop
|
2 |
+
various modifications to alphafold to allow backprop through the model
|
3 |
+
|
4 |
+
### projects that use af_backprop
|
5 |
+
- [SMURF](https://github.com/spetti/SMURF): End-to-end learning of multiple sequence alignments with differentiable Smith-Waterman
|
6 |
+
- [ColabDesign](https://github.com/sokrypton/ColabDesign): Making Protein Design accessible to all via Google Colab!
|
af_backprop/alphafold/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""An implementation of the inference pipeline of AlphaFold v2.0."""
|
af_backprop/alphafold/common/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Common data types and constants used within Alphafold."""
|
af_backprop/alphafold/common/confidence.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Functions for processing confidence metrics."""
|
16 |
+
|
17 |
+
from typing import Dict, Optional, Tuple
|
18 |
+
import numpy as np
|
19 |
+
import scipy.special
|
20 |
+
|
21 |
+
|
22 |
+
def compute_plddt(logits: np.ndarray) -> np.ndarray:
|
23 |
+
"""Computes per-residue pLDDT from logits.
|
24 |
+
|
25 |
+
Args:
|
26 |
+
logits: [num_res, num_bins] output from the PredictedLDDTHead.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
plddt: [num_res] per-residue pLDDT.
|
30 |
+
"""
|
31 |
+
num_bins = logits.shape[-1]
|
32 |
+
bin_width = 1.0 / num_bins
|
33 |
+
bin_centers = np.arange(start=0.5 * bin_width, stop=1.0, step=bin_width)
|
34 |
+
probs = scipy.special.softmax(logits, axis=-1)
|
35 |
+
predicted_lddt_ca = np.sum(probs * bin_centers[None, :], axis=-1)
|
36 |
+
return predicted_lddt_ca * 100
|
37 |
+
|
38 |
+
|
39 |
+
def _calculate_bin_centers(breaks: np.ndarray):
|
40 |
+
"""Gets the bin centers from the bin edges.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
breaks: [num_bins - 1] the error bin edges.
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
bin_centers: [num_bins] the error bin centers.
|
47 |
+
"""
|
48 |
+
step = (breaks[1] - breaks[0])
|
49 |
+
|
50 |
+
# Add half-step to get the center
|
51 |
+
bin_centers = breaks + step / 2
|
52 |
+
# Add a catch-all bin at the end.
|
53 |
+
bin_centers = np.concatenate([bin_centers, [bin_centers[-1] + step]],
|
54 |
+
axis=0)
|
55 |
+
return bin_centers
|
56 |
+
|
57 |
+
|
58 |
+
def _calculate_expected_aligned_error(
|
59 |
+
alignment_confidence_breaks: np.ndarray,
|
60 |
+
aligned_distance_error_probs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
61 |
+
"""Calculates expected aligned distance errors for every pair of residues.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
alignment_confidence_breaks: [num_bins - 1] the error bin edges.
|
65 |
+
aligned_distance_error_probs: [num_res, num_res, num_bins] the predicted
|
66 |
+
probs for each error bin, for each pair of residues.
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
predicted_aligned_error: [num_res, num_res] the expected aligned distance
|
70 |
+
error for each pair of residues.
|
71 |
+
max_predicted_aligned_error: The maximum predicted error possible.
|
72 |
+
"""
|
73 |
+
bin_centers = _calculate_bin_centers(alignment_confidence_breaks)
|
74 |
+
|
75 |
+
# Tuple of expected aligned distance error and max possible error.
|
76 |
+
return (np.sum(aligned_distance_error_probs * bin_centers, axis=-1),
|
77 |
+
np.asarray(bin_centers[-1]))
|
78 |
+
|
79 |
+
|
80 |
+
def compute_predicted_aligned_error(
|
81 |
+
logits: np.ndarray,
|
82 |
+
breaks: np.ndarray) -> Dict[str, np.ndarray]:
|
83 |
+
"""Computes aligned confidence metrics from logits.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
logits: [num_res, num_res, num_bins] the logits output from
|
87 |
+
PredictedAlignedErrorHead.
|
88 |
+
breaks: [num_bins - 1] the error bin edges.
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
aligned_confidence_probs: [num_res, num_res, num_bins] the predicted
|
92 |
+
aligned error probabilities over bins for each residue pair.
|
93 |
+
predicted_aligned_error: [num_res, num_res] the expected aligned distance
|
94 |
+
error for each pair of residues.
|
95 |
+
max_predicted_aligned_error: The maximum predicted error possible.
|
96 |
+
"""
|
97 |
+
aligned_confidence_probs = scipy.special.softmax(
|
98 |
+
logits,
|
99 |
+
axis=-1)
|
100 |
+
predicted_aligned_error, max_predicted_aligned_error = (
|
101 |
+
_calculate_expected_aligned_error(
|
102 |
+
alignment_confidence_breaks=breaks,
|
103 |
+
aligned_distance_error_probs=aligned_confidence_probs))
|
104 |
+
return {
|
105 |
+
'aligned_confidence_probs': aligned_confidence_probs,
|
106 |
+
'predicted_aligned_error': predicted_aligned_error,
|
107 |
+
'max_predicted_aligned_error': max_predicted_aligned_error,
|
108 |
+
}
|
109 |
+
|
110 |
+
|
111 |
+
def predicted_tm_score(
|
112 |
+
logits: np.ndarray,
|
113 |
+
breaks: np.ndarray,
|
114 |
+
residue_weights: Optional[np.ndarray] = None) -> np.ndarray:
|
115 |
+
"""Computes predicted TM alignment score.
|
116 |
+
|
117 |
+
Args:
|
118 |
+
logits: [num_res, num_res, num_bins] the logits output from
|
119 |
+
PredictedAlignedErrorHead.
|
120 |
+
breaks: [num_bins] the error bins.
|
121 |
+
residue_weights: [num_res] the per residue weights to use for the
|
122 |
+
expectation.
|
123 |
+
|
124 |
+
Returns:
|
125 |
+
ptm_score: the predicted TM alignment score.
|
126 |
+
"""
|
127 |
+
|
128 |
+
# residue_weights has to be in [0, 1], but can be floating-point, i.e. the
|
129 |
+
# exp. resolved head's probability.
|
130 |
+
if residue_weights is None:
|
131 |
+
residue_weights = np.ones(logits.shape[0])
|
132 |
+
|
133 |
+
bin_centers = _calculate_bin_centers(breaks)
|
134 |
+
|
135 |
+
num_res = np.sum(residue_weights)
|
136 |
+
# Clip num_res to avoid negative/undefined d0.
|
137 |
+
clipped_num_res = max(num_res, 19)
|
138 |
+
|
139 |
+
# Compute d_0(num_res) as defined by TM-score, eqn. (5) in
|
140 |
+
# http://zhanglab.ccmb.med.umich.edu/papers/2004_3.pdf
|
141 |
+
# Yang & Skolnick "Scoring function for automated
|
142 |
+
# assessment of protein structure template quality" 2004
|
143 |
+
d0 = 1.24 * (clipped_num_res - 15) ** (1./3) - 1.8
|
144 |
+
|
145 |
+
# Convert logits to probs
|
146 |
+
probs = scipy.special.softmax(logits, axis=-1)
|
147 |
+
|
148 |
+
# TM-Score term for every bin
|
149 |
+
tm_per_bin = 1. / (1 + np.square(bin_centers) / np.square(d0))
|
150 |
+
# E_distances tm(distance)
|
151 |
+
predicted_tm_term = np.sum(probs * tm_per_bin, axis=-1)
|
152 |
+
|
153 |
+
normed_residue_mask = residue_weights / (1e-8 + residue_weights.sum())
|
154 |
+
per_alignment = np.sum(predicted_tm_term * normed_residue_mask, axis=-1)
|
155 |
+
return np.asarray(per_alignment[(per_alignment * residue_weights).argmax()])
|
af_backprop/alphafold/common/protein.py
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Protein data type."""
|
16 |
+
import dataclasses
|
17 |
+
import io
|
18 |
+
from typing import Any, Mapping, Optional
|
19 |
+
from alphafold.common import residue_constants
|
20 |
+
from Bio.PDB import PDBParser
|
21 |
+
import numpy as np
|
22 |
+
|
23 |
+
FeatureDict = Mapping[str, np.ndarray]
|
24 |
+
ModelOutput = Mapping[str, Any] # Is a nested dict.
|
25 |
+
|
26 |
+
|
27 |
+
@dataclasses.dataclass(frozen=True)
|
28 |
+
class Protein:
|
29 |
+
"""Protein structure representation."""
|
30 |
+
|
31 |
+
# Cartesian coordinates of atoms in angstroms. The atom types correspond to
|
32 |
+
# residue_constants.atom_types, i.e. the first three are N, CA, CB.
|
33 |
+
atom_positions: np.ndarray # [num_res, num_atom_type, 3]
|
34 |
+
|
35 |
+
# Amino-acid type for each residue represented as an integer between 0 and
|
36 |
+
# 20, where 20 is 'X'.
|
37 |
+
aatype: np.ndarray # [num_res]
|
38 |
+
|
39 |
+
# Binary float mask to indicate presence of a particular atom. 1.0 if an atom
|
40 |
+
# is present and 0.0 if not. This should be used for loss masking.
|
41 |
+
atom_mask: np.ndarray # [num_res, num_atom_type]
|
42 |
+
|
43 |
+
# Residue index as used in PDB. It is not necessarily continuous or 0-indexed.
|
44 |
+
residue_index: np.ndarray # [num_res]
|
45 |
+
|
46 |
+
# B-factors, or temperature factors, of each residue (in sq. angstroms units),
|
47 |
+
# representing the displacement of the residue from its ground truth mean
|
48 |
+
# value.
|
49 |
+
b_factors: np.ndarray # [num_res, num_atom_type]
|
50 |
+
|
51 |
+
|
52 |
+
def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein:
|
53 |
+
"""Takes a PDB string and constructs a Protein object.
|
54 |
+
|
55 |
+
WARNING: All non-standard residue types will be converted into UNK. All
|
56 |
+
non-standard atoms will be ignored.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
pdb_str: The contents of the pdb file
|
60 |
+
chain_id: If None, then the pdb file must contain a single chain (which
|
61 |
+
will be parsed). If chain_id is specified (e.g. A), then only that chain
|
62 |
+
is parsed.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
A new `Protein` parsed from the pdb contents.
|
66 |
+
"""
|
67 |
+
pdb_fh = io.StringIO(pdb_str)
|
68 |
+
parser = PDBParser(QUIET=True)
|
69 |
+
structure = parser.get_structure('none', pdb_fh)
|
70 |
+
models = list(structure.get_models())
|
71 |
+
if len(models) != 1:
|
72 |
+
raise ValueError(
|
73 |
+
f'Only single model PDBs are supported. Found {len(models)} models.')
|
74 |
+
model = models[0]
|
75 |
+
|
76 |
+
if chain_id is not None:
|
77 |
+
chain = model[chain_id]
|
78 |
+
else:
|
79 |
+
chains = list(model.get_chains())
|
80 |
+
if len(chains) != 1:
|
81 |
+
raise ValueError(
|
82 |
+
'Only single chain PDBs are supported when chain_id not specified. '
|
83 |
+
f'Found {len(chains)} chains.')
|
84 |
+
else:
|
85 |
+
chain = chains[0]
|
86 |
+
|
87 |
+
atom_positions = []
|
88 |
+
aatype = []
|
89 |
+
atom_mask = []
|
90 |
+
residue_index = []
|
91 |
+
b_factors = []
|
92 |
+
|
93 |
+
for res in chain:
|
94 |
+
if res.id[2] != ' ':
|
95 |
+
raise ValueError(
|
96 |
+
f'PDB contains an insertion code at chain {chain.id} and residue '
|
97 |
+
f'index {res.id[1]}. These are not supported.')
|
98 |
+
res_shortname = residue_constants.restype_3to1.get(res.resname, 'X')
|
99 |
+
restype_idx = residue_constants.restype_order.get(
|
100 |
+
res_shortname, residue_constants.restype_num)
|
101 |
+
pos = np.zeros((residue_constants.atom_type_num, 3))
|
102 |
+
mask = np.zeros((residue_constants.atom_type_num,))
|
103 |
+
res_b_factors = np.zeros((residue_constants.atom_type_num,))
|
104 |
+
for atom in res:
|
105 |
+
if atom.name not in residue_constants.atom_types:
|
106 |
+
continue
|
107 |
+
pos[residue_constants.atom_order[atom.name]] = atom.coord
|
108 |
+
mask[residue_constants.atom_order[atom.name]] = 1.
|
109 |
+
res_b_factors[residue_constants.atom_order[atom.name]] = atom.bfactor
|
110 |
+
if np.sum(mask) < 0.5:
|
111 |
+
# If no known atom positions are reported for the residue then skip it.
|
112 |
+
continue
|
113 |
+
aatype.append(restype_idx)
|
114 |
+
atom_positions.append(pos)
|
115 |
+
atom_mask.append(mask)
|
116 |
+
residue_index.append(res.id[1])
|
117 |
+
b_factors.append(res_b_factors)
|
118 |
+
|
119 |
+
return Protein(
|
120 |
+
atom_positions=np.array(atom_positions),
|
121 |
+
atom_mask=np.array(atom_mask),
|
122 |
+
aatype=np.array(aatype),
|
123 |
+
residue_index=np.array(residue_index),
|
124 |
+
b_factors=np.array(b_factors))
|
125 |
+
|
126 |
+
|
127 |
+
def to_pdb(prot: Protein) -> str:
|
128 |
+
"""Converts a `Protein` instance to a PDB string.
|
129 |
+
|
130 |
+
Args:
|
131 |
+
prot: The protein to convert to PDB.
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
PDB string.
|
135 |
+
"""
|
136 |
+
restypes = residue_constants.restypes + ['X']
|
137 |
+
res_1to3 = lambda r: residue_constants.restype_1to3.get(restypes[r], 'UNK')
|
138 |
+
atom_types = residue_constants.atom_types
|
139 |
+
|
140 |
+
pdb_lines = []
|
141 |
+
|
142 |
+
atom_mask = prot.atom_mask
|
143 |
+
aatype = prot.aatype
|
144 |
+
atom_positions = prot.atom_positions
|
145 |
+
residue_index = prot.residue_index.astype(np.int32)
|
146 |
+
b_factors = prot.b_factors
|
147 |
+
|
148 |
+
if np.any(aatype > residue_constants.restype_num):
|
149 |
+
raise ValueError('Invalid aatypes.')
|
150 |
+
|
151 |
+
pdb_lines.append('MODEL 1')
|
152 |
+
atom_index = 1
|
153 |
+
chain_id = 'A'
|
154 |
+
# Add all atom sites.
|
155 |
+
for i in range(aatype.shape[0]):
|
156 |
+
res_name_3 = res_1to3(aatype[i])
|
157 |
+
for atom_name, pos, mask, b_factor in zip(
|
158 |
+
atom_types, atom_positions[i], atom_mask[i], b_factors[i]):
|
159 |
+
if mask < 0.5:
|
160 |
+
continue
|
161 |
+
|
162 |
+
record_type = 'ATOM'
|
163 |
+
name = atom_name if len(atom_name) == 4 else f' {atom_name}'
|
164 |
+
alt_loc = ''
|
165 |
+
insertion_code = ''
|
166 |
+
occupancy = 1.00
|
167 |
+
element = atom_name[0] # Protein supports only C, N, O, S, this works.
|
168 |
+
charge = ''
|
169 |
+
# PDB is a columnar format, every space matters here!
|
170 |
+
atom_line = (f'{record_type:<6}{atom_index:>5} {name:<4}{alt_loc:>1}'
|
171 |
+
f'{res_name_3:>3} {chain_id:>1}'
|
172 |
+
f'{residue_index[i]:>4}{insertion_code:>1} '
|
173 |
+
f'{pos[0]:>8.3f}{pos[1]:>8.3f}{pos[2]:>8.3f}'
|
174 |
+
f'{occupancy:>6.2f}{b_factor:>6.2f} '
|
175 |
+
f'{element:>2}{charge:>2}')
|
176 |
+
pdb_lines.append(atom_line)
|
177 |
+
atom_index += 1
|
178 |
+
|
179 |
+
# Close the chain.
|
180 |
+
chain_end = 'TER'
|
181 |
+
chain_termination_line = (
|
182 |
+
f'{chain_end:<6}{atom_index:>5} {res_1to3(aatype[-1]):>3} '
|
183 |
+
f'{chain_id:>1}{residue_index[-1]:>4}')
|
184 |
+
pdb_lines.append(chain_termination_line)
|
185 |
+
pdb_lines.append('ENDMDL')
|
186 |
+
|
187 |
+
pdb_lines.append('END')
|
188 |
+
pdb_lines.append('')
|
189 |
+
return '\n'.join(pdb_lines)
|
190 |
+
|
191 |
+
|
192 |
+
def ideal_atom_mask(prot: Protein) -> np.ndarray:
|
193 |
+
"""Computes an ideal atom mask.
|
194 |
+
|
195 |
+
`Protein.atom_mask` typically is defined according to the atoms that are
|
196 |
+
reported in the PDB. This function computes a mask according to heavy atoms
|
197 |
+
that should be present in the given sequence of amino acids.
|
198 |
+
|
199 |
+
Args:
|
200 |
+
prot: `Protein` whose fields are `numpy.ndarray` objects.
|
201 |
+
|
202 |
+
Returns:
|
203 |
+
An ideal atom mask.
|
204 |
+
"""
|
205 |
+
return residue_constants.STANDARD_ATOM_MASK[prot.aatype]
|
206 |
+
|
207 |
+
|
208 |
+
def from_prediction(features: FeatureDict, result: ModelOutput,
|
209 |
+
b_factors: Optional[np.ndarray] = None) -> Protein:
|
210 |
+
"""Assembles a protein from a prediction.
|
211 |
+
|
212 |
+
Args:
|
213 |
+
features: Dictionary holding model inputs.
|
214 |
+
result: Dictionary holding model outputs.
|
215 |
+
b_factors: (Optional) B-factors to use for the protein.
|
216 |
+
|
217 |
+
Returns:
|
218 |
+
A protein instance.
|
219 |
+
"""
|
220 |
+
fold_output = result['structure_module']
|
221 |
+
if b_factors is None:
|
222 |
+
b_factors = np.zeros_like(fold_output['final_atom_mask'])
|
223 |
+
|
224 |
+
return Protein(
|
225 |
+
aatype=features['aatype'][0],
|
226 |
+
atom_positions=fold_output['final_atom_positions'],
|
227 |
+
atom_mask=fold_output['final_atom_mask'],
|
228 |
+
residue_index=features['residue_index'][0] + 1,
|
229 |
+
b_factors=b_factors)
|
af_backprop/alphafold/common/residue_constants.py
ADDED
@@ -0,0 +1,911 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Constants used in AlphaFold."""
|
16 |
+
|
17 |
+
import collections
|
18 |
+
import functools
|
19 |
+
from typing import List, Mapping, Tuple
|
20 |
+
|
21 |
+
import numpy as np
|
22 |
+
import tree
|
23 |
+
|
24 |
+
# Internal import (35fd).
|
25 |
+
|
26 |
+
|
27 |
+
# Distance from one CA to next CA [trans configuration: omega = 180].
|
28 |
+
ca_ca = 3.80209737096
|
29 |
+
|
30 |
+
# Format: The list for each AA type contains chi1, chi2, chi3, chi4 in
|
31 |
+
# this order (or a relevant subset from chi1 onwards). ALA and GLY don't have
|
32 |
+
# chi angles so their chi angle lists are empty.
|
33 |
+
chi_angles_atoms = {
|
34 |
+
'ALA': [],
|
35 |
+
# Chi5 in arginine is always 0 +- 5 degrees, so ignore it.
|
36 |
+
'ARG': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
|
37 |
+
['CB', 'CG', 'CD', 'NE'], ['CG', 'CD', 'NE', 'CZ']],
|
38 |
+
'ASN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']],
|
39 |
+
'ASP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']],
|
40 |
+
'CYS': [['N', 'CA', 'CB', 'SG']],
|
41 |
+
'GLN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
|
42 |
+
['CB', 'CG', 'CD', 'OE1']],
|
43 |
+
'GLU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
|
44 |
+
['CB', 'CG', 'CD', 'OE1']],
|
45 |
+
'GLY': [],
|
46 |
+
'HIS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'ND1']],
|
47 |
+
'ILE': [['N', 'CA', 'CB', 'CG1'], ['CA', 'CB', 'CG1', 'CD1']],
|
48 |
+
'LEU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
|
49 |
+
'LYS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'],
|
50 |
+
['CB', 'CG', 'CD', 'CE'], ['CG', 'CD', 'CE', 'NZ']],
|
51 |
+
'MET': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'SD'],
|
52 |
+
['CB', 'CG', 'SD', 'CE']],
|
53 |
+
'PHE': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
|
54 |
+
'PRO': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD']],
|
55 |
+
'SER': [['N', 'CA', 'CB', 'OG']],
|
56 |
+
'THR': [['N', 'CA', 'CB', 'OG1']],
|
57 |
+
'TRP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
|
58 |
+
'TYR': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']],
|
59 |
+
'VAL': [['N', 'CA', 'CB', 'CG1']],
|
60 |
+
}
|
61 |
+
|
62 |
+
# If chi angles given in fixed-length array, this matrix determines how to mask
|
63 |
+
# them for each AA type. The order is as per restype_order (see below).
|
64 |
+
chi_angles_mask = [
|
65 |
+
[0.0, 0.0, 0.0, 0.0], # ALA
|
66 |
+
[1.0, 1.0, 1.0, 1.0], # ARG
|
67 |
+
[1.0, 1.0, 0.0, 0.0], # ASN
|
68 |
+
[1.0, 1.0, 0.0, 0.0], # ASP
|
69 |
+
[1.0, 0.0, 0.0, 0.0], # CYS
|
70 |
+
[1.0, 1.0, 1.0, 0.0], # GLN
|
71 |
+
[1.0, 1.0, 1.0, 0.0], # GLU
|
72 |
+
[0.0, 0.0, 0.0, 0.0], # GLY
|
73 |
+
[1.0, 1.0, 0.0, 0.0], # HIS
|
74 |
+
[1.0, 1.0, 0.0, 0.0], # ILE
|
75 |
+
[1.0, 1.0, 0.0, 0.0], # LEU
|
76 |
+
[1.0, 1.0, 1.0, 1.0], # LYS
|
77 |
+
[1.0, 1.0, 1.0, 0.0], # MET
|
78 |
+
[1.0, 1.0, 0.0, 0.0], # PHE
|
79 |
+
[1.0, 1.0, 0.0, 0.0], # PRO
|
80 |
+
[1.0, 0.0, 0.0, 0.0], # SER
|
81 |
+
[1.0, 0.0, 0.0, 0.0], # THR
|
82 |
+
[1.0, 1.0, 0.0, 0.0], # TRP
|
83 |
+
[1.0, 1.0, 0.0, 0.0], # TYR
|
84 |
+
[1.0, 0.0, 0.0, 0.0], # VAL
|
85 |
+
]
|
86 |
+
|
87 |
+
# The following chi angles are pi periodic: they can be rotated by a multiple
|
88 |
+
# of pi without affecting the structure.
|
89 |
+
chi_pi_periodic = [
|
90 |
+
[0.0, 0.0, 0.0, 0.0], # ALA
|
91 |
+
[0.0, 0.0, 0.0, 0.0], # ARG
|
92 |
+
[0.0, 0.0, 0.0, 0.0], # ASN
|
93 |
+
[0.0, 1.0, 0.0, 0.0], # ASP
|
94 |
+
[0.0, 0.0, 0.0, 0.0], # CYS
|
95 |
+
[0.0, 0.0, 0.0, 0.0], # GLN
|
96 |
+
[0.0, 0.0, 1.0, 0.0], # GLU
|
97 |
+
[0.0, 0.0, 0.0, 0.0], # GLY
|
98 |
+
[0.0, 0.0, 0.0, 0.0], # HIS
|
99 |
+
[0.0, 0.0, 0.0, 0.0], # ILE
|
100 |
+
[0.0, 0.0, 0.0, 0.0], # LEU
|
101 |
+
[0.0, 0.0, 0.0, 0.0], # LYS
|
102 |
+
[0.0, 0.0, 0.0, 0.0], # MET
|
103 |
+
[0.0, 1.0, 0.0, 0.0], # PHE
|
104 |
+
[0.0, 0.0, 0.0, 0.0], # PRO
|
105 |
+
[0.0, 0.0, 0.0, 0.0], # SER
|
106 |
+
[0.0, 0.0, 0.0, 0.0], # THR
|
107 |
+
[0.0, 0.0, 0.0, 0.0], # TRP
|
108 |
+
[0.0, 1.0, 0.0, 0.0], # TYR
|
109 |
+
[0.0, 0.0, 0.0, 0.0], # VAL
|
110 |
+
[0.0, 0.0, 0.0, 0.0], # UNK
|
111 |
+
]
|
112 |
+
|
113 |
+
# Atoms positions relative to the 8 rigid groups, defined by the pre-omega, phi,
|
114 |
+
# psi and chi angles:
|
115 |
+
# 0: 'backbone group',
|
116 |
+
# 1: 'pre-omega-group', (empty)
|
117 |
+
# 2: 'phi-group', (currently empty, because it defines only hydrogens)
|
118 |
+
# 3: 'psi-group',
|
119 |
+
# 4,5,6,7: 'chi1,2,3,4-group'
|
120 |
+
# The atom positions are relative to the axis-end-atom of the corresponding
|
121 |
+
# rotation axis. The x-axis is in direction of the rotation axis, and the y-axis
|
122 |
+
# is defined such that the dihedral-angle-definiting atom (the last entry in
|
123 |
+
# chi_angles_atoms above) is in the xy-plane (with a positive y-coordinate).
|
124 |
+
# format: [atomname, group_idx, rel_position]
|
125 |
+
rigid_group_atom_positions = {
|
126 |
+
'ALA': [
|
127 |
+
['N', 0, (-0.525, 1.363, 0.000)],
|
128 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
129 |
+
['C', 0, (1.526, -0.000, -0.000)],
|
130 |
+
['CB', 0, (-0.529, -0.774, -1.205)],
|
131 |
+
['O', 3, (0.627, 1.062, 0.000)],
|
132 |
+
],
|
133 |
+
'ARG': [
|
134 |
+
['N', 0, (-0.524, 1.362, -0.000)],
|
135 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
136 |
+
['C', 0, (1.525, -0.000, -0.000)],
|
137 |
+
['CB', 0, (-0.524, -0.778, -1.209)],
|
138 |
+
['O', 3, (0.626, 1.062, 0.000)],
|
139 |
+
['CG', 4, (0.616, 1.390, -0.000)],
|
140 |
+
['CD', 5, (0.564, 1.414, 0.000)],
|
141 |
+
['NE', 6, (0.539, 1.357, -0.000)],
|
142 |
+
['NH1', 7, (0.206, 2.301, 0.000)],
|
143 |
+
['NH2', 7, (2.078, 0.978, -0.000)],
|
144 |
+
['CZ', 7, (0.758, 1.093, -0.000)],
|
145 |
+
],
|
146 |
+
'ASN': [
|
147 |
+
['N', 0, (-0.536, 1.357, 0.000)],
|
148 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
149 |
+
['C', 0, (1.526, -0.000, -0.000)],
|
150 |
+
['CB', 0, (-0.531, -0.787, -1.200)],
|
151 |
+
['O', 3, (0.625, 1.062, 0.000)],
|
152 |
+
['CG', 4, (0.584, 1.399, 0.000)],
|
153 |
+
['ND2', 5, (0.593, -1.188, 0.001)],
|
154 |
+
['OD1', 5, (0.633, 1.059, 0.000)],
|
155 |
+
],
|
156 |
+
'ASP': [
|
157 |
+
['N', 0, (-0.525, 1.362, -0.000)],
|
158 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
159 |
+
['C', 0, (1.527, 0.000, -0.000)],
|
160 |
+
['CB', 0, (-0.526, -0.778, -1.208)],
|
161 |
+
['O', 3, (0.626, 1.062, -0.000)],
|
162 |
+
['CG', 4, (0.593, 1.398, -0.000)],
|
163 |
+
['OD1', 5, (0.610, 1.091, 0.000)],
|
164 |
+
['OD2', 5, (0.592, -1.101, -0.003)],
|
165 |
+
],
|
166 |
+
'CYS': [
|
167 |
+
['N', 0, (-0.522, 1.362, -0.000)],
|
168 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
169 |
+
['C', 0, (1.524, 0.000, 0.000)],
|
170 |
+
['CB', 0, (-0.519, -0.773, -1.212)],
|
171 |
+
['O', 3, (0.625, 1.062, -0.000)],
|
172 |
+
['SG', 4, (0.728, 1.653, 0.000)],
|
173 |
+
],
|
174 |
+
'GLN': [
|
175 |
+
['N', 0, (-0.526, 1.361, -0.000)],
|
176 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
177 |
+
['C', 0, (1.526, 0.000, 0.000)],
|
178 |
+
['CB', 0, (-0.525, -0.779, -1.207)],
|
179 |
+
['O', 3, (0.626, 1.062, -0.000)],
|
180 |
+
['CG', 4, (0.615, 1.393, 0.000)],
|
181 |
+
['CD', 5, (0.587, 1.399, -0.000)],
|
182 |
+
['NE2', 6, (0.593, -1.189, -0.001)],
|
183 |
+
['OE1', 6, (0.634, 1.060, 0.000)],
|
184 |
+
],
|
185 |
+
'GLU': [
|
186 |
+
['N', 0, (-0.528, 1.361, 0.000)],
|
187 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
188 |
+
['C', 0, (1.526, -0.000, -0.000)],
|
189 |
+
['CB', 0, (-0.526, -0.781, -1.207)],
|
190 |
+
['O', 3, (0.626, 1.062, 0.000)],
|
191 |
+
['CG', 4, (0.615, 1.392, 0.000)],
|
192 |
+
['CD', 5, (0.600, 1.397, 0.000)],
|
193 |
+
['OE1', 6, (0.607, 1.095, -0.000)],
|
194 |
+
['OE2', 6, (0.589, -1.104, -0.001)],
|
195 |
+
],
|
196 |
+
'GLY': [
|
197 |
+
['N', 0, (-0.572, 1.337, 0.000)],
|
198 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
199 |
+
['C', 0, (1.517, -0.000, -0.000)],
|
200 |
+
['O', 3, (0.626, 1.062, -0.000)],
|
201 |
+
],
|
202 |
+
'HIS': [
|
203 |
+
['N', 0, (-0.527, 1.360, 0.000)],
|
204 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
205 |
+
['C', 0, (1.525, 0.000, 0.000)],
|
206 |
+
['CB', 0, (-0.525, -0.778, -1.208)],
|
207 |
+
['O', 3, (0.625, 1.063, 0.000)],
|
208 |
+
['CG', 4, (0.600, 1.370, -0.000)],
|
209 |
+
['CD2', 5, (0.889, -1.021, 0.003)],
|
210 |
+
['ND1', 5, (0.744, 1.160, -0.000)],
|
211 |
+
['CE1', 5, (2.030, 0.851, 0.002)],
|
212 |
+
['NE2', 5, (2.145, -0.466, 0.004)],
|
213 |
+
],
|
214 |
+
'ILE': [
|
215 |
+
['N', 0, (-0.493, 1.373, -0.000)],
|
216 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
217 |
+
['C', 0, (1.527, -0.000, -0.000)],
|
218 |
+
['CB', 0, (-0.536, -0.793, -1.213)],
|
219 |
+
['O', 3, (0.627, 1.062, -0.000)],
|
220 |
+
['CG1', 4, (0.534, 1.437, -0.000)],
|
221 |
+
['CG2', 4, (0.540, -0.785, -1.199)],
|
222 |
+
['CD1', 5, (0.619, 1.391, 0.000)],
|
223 |
+
],
|
224 |
+
'LEU': [
|
225 |
+
['N', 0, (-0.520, 1.363, 0.000)],
|
226 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
227 |
+
['C', 0, (1.525, -0.000, -0.000)],
|
228 |
+
['CB', 0, (-0.522, -0.773, -1.214)],
|
229 |
+
['O', 3, (0.625, 1.063, -0.000)],
|
230 |
+
['CG', 4, (0.678, 1.371, 0.000)],
|
231 |
+
['CD1', 5, (0.530, 1.430, -0.000)],
|
232 |
+
['CD2', 5, (0.535, -0.774, 1.200)],
|
233 |
+
],
|
234 |
+
'LYS': [
|
235 |
+
['N', 0, (-0.526, 1.362, -0.000)],
|
236 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
237 |
+
['C', 0, (1.526, 0.000, 0.000)],
|
238 |
+
['CB', 0, (-0.524, -0.778, -1.208)],
|
239 |
+
['O', 3, (0.626, 1.062, -0.000)],
|
240 |
+
['CG', 4, (0.619, 1.390, 0.000)],
|
241 |
+
['CD', 5, (0.559, 1.417, 0.000)],
|
242 |
+
['CE', 6, (0.560, 1.416, 0.000)],
|
243 |
+
['NZ', 7, (0.554, 1.387, 0.000)],
|
244 |
+
],
|
245 |
+
'MET': [
|
246 |
+
['N', 0, (-0.521, 1.364, -0.000)],
|
247 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
248 |
+
['C', 0, (1.525, 0.000, 0.000)],
|
249 |
+
['CB', 0, (-0.523, -0.776, -1.210)],
|
250 |
+
['O', 3, (0.625, 1.062, -0.000)],
|
251 |
+
['CG', 4, (0.613, 1.391, -0.000)],
|
252 |
+
['SD', 5, (0.703, 1.695, 0.000)],
|
253 |
+
['CE', 6, (0.320, 1.786, -0.000)],
|
254 |
+
],
|
255 |
+
'PHE': [
|
256 |
+
['N', 0, (-0.518, 1.363, 0.000)],
|
257 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
258 |
+
['C', 0, (1.524, 0.000, -0.000)],
|
259 |
+
['CB', 0, (-0.525, -0.776, -1.212)],
|
260 |
+
['O', 3, (0.626, 1.062, -0.000)],
|
261 |
+
['CG', 4, (0.607, 1.377, 0.000)],
|
262 |
+
['CD1', 5, (0.709, 1.195, -0.000)],
|
263 |
+
['CD2', 5, (0.706, -1.196, 0.000)],
|
264 |
+
['CE1', 5, (2.102, 1.198, -0.000)],
|
265 |
+
['CE2', 5, (2.098, -1.201, -0.000)],
|
266 |
+
['CZ', 5, (2.794, -0.003, -0.001)],
|
267 |
+
],
|
268 |
+
'PRO': [
|
269 |
+
['N', 0, (-0.566, 1.351, -0.000)],
|
270 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
271 |
+
['C', 0, (1.527, -0.000, 0.000)],
|
272 |
+
['CB', 0, (-0.546, -0.611, -1.293)],
|
273 |
+
['O', 3, (0.621, 1.066, 0.000)],
|
274 |
+
['CG', 4, (0.382, 1.445, 0.0)],
|
275 |
+
# ['CD', 5, (0.427, 1.440, 0.0)],
|
276 |
+
['CD', 5, (0.477, 1.424, 0.0)], # manually made angle 2 degrees larger
|
277 |
+
],
|
278 |
+
'SER': [
|
279 |
+
['N', 0, (-0.529, 1.360, -0.000)],
|
280 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
281 |
+
['C', 0, (1.525, -0.000, -0.000)],
|
282 |
+
['CB', 0, (-0.518, -0.777, -1.211)],
|
283 |
+
['O', 3, (0.626, 1.062, -0.000)],
|
284 |
+
['OG', 4, (0.503, 1.325, 0.000)],
|
285 |
+
],
|
286 |
+
'THR': [
|
287 |
+
['N', 0, (-0.517, 1.364, 0.000)],
|
288 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
289 |
+
['C', 0, (1.526, 0.000, -0.000)],
|
290 |
+
['CB', 0, (-0.516, -0.793, -1.215)],
|
291 |
+
['O', 3, (0.626, 1.062, 0.000)],
|
292 |
+
['CG2', 4, (0.550, -0.718, -1.228)],
|
293 |
+
['OG1', 4, (0.472, 1.353, 0.000)],
|
294 |
+
],
|
295 |
+
'TRP': [
|
296 |
+
['N', 0, (-0.521, 1.363, 0.000)],
|
297 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
298 |
+
['C', 0, (1.525, -0.000, 0.000)],
|
299 |
+
['CB', 0, (-0.523, -0.776, -1.212)],
|
300 |
+
['O', 3, (0.627, 1.062, 0.000)],
|
301 |
+
['CG', 4, (0.609, 1.370, -0.000)],
|
302 |
+
['CD1', 5, (0.824, 1.091, 0.000)],
|
303 |
+
['CD2', 5, (0.854, -1.148, -0.005)],
|
304 |
+
['CE2', 5, (2.186, -0.678, -0.007)],
|
305 |
+
['CE3', 5, (0.622, -2.530, -0.007)],
|
306 |
+
['NE1', 5, (2.140, 0.690, -0.004)],
|
307 |
+
['CH2', 5, (3.028, -2.890, -0.013)],
|
308 |
+
['CZ2', 5, (3.283, -1.543, -0.011)],
|
309 |
+
['CZ3', 5, (1.715, -3.389, -0.011)],
|
310 |
+
],
|
311 |
+
'TYR': [
|
312 |
+
['N', 0, (-0.522, 1.362, 0.000)],
|
313 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
314 |
+
['C', 0, (1.524, -0.000, -0.000)],
|
315 |
+
['CB', 0, (-0.522, -0.776, -1.213)],
|
316 |
+
['O', 3, (0.627, 1.062, -0.000)],
|
317 |
+
['CG', 4, (0.607, 1.382, -0.000)],
|
318 |
+
['CD1', 5, (0.716, 1.195, -0.000)],
|
319 |
+
['CD2', 5, (0.713, -1.194, -0.001)],
|
320 |
+
['CE1', 5, (2.107, 1.200, -0.002)],
|
321 |
+
['CE2', 5, (2.104, -1.201, -0.003)],
|
322 |
+
['OH', 5, (4.168, -0.002, -0.005)],
|
323 |
+
['CZ', 5, (2.791, -0.001, -0.003)],
|
324 |
+
],
|
325 |
+
'VAL': [
|
326 |
+
['N', 0, (-0.494, 1.373, -0.000)],
|
327 |
+
['CA', 0, (0.000, 0.000, 0.000)],
|
328 |
+
['C', 0, (1.527, -0.000, -0.000)],
|
329 |
+
['CB', 0, (-0.533, -0.795, -1.213)],
|
330 |
+
['O', 3, (0.627, 1.062, -0.000)],
|
331 |
+
['CG1', 4, (0.540, 1.429, -0.000)],
|
332 |
+
['CG2', 4, (0.533, -0.776, 1.203)],
|
333 |
+
],
|
334 |
+
}
|
335 |
+
|
336 |
+
# A list of atoms (excluding hydrogen) for each AA type. PDB naming convention.
|
337 |
+
residue_atoms = {
|
338 |
+
'ALA': ['C', 'CA', 'CB', 'N', 'O'],
|
339 |
+
'ARG': ['C', 'CA', 'CB', 'CG', 'CD', 'CZ', 'N', 'NE', 'O', 'NH1', 'NH2'],
|
340 |
+
'ASP': ['C', 'CA', 'CB', 'CG', 'N', 'O', 'OD1', 'OD2'],
|
341 |
+
'ASN': ['C', 'CA', 'CB', 'CG', 'N', 'ND2', 'O', 'OD1'],
|
342 |
+
'CYS': ['C', 'CA', 'CB', 'N', 'O', 'SG'],
|
343 |
+
'GLU': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O', 'OE1', 'OE2'],
|
344 |
+
'GLN': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'NE2', 'O', 'OE1'],
|
345 |
+
'GLY': ['C', 'CA', 'N', 'O'],
|
346 |
+
'HIS': ['C', 'CA', 'CB', 'CG', 'CD2', 'CE1', 'N', 'ND1', 'NE2', 'O'],
|
347 |
+
'ILE': ['C', 'CA', 'CB', 'CG1', 'CG2', 'CD1', 'N', 'O'],
|
348 |
+
'LEU': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'N', 'O'],
|
349 |
+
'LYS': ['C', 'CA', 'CB', 'CG', 'CD', 'CE', 'N', 'NZ', 'O'],
|
350 |
+
'MET': ['C', 'CA', 'CB', 'CG', 'CE', 'N', 'O', 'SD'],
|
351 |
+
'PHE': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O'],
|
352 |
+
'PRO': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O'],
|
353 |
+
'SER': ['C', 'CA', 'CB', 'N', 'O', 'OG'],
|
354 |
+
'THR': ['C', 'CA', 'CB', 'CG2', 'N', 'O', 'OG1'],
|
355 |
+
'TRP': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE2', 'CE3', 'CZ2', 'CZ3',
|
356 |
+
'CH2', 'N', 'NE1', 'O'],
|
357 |
+
'TYR': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O',
|
358 |
+
'OH'],
|
359 |
+
'VAL': ['C', 'CA', 'CB', 'CG1', 'CG2', 'N', 'O']
|
360 |
+
}
|
361 |
+
|
362 |
+
# Naming swaps for ambiguous atom names.
|
363 |
+
# Due to symmetries in the amino acids the naming of atoms is ambiguous in
|
364 |
+
# 4 of the 20 amino acids.
|
365 |
+
# (The LDDT paper lists 7 amino acids as ambiguous, but the naming ambiguities
|
366 |
+
# in LEU, VAL and ARG can be resolved by using the 3d constellations of
|
367 |
+
# the 'ambiguous' atoms and their neighbours)
|
368 |
+
residue_atom_renaming_swaps = {
|
369 |
+
'ASP': {'OD1': 'OD2'},
|
370 |
+
'GLU': {'OE1': 'OE2'},
|
371 |
+
'PHE': {'CD1': 'CD2', 'CE1': 'CE2'},
|
372 |
+
'TYR': {'CD1': 'CD2', 'CE1': 'CE2'},
|
373 |
+
}
|
374 |
+
|
375 |
+
# Van der Waals radii [Angstroem] of the atoms (from Wikipedia)
|
376 |
+
van_der_waals_radius = {
|
377 |
+
'C': 1.7,
|
378 |
+
'N': 1.55,
|
379 |
+
'O': 1.52,
|
380 |
+
'S': 1.8,
|
381 |
+
}
|
382 |
+
|
383 |
+
Bond = collections.namedtuple(
|
384 |
+
'Bond', ['atom1_name', 'atom2_name', 'length', 'stddev'])
|
385 |
+
BondAngle = collections.namedtuple(
|
386 |
+
'BondAngle',
|
387 |
+
['atom1_name', 'atom2_name', 'atom3name', 'angle_rad', 'stddev'])
|
388 |
+
|
389 |
+
|
390 |
+
@functools.lru_cache(maxsize=None)
|
391 |
+
def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]],
|
392 |
+
Mapping[str, List[Bond]],
|
393 |
+
Mapping[str, List[BondAngle]]]:
|
394 |
+
"""Load stereo_chemical_props.txt into a nice structure.
|
395 |
+
|
396 |
+
Load literature values for bond lengths and bond angles and translate
|
397 |
+
bond angles into the length of the opposite edge of the triangle
|
398 |
+
("residue_virtual_bonds").
|
399 |
+
|
400 |
+
Returns:
|
401 |
+
residue_bonds: dict that maps resname --> list of Bond tuples
|
402 |
+
residue_virtual_bonds: dict that maps resname --> list of Bond tuples
|
403 |
+
residue_bond_angles: dict that maps resname --> list of BondAngle tuples
|
404 |
+
"""
|
405 |
+
stereo_chemical_props_path = (
|
406 |
+
'alphafold/common/stereo_chemical_props.txt')
|
407 |
+
with open(stereo_chemical_props_path, 'rt') as f:
|
408 |
+
stereo_chemical_props = f.read()
|
409 |
+
lines_iter = iter(stereo_chemical_props.splitlines())
|
410 |
+
# Load bond lengths.
|
411 |
+
residue_bonds = {}
|
412 |
+
next(lines_iter) # Skip header line.
|
413 |
+
for line in lines_iter:
|
414 |
+
if line.strip() == '-':
|
415 |
+
break
|
416 |
+
bond, resname, length, stddev = line.split()
|
417 |
+
atom1, atom2 = bond.split('-')
|
418 |
+
if resname not in residue_bonds:
|
419 |
+
residue_bonds[resname] = []
|
420 |
+
residue_bonds[resname].append(
|
421 |
+
Bond(atom1, atom2, float(length), float(stddev)))
|
422 |
+
residue_bonds['UNK'] = []
|
423 |
+
|
424 |
+
# Load bond angles.
|
425 |
+
residue_bond_angles = {}
|
426 |
+
next(lines_iter) # Skip empty line.
|
427 |
+
next(lines_iter) # Skip header line.
|
428 |
+
for line in lines_iter:
|
429 |
+
if line.strip() == '-':
|
430 |
+
break
|
431 |
+
bond, resname, angle_degree, stddev_degree = line.split()
|
432 |
+
atom1, atom2, atom3 = bond.split('-')
|
433 |
+
if resname not in residue_bond_angles:
|
434 |
+
residue_bond_angles[resname] = []
|
435 |
+
residue_bond_angles[resname].append(
|
436 |
+
BondAngle(atom1, atom2, atom3,
|
437 |
+
float(angle_degree) / 180. * np.pi,
|
438 |
+
float(stddev_degree) / 180. * np.pi))
|
439 |
+
residue_bond_angles['UNK'] = []
|
440 |
+
|
441 |
+
def make_bond_key(atom1_name, atom2_name):
|
442 |
+
"""Unique key to lookup bonds."""
|
443 |
+
return '-'.join(sorted([atom1_name, atom2_name]))
|
444 |
+
|
445 |
+
# Translate bond angles into distances ("virtual bonds").
|
446 |
+
residue_virtual_bonds = {}
|
447 |
+
for resname, bond_angles in residue_bond_angles.items():
|
448 |
+
# Create a fast lookup dict for bond lengths.
|
449 |
+
bond_cache = {}
|
450 |
+
for b in residue_bonds[resname]:
|
451 |
+
bond_cache[make_bond_key(b.atom1_name, b.atom2_name)] = b
|
452 |
+
residue_virtual_bonds[resname] = []
|
453 |
+
for ba in bond_angles:
|
454 |
+
bond1 = bond_cache[make_bond_key(ba.atom1_name, ba.atom2_name)]
|
455 |
+
bond2 = bond_cache[make_bond_key(ba.atom2_name, ba.atom3name)]
|
456 |
+
|
457 |
+
# Compute distance between atom1 and atom3 using the law of cosines
|
458 |
+
# c^2 = a^2 + b^2 - 2ab*cos(gamma).
|
459 |
+
gamma = ba.angle_rad
|
460 |
+
length = np.sqrt(bond1.length**2 + bond2.length**2
|
461 |
+
- 2 * bond1.length * bond2.length * np.cos(gamma))
|
462 |
+
|
463 |
+
# Propagation of uncertainty assuming uncorrelated errors.
|
464 |
+
dl_outer = 0.5 / length
|
465 |
+
dl_dgamma = (2 * bond1.length * bond2.length * np.sin(gamma)) * dl_outer
|
466 |
+
dl_db1 = (2 * bond1.length - 2 * bond2.length * np.cos(gamma)) * dl_outer
|
467 |
+
dl_db2 = (2 * bond2.length - 2 * bond1.length * np.cos(gamma)) * dl_outer
|
468 |
+
stddev = np.sqrt((dl_dgamma * ba.stddev)**2 +
|
469 |
+
(dl_db1 * bond1.stddev)**2 +
|
470 |
+
(dl_db2 * bond2.stddev)**2)
|
471 |
+
residue_virtual_bonds[resname].append(
|
472 |
+
Bond(ba.atom1_name, ba.atom3name, length, stddev))
|
473 |
+
|
474 |
+
return (residue_bonds,
|
475 |
+
residue_virtual_bonds,
|
476 |
+
residue_bond_angles)
|
477 |
+
|
478 |
+
|
479 |
+
# Between-residue bond lengths for general bonds (first element) and for Proline
|
480 |
+
# (second element).
|
481 |
+
between_res_bond_length_c_n = [1.329, 1.341]
|
482 |
+
between_res_bond_length_stddev_c_n = [0.014, 0.016]
|
483 |
+
|
484 |
+
# Between-residue cos_angles.
|
485 |
+
between_res_cos_angles_c_n_ca = [-0.5203, 0.0353] # degrees: 121.352 +- 2.315
|
486 |
+
between_res_cos_angles_ca_c_n = [-0.4473, 0.0311] # degrees: 116.568 +- 1.995
|
487 |
+
|
488 |
+
# This mapping is used when we need to store atom data in a format that requires
|
489 |
+
# fixed atom data size for every residue (e.g. a numpy array).
|
490 |
+
atom_types = [
|
491 |
+
'N', 'CA', 'C', 'CB', 'O', 'CG', 'CG1', 'CG2', 'OG', 'OG1', 'SG', 'CD',
|
492 |
+
'CD1', 'CD2', 'ND1', 'ND2', 'OD1', 'OD2', 'SD', 'CE', 'CE1', 'CE2', 'CE3',
|
493 |
+
'NE', 'NE1', 'NE2', 'OE1', 'OE2', 'CH2', 'NH1', 'NH2', 'OH', 'CZ', 'CZ2',
|
494 |
+
'CZ3', 'NZ', 'OXT'
|
495 |
+
]
|
496 |
+
atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)}
|
497 |
+
atom_type_num = len(atom_types) # := 37.
|
498 |
+
|
499 |
+
# A compact atom encoding with 14 columns
|
500 |
+
# pylint: disable=line-too-long
|
501 |
+
# pylint: disable=bad-whitespace
|
502 |
+
restype_name_to_atom14_names = {
|
503 |
+
'ALA': ['N', 'CA', 'C', 'O', 'CB', '', '', '', '', '', '', '', '', ''],
|
504 |
+
'ARG': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2', '', '', ''],
|
505 |
+
'ASN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'ND2', '', '', '', '', '', ''],
|
506 |
+
'ASP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'OD2', '', '', '', '', '', ''],
|
507 |
+
'CYS': ['N', 'CA', 'C', 'O', 'CB', 'SG', '', '', '', '', '', '', '', ''],
|
508 |
+
'GLN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'NE2', '', '', '', '', ''],
|
509 |
+
'GLU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'OE2', '', '', '', '', ''],
|
510 |
+
'GLY': ['N', 'CA', 'C', 'O', '', '', '', '', '', '', '', '', '', ''],
|
511 |
+
'HIS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'ND1', 'CD2', 'CE1', 'NE2', '', '', '', ''],
|
512 |
+
'ILE': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'CD1', '', '', '', '', '', ''],
|
513 |
+
'LEU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', '', '', '', '', '', ''],
|
514 |
+
'LYS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', '', '', '', '', ''],
|
515 |
+
'MET': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'SD', 'CE', '', '', '', '', '', ''],
|
516 |
+
'PHE': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', '', '', ''],
|
517 |
+
'PRO': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', '', '', '', '', '', '', ''],
|
518 |
+
'SER': ['N', 'CA', 'C', 'O', 'CB', 'OG', '', '', '', '', '', '', '', ''],
|
519 |
+
'THR': ['N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', '', '', '', '', '', '', ''],
|
520 |
+
'TRP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],
|
521 |
+
'TYR': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'OH', '', ''],
|
522 |
+
'VAL': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', '', '', '', '', '', '', ''],
|
523 |
+
'UNK': ['', '', '', '', '', '', '', '', '', '', '', '', '', ''],
|
524 |
+
|
525 |
+
}
|
526 |
+
# pylint: enable=line-too-long
|
527 |
+
# pylint: enable=bad-whitespace
|
528 |
+
|
529 |
+
|
530 |
+
# This is the standard residue order when coding AA type as a number.
|
531 |
+
# Reproduce it by taking 3-letter AA codes and sorting them alphabetically.
|
532 |
+
restypes = [
|
533 |
+
'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P',
|
534 |
+
'S', 'T', 'W', 'Y', 'V'
|
535 |
+
]
|
536 |
+
restype_order = {restype: i for i, restype in enumerate(restypes)}
|
537 |
+
restype_num = len(restypes) # := 20.
|
538 |
+
unk_restype_index = restype_num # Catch-all index for unknown restypes.
|
539 |
+
|
540 |
+
restypes_with_x = restypes + ['X']
|
541 |
+
restype_order_with_x = {restype: i for i, restype in enumerate(restypes_with_x)}
|
542 |
+
|
543 |
+
|
544 |
+
def sequence_to_onehot(
|
545 |
+
sequence: str,
|
546 |
+
mapping: Mapping[str, int],
|
547 |
+
map_unknown_to_x: bool = False) -> np.ndarray:
|
548 |
+
"""Maps the given sequence into a one-hot encoded matrix.
|
549 |
+
|
550 |
+
Args:
|
551 |
+
sequence: An amino acid sequence.
|
552 |
+
mapping: A dictionary mapping amino acids to integers.
|
553 |
+
map_unknown_to_x: If True, any amino acid that is not in the mapping will be
|
554 |
+
mapped to the unknown amino acid 'X'. If the mapping doesn't contain
|
555 |
+
amino acid 'X', an error will be thrown. If False, any amino acid not in
|
556 |
+
the mapping will throw an error.
|
557 |
+
|
558 |
+
Returns:
|
559 |
+
A numpy array of shape (seq_len, num_unique_aas) with one-hot encoding of
|
560 |
+
the sequence.
|
561 |
+
|
562 |
+
Raises:
|
563 |
+
ValueError: If the mapping doesn't contain values from 0 to
|
564 |
+
num_unique_aas - 1 without any gaps.
|
565 |
+
"""
|
566 |
+
num_entries = max(mapping.values()) + 1
|
567 |
+
|
568 |
+
if sorted(set(mapping.values())) != list(range(num_entries)):
|
569 |
+
raise ValueError('The mapping must have values from 0 to num_unique_aas-1 '
|
570 |
+
'without any gaps. Got: %s' % sorted(mapping.values()))
|
571 |
+
|
572 |
+
one_hot_arr = np.zeros((len(sequence), num_entries), dtype=np.int32)
|
573 |
+
|
574 |
+
for aa_index, aa_type in enumerate(sequence):
|
575 |
+
if map_unknown_to_x:
|
576 |
+
if aa_type.isalpha() and aa_type.isupper():
|
577 |
+
aa_id = mapping.get(aa_type, mapping['X'])
|
578 |
+
else:
|
579 |
+
raise ValueError(f'Invalid character in the sequence: {aa_type}')
|
580 |
+
else:
|
581 |
+
aa_id = mapping[aa_type]
|
582 |
+
one_hot_arr[aa_index, aa_id] = 1
|
583 |
+
|
584 |
+
return one_hot_arr
|
585 |
+
|
586 |
+
|
587 |
+
restype_1to3 = {
|
588 |
+
'A': 'ALA',
|
589 |
+
'R': 'ARG',
|
590 |
+
'N': 'ASN',
|
591 |
+
'D': 'ASP',
|
592 |
+
'C': 'CYS',
|
593 |
+
'Q': 'GLN',
|
594 |
+
'E': 'GLU',
|
595 |
+
'G': 'GLY',
|
596 |
+
'H': 'HIS',
|
597 |
+
'I': 'ILE',
|
598 |
+
'L': 'LEU',
|
599 |
+
'K': 'LYS',
|
600 |
+
'M': 'MET',
|
601 |
+
'F': 'PHE',
|
602 |
+
'P': 'PRO',
|
603 |
+
'S': 'SER',
|
604 |
+
'T': 'THR',
|
605 |
+
'W': 'TRP',
|
606 |
+
'Y': 'TYR',
|
607 |
+
'V': 'VAL',
|
608 |
+
}
|
609 |
+
|
610 |
+
|
611 |
+
# NB: restype_3to1 differs from Bio.PDB.protein_letters_3to1 by being a simple
|
612 |
+
# 1-to-1 mapping of 3 letter names to one letter names. The latter contains
|
613 |
+
# many more, and less common, three letter names as keys and maps many of these
|
614 |
+
# to the same one letter name (including 'X' and 'U' which we don't use here).
|
615 |
+
restype_3to1 = {v: k for k, v in restype_1to3.items()}
|
616 |
+
|
617 |
+
# Define a restype name for all unknown residues.
|
618 |
+
unk_restype = 'UNK'
|
619 |
+
|
620 |
+
resnames = [restype_1to3[r] for r in restypes] + [unk_restype]
|
621 |
+
resname_to_idx = {resname: i for i, resname in enumerate(resnames)}
|
622 |
+
|
623 |
+
|
624 |
+
# The mapping here uses hhblits convention, so that B is mapped to D, J and O
|
625 |
+
# are mapped to X, U is mapped to C, and Z is mapped to E. Other than that the
|
626 |
+
# remaining 20 amino acids are kept in alphabetical order.
|
627 |
+
# There are 2 non-amino acid codes, X (representing any amino acid) and
|
628 |
+
# "-" representing a missing amino acid in an alignment. The id for these
|
629 |
+
# codes is put at the end (20 and 21) so that they can easily be ignored if
|
630 |
+
# desired.
|
631 |
+
HHBLITS_AA_TO_ID = {
|
632 |
+
'A': 0,
|
633 |
+
'B': 2,
|
634 |
+
'C': 1,
|
635 |
+
'D': 2,
|
636 |
+
'E': 3,
|
637 |
+
'F': 4,
|
638 |
+
'G': 5,
|
639 |
+
'H': 6,
|
640 |
+
'I': 7,
|
641 |
+
'J': 20,
|
642 |
+
'K': 8,
|
643 |
+
'L': 9,
|
644 |
+
'M': 10,
|
645 |
+
'N': 11,
|
646 |
+
'O': 20,
|
647 |
+
'P': 12,
|
648 |
+
'Q': 13,
|
649 |
+
'R': 14,
|
650 |
+
'S': 15,
|
651 |
+
'T': 16,
|
652 |
+
'U': 1,
|
653 |
+
'V': 17,
|
654 |
+
'W': 18,
|
655 |
+
'X': 20,
|
656 |
+
'Y': 19,
|
657 |
+
'Z': 3,
|
658 |
+
'-': 21,
|
659 |
+
}
|
660 |
+
|
661 |
+
# Partial inversion of HHBLITS_AA_TO_ID.
|
662 |
+
ID_TO_HHBLITS_AA = {
|
663 |
+
0: 'A',
|
664 |
+
1: 'C', # Also U.
|
665 |
+
2: 'D', # Also B.
|
666 |
+
3: 'E', # Also Z.
|
667 |
+
4: 'F',
|
668 |
+
5: 'G',
|
669 |
+
6: 'H',
|
670 |
+
7: 'I',
|
671 |
+
8: 'K',
|
672 |
+
9: 'L',
|
673 |
+
10: 'M',
|
674 |
+
11: 'N',
|
675 |
+
12: 'P',
|
676 |
+
13: 'Q',
|
677 |
+
14: 'R',
|
678 |
+
15: 'S',
|
679 |
+
16: 'T',
|
680 |
+
17: 'V',
|
681 |
+
18: 'W',
|
682 |
+
19: 'Y',
|
683 |
+
20: 'X', # Includes J and O.
|
684 |
+
21: '-',
|
685 |
+
}
|
686 |
+
|
687 |
+
restypes_with_x_and_gap = restypes + ['X', '-']
|
688 |
+
MAP_HHBLITS_AATYPE_TO_OUR_AATYPE = tuple(
|
689 |
+
restypes_with_x_and_gap.index(ID_TO_HHBLITS_AA[i])
|
690 |
+
for i in range(len(restypes_with_x_and_gap)))
|
691 |
+
|
692 |
+
|
693 |
+
def _make_standard_atom_mask() -> np.ndarray:
|
694 |
+
"""Returns [num_res_types, num_atom_types] mask array."""
|
695 |
+
# +1 to account for unknown (all 0s).
|
696 |
+
mask = np.zeros([restype_num + 1, atom_type_num], dtype=np.int32)
|
697 |
+
for restype, restype_letter in enumerate(restypes):
|
698 |
+
restype_name = restype_1to3[restype_letter]
|
699 |
+
atom_names = residue_atoms[restype_name]
|
700 |
+
for atom_name in atom_names:
|
701 |
+
atom_type = atom_order[atom_name]
|
702 |
+
mask[restype, atom_type] = 1
|
703 |
+
return mask
|
704 |
+
|
705 |
+
|
706 |
+
STANDARD_ATOM_MASK = _make_standard_atom_mask()
|
707 |
+
|
708 |
+
|
709 |
+
# A one hot representation for the first and second atoms defining the axis
|
710 |
+
# of rotation for each chi-angle in each residue.
|
711 |
+
def chi_angle_atom(atom_index: int) -> np.ndarray:
|
712 |
+
"""Define chi-angle rigid groups via one-hot representations."""
|
713 |
+
chi_angles_index = {}
|
714 |
+
one_hots = []
|
715 |
+
|
716 |
+
for k, v in chi_angles_atoms.items():
|
717 |
+
indices = [atom_types.index(s[atom_index]) for s in v]
|
718 |
+
indices.extend([-1]*(4-len(indices)))
|
719 |
+
chi_angles_index[k] = indices
|
720 |
+
|
721 |
+
for r in restypes:
|
722 |
+
res3 = restype_1to3[r]
|
723 |
+
one_hot = np.eye(atom_type_num)[chi_angles_index[res3]]
|
724 |
+
one_hots.append(one_hot)
|
725 |
+
|
726 |
+
one_hots.append(np.zeros([4, atom_type_num])) # Add zeros for residue `X`.
|
727 |
+
one_hot = np.stack(one_hots, axis=0)
|
728 |
+
one_hot = np.transpose(one_hot, [0, 2, 1])
|
729 |
+
|
730 |
+
return one_hot
|
731 |
+
|
732 |
+
chi_atom_1_one_hot = chi_angle_atom(1)
|
733 |
+
chi_atom_2_one_hot = chi_angle_atom(2)
|
734 |
+
|
735 |
+
# An array like chi_angles_atoms but using indices rather than names.
|
736 |
+
chi_angles_atom_indices = [chi_angles_atoms[restype_1to3[r]] for r in restypes]
|
737 |
+
chi_angles_atom_indices = tree.map_structure(
|
738 |
+
lambda atom_name: atom_order[atom_name], chi_angles_atom_indices)
|
739 |
+
chi_angles_atom_indices = np.array([
|
740 |
+
chi_atoms + ([[0, 0, 0, 0]] * (4 - len(chi_atoms)))
|
741 |
+
for chi_atoms in chi_angles_atom_indices])
|
742 |
+
|
743 |
+
# Mapping from (res_name, atom_name) pairs to the atom's chi group index
|
744 |
+
# and atom index within that group.
|
745 |
+
chi_groups_for_atom = collections.defaultdict(list)
|
746 |
+
for res_name, chi_angle_atoms_for_res in chi_angles_atoms.items():
|
747 |
+
for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res):
|
748 |
+
for atom_i, atom in enumerate(chi_group):
|
749 |
+
chi_groups_for_atom[(res_name, atom)].append((chi_group_i, atom_i))
|
750 |
+
chi_groups_for_atom = dict(chi_groups_for_atom)
|
751 |
+
|
752 |
+
|
753 |
+
def _make_rigid_transformation_4x4(ex, ey, translation):
|
754 |
+
"""Create a rigid 4x4 transformation matrix from two axes and transl."""
|
755 |
+
# Normalize ex.
|
756 |
+
ex_normalized = ex / np.linalg.norm(ex)
|
757 |
+
|
758 |
+
# make ey perpendicular to ex
|
759 |
+
ey_normalized = ey - np.dot(ey, ex_normalized) * ex_normalized
|
760 |
+
ey_normalized /= np.linalg.norm(ey_normalized)
|
761 |
+
|
762 |
+
# compute ez as cross product
|
763 |
+
eznorm = np.cross(ex_normalized, ey_normalized)
|
764 |
+
m = np.stack([ex_normalized, ey_normalized, eznorm, translation]).transpose()
|
765 |
+
m = np.concatenate([m, [[0., 0., 0., 1.]]], axis=0)
|
766 |
+
return m
|
767 |
+
|
768 |
+
|
769 |
+
# create an array with (restype, atomtype) --> rigid_group_idx
|
770 |
+
# and an array with (restype, atomtype, coord) for the atom positions
|
771 |
+
# and compute affine transformation matrices (4,4) from one rigid group to the
|
772 |
+
# previous group
|
773 |
+
restype_atom37_to_rigid_group = np.zeros([21, 37], dtype=np.int)
|
774 |
+
restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
|
775 |
+
restype_atom37_rigid_group_positions = np.zeros([21, 37, 3], dtype=np.float32)
|
776 |
+
restype_atom14_to_rigid_group = np.zeros([21, 14], dtype=np.int)
|
777 |
+
restype_atom14_mask = np.zeros([21, 14], dtype=np.float32)
|
778 |
+
restype_atom14_rigid_group_positions = np.zeros([21, 14, 3], dtype=np.float32)
|
779 |
+
restype_rigid_group_default_frame = np.zeros([21, 8, 4, 4], dtype=np.float32)
|
780 |
+
|
781 |
+
###############################################
|
782 |
+
restype_atom14_to_atom37 = []
|
783 |
+
restype_atom37_to_atom14 = []
|
784 |
+
for rt in restypes:
|
785 |
+
atom_names = restype_name_to_atom14_names[restype_1to3[rt]]
|
786 |
+
restype_atom14_to_atom37.append([(atom_order[name] if name else 0) for name in atom_names])
|
787 |
+
atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
|
788 |
+
restype_atom37_to_atom14.append([(atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0) for name in atom_types])
|
789 |
+
restype_atom14_to_atom37.append([0] * 14)
|
790 |
+
restype_atom37_to_atom14.append([0] * 37)
|
791 |
+
restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)
|
792 |
+
restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)
|
793 |
+
################################################
|
794 |
+
|
795 |
+
def _make_rigid_group_constants():
|
796 |
+
"""Fill the arrays above."""
|
797 |
+
|
798 |
+
|
799 |
+
for restype, restype_letter in enumerate(restypes):
|
800 |
+
resname = restype_1to3[restype_letter]
|
801 |
+
for atomname, group_idx, atom_position in rigid_group_atom_positions[resname]:
|
802 |
+
atomtype = atom_order[atomname]
|
803 |
+
restype_atom37_to_rigid_group[restype, atomtype] = group_idx
|
804 |
+
restype_atom37_mask[restype, atomtype] = 1
|
805 |
+
restype_atom37_rigid_group_positions[restype, atomtype, :] = atom_position
|
806 |
+
|
807 |
+
atom14idx = restype_name_to_atom14_names[resname].index(atomname)
|
808 |
+
restype_atom14_to_rigid_group[restype, atom14idx] = group_idx
|
809 |
+
restype_atom14_mask[restype, atom14idx] = 1
|
810 |
+
restype_atom14_rigid_group_positions[restype, atom14idx, :] = atom_position
|
811 |
+
|
812 |
+
atom_names = residue_atoms[resname]
|
813 |
+
atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
|
814 |
+
|
815 |
+
for restype, restype_letter in enumerate(restypes):
|
816 |
+
resname = restype_1to3[restype_letter]
|
817 |
+
atom_positions = {name: np.array(pos) for name, _, pos
|
818 |
+
in rigid_group_atom_positions[resname]}
|
819 |
+
|
820 |
+
# backbone to backbone is the identity transform
|
821 |
+
restype_rigid_group_default_frame[restype, 0, :, :] = np.eye(4)
|
822 |
+
|
823 |
+
# pre-omega-frame to backbone (currently dummy identity matrix)
|
824 |
+
restype_rigid_group_default_frame[restype, 1, :, :] = np.eye(4)
|
825 |
+
|
826 |
+
# phi-frame to backbone
|
827 |
+
mat = _make_rigid_transformation_4x4(
|
828 |
+
ex=atom_positions['N'] - atom_positions['CA'],
|
829 |
+
ey=np.array([1., 0., 0.]),
|
830 |
+
translation=atom_positions['N'])
|
831 |
+
restype_rigid_group_default_frame[restype, 2, :, :] = mat
|
832 |
+
|
833 |
+
# psi-frame to backbone
|
834 |
+
mat = _make_rigid_transformation_4x4(
|
835 |
+
ex=atom_positions['C'] - atom_positions['CA'],
|
836 |
+
ey=atom_positions['CA'] - atom_positions['N'],
|
837 |
+
translation=atom_positions['C'])
|
838 |
+
restype_rigid_group_default_frame[restype, 3, :, :] = mat
|
839 |
+
|
840 |
+
# chi1-frame to backbone
|
841 |
+
if chi_angles_mask[restype][0]:
|
842 |
+
base_atom_names = chi_angles_atoms[resname][0]
|
843 |
+
base_atom_positions = [atom_positions[name] for name in base_atom_names]
|
844 |
+
mat = _make_rigid_transformation_4x4(
|
845 |
+
ex=base_atom_positions[2] - base_atom_positions[1],
|
846 |
+
ey=base_atom_positions[0] - base_atom_positions[1],
|
847 |
+
translation=base_atom_positions[2])
|
848 |
+
restype_rigid_group_default_frame[restype, 4, :, :] = mat
|
849 |
+
|
850 |
+
# chi2-frame to chi1-frame
|
851 |
+
# chi3-frame to chi2-frame
|
852 |
+
# chi4-frame to chi3-frame
|
853 |
+
# luckily all rotation axes for the next frame start at (0,0,0) of the
|
854 |
+
# previous frame
|
855 |
+
for chi_idx in range(1, 4):
|
856 |
+
if chi_angles_mask[restype][chi_idx]:
|
857 |
+
axis_end_atom_name = chi_angles_atoms[resname][chi_idx][2]
|
858 |
+
axis_end_atom_position = atom_positions[axis_end_atom_name]
|
859 |
+
mat = _make_rigid_transformation_4x4(
|
860 |
+
ex=axis_end_atom_position,
|
861 |
+
ey=np.array([-1., 0., 0.]),
|
862 |
+
translation=axis_end_atom_position)
|
863 |
+
restype_rigid_group_default_frame[restype, 4 + chi_idx, :, :] = mat
|
864 |
+
|
865 |
+
|
866 |
+
_make_rigid_group_constants()
|
867 |
+
|
868 |
+
|
869 |
+
def make_atom14_dists_bounds(overlap_tolerance=1.5,
|
870 |
+
bond_length_tolerance_factor=15):
|
871 |
+
"""compute upper and lower bounds for bonds to assess violations."""
|
872 |
+
restype_atom14_bond_lower_bound = np.zeros([21, 14, 14], np.float32)
|
873 |
+
restype_atom14_bond_upper_bound = np.zeros([21, 14, 14], np.float32)
|
874 |
+
restype_atom14_bond_stddev = np.zeros([21, 14, 14], np.float32)
|
875 |
+
residue_bonds, residue_virtual_bonds, _ = load_stereo_chemical_props()
|
876 |
+
for restype, restype_letter in enumerate(restypes):
|
877 |
+
resname = restype_1to3[restype_letter]
|
878 |
+
atom_list = restype_name_to_atom14_names[resname]
|
879 |
+
|
880 |
+
# create lower and upper bounds for clashes
|
881 |
+
for atom1_idx, atom1_name in enumerate(atom_list):
|
882 |
+
if not atom1_name:
|
883 |
+
continue
|
884 |
+
atom1_radius = van_der_waals_radius[atom1_name[0]]
|
885 |
+
for atom2_idx, atom2_name in enumerate(atom_list):
|
886 |
+
if (not atom2_name) or atom1_idx == atom2_idx:
|
887 |
+
continue
|
888 |
+
atom2_radius = van_der_waals_radius[atom2_name[0]]
|
889 |
+
lower = atom1_radius + atom2_radius - overlap_tolerance
|
890 |
+
upper = 1e10
|
891 |
+
restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower
|
892 |
+
restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower
|
893 |
+
restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper
|
894 |
+
restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper
|
895 |
+
|
896 |
+
# overwrite lower and upper bounds for bonds and angles
|
897 |
+
for b in residue_bonds[resname] + residue_virtual_bonds[resname]:
|
898 |
+
atom1_idx = atom_list.index(b.atom1_name)
|
899 |
+
atom2_idx = atom_list.index(b.atom2_name)
|
900 |
+
lower = b.length - bond_length_tolerance_factor * b.stddev
|
901 |
+
upper = b.length + bond_length_tolerance_factor * b.stddev
|
902 |
+
restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower
|
903 |
+
restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower
|
904 |
+
restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper
|
905 |
+
restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper
|
906 |
+
restype_atom14_bond_stddev[restype, atom1_idx, atom2_idx] = b.stddev
|
907 |
+
restype_atom14_bond_stddev[restype, atom2_idx, atom1_idx] = b.stddev
|
908 |
+
return {'lower_bound': restype_atom14_bond_lower_bound, # shape (21,14,14)
|
909 |
+
'upper_bound': restype_atom14_bond_upper_bound, # shape (21,14,14)
|
910 |
+
'stddev': restype_atom14_bond_stddev, # shape (21,14,14)
|
911 |
+
}
|
af_backprop/alphafold/data/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Data pipeline for model features."""
|
af_backprop/alphafold/data/mmcif_parsing.py
ADDED
@@ -0,0 +1,384 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Parses the mmCIF file format."""
|
16 |
+
import collections
|
17 |
+
import dataclasses
|
18 |
+
import io
|
19 |
+
from typing import Any, Mapping, Optional, Sequence, Tuple
|
20 |
+
|
21 |
+
from absl import logging
|
22 |
+
from Bio import PDB
|
23 |
+
from Bio.Data import SCOPData
|
24 |
+
|
25 |
+
# Type aliases:
|
26 |
+
ChainId = str
|
27 |
+
PdbHeader = Mapping[str, Any]
|
28 |
+
PdbStructure = PDB.Structure.Structure
|
29 |
+
SeqRes = str
|
30 |
+
MmCIFDict = Mapping[str, Sequence[str]]
|
31 |
+
|
32 |
+
|
33 |
+
@dataclasses.dataclass(frozen=True)
|
34 |
+
class Monomer:
|
35 |
+
id: str
|
36 |
+
num: int
|
37 |
+
|
38 |
+
|
39 |
+
# Note - mmCIF format provides no guarantees on the type of author-assigned
|
40 |
+
# sequence numbers. They need not be integers.
|
41 |
+
@dataclasses.dataclass(frozen=True)
|
42 |
+
class AtomSite:
|
43 |
+
residue_name: str
|
44 |
+
author_chain_id: str
|
45 |
+
mmcif_chain_id: str
|
46 |
+
author_seq_num: str
|
47 |
+
mmcif_seq_num: int
|
48 |
+
insertion_code: str
|
49 |
+
hetatm_atom: str
|
50 |
+
model_num: int
|
51 |
+
|
52 |
+
|
53 |
+
# Used to map SEQRES index to a residue in the structure.
|
54 |
+
@dataclasses.dataclass(frozen=True)
|
55 |
+
class ResiduePosition:
|
56 |
+
chain_id: str
|
57 |
+
residue_number: int
|
58 |
+
insertion_code: str
|
59 |
+
|
60 |
+
|
61 |
+
@dataclasses.dataclass(frozen=True)
|
62 |
+
class ResidueAtPosition:
|
63 |
+
position: Optional[ResiduePosition]
|
64 |
+
name: str
|
65 |
+
is_missing: bool
|
66 |
+
hetflag: str
|
67 |
+
|
68 |
+
|
69 |
+
@dataclasses.dataclass(frozen=True)
|
70 |
+
class MmcifObject:
|
71 |
+
"""Representation of a parsed mmCIF file.
|
72 |
+
|
73 |
+
Contains:
|
74 |
+
file_id: A meaningful name, e.g. a pdb_id. Should be unique amongst all
|
75 |
+
files being processed.
|
76 |
+
header: Biopython header.
|
77 |
+
structure: Biopython structure.
|
78 |
+
chain_to_seqres: Dict mapping chain_id to 1 letter amino acid sequence. E.g.
|
79 |
+
{'A': 'ABCDEFG'}
|
80 |
+
seqres_to_structure: Dict; for each chain_id contains a mapping between
|
81 |
+
SEQRES index and a ResidueAtPosition. e.g. {'A': {0: ResidueAtPosition,
|
82 |
+
1: ResidueAtPosition,
|
83 |
+
...}}
|
84 |
+
raw_string: The raw string used to construct the MmcifObject.
|
85 |
+
"""
|
86 |
+
file_id: str
|
87 |
+
header: PdbHeader
|
88 |
+
structure: PdbStructure
|
89 |
+
chain_to_seqres: Mapping[ChainId, SeqRes]
|
90 |
+
seqres_to_structure: Mapping[ChainId, Mapping[int, ResidueAtPosition]]
|
91 |
+
raw_string: Any
|
92 |
+
|
93 |
+
|
94 |
+
@dataclasses.dataclass(frozen=True)
|
95 |
+
class ParsingResult:
|
96 |
+
"""Returned by the parse function.
|
97 |
+
|
98 |
+
Contains:
|
99 |
+
mmcif_object: A MmcifObject, may be None if no chain could be successfully
|
100 |
+
parsed.
|
101 |
+
errors: A dict mapping (file_id, chain_id) to any exception generated.
|
102 |
+
"""
|
103 |
+
mmcif_object: Optional[MmcifObject]
|
104 |
+
errors: Mapping[Tuple[str, str], Any]
|
105 |
+
|
106 |
+
|
107 |
+
class ParseError(Exception):
|
108 |
+
"""An error indicating that an mmCIF file could not be parsed."""
|
109 |
+
|
110 |
+
|
111 |
+
def mmcif_loop_to_list(prefix: str,
|
112 |
+
parsed_info: MmCIFDict) -> Sequence[Mapping[str, str]]:
|
113 |
+
"""Extracts loop associated with a prefix from mmCIF data as a list.
|
114 |
+
|
115 |
+
Reference for loop_ in mmCIF:
|
116 |
+
http://mmcif.wwpdb.org/docs/tutorials/mechanics/pdbx-mmcif-syntax.html
|
117 |
+
|
118 |
+
Args:
|
119 |
+
prefix: Prefix shared by each of the data items in the loop.
|
120 |
+
e.g. '_entity_poly_seq.', where the data items are _entity_poly_seq.num,
|
121 |
+
_entity_poly_seq.mon_id. Should include the trailing period.
|
122 |
+
parsed_info: A dict of parsed mmCIF data, e.g. _mmcif_dict from a Biopython
|
123 |
+
parser.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
Returns a list of dicts; each dict represents 1 entry from an mmCIF loop.
|
127 |
+
"""
|
128 |
+
cols = []
|
129 |
+
data = []
|
130 |
+
for key, value in parsed_info.items():
|
131 |
+
if key.startswith(prefix):
|
132 |
+
cols.append(key)
|
133 |
+
data.append(value)
|
134 |
+
|
135 |
+
assert all([len(xs) == len(data[0]) for xs in data]), (
|
136 |
+
'mmCIF error: Not all loops are the same length: %s' % cols)
|
137 |
+
|
138 |
+
return [dict(zip(cols, xs)) for xs in zip(*data)]
|
139 |
+
|
140 |
+
|
141 |
+
def mmcif_loop_to_dict(prefix: str,
|
142 |
+
index: str,
|
143 |
+
parsed_info: MmCIFDict,
|
144 |
+
) -> Mapping[str, Mapping[str, str]]:
|
145 |
+
"""Extracts loop associated with a prefix from mmCIF data as a dictionary.
|
146 |
+
|
147 |
+
Args:
|
148 |
+
prefix: Prefix shared by each of the data items in the loop.
|
149 |
+
e.g. '_entity_poly_seq.', where the data items are _entity_poly_seq.num,
|
150 |
+
_entity_poly_seq.mon_id. Should include the trailing period.
|
151 |
+
index: Which item of loop data should serve as the key.
|
152 |
+
parsed_info: A dict of parsed mmCIF data, e.g. _mmcif_dict from a Biopython
|
153 |
+
parser.
|
154 |
+
|
155 |
+
Returns:
|
156 |
+
Returns a dict of dicts; each dict represents 1 entry from an mmCIF loop,
|
157 |
+
indexed by the index column.
|
158 |
+
"""
|
159 |
+
entries = mmcif_loop_to_list(prefix, parsed_info)
|
160 |
+
return {entry[index]: entry for entry in entries}
|
161 |
+
|
162 |
+
|
163 |
+
def parse(*,
|
164 |
+
file_id: str,
|
165 |
+
mmcif_string: str,
|
166 |
+
catch_all_errors: bool = True) -> ParsingResult:
|
167 |
+
"""Entry point, parses an mmcif_string.
|
168 |
+
|
169 |
+
Args:
|
170 |
+
file_id: A string identifier for this file. Should be unique within the
|
171 |
+
collection of files being processed.
|
172 |
+
mmcif_string: Contents of an mmCIF file.
|
173 |
+
catch_all_errors: If True, all exceptions are caught and error messages are
|
174 |
+
returned as part of the ParsingResult. If False exceptions will be allowed
|
175 |
+
to propagate.
|
176 |
+
|
177 |
+
Returns:
|
178 |
+
A ParsingResult.
|
179 |
+
"""
|
180 |
+
errors = {}
|
181 |
+
try:
|
182 |
+
parser = PDB.MMCIFParser(QUIET=True)
|
183 |
+
handle = io.StringIO(mmcif_string)
|
184 |
+
full_structure = parser.get_structure('', handle)
|
185 |
+
first_model_structure = _get_first_model(full_structure)
|
186 |
+
# Extract the _mmcif_dict from the parser, which contains useful fields not
|
187 |
+
# reflected in the Biopython structure.
|
188 |
+
parsed_info = parser._mmcif_dict # pylint:disable=protected-access
|
189 |
+
|
190 |
+
# Ensure all values are lists, even if singletons.
|
191 |
+
for key, value in parsed_info.items():
|
192 |
+
if not isinstance(value, list):
|
193 |
+
parsed_info[key] = [value]
|
194 |
+
|
195 |
+
header = _get_header(parsed_info)
|
196 |
+
|
197 |
+
# Determine the protein chains, and their start numbers according to the
|
198 |
+
# internal mmCIF numbering scheme (likely but not guaranteed to be 1).
|
199 |
+
valid_chains = _get_protein_chains(parsed_info=parsed_info)
|
200 |
+
if not valid_chains:
|
201 |
+
return ParsingResult(
|
202 |
+
None, {(file_id, ''): 'No protein chains found in this file.'})
|
203 |
+
seq_start_num = {chain_id: min([monomer.num for monomer in seq])
|
204 |
+
for chain_id, seq in valid_chains.items()}
|
205 |
+
|
206 |
+
# Loop over the atoms for which we have coordinates. Populate two mappings:
|
207 |
+
# -mmcif_to_author_chain_id (maps internal mmCIF chain ids to chain ids used
|
208 |
+
# the authors / Biopython).
|
209 |
+
# -seq_to_structure_mappings (maps idx into sequence to ResidueAtPosition).
|
210 |
+
mmcif_to_author_chain_id = {}
|
211 |
+
seq_to_structure_mappings = {}
|
212 |
+
for atom in _get_atom_site_list(parsed_info):
|
213 |
+
if atom.model_num != '1':
|
214 |
+
# We only process the first model at the moment.
|
215 |
+
continue
|
216 |
+
|
217 |
+
mmcif_to_author_chain_id[atom.mmcif_chain_id] = atom.author_chain_id
|
218 |
+
|
219 |
+
if atom.mmcif_chain_id in valid_chains:
|
220 |
+
hetflag = ' '
|
221 |
+
if atom.hetatm_atom == 'HETATM':
|
222 |
+
# Water atoms are assigned a special hetflag of W in Biopython. We
|
223 |
+
# need to do the same, so that this hetflag can be used to fetch
|
224 |
+
# a residue from the Biopython structure by id.
|
225 |
+
if atom.residue_name in ('HOH', 'WAT'):
|
226 |
+
hetflag = 'W'
|
227 |
+
else:
|
228 |
+
hetflag = 'H_' + atom.residue_name
|
229 |
+
insertion_code = atom.insertion_code
|
230 |
+
if not _is_set(atom.insertion_code):
|
231 |
+
insertion_code = ' '
|
232 |
+
position = ResiduePosition(chain_id=atom.author_chain_id,
|
233 |
+
residue_number=int(atom.author_seq_num),
|
234 |
+
insertion_code=insertion_code)
|
235 |
+
seq_idx = int(atom.mmcif_seq_num) - seq_start_num[atom.mmcif_chain_id]
|
236 |
+
current = seq_to_structure_mappings.get(atom.author_chain_id, {})
|
237 |
+
current[seq_idx] = ResidueAtPosition(position=position,
|
238 |
+
name=atom.residue_name,
|
239 |
+
is_missing=False,
|
240 |
+
hetflag=hetflag)
|
241 |
+
seq_to_structure_mappings[atom.author_chain_id] = current
|
242 |
+
|
243 |
+
# Add missing residue information to seq_to_structure_mappings.
|
244 |
+
for chain_id, seq_info in valid_chains.items():
|
245 |
+
author_chain = mmcif_to_author_chain_id[chain_id]
|
246 |
+
current_mapping = seq_to_structure_mappings[author_chain]
|
247 |
+
for idx, monomer in enumerate(seq_info):
|
248 |
+
if idx not in current_mapping:
|
249 |
+
current_mapping[idx] = ResidueAtPosition(position=None,
|
250 |
+
name=monomer.id,
|
251 |
+
is_missing=True,
|
252 |
+
hetflag=' ')
|
253 |
+
|
254 |
+
author_chain_to_sequence = {}
|
255 |
+
for chain_id, seq_info in valid_chains.items():
|
256 |
+
author_chain = mmcif_to_author_chain_id[chain_id]
|
257 |
+
seq = []
|
258 |
+
for monomer in seq_info:
|
259 |
+
code = SCOPData.protein_letters_3to1.get(monomer.id, 'X')
|
260 |
+
seq.append(code if len(code) == 1 else 'X')
|
261 |
+
seq = ''.join(seq)
|
262 |
+
author_chain_to_sequence[author_chain] = seq
|
263 |
+
|
264 |
+
mmcif_object = MmcifObject(
|
265 |
+
file_id=file_id,
|
266 |
+
header=header,
|
267 |
+
structure=first_model_structure,
|
268 |
+
chain_to_seqres=author_chain_to_sequence,
|
269 |
+
seqres_to_structure=seq_to_structure_mappings,
|
270 |
+
raw_string=parsed_info)
|
271 |
+
|
272 |
+
return ParsingResult(mmcif_object=mmcif_object, errors=errors)
|
273 |
+
except Exception as e: # pylint:disable=broad-except
|
274 |
+
errors[(file_id, '')] = e
|
275 |
+
if not catch_all_errors:
|
276 |
+
raise
|
277 |
+
return ParsingResult(mmcif_object=None, errors=errors)
|
278 |
+
|
279 |
+
|
280 |
+
def _get_first_model(structure: PdbStructure) -> PdbStructure:
|
281 |
+
"""Returns the first model in a Biopython structure."""
|
282 |
+
return next(structure.get_models())
|
283 |
+
|
284 |
+
_MIN_LENGTH_OF_CHAIN_TO_BE_COUNTED_AS_PEPTIDE = 21
|
285 |
+
|
286 |
+
|
287 |
+
def get_release_date(parsed_info: MmCIFDict) -> str:
|
288 |
+
"""Returns the oldest revision date."""
|
289 |
+
revision_dates = parsed_info['_pdbx_audit_revision_history.revision_date']
|
290 |
+
return min(revision_dates)
|
291 |
+
|
292 |
+
|
293 |
+
def _get_header(parsed_info: MmCIFDict) -> PdbHeader:
|
294 |
+
"""Returns a basic header containing method, release date and resolution."""
|
295 |
+
header = {}
|
296 |
+
|
297 |
+
experiments = mmcif_loop_to_list('_exptl.', parsed_info)
|
298 |
+
header['structure_method'] = ','.join([
|
299 |
+
experiment['_exptl.method'].lower() for experiment in experiments])
|
300 |
+
|
301 |
+
# Note: The release_date here corresponds to the oldest revision. We prefer to
|
302 |
+
# use this for dataset filtering over the deposition_date.
|
303 |
+
if '_pdbx_audit_revision_history.revision_date' in parsed_info:
|
304 |
+
header['release_date'] = get_release_date(parsed_info)
|
305 |
+
else:
|
306 |
+
logging.warning('Could not determine release_date: %s',
|
307 |
+
parsed_info['_entry.id'])
|
308 |
+
|
309 |
+
header['resolution'] = 0.00
|
310 |
+
for res_key in ('_refine.ls_d_res_high', '_em_3d_reconstruction.resolution',
|
311 |
+
'_reflns.d_resolution_high'):
|
312 |
+
if res_key in parsed_info:
|
313 |
+
try:
|
314 |
+
raw_resolution = parsed_info[res_key][0]
|
315 |
+
header['resolution'] = float(raw_resolution)
|
316 |
+
except ValueError:
|
317 |
+
logging.warning('Invalid resolution format: %s', parsed_info[res_key])
|
318 |
+
|
319 |
+
return header
|
320 |
+
|
321 |
+
|
322 |
+
def _get_atom_site_list(parsed_info: MmCIFDict) -> Sequence[AtomSite]:
|
323 |
+
"""Returns list of atom sites; contains data not present in the structure."""
|
324 |
+
return [AtomSite(*site) for site in zip( # pylint:disable=g-complex-comprehension
|
325 |
+
parsed_info['_atom_site.label_comp_id'],
|
326 |
+
parsed_info['_atom_site.auth_asym_id'],
|
327 |
+
parsed_info['_atom_site.label_asym_id'],
|
328 |
+
parsed_info['_atom_site.auth_seq_id'],
|
329 |
+
parsed_info['_atom_site.label_seq_id'],
|
330 |
+
parsed_info['_atom_site.pdbx_PDB_ins_code'],
|
331 |
+
parsed_info['_atom_site.group_PDB'],
|
332 |
+
parsed_info['_atom_site.pdbx_PDB_model_num'],
|
333 |
+
)]
|
334 |
+
|
335 |
+
|
336 |
+
def _get_protein_chains(
|
337 |
+
*, parsed_info: Mapping[str, Any]) -> Mapping[ChainId, Sequence[Monomer]]:
|
338 |
+
"""Extracts polymer information for protein chains only.
|
339 |
+
|
340 |
+
Args:
|
341 |
+
parsed_info: _mmcif_dict produced by the Biopython parser.
|
342 |
+
|
343 |
+
Returns:
|
344 |
+
A dict mapping mmcif chain id to a list of Monomers.
|
345 |
+
"""
|
346 |
+
# Get polymer information for each entity in the structure.
|
347 |
+
entity_poly_seqs = mmcif_loop_to_list('_entity_poly_seq.', parsed_info)
|
348 |
+
|
349 |
+
polymers = collections.defaultdict(list)
|
350 |
+
for entity_poly_seq in entity_poly_seqs:
|
351 |
+
polymers[entity_poly_seq['_entity_poly_seq.entity_id']].append(
|
352 |
+
Monomer(id=entity_poly_seq['_entity_poly_seq.mon_id'],
|
353 |
+
num=int(entity_poly_seq['_entity_poly_seq.num'])))
|
354 |
+
|
355 |
+
# Get chemical compositions. Will allow us to identify which of these polymers
|
356 |
+
# are proteins.
|
357 |
+
chem_comps = mmcif_loop_to_dict('_chem_comp.', '_chem_comp.id', parsed_info)
|
358 |
+
|
359 |
+
# Get chains information for each entity. Necessary so that we can return a
|
360 |
+
# dict keyed on chain id rather than entity.
|
361 |
+
struct_asyms = mmcif_loop_to_list('_struct_asym.', parsed_info)
|
362 |
+
|
363 |
+
entity_to_mmcif_chains = collections.defaultdict(list)
|
364 |
+
for struct_asym in struct_asyms:
|
365 |
+
chain_id = struct_asym['_struct_asym.id']
|
366 |
+
entity_id = struct_asym['_struct_asym.entity_id']
|
367 |
+
entity_to_mmcif_chains[entity_id].append(chain_id)
|
368 |
+
|
369 |
+
# Identify and return the valid protein chains.
|
370 |
+
valid_chains = {}
|
371 |
+
for entity_id, seq_info in polymers.items():
|
372 |
+
chain_ids = entity_to_mmcif_chains[entity_id]
|
373 |
+
|
374 |
+
# Reject polymers without any peptide-like components, such as DNA/RNA.
|
375 |
+
if any(['peptide' in chem_comps[monomer.id]['_chem_comp.type']
|
376 |
+
for monomer in seq_info]):
|
377 |
+
for chain_id in chain_ids:
|
378 |
+
valid_chains[chain_id] = seq_info
|
379 |
+
return valid_chains
|
380 |
+
|
381 |
+
|
382 |
+
def _is_set(data: str) -> bool:
|
383 |
+
"""Returns False if data is a special mmCIF character indicating 'unset'."""
|
384 |
+
return data not in ('.', '?')
|
af_backprop/alphafold/data/parsers.py
ADDED
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Functions for parsing various file formats."""
|
16 |
+
import collections
|
17 |
+
import dataclasses
|
18 |
+
import re
|
19 |
+
import string
|
20 |
+
from typing import Dict, Iterable, List, Optional, Sequence, Tuple
|
21 |
+
|
22 |
+
DeletionMatrix = Sequence[Sequence[int]]
|
23 |
+
|
24 |
+
|
25 |
+
@dataclasses.dataclass(frozen=True)
|
26 |
+
class TemplateHit:
|
27 |
+
"""Class representing a template hit."""
|
28 |
+
index: int
|
29 |
+
name: str
|
30 |
+
aligned_cols: int
|
31 |
+
sum_probs: float
|
32 |
+
query: str
|
33 |
+
hit_sequence: str
|
34 |
+
indices_query: List[int]
|
35 |
+
indices_hit: List[int]
|
36 |
+
|
37 |
+
|
38 |
+
def parse_fasta(fasta_string: str) -> Tuple[Sequence[str], Sequence[str]]:
|
39 |
+
"""Parses FASTA string and returns list of strings with amino-acid sequences.
|
40 |
+
|
41 |
+
Arguments:
|
42 |
+
fasta_string: The string contents of a FASTA file.
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
A tuple of two lists:
|
46 |
+
* A list of sequences.
|
47 |
+
* A list of sequence descriptions taken from the comment lines. In the
|
48 |
+
same order as the sequences.
|
49 |
+
"""
|
50 |
+
sequences = []
|
51 |
+
descriptions = []
|
52 |
+
index = -1
|
53 |
+
for line in fasta_string.splitlines():
|
54 |
+
line = line.strip()
|
55 |
+
if line.startswith('>'):
|
56 |
+
index += 1
|
57 |
+
descriptions.append(line[1:]) # Remove the '>' at the beginning.
|
58 |
+
sequences.append('')
|
59 |
+
continue
|
60 |
+
elif not line:
|
61 |
+
continue # Skip blank lines.
|
62 |
+
sequences[index] += line
|
63 |
+
|
64 |
+
return sequences, descriptions
|
65 |
+
|
66 |
+
|
67 |
+
def parse_stockholm(
|
68 |
+
stockholm_string: str
|
69 |
+
) -> Tuple[Sequence[str], DeletionMatrix, Sequence[str]]:
|
70 |
+
"""Parses sequences and deletion matrix from stockholm format alignment.
|
71 |
+
|
72 |
+
Args:
|
73 |
+
stockholm_string: The string contents of a stockholm file. The first
|
74 |
+
sequence in the file should be the query sequence.
|
75 |
+
|
76 |
+
Returns:
|
77 |
+
A tuple of:
|
78 |
+
* A list of sequences that have been aligned to the query. These
|
79 |
+
might contain duplicates.
|
80 |
+
* The deletion matrix for the alignment as a list of lists. The element
|
81 |
+
at `deletion_matrix[i][j]` is the number of residues deleted from
|
82 |
+
the aligned sequence i at residue position j.
|
83 |
+
* The names of the targets matched, including the jackhmmer subsequence
|
84 |
+
suffix.
|
85 |
+
"""
|
86 |
+
name_to_sequence = collections.OrderedDict()
|
87 |
+
for line in stockholm_string.splitlines():
|
88 |
+
line = line.strip()
|
89 |
+
if not line or line.startswith(('#', '//')):
|
90 |
+
continue
|
91 |
+
name, sequence = line.split()
|
92 |
+
if name not in name_to_sequence:
|
93 |
+
name_to_sequence[name] = ''
|
94 |
+
name_to_sequence[name] += sequence
|
95 |
+
|
96 |
+
msa = []
|
97 |
+
deletion_matrix = []
|
98 |
+
|
99 |
+
query = ''
|
100 |
+
keep_columns = []
|
101 |
+
for seq_index, sequence in enumerate(name_to_sequence.values()):
|
102 |
+
if seq_index == 0:
|
103 |
+
# Gather the columns with gaps from the query
|
104 |
+
query = sequence
|
105 |
+
keep_columns = [i for i, res in enumerate(query) if res != '-']
|
106 |
+
|
107 |
+
# Remove the columns with gaps in the query from all sequences.
|
108 |
+
aligned_sequence = ''.join([sequence[c] for c in keep_columns])
|
109 |
+
|
110 |
+
msa.append(aligned_sequence)
|
111 |
+
|
112 |
+
# Count the number of deletions w.r.t. query.
|
113 |
+
deletion_vec = []
|
114 |
+
deletion_count = 0
|
115 |
+
for seq_res, query_res in zip(sequence, query):
|
116 |
+
if seq_res != '-' or query_res != '-':
|
117 |
+
if query_res == '-':
|
118 |
+
deletion_count += 1
|
119 |
+
else:
|
120 |
+
deletion_vec.append(deletion_count)
|
121 |
+
deletion_count = 0
|
122 |
+
deletion_matrix.append(deletion_vec)
|
123 |
+
|
124 |
+
return msa, deletion_matrix, list(name_to_sequence.keys())
|
125 |
+
|
126 |
+
|
127 |
+
def parse_a3m(a3m_string: str) -> Tuple[Sequence[str], DeletionMatrix]:
|
128 |
+
"""Parses sequences and deletion matrix from a3m format alignment.
|
129 |
+
|
130 |
+
Args:
|
131 |
+
a3m_string: The string contents of a a3m file. The first sequence in the
|
132 |
+
file should be the query sequence.
|
133 |
+
|
134 |
+
Returns:
|
135 |
+
A tuple of:
|
136 |
+
* A list of sequences that have been aligned to the query. These
|
137 |
+
might contain duplicates.
|
138 |
+
* The deletion matrix for the alignment as a list of lists. The element
|
139 |
+
at `deletion_matrix[i][j]` is the number of residues deleted from
|
140 |
+
the aligned sequence i at residue position j.
|
141 |
+
"""
|
142 |
+
sequences, _ = parse_fasta(a3m_string)
|
143 |
+
deletion_matrix = []
|
144 |
+
for msa_sequence in sequences:
|
145 |
+
deletion_vec = []
|
146 |
+
deletion_count = 0
|
147 |
+
for j in msa_sequence:
|
148 |
+
if j.islower():
|
149 |
+
deletion_count += 1
|
150 |
+
else:
|
151 |
+
deletion_vec.append(deletion_count)
|
152 |
+
deletion_count = 0
|
153 |
+
deletion_matrix.append(deletion_vec)
|
154 |
+
|
155 |
+
# Make the MSA matrix out of aligned (deletion-free) sequences.
|
156 |
+
deletion_table = str.maketrans('', '', string.ascii_lowercase)
|
157 |
+
aligned_sequences = [s.translate(deletion_table) for s in sequences]
|
158 |
+
return aligned_sequences, deletion_matrix
|
159 |
+
|
160 |
+
|
161 |
+
def _convert_sto_seq_to_a3m(
|
162 |
+
query_non_gaps: Sequence[bool], sto_seq: str) -> Iterable[str]:
|
163 |
+
for is_query_res_non_gap, sequence_res in zip(query_non_gaps, sto_seq):
|
164 |
+
if is_query_res_non_gap:
|
165 |
+
yield sequence_res
|
166 |
+
elif sequence_res != '-':
|
167 |
+
yield sequence_res.lower()
|
168 |
+
|
169 |
+
|
170 |
+
def convert_stockholm_to_a3m(stockholm_format: str,
|
171 |
+
max_sequences: Optional[int] = None) -> str:
|
172 |
+
"""Converts MSA in Stockholm format to the A3M format."""
|
173 |
+
descriptions = {}
|
174 |
+
sequences = {}
|
175 |
+
reached_max_sequences = False
|
176 |
+
|
177 |
+
for line in stockholm_format.splitlines():
|
178 |
+
reached_max_sequences = max_sequences and len(sequences) >= max_sequences
|
179 |
+
if line.strip() and not line.startswith(('#', '//')):
|
180 |
+
# Ignore blank lines, markup and end symbols - remainder are alignment
|
181 |
+
# sequence parts.
|
182 |
+
seqname, aligned_seq = line.split(maxsplit=1)
|
183 |
+
if seqname not in sequences:
|
184 |
+
if reached_max_sequences:
|
185 |
+
continue
|
186 |
+
sequences[seqname] = ''
|
187 |
+
sequences[seqname] += aligned_seq
|
188 |
+
|
189 |
+
for line in stockholm_format.splitlines():
|
190 |
+
if line[:4] == '#=GS':
|
191 |
+
# Description row - example format is:
|
192 |
+
# #=GS UniRef90_Q9H5Z4/4-78 DE [subseq from] cDNA: FLJ22755 ...
|
193 |
+
columns = line.split(maxsplit=3)
|
194 |
+
seqname, feature = columns[1:3]
|
195 |
+
value = columns[3] if len(columns) == 4 else ''
|
196 |
+
if feature != 'DE':
|
197 |
+
continue
|
198 |
+
if reached_max_sequences and seqname not in sequences:
|
199 |
+
continue
|
200 |
+
descriptions[seqname] = value
|
201 |
+
if len(descriptions) == len(sequences):
|
202 |
+
break
|
203 |
+
|
204 |
+
# Convert sto format to a3m line by line
|
205 |
+
a3m_sequences = {}
|
206 |
+
# query_sequence is assumed to be the first sequence
|
207 |
+
query_sequence = next(iter(sequences.values()))
|
208 |
+
query_non_gaps = [res != '-' for res in query_sequence]
|
209 |
+
for seqname, sto_sequence in sequences.items():
|
210 |
+
a3m_sequences[seqname] = ''.join(
|
211 |
+
_convert_sto_seq_to_a3m(query_non_gaps, sto_sequence))
|
212 |
+
|
213 |
+
fasta_chunks = (f">{k} {descriptions.get(k, '')}\n{a3m_sequences[k]}"
|
214 |
+
for k in a3m_sequences)
|
215 |
+
return '\n'.join(fasta_chunks) + '\n' # Include terminating newline.
|
216 |
+
|
217 |
+
|
218 |
+
def _get_hhr_line_regex_groups(
|
219 |
+
regex_pattern: str, line: str) -> Sequence[Optional[str]]:
|
220 |
+
match = re.match(regex_pattern, line)
|
221 |
+
if match is None:
|
222 |
+
raise RuntimeError(f'Could not parse query line {line}')
|
223 |
+
return match.groups()
|
224 |
+
|
225 |
+
|
226 |
+
def _update_hhr_residue_indices_list(
|
227 |
+
sequence: str, start_index: int, indices_list: List[int]):
|
228 |
+
"""Computes the relative indices for each residue with respect to the original sequence."""
|
229 |
+
counter = start_index
|
230 |
+
for symbol in sequence:
|
231 |
+
if symbol == '-':
|
232 |
+
indices_list.append(-1)
|
233 |
+
else:
|
234 |
+
indices_list.append(counter)
|
235 |
+
counter += 1
|
236 |
+
|
237 |
+
|
238 |
+
def _parse_hhr_hit(detailed_lines: Sequence[str]) -> TemplateHit:
|
239 |
+
"""Parses the detailed HMM HMM comparison section for a single Hit.
|
240 |
+
|
241 |
+
This works on .hhr files generated from both HHBlits and HHSearch.
|
242 |
+
|
243 |
+
Args:
|
244 |
+
detailed_lines: A list of lines from a single comparison section between 2
|
245 |
+
sequences (which each have their own HMM's)
|
246 |
+
|
247 |
+
Returns:
|
248 |
+
A dictionary with the information from that detailed comparison section
|
249 |
+
|
250 |
+
Raises:
|
251 |
+
RuntimeError: If a certain line cannot be processed
|
252 |
+
"""
|
253 |
+
# Parse first 2 lines.
|
254 |
+
number_of_hit = int(detailed_lines[0].split()[-1])
|
255 |
+
name_hit = detailed_lines[1][1:]
|
256 |
+
|
257 |
+
# Parse the summary line.
|
258 |
+
pattern = (
|
259 |
+
'Probab=(.*)[\t ]*E-value=(.*)[\t ]*Score=(.*)[\t ]*Aligned_cols=(.*)[\t'
|
260 |
+
' ]*Identities=(.*)%[\t ]*Similarity=(.*)[\t ]*Sum_probs=(.*)[\t '
|
261 |
+
']*Template_Neff=(.*)')
|
262 |
+
match = re.match(pattern, detailed_lines[2])
|
263 |
+
if match is None:
|
264 |
+
raise RuntimeError(
|
265 |
+
'Could not parse section: %s. Expected this: \n%s to contain summary.' %
|
266 |
+
(detailed_lines, detailed_lines[2]))
|
267 |
+
(prob_true, e_value, _, aligned_cols, _, _, sum_probs,
|
268 |
+
neff) = [float(x) for x in match.groups()]
|
269 |
+
|
270 |
+
# The next section reads the detailed comparisons. These are in a 'human
|
271 |
+
# readable' format which has a fixed length. The strategy employed is to
|
272 |
+
# assume that each block starts with the query sequence line, and to parse
|
273 |
+
# that with a regexp in order to deduce the fixed length used for that block.
|
274 |
+
query = ''
|
275 |
+
hit_sequence = ''
|
276 |
+
indices_query = []
|
277 |
+
indices_hit = []
|
278 |
+
length_block = None
|
279 |
+
|
280 |
+
for line in detailed_lines[3:]:
|
281 |
+
# Parse the query sequence line
|
282 |
+
if (line.startswith('Q ') and not line.startswith('Q ss_dssp') and
|
283 |
+
not line.startswith('Q ss_pred') and
|
284 |
+
not line.startswith('Q Consensus')):
|
285 |
+
# Thus the first 17 characters must be 'Q <query_name> ', and we can parse
|
286 |
+
# everything after that.
|
287 |
+
# start sequence end total_sequence_length
|
288 |
+
patt = r'[\t ]*([0-9]*) ([A-Z-]*)[\t ]*([0-9]*) \([0-9]*\)'
|
289 |
+
groups = _get_hhr_line_regex_groups(patt, line[17:])
|
290 |
+
|
291 |
+
# Get the length of the parsed block using the start and finish indices,
|
292 |
+
# and ensure it is the same as the actual block length.
|
293 |
+
start = int(groups[0]) - 1 # Make index zero based.
|
294 |
+
delta_query = groups[1]
|
295 |
+
end = int(groups[2])
|
296 |
+
num_insertions = len([x for x in delta_query if x == '-'])
|
297 |
+
length_block = end - start + num_insertions
|
298 |
+
assert length_block == len(delta_query)
|
299 |
+
|
300 |
+
# Update the query sequence and indices list.
|
301 |
+
query += delta_query
|
302 |
+
_update_hhr_residue_indices_list(delta_query, start, indices_query)
|
303 |
+
|
304 |
+
elif line.startswith('T '):
|
305 |
+
# Parse the hit sequence.
|
306 |
+
if (not line.startswith('T ss_dssp') and
|
307 |
+
not line.startswith('T ss_pred') and
|
308 |
+
not line.startswith('T Consensus')):
|
309 |
+
# Thus the first 17 characters must be 'T <hit_name> ', and we can
|
310 |
+
# parse everything after that.
|
311 |
+
# start sequence end total_sequence_length
|
312 |
+
patt = r'[\t ]*([0-9]*) ([A-Z-]*)[\t ]*[0-9]* \([0-9]*\)'
|
313 |
+
groups = _get_hhr_line_regex_groups(patt, line[17:])
|
314 |
+
start = int(groups[0]) - 1 # Make index zero based.
|
315 |
+
delta_hit_sequence = groups[1]
|
316 |
+
assert length_block == len(delta_hit_sequence)
|
317 |
+
|
318 |
+
# Update the hit sequence and indices list.
|
319 |
+
hit_sequence += delta_hit_sequence
|
320 |
+
_update_hhr_residue_indices_list(delta_hit_sequence, start, indices_hit)
|
321 |
+
|
322 |
+
return TemplateHit(
|
323 |
+
index=number_of_hit,
|
324 |
+
name=name_hit,
|
325 |
+
aligned_cols=int(aligned_cols),
|
326 |
+
sum_probs=sum_probs,
|
327 |
+
query=query,
|
328 |
+
hit_sequence=hit_sequence,
|
329 |
+
indices_query=indices_query,
|
330 |
+
indices_hit=indices_hit,
|
331 |
+
)
|
332 |
+
|
333 |
+
|
334 |
+
def parse_hhr(hhr_string: str) -> Sequence[TemplateHit]:
|
335 |
+
"""Parses the content of an entire HHR file."""
|
336 |
+
lines = hhr_string.splitlines()
|
337 |
+
|
338 |
+
# Each .hhr file starts with a results table, then has a sequence of hit
|
339 |
+
# "paragraphs", each paragraph starting with a line 'No <hit number>'. We
|
340 |
+
# iterate through each paragraph to parse each hit.
|
341 |
+
|
342 |
+
block_starts = [i for i, line in enumerate(lines) if line.startswith('No ')]
|
343 |
+
|
344 |
+
hits = []
|
345 |
+
if block_starts:
|
346 |
+
block_starts.append(len(lines)) # Add the end of the final block.
|
347 |
+
for i in range(len(block_starts) - 1):
|
348 |
+
hits.append(_parse_hhr_hit(lines[block_starts[i]:block_starts[i + 1]]))
|
349 |
+
return hits
|
350 |
+
|
351 |
+
|
352 |
+
def parse_e_values_from_tblout(tblout: str) -> Dict[str, float]:
|
353 |
+
"""Parse target to e-value mapping parsed from Jackhmmer tblout string."""
|
354 |
+
e_values = {'query': 0}
|
355 |
+
lines = [line for line in tblout.splitlines() if line[0] != '#']
|
356 |
+
# As per http://eddylab.org/software/hmmer/Userguide.pdf fields are
|
357 |
+
# space-delimited. Relevant fields are (1) target name: and
|
358 |
+
# (5) E-value (full sequence) (numbering from 1).
|
359 |
+
for line in lines:
|
360 |
+
fields = line.split()
|
361 |
+
e_value = fields[4]
|
362 |
+
target_name = fields[0]
|
363 |
+
e_values[target_name] = float(e_value)
|
364 |
+
return e_values
|
af_backprop/alphafold/data/pipeline.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Functions for building the input features for the AlphaFold model."""
|
16 |
+
|
17 |
+
import os
|
18 |
+
from typing import Mapping, Optional, Sequence
|
19 |
+
from absl import logging
|
20 |
+
from alphafold.common import residue_constants
|
21 |
+
from alphafold.data import parsers
|
22 |
+
from alphafold.data import templates
|
23 |
+
from alphafold.data.tools import hhblits
|
24 |
+
from alphafold.data.tools import hhsearch
|
25 |
+
from alphafold.data.tools import jackhmmer
|
26 |
+
import numpy as np
|
27 |
+
|
28 |
+
# Internal import (7716).
|
29 |
+
|
30 |
+
FeatureDict = Mapping[str, np.ndarray]
|
31 |
+
|
32 |
+
|
33 |
+
def make_sequence_features(
|
34 |
+
sequence: str, description: str, num_res: int) -> FeatureDict:
|
35 |
+
"""Constructs a feature dict of sequence features."""
|
36 |
+
features = {}
|
37 |
+
features['aatype'] = residue_constants.sequence_to_onehot(
|
38 |
+
sequence=sequence,
|
39 |
+
mapping=residue_constants.restype_order_with_x,
|
40 |
+
map_unknown_to_x=True)
|
41 |
+
features['between_segment_residues'] = np.zeros((num_res,), dtype=np.int32)
|
42 |
+
features['domain_name'] = np.array([description.encode('utf-8')],
|
43 |
+
dtype=np.object_)
|
44 |
+
features['residue_index'] = np.array(range(num_res), dtype=np.int32)
|
45 |
+
features['seq_length'] = np.array([num_res] * num_res, dtype=np.int32)
|
46 |
+
features['sequence'] = np.array([sequence.encode('utf-8')], dtype=np.object_)
|
47 |
+
return features
|
48 |
+
|
49 |
+
|
50 |
+
def make_msa_features(
|
51 |
+
msas: Sequence[Sequence[str]],
|
52 |
+
deletion_matrices: Sequence[parsers.DeletionMatrix]) -> FeatureDict:
|
53 |
+
"""Constructs a feature dict of MSA features."""
|
54 |
+
if not msas:
|
55 |
+
raise ValueError('At least one MSA must be provided.')
|
56 |
+
|
57 |
+
int_msa = []
|
58 |
+
deletion_matrix = []
|
59 |
+
seen_sequences = set()
|
60 |
+
for msa_index, msa in enumerate(msas):
|
61 |
+
if not msa:
|
62 |
+
raise ValueError(f'MSA {msa_index} must contain at least one sequence.')
|
63 |
+
for sequence_index, sequence in enumerate(msa):
|
64 |
+
if sequence in seen_sequences:
|
65 |
+
continue
|
66 |
+
seen_sequences.add(sequence)
|
67 |
+
int_msa.append(
|
68 |
+
[residue_constants.HHBLITS_AA_TO_ID[res] for res in sequence])
|
69 |
+
deletion_matrix.append(deletion_matrices[msa_index][sequence_index])
|
70 |
+
|
71 |
+
num_res = len(msas[0][0])
|
72 |
+
num_alignments = len(int_msa)
|
73 |
+
features = {}
|
74 |
+
features['deletion_matrix_int'] = np.array(deletion_matrix, dtype=np.int32)
|
75 |
+
features['msa'] = np.array(int_msa, dtype=np.int32)
|
76 |
+
features['num_alignments'] = np.array(
|
77 |
+
[num_alignments] * num_res, dtype=np.int32)
|
78 |
+
return features
|
79 |
+
|
80 |
+
|
81 |
+
class DataPipeline:
|
82 |
+
"""Runs the alignment tools and assembles the input features."""
|
83 |
+
|
84 |
+
def __init__(self,
|
85 |
+
jackhmmer_binary_path: str,
|
86 |
+
hhblits_binary_path: str,
|
87 |
+
hhsearch_binary_path: str,
|
88 |
+
uniref90_database_path: str,
|
89 |
+
mgnify_database_path: str,
|
90 |
+
bfd_database_path: Optional[str],
|
91 |
+
uniclust30_database_path: Optional[str],
|
92 |
+
small_bfd_database_path: Optional[str],
|
93 |
+
pdb70_database_path: str,
|
94 |
+
template_featurizer: templates.TemplateHitFeaturizer,
|
95 |
+
use_small_bfd: bool,
|
96 |
+
mgnify_max_hits: int = 501,
|
97 |
+
uniref_max_hits: int = 10000):
|
98 |
+
"""Constructs a feature dict for a given FASTA file."""
|
99 |
+
self._use_small_bfd = use_small_bfd
|
100 |
+
self.jackhmmer_uniref90_runner = jackhmmer.Jackhmmer(
|
101 |
+
binary_path=jackhmmer_binary_path,
|
102 |
+
database_path=uniref90_database_path)
|
103 |
+
if use_small_bfd:
|
104 |
+
self.jackhmmer_small_bfd_runner = jackhmmer.Jackhmmer(
|
105 |
+
binary_path=jackhmmer_binary_path,
|
106 |
+
database_path=small_bfd_database_path)
|
107 |
+
else:
|
108 |
+
self.hhblits_bfd_uniclust_runner = hhblits.HHBlits(
|
109 |
+
binary_path=hhblits_binary_path,
|
110 |
+
databases=[bfd_database_path, uniclust30_database_path])
|
111 |
+
self.jackhmmer_mgnify_runner = jackhmmer.Jackhmmer(
|
112 |
+
binary_path=jackhmmer_binary_path,
|
113 |
+
database_path=mgnify_database_path)
|
114 |
+
self.hhsearch_pdb70_runner = hhsearch.HHSearch(
|
115 |
+
binary_path=hhsearch_binary_path,
|
116 |
+
databases=[pdb70_database_path])
|
117 |
+
self.template_featurizer = template_featurizer
|
118 |
+
self.mgnify_max_hits = mgnify_max_hits
|
119 |
+
self.uniref_max_hits = uniref_max_hits
|
120 |
+
|
121 |
+
def process(self, input_fasta_path: str, msa_output_dir: str) -> FeatureDict:
|
122 |
+
"""Runs alignment tools on the input sequence and creates features."""
|
123 |
+
with open(input_fasta_path) as f:
|
124 |
+
input_fasta_str = f.read()
|
125 |
+
input_seqs, input_descs = parsers.parse_fasta(input_fasta_str)
|
126 |
+
if len(input_seqs) != 1:
|
127 |
+
raise ValueError(
|
128 |
+
f'More than one input sequence found in {input_fasta_path}.')
|
129 |
+
input_sequence = input_seqs[0]
|
130 |
+
input_description = input_descs[0]
|
131 |
+
num_res = len(input_sequence)
|
132 |
+
|
133 |
+
jackhmmer_uniref90_result = self.jackhmmer_uniref90_runner.query(
|
134 |
+
input_fasta_path)[0]
|
135 |
+
jackhmmer_mgnify_result = self.jackhmmer_mgnify_runner.query(
|
136 |
+
input_fasta_path)[0]
|
137 |
+
|
138 |
+
uniref90_msa_as_a3m = parsers.convert_stockholm_to_a3m(
|
139 |
+
jackhmmer_uniref90_result['sto'], max_sequences=self.uniref_max_hits)
|
140 |
+
hhsearch_result = self.hhsearch_pdb70_runner.query(uniref90_msa_as_a3m)
|
141 |
+
|
142 |
+
uniref90_out_path = os.path.join(msa_output_dir, 'uniref90_hits.sto')
|
143 |
+
with open(uniref90_out_path, 'w') as f:
|
144 |
+
f.write(jackhmmer_uniref90_result['sto'])
|
145 |
+
|
146 |
+
mgnify_out_path = os.path.join(msa_output_dir, 'mgnify_hits.sto')
|
147 |
+
with open(mgnify_out_path, 'w') as f:
|
148 |
+
f.write(jackhmmer_mgnify_result['sto'])
|
149 |
+
|
150 |
+
pdb70_out_path = os.path.join(msa_output_dir, 'pdb70_hits.hhr')
|
151 |
+
with open(pdb70_out_path, 'w') as f:
|
152 |
+
f.write(hhsearch_result)
|
153 |
+
|
154 |
+
uniref90_msa, uniref90_deletion_matrix, _ = parsers.parse_stockholm(
|
155 |
+
jackhmmer_uniref90_result['sto'])
|
156 |
+
mgnify_msa, mgnify_deletion_matrix, _ = parsers.parse_stockholm(
|
157 |
+
jackhmmer_mgnify_result['sto'])
|
158 |
+
hhsearch_hits = parsers.parse_hhr(hhsearch_result)
|
159 |
+
mgnify_msa = mgnify_msa[:self.mgnify_max_hits]
|
160 |
+
mgnify_deletion_matrix = mgnify_deletion_matrix[:self.mgnify_max_hits]
|
161 |
+
|
162 |
+
if self._use_small_bfd:
|
163 |
+
jackhmmer_small_bfd_result = self.jackhmmer_small_bfd_runner.query(
|
164 |
+
input_fasta_path)[0]
|
165 |
+
|
166 |
+
bfd_out_path = os.path.join(msa_output_dir, 'small_bfd_hits.a3m')
|
167 |
+
with open(bfd_out_path, 'w') as f:
|
168 |
+
f.write(jackhmmer_small_bfd_result['sto'])
|
169 |
+
|
170 |
+
bfd_msa, bfd_deletion_matrix, _ = parsers.parse_stockholm(
|
171 |
+
jackhmmer_small_bfd_result['sto'])
|
172 |
+
else:
|
173 |
+
hhblits_bfd_uniclust_result = self.hhblits_bfd_uniclust_runner.query(
|
174 |
+
input_fasta_path)
|
175 |
+
|
176 |
+
bfd_out_path = os.path.join(msa_output_dir, 'bfd_uniclust_hits.a3m')
|
177 |
+
with open(bfd_out_path, 'w') as f:
|
178 |
+
f.write(hhblits_bfd_uniclust_result['a3m'])
|
179 |
+
|
180 |
+
bfd_msa, bfd_deletion_matrix = parsers.parse_a3m(
|
181 |
+
hhblits_bfd_uniclust_result['a3m'])
|
182 |
+
|
183 |
+
templates_result = self.template_featurizer.get_templates(
|
184 |
+
query_sequence=input_sequence,
|
185 |
+
query_pdb_code=None,
|
186 |
+
query_release_date=None,
|
187 |
+
hits=hhsearch_hits)
|
188 |
+
|
189 |
+
sequence_features = make_sequence_features(
|
190 |
+
sequence=input_sequence,
|
191 |
+
description=input_description,
|
192 |
+
num_res=num_res)
|
193 |
+
|
194 |
+
msa_features = make_msa_features(
|
195 |
+
msas=(uniref90_msa, bfd_msa, mgnify_msa),
|
196 |
+
deletion_matrices=(uniref90_deletion_matrix,
|
197 |
+
bfd_deletion_matrix,
|
198 |
+
mgnify_deletion_matrix))
|
199 |
+
|
200 |
+
logging.info('Uniref90 MSA size: %d sequences.', len(uniref90_msa))
|
201 |
+
logging.info('BFD MSA size: %d sequences.', len(bfd_msa))
|
202 |
+
logging.info('MGnify MSA size: %d sequences.', len(mgnify_msa))
|
203 |
+
logging.info('Final (deduplicated) MSA size: %d sequences.',
|
204 |
+
msa_features['num_alignments'][0])
|
205 |
+
logging.info('Total number of templates (NB: this can include bad '
|
206 |
+
'templates and is later filtered to top 4): %d.',
|
207 |
+
templates_result.features['template_domain_names'].shape[0])
|
208 |
+
|
209 |
+
return {**sequence_features, **msa_features, **templates_result.features}
|
af_backprop/alphafold/data/prep_inputs.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from alphafold.common import residue_constants
|
3 |
+
|
4 |
+
def make_atom14_positions(prot):
|
5 |
+
"""Constructs denser atom positions (14 dimensions instead of 37)."""
|
6 |
+
restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37
|
7 |
+
restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14
|
8 |
+
restype_atom14_mask = []
|
9 |
+
|
10 |
+
for rt in residue_constants.restypes:
|
11 |
+
atom_names = residue_constants.restype_name_to_atom14_names[
|
12 |
+
residue_constants.restype_1to3[rt]]
|
13 |
+
|
14 |
+
restype_atom14_to_atom37.append([
|
15 |
+
(residue_constants.atom_order[name] if name else 0)
|
16 |
+
for name in atom_names
|
17 |
+
])
|
18 |
+
|
19 |
+
atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
|
20 |
+
restype_atom37_to_atom14.append([
|
21 |
+
(atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0)
|
22 |
+
for name in residue_constants.atom_types
|
23 |
+
])
|
24 |
+
|
25 |
+
restype_atom14_mask.append([(1. if name else 0.) for name in atom_names])
|
26 |
+
|
27 |
+
# Add dummy mapping for restype 'UNK'.
|
28 |
+
restype_atom14_to_atom37.append([0] * 14)
|
29 |
+
restype_atom37_to_atom14.append([0] * 37)
|
30 |
+
restype_atom14_mask.append([0.] * 14)
|
31 |
+
|
32 |
+
restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)
|
33 |
+
restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)
|
34 |
+
restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32)
|
35 |
+
|
36 |
+
# Create the mapping for (residx, atom14) --> atom37, i.e. an array
|
37 |
+
# with shape (num_res, 14) containing the atom37 indices for this protein.
|
38 |
+
residx_atom14_to_atom37 = restype_atom14_to_atom37[prot["aatype"]]
|
39 |
+
residx_atom14_mask = restype_atom14_mask[prot["aatype"]]
|
40 |
+
|
41 |
+
# Create a mask for known ground truth positions.
|
42 |
+
residx_atom14_gt_mask = residx_atom14_mask * np.take_along_axis(
|
43 |
+
prot["all_atom_mask"], residx_atom14_to_atom37, axis=1).astype(np.float32)
|
44 |
+
|
45 |
+
# Gather the ground truth positions.
|
46 |
+
residx_atom14_gt_positions = residx_atom14_gt_mask[:, :, None] * (
|
47 |
+
np.take_along_axis(prot["all_atom_positions"],
|
48 |
+
residx_atom14_to_atom37[..., None],
|
49 |
+
axis=1))
|
50 |
+
|
51 |
+
prot["atom14_atom_exists"] = residx_atom14_mask
|
52 |
+
prot["atom14_gt_exists"] = residx_atom14_gt_mask
|
53 |
+
prot["atom14_gt_positions"] = residx_atom14_gt_positions
|
54 |
+
|
55 |
+
prot["residx_atom14_to_atom37"] = residx_atom14_to_atom37
|
56 |
+
|
57 |
+
# Create the gather indices for mapping back.
|
58 |
+
residx_atom37_to_atom14 = restype_atom37_to_atom14[prot["aatype"]]
|
59 |
+
prot["residx_atom37_to_atom14"] = residx_atom37_to_atom14
|
60 |
+
|
61 |
+
# Create the corresponding mask.
|
62 |
+
restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
|
63 |
+
for restype, restype_letter in enumerate(residue_constants.restypes):
|
64 |
+
restype_name = residue_constants.restype_1to3[restype_letter]
|
65 |
+
atom_names = residue_constants.residue_atoms[restype_name]
|
66 |
+
for atom_name in atom_names:
|
67 |
+
atom_type = residue_constants.atom_order[atom_name]
|
68 |
+
restype_atom37_mask[restype, atom_type] = 1
|
69 |
+
|
70 |
+
residx_atom37_mask = restype_atom37_mask[prot["aatype"]]
|
71 |
+
prot["atom37_atom_exists"] = residx_atom37_mask
|
72 |
+
|
73 |
+
# As the atom naming is ambiguous for 7 of the 20 amino acids, provide
|
74 |
+
# alternative ground truth coordinates where the naming is swapped
|
75 |
+
restype_3 = [
|
76 |
+
residue_constants.restype_1to3[res] for res in residue_constants.restypes
|
77 |
+
]
|
78 |
+
restype_3 += ["UNK"]
|
79 |
+
|
80 |
+
# Matrices for renaming ambiguous atoms.
|
81 |
+
all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3}
|
82 |
+
for resname, swap in residue_constants.residue_atom_renaming_swaps.items():
|
83 |
+
correspondences = np.arange(14)
|
84 |
+
for source_atom_swap, target_atom_swap in swap.items():
|
85 |
+
source_index = residue_constants.restype_name_to_atom14_names[
|
86 |
+
resname].index(source_atom_swap)
|
87 |
+
target_index = residue_constants.restype_name_to_atom14_names[
|
88 |
+
resname].index(target_atom_swap)
|
89 |
+
correspondences[source_index] = target_index
|
90 |
+
correspondences[target_index] = source_index
|
91 |
+
renaming_matrix = np.zeros((14, 14), dtype=np.float32)
|
92 |
+
for index, correspondence in enumerate(correspondences):
|
93 |
+
renaming_matrix[index, correspondence] = 1.
|
94 |
+
all_matrices[resname] = renaming_matrix.astype(np.float32)
|
95 |
+
renaming_matrices = np.stack([all_matrices[restype] for restype in restype_3])
|
96 |
+
|
97 |
+
# Pick the transformation matrices for the given residue sequence
|
98 |
+
# shape (num_res, 14, 14).
|
99 |
+
renaming_transform = renaming_matrices[prot["aatype"]]
|
100 |
+
|
101 |
+
# Apply it to the ground truth positions. shape (num_res, 14, 3).
|
102 |
+
alternative_gt_positions = np.einsum("rac,rab->rbc",
|
103 |
+
residx_atom14_gt_positions,
|
104 |
+
renaming_transform)
|
105 |
+
prot["atom14_alt_gt_positions"] = alternative_gt_positions
|
106 |
+
|
107 |
+
# Create the mask for the alternative ground truth (differs from the
|
108 |
+
# ground truth mask, if only one of the atoms in an ambiguous pair has a
|
109 |
+
# ground truth position).
|
110 |
+
alternative_gt_mask = np.einsum("ra,rab->rb",
|
111 |
+
residx_atom14_gt_mask,
|
112 |
+
renaming_transform)
|
113 |
+
|
114 |
+
prot["atom14_alt_gt_exists"] = alternative_gt_mask
|
115 |
+
|
116 |
+
# Create an ambiguous atoms mask. shape: (21, 14).
|
117 |
+
restype_atom14_is_ambiguous = np.zeros((21, 14), dtype=np.float32)
|
118 |
+
for resname, swap in residue_constants.residue_atom_renaming_swaps.items():
|
119 |
+
for atom_name1, atom_name2 in swap.items():
|
120 |
+
restype = residue_constants.restype_order[
|
121 |
+
residue_constants.restype_3to1[resname]]
|
122 |
+
atom_idx1 = residue_constants.restype_name_to_atom14_names[resname].index(
|
123 |
+
atom_name1)
|
124 |
+
atom_idx2 = residue_constants.restype_name_to_atom14_names[resname].index(
|
125 |
+
atom_name2)
|
126 |
+
restype_atom14_is_ambiguous[restype, atom_idx1] = 1
|
127 |
+
restype_atom14_is_ambiguous[restype, atom_idx2] = 1
|
128 |
+
|
129 |
+
# From this create an ambiguous_mask for the given sequence.
|
130 |
+
prot["atom14_atom_is_ambiguous"] = (
|
131 |
+
restype_atom14_is_ambiguous[prot["aatype"]])
|
132 |
+
|
133 |
+
return prot
|
af_backprop/alphafold/data/templates.py
ADDED
@@ -0,0 +1,910 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Functions for getting templates and calculating template features."""
|
16 |
+
import dataclasses
|
17 |
+
import datetime
|
18 |
+
import glob
|
19 |
+
import os
|
20 |
+
import re
|
21 |
+
from typing import Any, Dict, Mapping, Optional, Sequence, Tuple
|
22 |
+
|
23 |
+
from absl import logging
|
24 |
+
from alphafold.common import residue_constants
|
25 |
+
from alphafold.data import mmcif_parsing
|
26 |
+
from alphafold.data import parsers
|
27 |
+
from alphafold.data.tools import kalign
|
28 |
+
import numpy as np
|
29 |
+
|
30 |
+
# Internal import (7716).
|
31 |
+
|
32 |
+
|
33 |
+
class Error(Exception):
|
34 |
+
"""Base class for exceptions."""
|
35 |
+
|
36 |
+
|
37 |
+
class NoChainsError(Error):
|
38 |
+
"""An error indicating that template mmCIF didn't have any chains."""
|
39 |
+
|
40 |
+
|
41 |
+
class SequenceNotInTemplateError(Error):
|
42 |
+
"""An error indicating that template mmCIF didn't contain the sequence."""
|
43 |
+
|
44 |
+
|
45 |
+
class NoAtomDataInTemplateError(Error):
|
46 |
+
"""An error indicating that template mmCIF didn't contain atom positions."""
|
47 |
+
|
48 |
+
|
49 |
+
class TemplateAtomMaskAllZerosError(Error):
|
50 |
+
"""An error indicating that template mmCIF had all atom positions masked."""
|
51 |
+
|
52 |
+
|
53 |
+
class QueryToTemplateAlignError(Error):
|
54 |
+
"""An error indicating that the query can't be aligned to the template."""
|
55 |
+
|
56 |
+
|
57 |
+
class CaDistanceError(Error):
|
58 |
+
"""An error indicating that a CA atom distance exceeds a threshold."""
|
59 |
+
|
60 |
+
|
61 |
+
class MultipleChainsError(Error):
|
62 |
+
"""An error indicating that multiple chains were found for a given ID."""
|
63 |
+
|
64 |
+
|
65 |
+
# Prefilter exceptions.
|
66 |
+
class PrefilterError(Exception):
|
67 |
+
"""A base class for template prefilter exceptions."""
|
68 |
+
|
69 |
+
|
70 |
+
class DateError(PrefilterError):
|
71 |
+
"""An error indicating that the hit date was after the max allowed date."""
|
72 |
+
|
73 |
+
|
74 |
+
class PdbIdError(PrefilterError):
|
75 |
+
"""An error indicating that the hit PDB ID was identical to the query."""
|
76 |
+
|
77 |
+
|
78 |
+
class AlignRatioError(PrefilterError):
|
79 |
+
"""An error indicating that the hit align ratio to the query was too small."""
|
80 |
+
|
81 |
+
|
82 |
+
class DuplicateError(PrefilterError):
|
83 |
+
"""An error indicating that the hit was an exact subsequence of the query."""
|
84 |
+
|
85 |
+
|
86 |
+
class LengthError(PrefilterError):
|
87 |
+
"""An error indicating that the hit was too short."""
|
88 |
+
|
89 |
+
|
90 |
+
TEMPLATE_FEATURES = {
|
91 |
+
'template_aatype': np.float32,
|
92 |
+
'template_all_atom_masks': np.float32,
|
93 |
+
'template_all_atom_positions': np.float32,
|
94 |
+
'template_domain_names': np.object,
|
95 |
+
'template_sequence': np.object,
|
96 |
+
'template_sum_probs': np.float32,
|
97 |
+
}
|
98 |
+
|
99 |
+
|
100 |
+
def _get_pdb_id_and_chain(hit: parsers.TemplateHit) -> Tuple[str, str]:
|
101 |
+
"""Returns PDB id and chain id for an HHSearch Hit."""
|
102 |
+
# PDB ID: 4 letters. Chain ID: 1+ alphanumeric letters or "." if unknown.
|
103 |
+
id_match = re.match(r'[a-zA-Z\d]{4}_[a-zA-Z0-9.]+', hit.name)
|
104 |
+
if not id_match:
|
105 |
+
raise ValueError(f'hit.name did not start with PDBID_chain: {hit.name}')
|
106 |
+
pdb_id, chain_id = id_match.group(0).split('_')
|
107 |
+
return pdb_id.lower(), chain_id
|
108 |
+
|
109 |
+
|
110 |
+
def _is_after_cutoff(
|
111 |
+
pdb_id: str,
|
112 |
+
release_dates: Mapping[str, datetime.datetime],
|
113 |
+
release_date_cutoff: Optional[datetime.datetime]) -> bool:
|
114 |
+
"""Checks if the template date is after the release date cutoff.
|
115 |
+
|
116 |
+
Args:
|
117 |
+
pdb_id: 4 letter pdb code.
|
118 |
+
release_dates: Dictionary mapping PDB ids to their structure release dates.
|
119 |
+
release_date_cutoff: Max release date that is valid for this query.
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
True if the template release date is after the cutoff, False otherwise.
|
123 |
+
"""
|
124 |
+
if release_date_cutoff is None:
|
125 |
+
raise ValueError('The release_date_cutoff must not be None.')
|
126 |
+
if pdb_id in release_dates:
|
127 |
+
return release_dates[pdb_id] > release_date_cutoff
|
128 |
+
else:
|
129 |
+
# Since this is just a quick prefilter to reduce the number of mmCIF files
|
130 |
+
# we need to parse, we don't have to worry about returning True here.
|
131 |
+
logging.warning('Template structure not in release dates dict: %s', pdb_id)
|
132 |
+
return False
|
133 |
+
|
134 |
+
|
135 |
+
def _parse_obsolete(obsolete_file_path: str) -> Mapping[str, str]:
|
136 |
+
"""Parses the data file from PDB that lists which PDB ids are obsolete."""
|
137 |
+
with open(obsolete_file_path) as f:
|
138 |
+
result = {}
|
139 |
+
for line in f:
|
140 |
+
line = line.strip()
|
141 |
+
# We skip obsolete entries that don't contain a mapping to a new entry.
|
142 |
+
if line.startswith('OBSLTE') and len(line) > 30:
|
143 |
+
# Format: Date From To
|
144 |
+
# 'OBSLTE 31-JUL-94 116L 216L'
|
145 |
+
from_id = line[20:24].lower()
|
146 |
+
to_id = line[29:33].lower()
|
147 |
+
result[from_id] = to_id
|
148 |
+
return result
|
149 |
+
|
150 |
+
|
151 |
+
def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]:
|
152 |
+
"""Parses release dates file, returns a mapping from PDBs to release dates."""
|
153 |
+
if path.endswith('txt'):
|
154 |
+
release_dates = {}
|
155 |
+
with open(path, 'r') as f:
|
156 |
+
for line in f:
|
157 |
+
pdb_id, date = line.split(':')
|
158 |
+
date = date.strip()
|
159 |
+
# Python 3.6 doesn't have datetime.date.fromisoformat() which is about
|
160 |
+
# 90x faster than strptime. However, splitting the string manually is
|
161 |
+
# about 10x faster than strptime.
|
162 |
+
release_dates[pdb_id.strip()] = datetime.datetime(
|
163 |
+
year=int(date[:4]), month=int(date[5:7]), day=int(date[8:10]))
|
164 |
+
return release_dates
|
165 |
+
else:
|
166 |
+
raise ValueError('Invalid format of the release date file %s.' % path)
|
167 |
+
|
168 |
+
|
169 |
+
def _assess_hhsearch_hit(
|
170 |
+
hit: parsers.TemplateHit,
|
171 |
+
hit_pdb_code: str,
|
172 |
+
query_sequence: str,
|
173 |
+
query_pdb_code: Optional[str],
|
174 |
+
release_dates: Mapping[str, datetime.datetime],
|
175 |
+
release_date_cutoff: datetime.datetime,
|
176 |
+
max_subsequence_ratio: float = 0.95,
|
177 |
+
min_align_ratio: float = 0.1) -> bool:
|
178 |
+
"""Determines if template is valid (without parsing the template mmcif file).
|
179 |
+
|
180 |
+
Args:
|
181 |
+
hit: HhrHit for the template.
|
182 |
+
hit_pdb_code: The 4 letter pdb code of the template hit. This might be
|
183 |
+
different from the value in the actual hit since the original pdb might
|
184 |
+
have become obsolete.
|
185 |
+
query_sequence: Amino acid sequence of the query.
|
186 |
+
query_pdb_code: 4 letter pdb code of the query.
|
187 |
+
release_dates: Dictionary mapping pdb codes to their structure release
|
188 |
+
dates.
|
189 |
+
release_date_cutoff: Max release date that is valid for this query.
|
190 |
+
max_subsequence_ratio: Exclude any exact matches with this much overlap.
|
191 |
+
min_align_ratio: Minimum overlap between the template and query.
|
192 |
+
|
193 |
+
Returns:
|
194 |
+
True if the hit passed the prefilter. Raises an exception otherwise.
|
195 |
+
|
196 |
+
Raises:
|
197 |
+
DateError: If the hit date was after the max allowed date.
|
198 |
+
PdbIdError: If the hit PDB ID was identical to the query.
|
199 |
+
AlignRatioError: If the hit align ratio to the query was too small.
|
200 |
+
DuplicateError: If the hit was an exact subsequence of the query.
|
201 |
+
LengthError: If the hit was too short.
|
202 |
+
"""
|
203 |
+
aligned_cols = hit.aligned_cols
|
204 |
+
align_ratio = aligned_cols / len(query_sequence)
|
205 |
+
|
206 |
+
template_sequence = hit.hit_sequence.replace('-', '')
|
207 |
+
length_ratio = float(len(template_sequence)) / len(query_sequence)
|
208 |
+
|
209 |
+
# Check whether the template is a large subsequence or duplicate of original
|
210 |
+
# query. This can happen due to duplicate entries in the PDB database.
|
211 |
+
duplicate = (template_sequence in query_sequence and
|
212 |
+
length_ratio > max_subsequence_ratio)
|
213 |
+
|
214 |
+
if _is_after_cutoff(hit_pdb_code, release_dates, release_date_cutoff):
|
215 |
+
raise DateError(f'Date ({release_dates[hit_pdb_code]}) > max template date '
|
216 |
+
f'({release_date_cutoff}).')
|
217 |
+
|
218 |
+
if query_pdb_code is not None:
|
219 |
+
if query_pdb_code.lower() == hit_pdb_code.lower():
|
220 |
+
raise PdbIdError('PDB code identical to Query PDB code.')
|
221 |
+
|
222 |
+
if align_ratio <= min_align_ratio:
|
223 |
+
raise AlignRatioError('Proportion of residues aligned to query too small. '
|
224 |
+
f'Align ratio: {align_ratio}.')
|
225 |
+
|
226 |
+
if duplicate:
|
227 |
+
raise DuplicateError('Template is an exact subsequence of query with large '
|
228 |
+
f'coverage. Length ratio: {length_ratio}.')
|
229 |
+
|
230 |
+
if len(template_sequence) < 10:
|
231 |
+
raise LengthError(f'Template too short. Length: {len(template_sequence)}.')
|
232 |
+
|
233 |
+
return True
|
234 |
+
|
235 |
+
|
236 |
+
def _find_template_in_pdb(
|
237 |
+
template_chain_id: str,
|
238 |
+
template_sequence: str,
|
239 |
+
mmcif_object: mmcif_parsing.MmcifObject) -> Tuple[str, str, int]:
|
240 |
+
"""Tries to find the template chain in the given pdb file.
|
241 |
+
|
242 |
+
This method tries the three following things in order:
|
243 |
+
1. Tries if there is an exact match in both the chain ID and the sequence.
|
244 |
+
If yes, the chain sequence is returned. Otherwise:
|
245 |
+
2. Tries if there is an exact match only in the sequence.
|
246 |
+
If yes, the chain sequence is returned. Otherwise:
|
247 |
+
3. Tries if there is a fuzzy match (X = wildcard) in the sequence.
|
248 |
+
If yes, the chain sequence is returned.
|
249 |
+
If none of these succeed, a SequenceNotInTemplateError is thrown.
|
250 |
+
|
251 |
+
Args:
|
252 |
+
template_chain_id: The template chain ID.
|
253 |
+
template_sequence: The template chain sequence.
|
254 |
+
mmcif_object: The PDB object to search for the template in.
|
255 |
+
|
256 |
+
Returns:
|
257 |
+
A tuple with:
|
258 |
+
* The chain sequence that was found to match the template in the PDB object.
|
259 |
+
* The ID of the chain that is being returned.
|
260 |
+
* The offset where the template sequence starts in the chain sequence.
|
261 |
+
|
262 |
+
Raises:
|
263 |
+
SequenceNotInTemplateError: If no match is found after the steps described
|
264 |
+
above.
|
265 |
+
"""
|
266 |
+
# Try if there is an exact match in both the chain ID and the (sub)sequence.
|
267 |
+
pdb_id = mmcif_object.file_id
|
268 |
+
chain_sequence = mmcif_object.chain_to_seqres.get(template_chain_id)
|
269 |
+
if chain_sequence and (template_sequence in chain_sequence):
|
270 |
+
logging.info(
|
271 |
+
'Found an exact template match %s_%s.', pdb_id, template_chain_id)
|
272 |
+
mapping_offset = chain_sequence.find(template_sequence)
|
273 |
+
return chain_sequence, template_chain_id, mapping_offset
|
274 |
+
|
275 |
+
# Try if there is an exact match in the (sub)sequence only.
|
276 |
+
for chain_id, chain_sequence in mmcif_object.chain_to_seqres.items():
|
277 |
+
if chain_sequence and (template_sequence in chain_sequence):
|
278 |
+
logging.info('Found a sequence-only match %s_%s.', pdb_id, chain_id)
|
279 |
+
mapping_offset = chain_sequence.find(template_sequence)
|
280 |
+
return chain_sequence, chain_id, mapping_offset
|
281 |
+
|
282 |
+
# Return a chain sequence that fuzzy matches (X = wildcard) the template.
|
283 |
+
# Make parentheses unnamed groups (?:_) to avoid the 100 named groups limit.
|
284 |
+
regex = ['.' if aa == 'X' else '(?:%s|X)' % aa for aa in template_sequence]
|
285 |
+
regex = re.compile(''.join(regex))
|
286 |
+
for chain_id, chain_sequence in mmcif_object.chain_to_seqres.items():
|
287 |
+
match = re.search(regex, chain_sequence)
|
288 |
+
if match:
|
289 |
+
logging.info('Found a fuzzy sequence-only match %s_%s.', pdb_id, chain_id)
|
290 |
+
mapping_offset = match.start()
|
291 |
+
return chain_sequence, chain_id, mapping_offset
|
292 |
+
|
293 |
+
# No hits, raise an error.
|
294 |
+
raise SequenceNotInTemplateError(
|
295 |
+
'Could not find the template sequence in %s_%s. Template sequence: %s, '
|
296 |
+
'chain_to_seqres: %s' % (pdb_id, template_chain_id, template_sequence,
|
297 |
+
mmcif_object.chain_to_seqres))
|
298 |
+
|
299 |
+
|
300 |
+
def _realign_pdb_template_to_query(
|
301 |
+
old_template_sequence: str,
|
302 |
+
template_chain_id: str,
|
303 |
+
mmcif_object: mmcif_parsing.MmcifObject,
|
304 |
+
old_mapping: Mapping[int, int],
|
305 |
+
kalign_binary_path: str) -> Tuple[str, Mapping[int, int]]:
|
306 |
+
"""Aligns template from the mmcif_object to the query.
|
307 |
+
|
308 |
+
In case PDB70 contains a different version of the template sequence, we need
|
309 |
+
to perform a realignment to the actual sequence that is in the mmCIF file.
|
310 |
+
This method performs such realignment, but returns the new sequence and
|
311 |
+
mapping only if the sequence in the mmCIF file is 90% identical to the old
|
312 |
+
sequence.
|
313 |
+
|
314 |
+
Note that the old_template_sequence comes from the hit, and contains only that
|
315 |
+
part of the chain that matches with the query while the new_template_sequence
|
316 |
+
is the full chain.
|
317 |
+
|
318 |
+
Args:
|
319 |
+
old_template_sequence: The template sequence that was returned by the PDB
|
320 |
+
template search (typically done using HHSearch).
|
321 |
+
template_chain_id: The template chain id was returned by the PDB template
|
322 |
+
search (typically done using HHSearch). This is used to find the right
|
323 |
+
chain in the mmcif_object chain_to_seqres mapping.
|
324 |
+
mmcif_object: A mmcif_object which holds the actual template data.
|
325 |
+
old_mapping: A mapping from the query sequence to the template sequence.
|
326 |
+
This mapping will be used to compute the new mapping from the query
|
327 |
+
sequence to the actual mmcif_object template sequence by aligning the
|
328 |
+
old_template_sequence and the actual template sequence.
|
329 |
+
kalign_binary_path: The path to a kalign executable.
|
330 |
+
|
331 |
+
Returns:
|
332 |
+
A tuple (new_template_sequence, new_query_to_template_mapping) where:
|
333 |
+
* new_template_sequence is the actual template sequence that was found in
|
334 |
+
the mmcif_object.
|
335 |
+
* new_query_to_template_mapping is the new mapping from the query to the
|
336 |
+
actual template found in the mmcif_object.
|
337 |
+
|
338 |
+
Raises:
|
339 |
+
QueryToTemplateAlignError:
|
340 |
+
* If there was an error thrown by the alignment tool.
|
341 |
+
* Or if the actual template sequence differs by more than 10% from the
|
342 |
+
old_template_sequence.
|
343 |
+
"""
|
344 |
+
aligner = kalign.Kalign(binary_path=kalign_binary_path)
|
345 |
+
new_template_sequence = mmcif_object.chain_to_seqres.get(
|
346 |
+
template_chain_id, '')
|
347 |
+
|
348 |
+
# Sometimes the template chain id is unknown. But if there is only a single
|
349 |
+
# sequence within the mmcif_object, it is safe to assume it is that one.
|
350 |
+
if not new_template_sequence:
|
351 |
+
if len(mmcif_object.chain_to_seqres) == 1:
|
352 |
+
logging.info('Could not find %s in %s, but there is only 1 sequence, so '
|
353 |
+
'using that one.',
|
354 |
+
template_chain_id,
|
355 |
+
mmcif_object.file_id)
|
356 |
+
new_template_sequence = list(mmcif_object.chain_to_seqres.values())[0]
|
357 |
+
else:
|
358 |
+
raise QueryToTemplateAlignError(
|
359 |
+
f'Could not find chain {template_chain_id} in {mmcif_object.file_id}. '
|
360 |
+
'If there are no mmCIF parsing errors, it is possible it was not a '
|
361 |
+
'protein chain.')
|
362 |
+
|
363 |
+
try:
|
364 |
+
(old_aligned_template, new_aligned_template), _ = parsers.parse_a3m(
|
365 |
+
aligner.align([old_template_sequence, new_template_sequence]))
|
366 |
+
except Exception as e:
|
367 |
+
raise QueryToTemplateAlignError(
|
368 |
+
'Could not align old template %s to template %s (%s_%s). Error: %s' %
|
369 |
+
(old_template_sequence, new_template_sequence, mmcif_object.file_id,
|
370 |
+
template_chain_id, str(e)))
|
371 |
+
|
372 |
+
logging.info('Old aligned template: %s\nNew aligned template: %s',
|
373 |
+
old_aligned_template, new_aligned_template)
|
374 |
+
|
375 |
+
old_to_new_template_mapping = {}
|
376 |
+
old_template_index = -1
|
377 |
+
new_template_index = -1
|
378 |
+
num_same = 0
|
379 |
+
for old_template_aa, new_template_aa in zip(
|
380 |
+
old_aligned_template, new_aligned_template):
|
381 |
+
if old_template_aa != '-':
|
382 |
+
old_template_index += 1
|
383 |
+
if new_template_aa != '-':
|
384 |
+
new_template_index += 1
|
385 |
+
if old_template_aa != '-' and new_template_aa != '-':
|
386 |
+
old_to_new_template_mapping[old_template_index] = new_template_index
|
387 |
+
if old_template_aa == new_template_aa:
|
388 |
+
num_same += 1
|
389 |
+
|
390 |
+
# Require at least 90 % sequence identity wrt to the shorter of the sequences.
|
391 |
+
if float(num_same) / min(
|
392 |
+
len(old_template_sequence), len(new_template_sequence)) < 0.9:
|
393 |
+
raise QueryToTemplateAlignError(
|
394 |
+
'Insufficient similarity of the sequence in the database: %s to the '
|
395 |
+
'actual sequence in the mmCIF file %s_%s: %s. We require at least '
|
396 |
+
'90 %% similarity wrt to the shorter of the sequences. This is not a '
|
397 |
+
'problem unless you think this is a template that should be included.' %
|
398 |
+
(old_template_sequence, mmcif_object.file_id, template_chain_id,
|
399 |
+
new_template_sequence))
|
400 |
+
|
401 |
+
new_query_to_template_mapping = {}
|
402 |
+
for query_index, old_template_index in old_mapping.items():
|
403 |
+
new_query_to_template_mapping[query_index] = (
|
404 |
+
old_to_new_template_mapping.get(old_template_index, -1))
|
405 |
+
|
406 |
+
new_template_sequence = new_template_sequence.replace('-', '')
|
407 |
+
|
408 |
+
return new_template_sequence, new_query_to_template_mapping
|
409 |
+
|
410 |
+
|
411 |
+
def _check_residue_distances(all_positions: np.ndarray,
|
412 |
+
all_positions_mask: np.ndarray,
|
413 |
+
max_ca_ca_distance: float):
|
414 |
+
"""Checks if the distance between unmasked neighbor residues is ok."""
|
415 |
+
ca_position = residue_constants.atom_order['CA']
|
416 |
+
prev_is_unmasked = False
|
417 |
+
prev_calpha = None
|
418 |
+
for i, (coords, mask) in enumerate(zip(all_positions, all_positions_mask)):
|
419 |
+
this_is_unmasked = bool(mask[ca_position])
|
420 |
+
if this_is_unmasked:
|
421 |
+
this_calpha = coords[ca_position]
|
422 |
+
if prev_is_unmasked:
|
423 |
+
distance = np.linalg.norm(this_calpha - prev_calpha)
|
424 |
+
if distance > max_ca_ca_distance:
|
425 |
+
raise CaDistanceError(
|
426 |
+
'The distance between residues %d and %d is %f > limit %f.' % (
|
427 |
+
i, i + 1, distance, max_ca_ca_distance))
|
428 |
+
prev_calpha = this_calpha
|
429 |
+
prev_is_unmasked = this_is_unmasked
|
430 |
+
|
431 |
+
|
432 |
+
def _get_atom_positions(
|
433 |
+
mmcif_object: mmcif_parsing.MmcifObject,
|
434 |
+
auth_chain_id: str,
|
435 |
+
max_ca_ca_distance: float) -> Tuple[np.ndarray, np.ndarray]:
|
436 |
+
"""Gets atom positions and mask from a list of Biopython Residues."""
|
437 |
+
num_res = len(mmcif_object.chain_to_seqres[auth_chain_id])
|
438 |
+
|
439 |
+
relevant_chains = [c for c in mmcif_object.structure.get_chains()
|
440 |
+
if c.id == auth_chain_id]
|
441 |
+
if len(relevant_chains) != 1:
|
442 |
+
raise MultipleChainsError(
|
443 |
+
f'Expected exactly one chain in structure with id {auth_chain_id}.')
|
444 |
+
chain = relevant_chains[0]
|
445 |
+
|
446 |
+
all_positions = np.zeros([num_res, residue_constants.atom_type_num, 3])
|
447 |
+
all_positions_mask = np.zeros([num_res, residue_constants.atom_type_num],
|
448 |
+
dtype=np.int64)
|
449 |
+
for res_index in range(num_res):
|
450 |
+
pos = np.zeros([residue_constants.atom_type_num, 3], dtype=np.float32)
|
451 |
+
mask = np.zeros([residue_constants.atom_type_num], dtype=np.float32)
|
452 |
+
res_at_position = mmcif_object.seqres_to_structure[auth_chain_id][res_index]
|
453 |
+
if not res_at_position.is_missing:
|
454 |
+
res = chain[(res_at_position.hetflag,
|
455 |
+
res_at_position.position.residue_number,
|
456 |
+
res_at_position.position.insertion_code)]
|
457 |
+
for atom in res.get_atoms():
|
458 |
+
atom_name = atom.get_name()
|
459 |
+
x, y, z = atom.get_coord()
|
460 |
+
if atom_name in residue_constants.atom_order.keys():
|
461 |
+
pos[residue_constants.atom_order[atom_name]] = [x, y, z]
|
462 |
+
mask[residue_constants.atom_order[atom_name]] = 1.0
|
463 |
+
elif atom_name.upper() == 'SE' and res.get_resname() == 'MSE':
|
464 |
+
# Put the coordinates of the selenium atom in the sulphur column.
|
465 |
+
pos[residue_constants.atom_order['SD']] = [x, y, z]
|
466 |
+
mask[residue_constants.atom_order['SD']] = 1.0
|
467 |
+
|
468 |
+
all_positions[res_index] = pos
|
469 |
+
all_positions_mask[res_index] = mask
|
470 |
+
_check_residue_distances(
|
471 |
+
all_positions, all_positions_mask, max_ca_ca_distance)
|
472 |
+
return all_positions, all_positions_mask
|
473 |
+
|
474 |
+
|
475 |
+
def _extract_template_features(
|
476 |
+
mmcif_object: mmcif_parsing.MmcifObject,
|
477 |
+
pdb_id: str,
|
478 |
+
mapping: Mapping[int, int],
|
479 |
+
template_sequence: str,
|
480 |
+
query_sequence: str,
|
481 |
+
template_chain_id: str,
|
482 |
+
kalign_binary_path: str) -> Tuple[Dict[str, Any], Optional[str]]:
|
483 |
+
"""Parses atom positions in the target structure and aligns with the query.
|
484 |
+
|
485 |
+
Atoms for each residue in the template structure are indexed to coincide
|
486 |
+
with their corresponding residue in the query sequence, according to the
|
487 |
+
alignment mapping provided.
|
488 |
+
|
489 |
+
Args:
|
490 |
+
mmcif_object: mmcif_parsing.MmcifObject representing the template.
|
491 |
+
pdb_id: PDB code for the template.
|
492 |
+
mapping: Dictionary mapping indices in the query sequence to indices in
|
493 |
+
the template sequence.
|
494 |
+
template_sequence: String describing the amino acid sequence for the
|
495 |
+
template protein.
|
496 |
+
query_sequence: String describing the amino acid sequence for the query
|
497 |
+
protein.
|
498 |
+
template_chain_id: String ID describing which chain in the structure proto
|
499 |
+
should be used.
|
500 |
+
kalign_binary_path: The path to a kalign executable used for template
|
501 |
+
realignment.
|
502 |
+
|
503 |
+
Returns:
|
504 |
+
A tuple with:
|
505 |
+
* A dictionary containing the extra features derived from the template
|
506 |
+
protein structure.
|
507 |
+
* A warning message if the hit was realigned to the actual mmCIF sequence.
|
508 |
+
Otherwise None.
|
509 |
+
|
510 |
+
Raises:
|
511 |
+
NoChainsError: If the mmcif object doesn't contain any chains.
|
512 |
+
SequenceNotInTemplateError: If the given chain id / sequence can't
|
513 |
+
be found in the mmcif object.
|
514 |
+
QueryToTemplateAlignError: If the actual template in the mmCIF file
|
515 |
+
can't be aligned to the query.
|
516 |
+
NoAtomDataInTemplateError: If the mmcif object doesn't contain
|
517 |
+
atom positions.
|
518 |
+
TemplateAtomMaskAllZerosError: If the mmcif object doesn't have any
|
519 |
+
unmasked residues.
|
520 |
+
"""
|
521 |
+
if mmcif_object is None or not mmcif_object.chain_to_seqres:
|
522 |
+
raise NoChainsError('No chains in PDB: %s_%s' % (pdb_id, template_chain_id))
|
523 |
+
|
524 |
+
warning = None
|
525 |
+
try:
|
526 |
+
seqres, chain_id, mapping_offset = _find_template_in_pdb(
|
527 |
+
template_chain_id=template_chain_id,
|
528 |
+
template_sequence=template_sequence,
|
529 |
+
mmcif_object=mmcif_object)
|
530 |
+
except SequenceNotInTemplateError:
|
531 |
+
# If PDB70 contains a different version of the template, we use the sequence
|
532 |
+
# from the mmcif_object.
|
533 |
+
chain_id = template_chain_id
|
534 |
+
warning = (
|
535 |
+
f'The exact sequence {template_sequence} was not found in '
|
536 |
+
f'{pdb_id}_{chain_id}. Realigning the template to the actual sequence.')
|
537 |
+
logging.warning(warning)
|
538 |
+
# This throws an exception if it fails to realign the hit.
|
539 |
+
seqres, mapping = _realign_pdb_template_to_query(
|
540 |
+
old_template_sequence=template_sequence,
|
541 |
+
template_chain_id=template_chain_id,
|
542 |
+
mmcif_object=mmcif_object,
|
543 |
+
old_mapping=mapping,
|
544 |
+
kalign_binary_path=kalign_binary_path)
|
545 |
+
logging.info('Sequence in %s_%s: %s successfully realigned to %s',
|
546 |
+
pdb_id, chain_id, template_sequence, seqres)
|
547 |
+
# The template sequence changed.
|
548 |
+
template_sequence = seqres
|
549 |
+
# No mapping offset, the query is aligned to the actual sequence.
|
550 |
+
mapping_offset = 0
|
551 |
+
|
552 |
+
try:
|
553 |
+
# Essentially set to infinity - we don't want to reject templates unless
|
554 |
+
# they're really really bad.
|
555 |
+
all_atom_positions, all_atom_mask = _get_atom_positions(
|
556 |
+
mmcif_object, chain_id, max_ca_ca_distance=150.0)
|
557 |
+
except (CaDistanceError, KeyError) as ex:
|
558 |
+
raise NoAtomDataInTemplateError(
|
559 |
+
'Could not get atom data (%s_%s): %s' % (pdb_id, chain_id, str(ex))
|
560 |
+
) from ex
|
561 |
+
|
562 |
+
all_atom_positions = np.split(all_atom_positions, all_atom_positions.shape[0])
|
563 |
+
all_atom_masks = np.split(all_atom_mask, all_atom_mask.shape[0])
|
564 |
+
|
565 |
+
output_templates_sequence = []
|
566 |
+
templates_all_atom_positions = []
|
567 |
+
templates_all_atom_masks = []
|
568 |
+
|
569 |
+
for _ in query_sequence:
|
570 |
+
# Residues in the query_sequence that are not in the template_sequence:
|
571 |
+
templates_all_atom_positions.append(
|
572 |
+
np.zeros((residue_constants.atom_type_num, 3)))
|
573 |
+
templates_all_atom_masks.append(np.zeros(residue_constants.atom_type_num))
|
574 |
+
output_templates_sequence.append('-')
|
575 |
+
|
576 |
+
for k, v in mapping.items():
|
577 |
+
template_index = v + mapping_offset
|
578 |
+
templates_all_atom_positions[k] = all_atom_positions[template_index][0]
|
579 |
+
templates_all_atom_masks[k] = all_atom_masks[template_index][0]
|
580 |
+
output_templates_sequence[k] = template_sequence[v]
|
581 |
+
|
582 |
+
# Alanine (AA with the lowest number of atoms) has 5 atoms (C, CA, CB, N, O).
|
583 |
+
if np.sum(templates_all_atom_masks) < 5:
|
584 |
+
raise TemplateAtomMaskAllZerosError(
|
585 |
+
'Template all atom mask was all zeros: %s_%s. Residue range: %d-%d' %
|
586 |
+
(pdb_id, chain_id, min(mapping.values()) + mapping_offset,
|
587 |
+
max(mapping.values()) + mapping_offset))
|
588 |
+
|
589 |
+
output_templates_sequence = ''.join(output_templates_sequence)
|
590 |
+
|
591 |
+
templates_aatype = residue_constants.sequence_to_onehot(
|
592 |
+
output_templates_sequence, residue_constants.HHBLITS_AA_TO_ID)
|
593 |
+
|
594 |
+
return (
|
595 |
+
{
|
596 |
+
'template_all_atom_positions': np.array(templates_all_atom_positions),
|
597 |
+
'template_all_atom_masks': np.array(templates_all_atom_masks),
|
598 |
+
'template_sequence': output_templates_sequence.encode(),
|
599 |
+
'template_aatype': np.array(templates_aatype),
|
600 |
+
'template_domain_names': f'{pdb_id.lower()}_{chain_id}'.encode(),
|
601 |
+
},
|
602 |
+
warning)
|
603 |
+
|
604 |
+
|
605 |
+
def _build_query_to_hit_index_mapping(
|
606 |
+
hit_query_sequence: str,
|
607 |
+
hit_sequence: str,
|
608 |
+
indices_hit: Sequence[int],
|
609 |
+
indices_query: Sequence[int],
|
610 |
+
original_query_sequence: str) -> Mapping[int, int]:
|
611 |
+
"""Gets mapping from indices in original query sequence to indices in the hit.
|
612 |
+
|
613 |
+
hit_query_sequence and hit_sequence are two aligned sequences containing gap
|
614 |
+
characters. hit_query_sequence contains only the part of the original query
|
615 |
+
sequence that matched the hit. When interpreting the indices from the .hhr, we
|
616 |
+
need to correct for this to recover a mapping from original query sequence to
|
617 |
+
the hit sequence.
|
618 |
+
|
619 |
+
Args:
|
620 |
+
hit_query_sequence: The portion of the query sequence that is in the .hhr
|
621 |
+
hit
|
622 |
+
hit_sequence: The portion of the hit sequence that is in the .hhr
|
623 |
+
indices_hit: The indices for each aminoacid relative to the hit sequence
|
624 |
+
indices_query: The indices for each aminoacid relative to the original query
|
625 |
+
sequence
|
626 |
+
original_query_sequence: String describing the original query sequence.
|
627 |
+
|
628 |
+
Returns:
|
629 |
+
Dictionary with indices in the original query sequence as keys and indices
|
630 |
+
in the hit sequence as values.
|
631 |
+
"""
|
632 |
+
# If the hit is empty (no aligned residues), return empty mapping
|
633 |
+
if not hit_query_sequence:
|
634 |
+
return {}
|
635 |
+
|
636 |
+
# Remove gaps and find the offset of hit.query relative to original query.
|
637 |
+
hhsearch_query_sequence = hit_query_sequence.replace('-', '')
|
638 |
+
hit_sequence = hit_sequence.replace('-', '')
|
639 |
+
hhsearch_query_offset = original_query_sequence.find(hhsearch_query_sequence)
|
640 |
+
|
641 |
+
# Index of -1 used for gap characters. Subtract the min index ignoring gaps.
|
642 |
+
min_idx = min(x for x in indices_hit if x > -1)
|
643 |
+
fixed_indices_hit = [
|
644 |
+
x - min_idx if x > -1 else -1 for x in indices_hit
|
645 |
+
]
|
646 |
+
|
647 |
+
min_idx = min(x for x in indices_query if x > -1)
|
648 |
+
fixed_indices_query = [x - min_idx if x > -1 else -1 for x in indices_query]
|
649 |
+
|
650 |
+
# Zip the corrected indices, ignore case where both seqs have gap characters.
|
651 |
+
mapping = {}
|
652 |
+
for q_i, q_t in zip(fixed_indices_query, fixed_indices_hit):
|
653 |
+
if q_t != -1 and q_i != -1:
|
654 |
+
if (q_t >= len(hit_sequence) or
|
655 |
+
q_i + hhsearch_query_offset >= len(original_query_sequence)):
|
656 |
+
continue
|
657 |
+
mapping[q_i + hhsearch_query_offset] = q_t
|
658 |
+
|
659 |
+
return mapping
|
660 |
+
|
661 |
+
|
662 |
+
@dataclasses.dataclass(frozen=True)
|
663 |
+
class SingleHitResult:
|
664 |
+
features: Optional[Mapping[str, Any]]
|
665 |
+
error: Optional[str]
|
666 |
+
warning: Optional[str]
|
667 |
+
|
668 |
+
|
669 |
+
def _process_single_hit(
|
670 |
+
query_sequence: str,
|
671 |
+
query_pdb_code: Optional[str],
|
672 |
+
hit: parsers.TemplateHit,
|
673 |
+
mmcif_dir: str,
|
674 |
+
max_template_date: datetime.datetime,
|
675 |
+
release_dates: Mapping[str, datetime.datetime],
|
676 |
+
obsolete_pdbs: Mapping[str, str],
|
677 |
+
kalign_binary_path: str,
|
678 |
+
strict_error_check: bool = False) -> SingleHitResult:
|
679 |
+
"""Tries to extract template features from a single HHSearch hit."""
|
680 |
+
# Fail hard if we can't get the PDB ID and chain name from the hit.
|
681 |
+
hit_pdb_code, hit_chain_id = _get_pdb_id_and_chain(hit)
|
682 |
+
|
683 |
+
if hit_pdb_code not in release_dates:
|
684 |
+
if hit_pdb_code in obsolete_pdbs:
|
685 |
+
hit_pdb_code = obsolete_pdbs[hit_pdb_code]
|
686 |
+
|
687 |
+
# Pass hit_pdb_code since it might have changed due to the pdb being obsolete.
|
688 |
+
try:
|
689 |
+
_assess_hhsearch_hit(
|
690 |
+
hit=hit,
|
691 |
+
hit_pdb_code=hit_pdb_code,
|
692 |
+
query_sequence=query_sequence,
|
693 |
+
query_pdb_code=query_pdb_code,
|
694 |
+
release_dates=release_dates,
|
695 |
+
release_date_cutoff=max_template_date)
|
696 |
+
except PrefilterError as e:
|
697 |
+
msg = f'hit {hit_pdb_code}_{hit_chain_id} did not pass prefilter: {str(e)}'
|
698 |
+
logging.info('%s: %s', query_pdb_code, msg)
|
699 |
+
if strict_error_check and isinstance(
|
700 |
+
e, (DateError, PdbIdError, DuplicateError)):
|
701 |
+
# In strict mode we treat some prefilter cases as errors.
|
702 |
+
return SingleHitResult(features=None, error=msg, warning=None)
|
703 |
+
|
704 |
+
return SingleHitResult(features=None, error=None, warning=None)
|
705 |
+
|
706 |
+
mapping = _build_query_to_hit_index_mapping(
|
707 |
+
hit.query, hit.hit_sequence, hit.indices_hit, hit.indices_query,
|
708 |
+
query_sequence)
|
709 |
+
|
710 |
+
# The mapping is from the query to the actual hit sequence, so we need to
|
711 |
+
# remove gaps (which regardless have a missing confidence score).
|
712 |
+
template_sequence = hit.hit_sequence.replace('-', '')
|
713 |
+
|
714 |
+
cif_path = os.path.join(mmcif_dir, hit_pdb_code + '.cif')
|
715 |
+
logging.info('Reading PDB entry from %s. Query: %s, template: %s',
|
716 |
+
cif_path, query_sequence, template_sequence)
|
717 |
+
# Fail if we can't find the mmCIF file.
|
718 |
+
with open(cif_path, 'r') as cif_file:
|
719 |
+
cif_string = cif_file.read()
|
720 |
+
|
721 |
+
parsing_result = mmcif_parsing.parse(
|
722 |
+
file_id=hit_pdb_code, mmcif_string=cif_string)
|
723 |
+
|
724 |
+
if parsing_result.mmcif_object is not None:
|
725 |
+
hit_release_date = datetime.datetime.strptime(
|
726 |
+
parsing_result.mmcif_object.header['release_date'], '%Y-%m-%d')
|
727 |
+
if hit_release_date > max_template_date:
|
728 |
+
error = ('Template %s date (%s) > max template date (%s).' %
|
729 |
+
(hit_pdb_code, hit_release_date, max_template_date))
|
730 |
+
if strict_error_check:
|
731 |
+
return SingleHitResult(features=None, error=error, warning=None)
|
732 |
+
else:
|
733 |
+
logging.warning(error)
|
734 |
+
return SingleHitResult(features=None, error=None, warning=None)
|
735 |
+
|
736 |
+
try:
|
737 |
+
features, realign_warning = _extract_template_features(
|
738 |
+
mmcif_object=parsing_result.mmcif_object,
|
739 |
+
pdb_id=hit_pdb_code,
|
740 |
+
mapping=mapping,
|
741 |
+
template_sequence=template_sequence,
|
742 |
+
query_sequence=query_sequence,
|
743 |
+
template_chain_id=hit_chain_id,
|
744 |
+
kalign_binary_path=kalign_binary_path)
|
745 |
+
features['template_sum_probs'] = [hit.sum_probs]
|
746 |
+
|
747 |
+
# It is possible there were some errors when parsing the other chains in the
|
748 |
+
# mmCIF file, but the template features for the chain we want were still
|
749 |
+
# computed. In such case the mmCIF parsing errors are not relevant.
|
750 |
+
return SingleHitResult(
|
751 |
+
features=features, error=None, warning=realign_warning)
|
752 |
+
except (NoChainsError, NoAtomDataInTemplateError,
|
753 |
+
TemplateAtomMaskAllZerosError) as e:
|
754 |
+
# These 3 errors indicate missing mmCIF experimental data rather than a
|
755 |
+
# problem with the template search, so turn them into warnings.
|
756 |
+
warning = ('%s_%s (sum_probs: %.2f, rank: %d): feature extracting errors: '
|
757 |
+
'%s, mmCIF parsing errors: %s'
|
758 |
+
% (hit_pdb_code, hit_chain_id, hit.sum_probs, hit.index,
|
759 |
+
str(e), parsing_result.errors))
|
760 |
+
if strict_error_check:
|
761 |
+
return SingleHitResult(features=None, error=warning, warning=None)
|
762 |
+
else:
|
763 |
+
return SingleHitResult(features=None, error=None, warning=warning)
|
764 |
+
except Error as e:
|
765 |
+
error = ('%s_%s (sum_probs: %.2f, rank: %d): feature extracting errors: '
|
766 |
+
'%s, mmCIF parsing errors: %s'
|
767 |
+
% (hit_pdb_code, hit_chain_id, hit.sum_probs, hit.index,
|
768 |
+
str(e), parsing_result.errors))
|
769 |
+
return SingleHitResult(features=None, error=error, warning=None)
|
770 |
+
|
771 |
+
|
772 |
+
@dataclasses.dataclass(frozen=True)
|
773 |
+
class TemplateSearchResult:
|
774 |
+
features: Mapping[str, Any]
|
775 |
+
errors: Sequence[str]
|
776 |
+
warnings: Sequence[str]
|
777 |
+
|
778 |
+
|
779 |
+
class TemplateHitFeaturizer:
|
780 |
+
"""A class for turning hhr hits to template features."""
|
781 |
+
|
782 |
+
def __init__(
|
783 |
+
self,
|
784 |
+
mmcif_dir: str,
|
785 |
+
max_template_date: str,
|
786 |
+
max_hits: int,
|
787 |
+
kalign_binary_path: str,
|
788 |
+
release_dates_path: Optional[str],
|
789 |
+
obsolete_pdbs_path: Optional[str],
|
790 |
+
strict_error_check: bool = False):
|
791 |
+
"""Initializes the Template Search.
|
792 |
+
|
793 |
+
Args:
|
794 |
+
mmcif_dir: Path to a directory with mmCIF structures. Once a template ID
|
795 |
+
is found by HHSearch, this directory is used to retrieve the template
|
796 |
+
data.
|
797 |
+
max_template_date: The maximum date permitted for template structures. No
|
798 |
+
template with date higher than this date will be returned. In ISO8601
|
799 |
+
date format, YYYY-MM-DD.
|
800 |
+
max_hits: The maximum number of templates that will be returned.
|
801 |
+
kalign_binary_path: The path to a kalign executable used for template
|
802 |
+
realignment.
|
803 |
+
release_dates_path: An optional path to a file with a mapping from PDB IDs
|
804 |
+
to their release dates. Thanks to this we don't have to redundantly
|
805 |
+
parse mmCIF files to get that information.
|
806 |
+
obsolete_pdbs_path: An optional path to a file containing a mapping from
|
807 |
+
obsolete PDB IDs to the PDB IDs of their replacements.
|
808 |
+
strict_error_check: If True, then the following will be treated as errors:
|
809 |
+
* If any template date is after the max_template_date.
|
810 |
+
* If any template has identical PDB ID to the query.
|
811 |
+
* If any template is a duplicate of the query.
|
812 |
+
* Any feature computation errors.
|
813 |
+
"""
|
814 |
+
self._mmcif_dir = mmcif_dir
|
815 |
+
if not glob.glob(os.path.join(self._mmcif_dir, '*.cif')):
|
816 |
+
logging.error('Could not find CIFs in %s', self._mmcif_dir)
|
817 |
+
raise ValueError(f'Could not find CIFs in {self._mmcif_dir}')
|
818 |
+
|
819 |
+
try:
|
820 |
+
self._max_template_date = datetime.datetime.strptime(
|
821 |
+
max_template_date, '%Y-%m-%d')
|
822 |
+
except ValueError:
|
823 |
+
raise ValueError(
|
824 |
+
'max_template_date must be set and have format YYYY-MM-DD.')
|
825 |
+
self._max_hits = max_hits
|
826 |
+
self._kalign_binary_path = kalign_binary_path
|
827 |
+
self._strict_error_check = strict_error_check
|
828 |
+
|
829 |
+
if release_dates_path:
|
830 |
+
logging.info('Using precomputed release dates %s.', release_dates_path)
|
831 |
+
self._release_dates = _parse_release_dates(release_dates_path)
|
832 |
+
else:
|
833 |
+
self._release_dates = {}
|
834 |
+
|
835 |
+
if obsolete_pdbs_path:
|
836 |
+
logging.info('Using precomputed obsolete pdbs %s.', obsolete_pdbs_path)
|
837 |
+
self._obsolete_pdbs = _parse_obsolete(obsolete_pdbs_path)
|
838 |
+
else:
|
839 |
+
self._obsolete_pdbs = {}
|
840 |
+
|
841 |
+
def get_templates(
|
842 |
+
self,
|
843 |
+
query_sequence: str,
|
844 |
+
query_pdb_code: Optional[str],
|
845 |
+
query_release_date: Optional[datetime.datetime],
|
846 |
+
hits: Sequence[parsers.TemplateHit]) -> TemplateSearchResult:
|
847 |
+
"""Computes the templates for given query sequence (more details above)."""
|
848 |
+
logging.info('Searching for template for: %s', query_pdb_code)
|
849 |
+
|
850 |
+
template_features = {}
|
851 |
+
for template_feature_name in TEMPLATE_FEATURES:
|
852 |
+
template_features[template_feature_name] = []
|
853 |
+
|
854 |
+
# Always use a max_template_date. Set to query_release_date minus 60 days
|
855 |
+
# if that's earlier.
|
856 |
+
template_cutoff_date = self._max_template_date
|
857 |
+
if query_release_date:
|
858 |
+
delta = datetime.timedelta(days=60)
|
859 |
+
if query_release_date - delta < template_cutoff_date:
|
860 |
+
template_cutoff_date = query_release_date - delta
|
861 |
+
assert template_cutoff_date < query_release_date
|
862 |
+
assert template_cutoff_date <= self._max_template_date
|
863 |
+
|
864 |
+
num_hits = 0
|
865 |
+
errors = []
|
866 |
+
warnings = []
|
867 |
+
|
868 |
+
for hit in sorted(hits, key=lambda x: x.sum_probs, reverse=True):
|
869 |
+
# We got all the templates we wanted, stop processing hits.
|
870 |
+
if num_hits >= self._max_hits:
|
871 |
+
break
|
872 |
+
|
873 |
+
result = _process_single_hit(
|
874 |
+
query_sequence=query_sequence,
|
875 |
+
query_pdb_code=query_pdb_code,
|
876 |
+
hit=hit,
|
877 |
+
mmcif_dir=self._mmcif_dir,
|
878 |
+
max_template_date=template_cutoff_date,
|
879 |
+
release_dates=self._release_dates,
|
880 |
+
obsolete_pdbs=self._obsolete_pdbs,
|
881 |
+
strict_error_check=self._strict_error_check,
|
882 |
+
kalign_binary_path=self._kalign_binary_path)
|
883 |
+
|
884 |
+
if result.error:
|
885 |
+
errors.append(result.error)
|
886 |
+
|
887 |
+
# There could be an error even if there are some results, e.g. thrown by
|
888 |
+
# other unparsable chains in the same mmCIF file.
|
889 |
+
if result.warning:
|
890 |
+
warnings.append(result.warning)
|
891 |
+
|
892 |
+
if result.features is None:
|
893 |
+
logging.info('Skipped invalid hit %s, error: %s, warning: %s',
|
894 |
+
hit.name, result.error, result.warning)
|
895 |
+
else:
|
896 |
+
# Increment the hit counter, since we got features out of this hit.
|
897 |
+
num_hits += 1
|
898 |
+
for k in template_features:
|
899 |
+
template_features[k].append(result.features[k])
|
900 |
+
|
901 |
+
for name in template_features:
|
902 |
+
if num_hits > 0:
|
903 |
+
template_features[name] = np.stack(
|
904 |
+
template_features[name], axis=0).astype(TEMPLATE_FEATURES[name])
|
905 |
+
else:
|
906 |
+
# Make sure the feature has correct dtype even if empty.
|
907 |
+
template_features[name] = np.array([], dtype=TEMPLATE_FEATURES[name])
|
908 |
+
|
909 |
+
return TemplateSearchResult(
|
910 |
+
features=template_features, errors=errors, warnings=warnings)
|
af_backprop/alphafold/data/tools/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Python wrappers for third party tools."""
|
af_backprop/alphafold/data/tools/hhblits.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Library to run HHblits from Python."""
|
16 |
+
|
17 |
+
import glob
|
18 |
+
import os
|
19 |
+
import subprocess
|
20 |
+
from typing import Any, Mapping, Optional, Sequence
|
21 |
+
|
22 |
+
from absl import logging
|
23 |
+
from alphafold.data.tools import utils
|
24 |
+
# Internal import (7716).
|
25 |
+
|
26 |
+
|
27 |
+
_HHBLITS_DEFAULT_P = 20
|
28 |
+
_HHBLITS_DEFAULT_Z = 500
|
29 |
+
|
30 |
+
|
31 |
+
class HHBlits:
|
32 |
+
"""Python wrapper of the HHblits binary."""
|
33 |
+
|
34 |
+
def __init__(self,
|
35 |
+
*,
|
36 |
+
binary_path: str,
|
37 |
+
databases: Sequence[str],
|
38 |
+
n_cpu: int = 4,
|
39 |
+
n_iter: int = 3,
|
40 |
+
e_value: float = 0.001,
|
41 |
+
maxseq: int = 1_000_000,
|
42 |
+
realign_max: int = 100_000,
|
43 |
+
maxfilt: int = 100_000,
|
44 |
+
min_prefilter_hits: int = 1000,
|
45 |
+
all_seqs: bool = False,
|
46 |
+
alt: Optional[int] = None,
|
47 |
+
p: int = _HHBLITS_DEFAULT_P,
|
48 |
+
z: int = _HHBLITS_DEFAULT_Z):
|
49 |
+
"""Initializes the Python HHblits wrapper.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
binary_path: The path to the HHblits executable.
|
53 |
+
databases: A sequence of HHblits database paths. This should be the
|
54 |
+
common prefix for the database files (i.e. up to but not including
|
55 |
+
_hhm.ffindex etc.)
|
56 |
+
n_cpu: The number of CPUs to give HHblits.
|
57 |
+
n_iter: The number of HHblits iterations.
|
58 |
+
e_value: The E-value, see HHblits docs for more details.
|
59 |
+
maxseq: The maximum number of rows in an input alignment. Note that this
|
60 |
+
parameter is only supported in HHBlits version 3.1 and higher.
|
61 |
+
realign_max: Max number of HMM-HMM hits to realign. HHblits default: 500.
|
62 |
+
maxfilt: Max number of hits allowed to pass the 2nd prefilter.
|
63 |
+
HHblits default: 20000.
|
64 |
+
min_prefilter_hits: Min number of hits to pass prefilter.
|
65 |
+
HHblits default: 100.
|
66 |
+
all_seqs: Return all sequences in the MSA / Do not filter the result MSA.
|
67 |
+
HHblits default: False.
|
68 |
+
alt: Show up to this many alternative alignments.
|
69 |
+
p: Minimum Prob for a hit to be included in the output hhr file.
|
70 |
+
HHblits default: 20.
|
71 |
+
z: Hard cap on number of hits reported in the hhr file.
|
72 |
+
HHblits default: 500. NB: The relevant HHblits flag is -Z not -z.
|
73 |
+
|
74 |
+
Raises:
|
75 |
+
RuntimeError: If HHblits binary not found within the path.
|
76 |
+
"""
|
77 |
+
self.binary_path = binary_path
|
78 |
+
self.databases = databases
|
79 |
+
|
80 |
+
for database_path in self.databases:
|
81 |
+
if not glob.glob(database_path + '_*'):
|
82 |
+
logging.error('Could not find HHBlits database %s', database_path)
|
83 |
+
raise ValueError(f'Could not find HHBlits database {database_path}')
|
84 |
+
|
85 |
+
self.n_cpu = n_cpu
|
86 |
+
self.n_iter = n_iter
|
87 |
+
self.e_value = e_value
|
88 |
+
self.maxseq = maxseq
|
89 |
+
self.realign_max = realign_max
|
90 |
+
self.maxfilt = maxfilt
|
91 |
+
self.min_prefilter_hits = min_prefilter_hits
|
92 |
+
self.all_seqs = all_seqs
|
93 |
+
self.alt = alt
|
94 |
+
self.p = p
|
95 |
+
self.z = z
|
96 |
+
|
97 |
+
def query(self, input_fasta_path: str) -> Mapping[str, Any]:
|
98 |
+
"""Queries the database using HHblits."""
|
99 |
+
with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
|
100 |
+
a3m_path = os.path.join(query_tmp_dir, 'output.a3m')
|
101 |
+
|
102 |
+
db_cmd = []
|
103 |
+
for db_path in self.databases:
|
104 |
+
db_cmd.append('-d')
|
105 |
+
db_cmd.append(db_path)
|
106 |
+
cmd = [
|
107 |
+
self.binary_path,
|
108 |
+
'-i', input_fasta_path,
|
109 |
+
'-cpu', str(self.n_cpu),
|
110 |
+
'-oa3m', a3m_path,
|
111 |
+
'-o', '/dev/null',
|
112 |
+
'-n', str(self.n_iter),
|
113 |
+
'-e', str(self.e_value),
|
114 |
+
'-maxseq', str(self.maxseq),
|
115 |
+
'-realign_max', str(self.realign_max),
|
116 |
+
'-maxfilt', str(self.maxfilt),
|
117 |
+
'-min_prefilter_hits', str(self.min_prefilter_hits)]
|
118 |
+
if self.all_seqs:
|
119 |
+
cmd += ['-all']
|
120 |
+
if self.alt:
|
121 |
+
cmd += ['-alt', str(self.alt)]
|
122 |
+
if self.p != _HHBLITS_DEFAULT_P:
|
123 |
+
cmd += ['-p', str(self.p)]
|
124 |
+
if self.z != _HHBLITS_DEFAULT_Z:
|
125 |
+
cmd += ['-Z', str(self.z)]
|
126 |
+
cmd += db_cmd
|
127 |
+
|
128 |
+
logging.info('Launching subprocess "%s"', ' '.join(cmd))
|
129 |
+
process = subprocess.Popen(
|
130 |
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
131 |
+
|
132 |
+
with utils.timing('HHblits query'):
|
133 |
+
stdout, stderr = process.communicate()
|
134 |
+
retcode = process.wait()
|
135 |
+
|
136 |
+
if retcode:
|
137 |
+
# Logs have a 15k character limit, so log HHblits error line by line.
|
138 |
+
logging.error('HHblits failed. HHblits stderr begin:')
|
139 |
+
for error_line in stderr.decode('utf-8').splitlines():
|
140 |
+
if error_line.strip():
|
141 |
+
logging.error(error_line.strip())
|
142 |
+
logging.error('HHblits stderr end')
|
143 |
+
raise RuntimeError('HHblits failed\nstdout:\n%s\n\nstderr:\n%s\n' % (
|
144 |
+
stdout.decode('utf-8'), stderr[:500_000].decode('utf-8')))
|
145 |
+
|
146 |
+
with open(a3m_path) as f:
|
147 |
+
a3m = f.read()
|
148 |
+
|
149 |
+
raw_output = dict(
|
150 |
+
a3m=a3m,
|
151 |
+
output=stdout,
|
152 |
+
stderr=stderr,
|
153 |
+
n_iter=self.n_iter,
|
154 |
+
e_value=self.e_value)
|
155 |
+
return raw_output
|
af_backprop/alphafold/data/tools/hhsearch.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Library to run HHsearch from Python."""
|
16 |
+
|
17 |
+
import glob
|
18 |
+
import os
|
19 |
+
import subprocess
|
20 |
+
from typing import Sequence
|
21 |
+
|
22 |
+
from absl import logging
|
23 |
+
|
24 |
+
from alphafold.data.tools import utils
|
25 |
+
# Internal import (7716).
|
26 |
+
|
27 |
+
|
28 |
+
class HHSearch:
|
29 |
+
"""Python wrapper of the HHsearch binary."""
|
30 |
+
|
31 |
+
def __init__(self,
|
32 |
+
*,
|
33 |
+
binary_path: str,
|
34 |
+
databases: Sequence[str],
|
35 |
+
maxseq: int = 1_000_000):
|
36 |
+
"""Initializes the Python HHsearch wrapper.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
binary_path: The path to the HHsearch executable.
|
40 |
+
databases: A sequence of HHsearch database paths. This should be the
|
41 |
+
common prefix for the database files (i.e. up to but not including
|
42 |
+
_hhm.ffindex etc.)
|
43 |
+
maxseq: The maximum number of rows in an input alignment. Note that this
|
44 |
+
parameter is only supported in HHBlits version 3.1 and higher.
|
45 |
+
|
46 |
+
Raises:
|
47 |
+
RuntimeError: If HHsearch binary not found within the path.
|
48 |
+
"""
|
49 |
+
self.binary_path = binary_path
|
50 |
+
self.databases = databases
|
51 |
+
self.maxseq = maxseq
|
52 |
+
|
53 |
+
for database_path in self.databases:
|
54 |
+
if not glob.glob(database_path + '_*'):
|
55 |
+
logging.error('Could not find HHsearch database %s', database_path)
|
56 |
+
raise ValueError(f'Could not find HHsearch database {database_path}')
|
57 |
+
|
58 |
+
def query(self, a3m: str) -> str:
|
59 |
+
"""Queries the database using HHsearch using a given a3m."""
|
60 |
+
with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
|
61 |
+
input_path = os.path.join(query_tmp_dir, 'query.a3m')
|
62 |
+
hhr_path = os.path.join(query_tmp_dir, 'output.hhr')
|
63 |
+
with open(input_path, 'w') as f:
|
64 |
+
f.write(a3m)
|
65 |
+
|
66 |
+
db_cmd = []
|
67 |
+
for db_path in self.databases:
|
68 |
+
db_cmd.append('-d')
|
69 |
+
db_cmd.append(db_path)
|
70 |
+
cmd = [self.binary_path,
|
71 |
+
'-i', input_path,
|
72 |
+
'-o', hhr_path,
|
73 |
+
'-maxseq', str(self.maxseq)
|
74 |
+
] + db_cmd
|
75 |
+
|
76 |
+
logging.info('Launching subprocess "%s"', ' '.join(cmd))
|
77 |
+
process = subprocess.Popen(
|
78 |
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
79 |
+
with utils.timing('HHsearch query'):
|
80 |
+
stdout, stderr = process.communicate()
|
81 |
+
retcode = process.wait()
|
82 |
+
|
83 |
+
if retcode:
|
84 |
+
# Stderr is truncated to prevent proto size errors in Beam.
|
85 |
+
raise RuntimeError(
|
86 |
+
'HHSearch failed:\nstdout:\n%s\n\nstderr:\n%s\n' % (
|
87 |
+
stdout.decode('utf-8'), stderr[:100_000].decode('utf-8')))
|
88 |
+
|
89 |
+
with open(hhr_path) as f:
|
90 |
+
hhr = f.read()
|
91 |
+
return hhr
|
af_backprop/alphafold/data/tools/hmmbuild.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""A Python wrapper for hmmbuild - construct HMM profiles from MSA."""
|
16 |
+
|
17 |
+
import os
|
18 |
+
import re
|
19 |
+
import subprocess
|
20 |
+
|
21 |
+
from absl import logging
|
22 |
+
from alphafold.data.tools import utils
|
23 |
+
# Internal import (7716).
|
24 |
+
|
25 |
+
|
26 |
+
class Hmmbuild(object):
|
27 |
+
"""Python wrapper of the hmmbuild binary."""
|
28 |
+
|
29 |
+
def __init__(self,
|
30 |
+
*,
|
31 |
+
binary_path: str,
|
32 |
+
singlemx: bool = False):
|
33 |
+
"""Initializes the Python hmmbuild wrapper.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
binary_path: The path to the hmmbuild executable.
|
37 |
+
singlemx: Whether to use --singlemx flag. If True, it forces HMMBuild to
|
38 |
+
just use a common substitution score matrix.
|
39 |
+
|
40 |
+
Raises:
|
41 |
+
RuntimeError: If hmmbuild binary not found within the path.
|
42 |
+
"""
|
43 |
+
self.binary_path = binary_path
|
44 |
+
self.singlemx = singlemx
|
45 |
+
|
46 |
+
def build_profile_from_sto(self, sto: str, model_construction='fast') -> str:
|
47 |
+
"""Builds a HHM for the aligned sequences given as an A3M string.
|
48 |
+
|
49 |
+
Args:
|
50 |
+
sto: A string with the aligned sequences in the Stockholm format.
|
51 |
+
model_construction: Whether to use reference annotation in the msa to
|
52 |
+
determine consensus columns ('hand') or default ('fast').
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
A string with the profile in the HMM format.
|
56 |
+
|
57 |
+
Raises:
|
58 |
+
RuntimeError: If hmmbuild fails.
|
59 |
+
"""
|
60 |
+
return self._build_profile(sto, model_construction=model_construction)
|
61 |
+
|
62 |
+
def build_profile_from_a3m(self, a3m: str) -> str:
|
63 |
+
"""Builds a HHM for the aligned sequences given as an A3M string.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
a3m: A string with the aligned sequences in the A3M format.
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
A string with the profile in the HMM format.
|
70 |
+
|
71 |
+
Raises:
|
72 |
+
RuntimeError: If hmmbuild fails.
|
73 |
+
"""
|
74 |
+
lines = []
|
75 |
+
for line in a3m.splitlines():
|
76 |
+
if not line.startswith('>'):
|
77 |
+
line = re.sub('[a-z]+', '', line) # Remove inserted residues.
|
78 |
+
lines.append(line + '\n')
|
79 |
+
msa = ''.join(lines)
|
80 |
+
return self._build_profile(msa, model_construction='fast')
|
81 |
+
|
82 |
+
def _build_profile(self, msa: str, model_construction: str = 'fast') -> str:
|
83 |
+
"""Builds a HMM for the aligned sequences given as an MSA string.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
msa: A string with the aligned sequences, in A3M or STO format.
|
87 |
+
model_construction: Whether to use reference annotation in the msa to
|
88 |
+
determine consensus columns ('hand') or default ('fast').
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
A string with the profile in the HMM format.
|
92 |
+
|
93 |
+
Raises:
|
94 |
+
RuntimeError: If hmmbuild fails.
|
95 |
+
ValueError: If unspecified arguments are provided.
|
96 |
+
"""
|
97 |
+
if model_construction not in {'hand', 'fast'}:
|
98 |
+
raise ValueError(f'Invalid model_construction {model_construction} - only'
|
99 |
+
'hand and fast supported.')
|
100 |
+
|
101 |
+
with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
|
102 |
+
input_query = os.path.join(query_tmp_dir, 'query.msa')
|
103 |
+
output_hmm_path = os.path.join(query_tmp_dir, 'output.hmm')
|
104 |
+
|
105 |
+
with open(input_query, 'w') as f:
|
106 |
+
f.write(msa)
|
107 |
+
|
108 |
+
cmd = [self.binary_path]
|
109 |
+
# If adding flags, we have to do so before the output and input:
|
110 |
+
|
111 |
+
if model_construction == 'hand':
|
112 |
+
cmd.append(f'--{model_construction}')
|
113 |
+
if self.singlemx:
|
114 |
+
cmd.append('--singlemx')
|
115 |
+
cmd.extend([
|
116 |
+
'--amino',
|
117 |
+
output_hmm_path,
|
118 |
+
input_query,
|
119 |
+
])
|
120 |
+
|
121 |
+
logging.info('Launching subprocess %s', cmd)
|
122 |
+
process = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
123 |
+
stderr=subprocess.PIPE)
|
124 |
+
|
125 |
+
with utils.timing('hmmbuild query'):
|
126 |
+
stdout, stderr = process.communicate()
|
127 |
+
retcode = process.wait()
|
128 |
+
logging.info('hmmbuild stdout:\n%s\n\nstderr:\n%s\n',
|
129 |
+
stdout.decode('utf-8'), stderr.decode('utf-8'))
|
130 |
+
|
131 |
+
if retcode:
|
132 |
+
raise RuntimeError('hmmbuild failed\nstdout:\n%s\n\nstderr:\n%s\n'
|
133 |
+
% (stdout.decode('utf-8'), stderr.decode('utf-8')))
|
134 |
+
|
135 |
+
with open(output_hmm_path, encoding='utf-8') as f:
|
136 |
+
hmm = f.read()
|
137 |
+
|
138 |
+
return hmm
|
af_backprop/alphafold/data/tools/hmmsearch.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""A Python wrapper for hmmsearch - search profile against a sequence db."""
|
16 |
+
|
17 |
+
import os
|
18 |
+
import subprocess
|
19 |
+
from typing import Optional, Sequence
|
20 |
+
|
21 |
+
from absl import logging
|
22 |
+
from alphafold.data.tools import utils
|
23 |
+
# Internal import (7716).
|
24 |
+
|
25 |
+
|
26 |
+
class Hmmsearch(object):
|
27 |
+
"""Python wrapper of the hmmsearch binary."""
|
28 |
+
|
29 |
+
def __init__(self,
|
30 |
+
*,
|
31 |
+
binary_path: str,
|
32 |
+
database_path: str,
|
33 |
+
flags: Optional[Sequence[str]] = None):
|
34 |
+
"""Initializes the Python hmmsearch wrapper.
|
35 |
+
|
36 |
+
Args:
|
37 |
+
binary_path: The path to the hmmsearch executable.
|
38 |
+
database_path: The path to the hmmsearch database (FASTA format).
|
39 |
+
flags: List of flags to be used by hmmsearch.
|
40 |
+
|
41 |
+
Raises:
|
42 |
+
RuntimeError: If hmmsearch binary not found within the path.
|
43 |
+
"""
|
44 |
+
self.binary_path = binary_path
|
45 |
+
self.database_path = database_path
|
46 |
+
self.flags = flags
|
47 |
+
|
48 |
+
if not os.path.exists(self.database_path):
|
49 |
+
logging.error('Could not find hmmsearch database %s', database_path)
|
50 |
+
raise ValueError(f'Could not find hmmsearch database {database_path}')
|
51 |
+
|
52 |
+
def query(self, hmm: str) -> str:
|
53 |
+
"""Queries the database using hmmsearch using a given hmm."""
|
54 |
+
with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
|
55 |
+
hmm_input_path = os.path.join(query_tmp_dir, 'query.hmm')
|
56 |
+
a3m_out_path = os.path.join(query_tmp_dir, 'output.a3m')
|
57 |
+
with open(hmm_input_path, 'w') as f:
|
58 |
+
f.write(hmm)
|
59 |
+
|
60 |
+
cmd = [
|
61 |
+
self.binary_path,
|
62 |
+
'--noali', # Don't include the alignment in stdout.
|
63 |
+
'--cpu', '8'
|
64 |
+
]
|
65 |
+
# If adding flags, we have to do so before the output and input:
|
66 |
+
if self.flags:
|
67 |
+
cmd.extend(self.flags)
|
68 |
+
cmd.extend([
|
69 |
+
'-A', a3m_out_path,
|
70 |
+
hmm_input_path,
|
71 |
+
self.database_path,
|
72 |
+
])
|
73 |
+
|
74 |
+
logging.info('Launching sub-process %s', cmd)
|
75 |
+
process = subprocess.Popen(
|
76 |
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
77 |
+
with utils.timing(
|
78 |
+
f'hmmsearch ({os.path.basename(self.database_path)}) query'):
|
79 |
+
stdout, stderr = process.communicate()
|
80 |
+
retcode = process.wait()
|
81 |
+
|
82 |
+
if retcode:
|
83 |
+
raise RuntimeError(
|
84 |
+
'hmmsearch failed:\nstdout:\n%s\n\nstderr:\n%s\n' % (
|
85 |
+
stdout.decode('utf-8'), stderr.decode('utf-8')))
|
86 |
+
|
87 |
+
with open(a3m_out_path) as f:
|
88 |
+
a3m_out = f.read()
|
89 |
+
|
90 |
+
return a3m_out
|
af_backprop/alphafold/data/tools/jackhmmer.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Library to run Jackhmmer from Python."""
|
16 |
+
|
17 |
+
from concurrent import futures
|
18 |
+
import glob
|
19 |
+
import os
|
20 |
+
import subprocess
|
21 |
+
from typing import Any, Callable, Mapping, Optional, Sequence
|
22 |
+
from urllib import request
|
23 |
+
|
24 |
+
from absl import logging
|
25 |
+
|
26 |
+
from alphafold.data.tools import utils
|
27 |
+
# Internal import (7716).
|
28 |
+
|
29 |
+
|
30 |
+
class Jackhmmer:
|
31 |
+
"""Python wrapper of the Jackhmmer binary."""
|
32 |
+
|
33 |
+
def __init__(self,
|
34 |
+
*,
|
35 |
+
binary_path: str,
|
36 |
+
database_path: str,
|
37 |
+
n_cpu: int = 8,
|
38 |
+
n_iter: int = 1,
|
39 |
+
e_value: float = 0.0001,
|
40 |
+
z_value: Optional[int] = None,
|
41 |
+
get_tblout: bool = False,
|
42 |
+
filter_f1: float = 0.0005,
|
43 |
+
filter_f2: float = 0.00005,
|
44 |
+
filter_f3: float = 0.0000005,
|
45 |
+
incdom_e: Optional[float] = None,
|
46 |
+
dom_e: Optional[float] = None,
|
47 |
+
num_streamed_chunks: Optional[int] = None,
|
48 |
+
streaming_callback: Optional[Callable[[int], None]] = None):
|
49 |
+
"""Initializes the Python Jackhmmer wrapper.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
binary_path: The path to the jackhmmer executable.
|
53 |
+
database_path: The path to the jackhmmer database (FASTA format).
|
54 |
+
n_cpu: The number of CPUs to give Jackhmmer.
|
55 |
+
n_iter: The number of Jackhmmer iterations.
|
56 |
+
e_value: The E-value, see Jackhmmer docs for more details.
|
57 |
+
z_value: The Z-value, see Jackhmmer docs for more details.
|
58 |
+
get_tblout: Whether to save tblout string.
|
59 |
+
filter_f1: MSV and biased composition pre-filter, set to >1.0 to turn off.
|
60 |
+
filter_f2: Viterbi pre-filter, set to >1.0 to turn off.
|
61 |
+
filter_f3: Forward pre-filter, set to >1.0 to turn off.
|
62 |
+
incdom_e: Domain e-value criteria for inclusion of domains in MSA/next
|
63 |
+
round.
|
64 |
+
dom_e: Domain e-value criteria for inclusion in tblout.
|
65 |
+
num_streamed_chunks: Number of database chunks to stream over.
|
66 |
+
streaming_callback: Callback function run after each chunk iteration with
|
67 |
+
the iteration number as argument.
|
68 |
+
"""
|
69 |
+
self.binary_path = binary_path
|
70 |
+
self.database_path = database_path
|
71 |
+
self.num_streamed_chunks = num_streamed_chunks
|
72 |
+
|
73 |
+
if not os.path.exists(self.database_path) and num_streamed_chunks is None:
|
74 |
+
logging.error('Could not find Jackhmmer database %s', database_path)
|
75 |
+
raise ValueError(f'Could not find Jackhmmer database {database_path}')
|
76 |
+
|
77 |
+
self.n_cpu = n_cpu
|
78 |
+
self.n_iter = n_iter
|
79 |
+
self.e_value = e_value
|
80 |
+
self.z_value = z_value
|
81 |
+
self.filter_f1 = filter_f1
|
82 |
+
self.filter_f2 = filter_f2
|
83 |
+
self.filter_f3 = filter_f3
|
84 |
+
self.incdom_e = incdom_e
|
85 |
+
self.dom_e = dom_e
|
86 |
+
self.get_tblout = get_tblout
|
87 |
+
self.streaming_callback = streaming_callback
|
88 |
+
|
89 |
+
def _query_chunk(self, input_fasta_path: str, database_path: str
|
90 |
+
) -> Mapping[str, Any]:
|
91 |
+
"""Queries the database chunk using Jackhmmer."""
|
92 |
+
with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
|
93 |
+
sto_path = os.path.join(query_tmp_dir, 'output.sto')
|
94 |
+
|
95 |
+
# The F1/F2/F3 are the expected proportion to pass each of the filtering
|
96 |
+
# stages (which get progressively more expensive), reducing these
|
97 |
+
# speeds up the pipeline at the expensive of sensitivity. They are
|
98 |
+
# currently set very low to make querying Mgnify run in a reasonable
|
99 |
+
# amount of time.
|
100 |
+
cmd_flags = [
|
101 |
+
# Don't pollute stdout with Jackhmmer output.
|
102 |
+
'-o', '/dev/null',
|
103 |
+
'-A', sto_path,
|
104 |
+
'--noali',
|
105 |
+
'--F1', str(self.filter_f1),
|
106 |
+
'--F2', str(self.filter_f2),
|
107 |
+
'--F3', str(self.filter_f3),
|
108 |
+
'--incE', str(self.e_value),
|
109 |
+
# Report only sequences with E-values <= x in per-sequence output.
|
110 |
+
'-E', str(self.e_value),
|
111 |
+
'--cpu', str(self.n_cpu),
|
112 |
+
'-N', str(self.n_iter)
|
113 |
+
]
|
114 |
+
if self.get_tblout:
|
115 |
+
tblout_path = os.path.join(query_tmp_dir, 'tblout.txt')
|
116 |
+
cmd_flags.extend(['--tblout', tblout_path])
|
117 |
+
|
118 |
+
if self.z_value:
|
119 |
+
cmd_flags.extend(['-Z', str(self.z_value)])
|
120 |
+
|
121 |
+
if self.dom_e is not None:
|
122 |
+
cmd_flags.extend(['--domE', str(self.dom_e)])
|
123 |
+
|
124 |
+
if self.incdom_e is not None:
|
125 |
+
cmd_flags.extend(['--incdomE', str(self.incdom_e)])
|
126 |
+
|
127 |
+
cmd = [self.binary_path] + cmd_flags + [input_fasta_path,
|
128 |
+
database_path]
|
129 |
+
|
130 |
+
logging.info('Launching subprocess "%s"', ' '.join(cmd))
|
131 |
+
process = subprocess.Popen(
|
132 |
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
133 |
+
with utils.timing(
|
134 |
+
f'Jackhmmer ({os.path.basename(database_path)}) query'):
|
135 |
+
_, stderr = process.communicate()
|
136 |
+
retcode = process.wait()
|
137 |
+
|
138 |
+
if retcode:
|
139 |
+
raise RuntimeError(
|
140 |
+
'Jackhmmer failed\nstderr:\n%s\n' % stderr.decode('utf-8'))
|
141 |
+
|
142 |
+
# Get e-values for each target name
|
143 |
+
tbl = ''
|
144 |
+
if self.get_tblout:
|
145 |
+
with open(tblout_path) as f:
|
146 |
+
tbl = f.read()
|
147 |
+
|
148 |
+
with open(sto_path) as f:
|
149 |
+
sto = f.read()
|
150 |
+
|
151 |
+
raw_output = dict(
|
152 |
+
sto=sto,
|
153 |
+
tbl=tbl,
|
154 |
+
stderr=stderr,
|
155 |
+
n_iter=self.n_iter,
|
156 |
+
e_value=self.e_value)
|
157 |
+
|
158 |
+
return raw_output
|
159 |
+
|
160 |
+
def query(self, input_fasta_path: str) -> Sequence[Mapping[str, Any]]:
|
161 |
+
"""Queries the database using Jackhmmer."""
|
162 |
+
if self.num_streamed_chunks is None:
|
163 |
+
return [self._query_chunk(input_fasta_path, self.database_path)]
|
164 |
+
|
165 |
+
db_basename = os.path.basename(self.database_path)
|
166 |
+
db_remote_chunk = lambda db_idx: f'{self.database_path}.{db_idx}'
|
167 |
+
db_local_chunk = lambda db_idx: f'/tmp/ramdisk/{db_basename}.{db_idx}'
|
168 |
+
|
169 |
+
# Remove existing files to prevent OOM
|
170 |
+
for f in glob.glob(db_local_chunk('[0-9]*')):
|
171 |
+
try:
|
172 |
+
os.remove(f)
|
173 |
+
except OSError:
|
174 |
+
print(f'OSError while deleting {f}')
|
175 |
+
|
176 |
+
# Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk
|
177 |
+
with futures.ThreadPoolExecutor(max_workers=2) as executor:
|
178 |
+
chunked_output = []
|
179 |
+
for i in range(1, self.num_streamed_chunks + 1):
|
180 |
+
# Copy the chunk locally
|
181 |
+
if i == 1:
|
182 |
+
future = executor.submit(
|
183 |
+
request.urlretrieve, db_remote_chunk(i), db_local_chunk(i))
|
184 |
+
if i < self.num_streamed_chunks:
|
185 |
+
next_future = executor.submit(
|
186 |
+
request.urlretrieve, db_remote_chunk(i+1), db_local_chunk(i+1))
|
187 |
+
|
188 |
+
# Run Jackhmmer with the chunk
|
189 |
+
future.result()
|
190 |
+
chunked_output.append(
|
191 |
+
self._query_chunk(input_fasta_path, db_local_chunk(i)))
|
192 |
+
|
193 |
+
# Remove the local copy of the chunk
|
194 |
+
os.remove(db_local_chunk(i))
|
195 |
+
future = next_future
|
196 |
+
if self.streaming_callback:
|
197 |
+
self.streaming_callback(i)
|
198 |
+
return chunked_output
|
af_backprop/alphafold/data/tools/kalign.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""A Python wrapper for Kalign."""
|
16 |
+
import os
|
17 |
+
import subprocess
|
18 |
+
from typing import Sequence
|
19 |
+
|
20 |
+
from absl import logging
|
21 |
+
|
22 |
+
from alphafold.data.tools import utils
|
23 |
+
# Internal import (7716).
|
24 |
+
|
25 |
+
|
26 |
+
def _to_a3m(sequences: Sequence[str]) -> str:
|
27 |
+
"""Converts sequences to an a3m file."""
|
28 |
+
names = ['sequence %d' % i for i in range(1, len(sequences) + 1)]
|
29 |
+
a3m = []
|
30 |
+
for sequence, name in zip(sequences, names):
|
31 |
+
a3m.append(u'>' + name + u'\n')
|
32 |
+
a3m.append(sequence + u'\n')
|
33 |
+
return ''.join(a3m)
|
34 |
+
|
35 |
+
|
36 |
+
class Kalign:
|
37 |
+
"""Python wrapper of the Kalign binary."""
|
38 |
+
|
39 |
+
def __init__(self, *, binary_path: str):
|
40 |
+
"""Initializes the Python Kalign wrapper.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
binary_path: The path to the Kalign binary.
|
44 |
+
|
45 |
+
Raises:
|
46 |
+
RuntimeError: If Kalign binary not found within the path.
|
47 |
+
"""
|
48 |
+
self.binary_path = binary_path
|
49 |
+
|
50 |
+
def align(self, sequences: Sequence[str]) -> str:
|
51 |
+
"""Aligns the sequences and returns the alignment in A3M string.
|
52 |
+
|
53 |
+
Args:
|
54 |
+
sequences: A list of query sequence strings. The sequences have to be at
|
55 |
+
least 6 residues long (Kalign requires this). Note that the order in
|
56 |
+
which you give the sequences might alter the output slightly as
|
57 |
+
different alignment tree might get constructed.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
A string with the alignment in a3m format.
|
61 |
+
|
62 |
+
Raises:
|
63 |
+
RuntimeError: If Kalign fails.
|
64 |
+
ValueError: If any of the sequences is less than 6 residues long.
|
65 |
+
"""
|
66 |
+
logging.info('Aligning %d sequences', len(sequences))
|
67 |
+
|
68 |
+
for s in sequences:
|
69 |
+
if len(s) < 6:
|
70 |
+
raise ValueError('Kalign requires all sequences to be at least 6 '
|
71 |
+
'residues long. Got %s (%d residues).' % (s, len(s)))
|
72 |
+
|
73 |
+
with utils.tmpdir_manager(base_dir='/tmp') as query_tmp_dir:
|
74 |
+
input_fasta_path = os.path.join(query_tmp_dir, 'input.fasta')
|
75 |
+
output_a3m_path = os.path.join(query_tmp_dir, 'output.a3m')
|
76 |
+
|
77 |
+
with open(input_fasta_path, 'w') as f:
|
78 |
+
f.write(_to_a3m(sequences))
|
79 |
+
|
80 |
+
cmd = [
|
81 |
+
self.binary_path,
|
82 |
+
'-i', input_fasta_path,
|
83 |
+
'-o', output_a3m_path,
|
84 |
+
'-format', 'fasta',
|
85 |
+
]
|
86 |
+
|
87 |
+
logging.info('Launching subprocess "%s"', ' '.join(cmd))
|
88 |
+
process = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
89 |
+
stderr=subprocess.PIPE)
|
90 |
+
|
91 |
+
with utils.timing('Kalign query'):
|
92 |
+
stdout, stderr = process.communicate()
|
93 |
+
retcode = process.wait()
|
94 |
+
logging.info('Kalign stdout:\n%s\n\nstderr:\n%s\n',
|
95 |
+
stdout.decode('utf-8'), stderr.decode('utf-8'))
|
96 |
+
|
97 |
+
if retcode:
|
98 |
+
raise RuntimeError('Kalign failed\nstdout:\n%s\n\nstderr:\n%s\n'
|
99 |
+
% (stdout.decode('utf-8'), stderr.decode('utf-8')))
|
100 |
+
|
101 |
+
with open(output_a3m_path) as f:
|
102 |
+
a3m = f.read()
|
103 |
+
|
104 |
+
return a3m
|
af_backprop/alphafold/data/tools/utils.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Common utilities for data pipeline tools."""
|
15 |
+
import contextlib
|
16 |
+
import shutil
|
17 |
+
import tempfile
|
18 |
+
import time
|
19 |
+
from typing import Optional
|
20 |
+
|
21 |
+
from absl import logging
|
22 |
+
|
23 |
+
|
24 |
+
@contextlib.contextmanager
|
25 |
+
def tmpdir_manager(base_dir: Optional[str] = None):
|
26 |
+
"""Context manager that deletes a temporary directory on exit."""
|
27 |
+
tmpdir = tempfile.mkdtemp(dir=base_dir)
|
28 |
+
try:
|
29 |
+
yield tmpdir
|
30 |
+
finally:
|
31 |
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
32 |
+
|
33 |
+
|
34 |
+
@contextlib.contextmanager
|
35 |
+
def timing(msg: str):
|
36 |
+
logging.info('Started %s', msg)
|
37 |
+
tic = time.time()
|
38 |
+
yield
|
39 |
+
toc = time.time()
|
40 |
+
logging.info('Finished %s in %.3f seconds', msg, toc - tic)
|
af_backprop/alphafold/model/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Alphafold model."""
|
af_backprop/alphafold/model/all_atom.py
ADDED
@@ -0,0 +1,1155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Ops for all atom representations.
|
16 |
+
|
17 |
+
Generally we employ two different representations for all atom coordinates,
|
18 |
+
one is atom37 where each heavy atom corresponds to a given position in a 37
|
19 |
+
dimensional array, This mapping is non amino acid specific, but each slot
|
20 |
+
corresponds to an atom of a given name, for example slot 12 always corresponds
|
21 |
+
to 'C delta 1', positions that are not present for a given amino acid are
|
22 |
+
zeroed out and denoted by a mask.
|
23 |
+
The other representation we employ is called atom14, this is a more dense way
|
24 |
+
of representing atoms with 14 slots. Here a given slot will correspond to a
|
25 |
+
different kind of atom depending on amino acid type, for example slot 5
|
26 |
+
corresponds to 'N delta 2' for Aspargine, but to 'C delta 1' for Isoleucine.
|
27 |
+
14 is chosen because it is the maximum number of heavy atoms for any standard
|
28 |
+
amino acid.
|
29 |
+
The order of slots can be found in 'residue_constants.residue_atoms'.
|
30 |
+
Internally the model uses the atom14 representation because it is
|
31 |
+
computationally more efficient.
|
32 |
+
The internal atom14 representation is turned into the atom37 at the output of
|
33 |
+
the network to facilitate easier conversion to existing protein datastructures.
|
34 |
+
"""
|
35 |
+
|
36 |
+
from typing import Dict, Optional
|
37 |
+
from alphafold.common import residue_constants
|
38 |
+
|
39 |
+
from alphafold.model import r3
|
40 |
+
from alphafold.model import utils
|
41 |
+
import jax
|
42 |
+
import jax.numpy as jnp
|
43 |
+
import numpy as np
|
44 |
+
|
45 |
+
|
46 |
+
def squared_difference(x, y):
|
47 |
+
return jnp.square(x - y)
|
48 |
+
|
49 |
+
|
50 |
+
def get_chi_atom_indices():
|
51 |
+
"""Returns atom indices needed to compute chi angles for all residue types.
|
52 |
+
|
53 |
+
Returns:
|
54 |
+
A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue types are
|
55 |
+
in the order specified in residue_constants.restypes + unknown residue type
|
56 |
+
at the end. For chi angles which are not defined on the residue, the
|
57 |
+
positions indices are by default set to 0.
|
58 |
+
"""
|
59 |
+
chi_atom_indices = []
|
60 |
+
for residue_name in residue_constants.restypes:
|
61 |
+
residue_name = residue_constants.restype_1to3[residue_name]
|
62 |
+
residue_chi_angles = residue_constants.chi_angles_atoms[residue_name]
|
63 |
+
atom_indices = []
|
64 |
+
for chi_angle in residue_chi_angles:
|
65 |
+
atom_indices.append(
|
66 |
+
[residue_constants.atom_order[atom] for atom in chi_angle])
|
67 |
+
for _ in range(4 - len(atom_indices)):
|
68 |
+
atom_indices.append([0, 0, 0, 0]) # For chi angles not defined on the AA.
|
69 |
+
chi_atom_indices.append(atom_indices)
|
70 |
+
|
71 |
+
chi_atom_indices.append([[0, 0, 0, 0]] * 4) # For UNKNOWN residue.
|
72 |
+
|
73 |
+
return jnp.asarray(chi_atom_indices)
|
74 |
+
|
75 |
+
|
76 |
+
def atom14_to_atom37(atom14_data: jnp.ndarray, # (N, 14, ...)
|
77 |
+
batch: Dict[str, jnp.ndarray]
|
78 |
+
) -> jnp.ndarray: # (N, 37, ...)
|
79 |
+
"""Convert atom14 to atom37 representation."""
|
80 |
+
assert len(atom14_data.shape) in [2, 3]
|
81 |
+
assert 'residx_atom37_to_atom14' in batch
|
82 |
+
assert 'atom37_atom_exists' in batch
|
83 |
+
|
84 |
+
if jnp.issubdtype(batch['residx_atom37_to_atom14'].dtype, jnp.integer):
|
85 |
+
atom37_data = utils.batched_gather(atom14_data, batch['residx_atom37_to_atom14'], batch_dims=1)
|
86 |
+
else:
|
87 |
+
atom37_data = jnp.einsum("na...,nba->nb...", atom14_data, batch['residx_atom37_to_atom14'])
|
88 |
+
|
89 |
+
if len(atom14_data.shape) == 2:
|
90 |
+
atom37_data *= batch['atom37_atom_exists']
|
91 |
+
elif len(atom14_data.shape) == 3:
|
92 |
+
atom37_data *= batch['atom37_atom_exists'][:, :, None].astype(atom37_data.dtype)
|
93 |
+
return atom37_data
|
94 |
+
|
95 |
+
def atom37_to_atom14(
|
96 |
+
atom37_data: jnp.ndarray, # (N, 37, ...)
|
97 |
+
batch: Dict[str, jnp.ndarray]) -> jnp.ndarray: # (N, 14, ...)
|
98 |
+
"""Convert atom14 to atom37 representation."""
|
99 |
+
assert len(atom37_data.shape) in [2, 3]
|
100 |
+
assert 'residx_atom14_to_atom37' in batch
|
101 |
+
assert 'atom14_atom_exists' in batch
|
102 |
+
|
103 |
+
if jnp.issubdtype(batch['residx_atom14_to_atom37'].dtype, jnp.integer):
|
104 |
+
atom14_data = utils.batched_gather(atom37_data, batch['residx_atom14_to_atom37'], batch_dims=1)
|
105 |
+
else:
|
106 |
+
atom14_data = jnp.einsum("na...,nba->nb...", atom37_data, batch['residx_atom14_to_atom37'])
|
107 |
+
|
108 |
+
if len(atom37_data.shape) == 2:
|
109 |
+
atom14_data *= batch['atom14_atom_exists'].astype(atom14_data.dtype)
|
110 |
+
elif len(atom37_data.shape) == 3:
|
111 |
+
atom14_data *= batch['atom14_atom_exists'][:, :, None].astype(atom14_data.dtype)
|
112 |
+
return atom14_data
|
113 |
+
|
114 |
+
|
115 |
+
def atom37_to_frames(
|
116 |
+
aatype: jnp.ndarray, # (...)
|
117 |
+
all_atom_positions: jnp.ndarray, # (..., 37, 3)
|
118 |
+
all_atom_mask: jnp.ndarray, # (..., 37)
|
119 |
+
) -> Dict[str, jnp.ndarray]:
|
120 |
+
"""Computes the frames for the up to 8 rigid groups for each residue.
|
121 |
+
|
122 |
+
The rigid groups are defined by the possible torsions in a given amino acid.
|
123 |
+
We group the atoms according to their dependence on the torsion angles into
|
124 |
+
"rigid groups". E.g., the position of atoms in the chi2-group depend on
|
125 |
+
chi1 and chi2, but do not depend on chi3 or chi4.
|
126 |
+
Jumper et al. (2021) Suppl. Table 2 and corresponding text.
|
127 |
+
|
128 |
+
Args:
|
129 |
+
aatype: Amino acid type, given as array with integers.
|
130 |
+
all_atom_positions: atom37 representation of all atom coordinates.
|
131 |
+
all_atom_mask: atom37 representation of mask on all atom coordinates.
|
132 |
+
Returns:
|
133 |
+
Dictionary containing:
|
134 |
+
* 'rigidgroups_gt_frames': 8 Frames corresponding to 'all_atom_positions'
|
135 |
+
represented as flat 12 dimensional array.
|
136 |
+
* 'rigidgroups_gt_exists': Mask denoting whether the atom positions for
|
137 |
+
the given frame are available in the ground truth, e.g. if they were
|
138 |
+
resolved in the experiment.
|
139 |
+
* 'rigidgroups_group_exists': Mask denoting whether given group is in
|
140 |
+
principle present for given amino acid type.
|
141 |
+
* 'rigidgroups_group_is_ambiguous': Mask denoting whether frame is
|
142 |
+
affected by naming ambiguity.
|
143 |
+
* 'rigidgroups_alt_gt_frames': 8 Frames with alternative atom renaming
|
144 |
+
corresponding to 'all_atom_positions' represented as flat
|
145 |
+
12 dimensional array.
|
146 |
+
"""
|
147 |
+
# 0: 'backbone group',
|
148 |
+
# 1: 'pre-omega-group', (empty)
|
149 |
+
# 2: 'phi-group', (currently empty, because it defines only hydrogens)
|
150 |
+
# 3: 'psi-group',
|
151 |
+
# 4,5,6,7: 'chi1,2,3,4-group'
|
152 |
+
aatype_in_shape = aatype.shape
|
153 |
+
|
154 |
+
# If there is a batch axis, just flatten it away, and reshape everything
|
155 |
+
# back at the end of the function.
|
156 |
+
aatype = jnp.reshape(aatype, [-1])
|
157 |
+
all_atom_positions = jnp.reshape(all_atom_positions, [-1, 37, 3])
|
158 |
+
all_atom_mask = jnp.reshape(all_atom_mask, [-1, 37])
|
159 |
+
|
160 |
+
# Create an array with the atom names.
|
161 |
+
# shape (num_restypes, num_rigidgroups, 3_atoms): (21, 8, 3)
|
162 |
+
restype_rigidgroup_base_atom_names = np.full([21, 8, 3], '', dtype=object)
|
163 |
+
|
164 |
+
# 0: backbone frame
|
165 |
+
restype_rigidgroup_base_atom_names[:, 0, :] = ['C', 'CA', 'N']
|
166 |
+
|
167 |
+
# 3: 'psi-group'
|
168 |
+
restype_rigidgroup_base_atom_names[:, 3, :] = ['CA', 'C', 'O']
|
169 |
+
|
170 |
+
# 4,5,6,7: 'chi1,2,3,4-group'
|
171 |
+
for restype, restype_letter in enumerate(residue_constants.restypes):
|
172 |
+
resname = residue_constants.restype_1to3[restype_letter]
|
173 |
+
for chi_idx in range(4):
|
174 |
+
if residue_constants.chi_angles_mask[restype][chi_idx]:
|
175 |
+
atom_names = residue_constants.chi_angles_atoms[resname][chi_idx]
|
176 |
+
restype_rigidgroup_base_atom_names[
|
177 |
+
restype, chi_idx + 4, :] = atom_names[1:]
|
178 |
+
|
179 |
+
# Create mask for existing rigid groups.
|
180 |
+
restype_rigidgroup_mask = np.zeros([21, 8], dtype=np.float32)
|
181 |
+
restype_rigidgroup_mask[:, 0] = 1
|
182 |
+
restype_rigidgroup_mask[:, 3] = 1
|
183 |
+
restype_rigidgroup_mask[:20, 4:] = residue_constants.chi_angles_mask
|
184 |
+
|
185 |
+
# Translate atom names into atom37 indices.
|
186 |
+
lookuptable = residue_constants.atom_order.copy()
|
187 |
+
lookuptable[''] = 0
|
188 |
+
restype_rigidgroup_base_atom37_idx = np.vectorize(lambda x: lookuptable[x])(
|
189 |
+
restype_rigidgroup_base_atom_names)
|
190 |
+
|
191 |
+
# Compute the gather indices for all residues in the chain.
|
192 |
+
# shape (N, 8, 3)
|
193 |
+
residx_rigidgroup_base_atom37_idx = utils.batched_gather(
|
194 |
+
restype_rigidgroup_base_atom37_idx, aatype)
|
195 |
+
|
196 |
+
# Gather the base atom positions for each rigid group.
|
197 |
+
base_atom_pos = utils.batched_gather(
|
198 |
+
all_atom_positions,
|
199 |
+
residx_rigidgroup_base_atom37_idx,
|
200 |
+
batch_dims=1)
|
201 |
+
|
202 |
+
# Compute the Rigids.
|
203 |
+
gt_frames = r3.rigids_from_3_points(
|
204 |
+
point_on_neg_x_axis=r3.vecs_from_tensor(base_atom_pos[:, :, 0, :]),
|
205 |
+
origin=r3.vecs_from_tensor(base_atom_pos[:, :, 1, :]),
|
206 |
+
point_on_xy_plane=r3.vecs_from_tensor(base_atom_pos[:, :, 2, :])
|
207 |
+
)
|
208 |
+
|
209 |
+
# Compute a mask whether the group exists.
|
210 |
+
# (N, 8)
|
211 |
+
group_exists = utils.batched_gather(restype_rigidgroup_mask, aatype)
|
212 |
+
|
213 |
+
# Compute a mask whether ground truth exists for the group
|
214 |
+
gt_atoms_exist = utils.batched_gather( # shape (N, 8, 3)
|
215 |
+
all_atom_mask.astype(jnp.float32),
|
216 |
+
residx_rigidgroup_base_atom37_idx,
|
217 |
+
batch_dims=1)
|
218 |
+
gt_exists = jnp.min(gt_atoms_exist, axis=-1) * group_exists # (N, 8)
|
219 |
+
|
220 |
+
# Adapt backbone frame to old convention (mirror x-axis and z-axis).
|
221 |
+
rots = np.tile(np.eye(3, dtype=np.float32), [8, 1, 1])
|
222 |
+
rots[0, 0, 0] = -1
|
223 |
+
rots[0, 2, 2] = -1
|
224 |
+
gt_frames = r3.rigids_mul_rots(gt_frames, r3.rots_from_tensor3x3(rots))
|
225 |
+
|
226 |
+
# The frames for ambiguous rigid groups are just rotated by 180 degree around
|
227 |
+
# the x-axis. The ambiguous group is always the last chi-group.
|
228 |
+
restype_rigidgroup_is_ambiguous = np.zeros([21, 8], dtype=np.float32)
|
229 |
+
restype_rigidgroup_rots = np.tile(np.eye(3, dtype=np.float32), [21, 8, 1, 1])
|
230 |
+
|
231 |
+
for resname, _ in residue_constants.residue_atom_renaming_swaps.items():
|
232 |
+
restype = residue_constants.restype_order[
|
233 |
+
residue_constants.restype_3to1[resname]]
|
234 |
+
chi_idx = int(sum(residue_constants.chi_angles_mask[restype]) - 1)
|
235 |
+
restype_rigidgroup_is_ambiguous[restype, chi_idx + 4] = 1
|
236 |
+
restype_rigidgroup_rots[restype, chi_idx + 4, 1, 1] = -1
|
237 |
+
restype_rigidgroup_rots[restype, chi_idx + 4, 2, 2] = -1
|
238 |
+
|
239 |
+
# Gather the ambiguity information for each residue.
|
240 |
+
residx_rigidgroup_is_ambiguous = utils.batched_gather(
|
241 |
+
restype_rigidgroup_is_ambiguous, aatype)
|
242 |
+
residx_rigidgroup_ambiguity_rot = utils.batched_gather(
|
243 |
+
restype_rigidgroup_rots, aatype)
|
244 |
+
|
245 |
+
# Create the alternative ground truth frames.
|
246 |
+
alt_gt_frames = r3.rigids_mul_rots(
|
247 |
+
gt_frames, r3.rots_from_tensor3x3(residx_rigidgroup_ambiguity_rot))
|
248 |
+
|
249 |
+
gt_frames_flat12 = r3.rigids_to_tensor_flat12(gt_frames)
|
250 |
+
alt_gt_frames_flat12 = r3.rigids_to_tensor_flat12(alt_gt_frames)
|
251 |
+
|
252 |
+
# reshape back to original residue layout
|
253 |
+
gt_frames_flat12 = jnp.reshape(gt_frames_flat12, aatype_in_shape + (8, 12))
|
254 |
+
gt_exists = jnp.reshape(gt_exists, aatype_in_shape + (8,))
|
255 |
+
group_exists = jnp.reshape(group_exists, aatype_in_shape + (8,))
|
256 |
+
gt_frames_flat12 = jnp.reshape(gt_frames_flat12, aatype_in_shape + (8, 12))
|
257 |
+
residx_rigidgroup_is_ambiguous = jnp.reshape(residx_rigidgroup_is_ambiguous,
|
258 |
+
aatype_in_shape + (8,))
|
259 |
+
alt_gt_frames_flat12 = jnp.reshape(alt_gt_frames_flat12,
|
260 |
+
aatype_in_shape + (8, 12,))
|
261 |
+
|
262 |
+
return {
|
263 |
+
'rigidgroups_gt_frames': gt_frames_flat12, # (..., 8, 12)
|
264 |
+
'rigidgroups_gt_exists': gt_exists, # (..., 8)
|
265 |
+
'rigidgroups_group_exists': group_exists, # (..., 8)
|
266 |
+
'rigidgroups_group_is_ambiguous':
|
267 |
+
residx_rigidgroup_is_ambiguous, # (..., 8)
|
268 |
+
'rigidgroups_alt_gt_frames': alt_gt_frames_flat12, # (..., 8, 12)
|
269 |
+
}
|
270 |
+
|
271 |
+
|
272 |
+
def atom37_to_torsion_angles(
|
273 |
+
aatype: jnp.ndarray, # (B, N)
|
274 |
+
all_atom_pos: jnp.ndarray, # (B, N, 37, 3)
|
275 |
+
all_atom_mask: jnp.ndarray, # (B, N, 37)
|
276 |
+
placeholder_for_undefined=False,
|
277 |
+
) -> Dict[str, jnp.ndarray]:
|
278 |
+
"""Computes the 7 torsion angles (in sin, cos encoding) for each residue.
|
279 |
+
|
280 |
+
The 7 torsion angles are in the order
|
281 |
+
'[pre_omega, phi, psi, chi_1, chi_2, chi_3, chi_4]',
|
282 |
+
here pre_omega denotes the omega torsion angle between the given amino acid
|
283 |
+
and the previous amino acid.
|
284 |
+
|
285 |
+
Args:
|
286 |
+
aatype: Amino acid type, given as array with integers.
|
287 |
+
all_atom_pos: atom37 representation of all atom coordinates.
|
288 |
+
all_atom_mask: atom37 representation of mask on all atom coordinates.
|
289 |
+
placeholder_for_undefined: flag denoting whether to set masked torsion
|
290 |
+
angles to zero.
|
291 |
+
Returns:
|
292 |
+
Dict containing:
|
293 |
+
* 'torsion_angles_sin_cos': Array with shape (B, N, 7, 2) where the final
|
294 |
+
2 dimensions denote sin and cos respectively
|
295 |
+
* 'alt_torsion_angles_sin_cos': same as 'torsion_angles_sin_cos', but
|
296 |
+
with the angle shifted by pi for all chi angles affected by the naming
|
297 |
+
ambiguities.
|
298 |
+
* 'torsion_angles_mask': Mask for which chi angles are present.
|
299 |
+
"""
|
300 |
+
|
301 |
+
# Map aatype > 20 to 'Unknown' (20).
|
302 |
+
aatype = jnp.minimum(aatype, 20)
|
303 |
+
|
304 |
+
# Compute the backbone angles.
|
305 |
+
num_batch, num_res = aatype.shape
|
306 |
+
|
307 |
+
pad = jnp.zeros([num_batch, 1, 37, 3], jnp.float32)
|
308 |
+
prev_all_atom_pos = jnp.concatenate([pad, all_atom_pos[:, :-1, :, :]], axis=1)
|
309 |
+
|
310 |
+
pad = jnp.zeros([num_batch, 1, 37], jnp.float32)
|
311 |
+
prev_all_atom_mask = jnp.concatenate([pad, all_atom_mask[:, :-1, :]], axis=1)
|
312 |
+
|
313 |
+
# For each torsion angle collect the 4 atom positions that define this angle.
|
314 |
+
# shape (B, N, atoms=4, xyz=3)
|
315 |
+
pre_omega_atom_pos = jnp.concatenate(
|
316 |
+
[prev_all_atom_pos[:, :, 1:3, :], # prev CA, C
|
317 |
+
all_atom_pos[:, :, 0:2, :] # this N, CA
|
318 |
+
], axis=-2)
|
319 |
+
phi_atom_pos = jnp.concatenate(
|
320 |
+
[prev_all_atom_pos[:, :, 2:3, :], # prev C
|
321 |
+
all_atom_pos[:, :, 0:3, :] # this N, CA, C
|
322 |
+
], axis=-2)
|
323 |
+
psi_atom_pos = jnp.concatenate(
|
324 |
+
[all_atom_pos[:, :, 0:3, :], # this N, CA, C
|
325 |
+
all_atom_pos[:, :, 4:5, :] # this O
|
326 |
+
], axis=-2)
|
327 |
+
|
328 |
+
# Collect the masks from these atoms.
|
329 |
+
# Shape [batch, num_res]
|
330 |
+
pre_omega_mask = (
|
331 |
+
jnp.prod(prev_all_atom_mask[:, :, 1:3], axis=-1) # prev CA, C
|
332 |
+
* jnp.prod(all_atom_mask[:, :, 0:2], axis=-1)) # this N, CA
|
333 |
+
phi_mask = (
|
334 |
+
prev_all_atom_mask[:, :, 2] # prev C
|
335 |
+
* jnp.prod(all_atom_mask[:, :, 0:3], axis=-1)) # this N, CA, C
|
336 |
+
psi_mask = (
|
337 |
+
jnp.prod(all_atom_mask[:, :, 0:3], axis=-1) * # this N, CA, C
|
338 |
+
all_atom_mask[:, :, 4]) # this O
|
339 |
+
|
340 |
+
# Collect the atoms for the chi-angles.
|
341 |
+
# Compute the table of chi angle indices. Shape: [restypes, chis=4, atoms=4].
|
342 |
+
chi_atom_indices = get_chi_atom_indices()
|
343 |
+
# Select atoms to compute chis. Shape: [batch, num_res, chis=4, atoms=4].
|
344 |
+
atom_indices = utils.batched_gather(
|
345 |
+
params=chi_atom_indices, indices=aatype, axis=0, batch_dims=0)
|
346 |
+
# Gather atom positions. Shape: [batch, num_res, chis=4, atoms=4, xyz=3].
|
347 |
+
chis_atom_pos = utils.batched_gather(
|
348 |
+
params=all_atom_pos, indices=atom_indices, axis=-2,
|
349 |
+
batch_dims=2)
|
350 |
+
|
351 |
+
# Copy the chi angle mask, add the UNKNOWN residue. Shape: [restypes, 4].
|
352 |
+
chi_angles_mask = list(residue_constants.chi_angles_mask)
|
353 |
+
chi_angles_mask.append([0.0, 0.0, 0.0, 0.0])
|
354 |
+
chi_angles_mask = jnp.asarray(chi_angles_mask)
|
355 |
+
|
356 |
+
# Compute the chi angle mask. I.e. which chis angles exist according to the
|
357 |
+
# aatype. Shape [batch, num_res, chis=4].
|
358 |
+
chis_mask = utils.batched_gather(params=chi_angles_mask, indices=aatype,
|
359 |
+
axis=0, batch_dims=0)
|
360 |
+
|
361 |
+
# Constrain the chis_mask to those chis, where the ground truth coordinates of
|
362 |
+
# all defining four atoms are available.
|
363 |
+
# Gather the chi angle atoms mask. Shape: [batch, num_res, chis=4, atoms=4].
|
364 |
+
chi_angle_atoms_mask = utils.batched_gather(
|
365 |
+
params=all_atom_mask, indices=atom_indices, axis=-1,
|
366 |
+
batch_dims=2)
|
367 |
+
# Check if all 4 chi angle atoms were set. Shape: [batch, num_res, chis=4].
|
368 |
+
chi_angle_atoms_mask = jnp.prod(chi_angle_atoms_mask, axis=[-1])
|
369 |
+
chis_mask = chis_mask * (chi_angle_atoms_mask).astype(jnp.float32)
|
370 |
+
|
371 |
+
# Stack all torsion angle atom positions.
|
372 |
+
# Shape (B, N, torsions=7, atoms=4, xyz=3)
|
373 |
+
torsions_atom_pos = jnp.concatenate(
|
374 |
+
[pre_omega_atom_pos[:, :, None, :, :],
|
375 |
+
phi_atom_pos[:, :, None, :, :],
|
376 |
+
psi_atom_pos[:, :, None, :, :],
|
377 |
+
chis_atom_pos
|
378 |
+
], axis=2)
|
379 |
+
|
380 |
+
# Stack up masks for all torsion angles.
|
381 |
+
# shape (B, N, torsions=7)
|
382 |
+
torsion_angles_mask = jnp.concatenate(
|
383 |
+
[pre_omega_mask[:, :, None],
|
384 |
+
phi_mask[:, :, None],
|
385 |
+
psi_mask[:, :, None],
|
386 |
+
chis_mask
|
387 |
+
], axis=2)
|
388 |
+
|
389 |
+
# Create a frame from the first three atoms:
|
390 |
+
# First atom: point on x-y-plane
|
391 |
+
# Second atom: point on negative x-axis
|
392 |
+
# Third atom: origin
|
393 |
+
# r3.Rigids (B, N, torsions=7)
|
394 |
+
torsion_frames = r3.rigids_from_3_points(
|
395 |
+
point_on_neg_x_axis=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 1, :]),
|
396 |
+
origin=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 2, :]),
|
397 |
+
point_on_xy_plane=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 0, :]))
|
398 |
+
|
399 |
+
# Compute the position of the forth atom in this frame (y and z coordinate
|
400 |
+
# define the chi angle)
|
401 |
+
# r3.Vecs (B, N, torsions=7)
|
402 |
+
forth_atom_rel_pos = r3.rigids_mul_vecs(
|
403 |
+
r3.invert_rigids(torsion_frames),
|
404 |
+
r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 3, :]))
|
405 |
+
|
406 |
+
# Normalize to have the sin and cos of the torsion angle.
|
407 |
+
# jnp.ndarray (B, N, torsions=7, sincos=2)
|
408 |
+
torsion_angles_sin_cos = jnp.stack(
|
409 |
+
[forth_atom_rel_pos.z, forth_atom_rel_pos.y], axis=-1)
|
410 |
+
torsion_angles_sin_cos /= jnp.sqrt(
|
411 |
+
jnp.sum(jnp.square(torsion_angles_sin_cos), axis=-1, keepdims=True)
|
412 |
+
+ 1e-8)
|
413 |
+
|
414 |
+
# Mirror psi, because we computed it from the Oxygen-atom.
|
415 |
+
torsion_angles_sin_cos *= jnp.asarray(
|
416 |
+
[1., 1., -1., 1., 1., 1., 1.])[None, None, :, None]
|
417 |
+
|
418 |
+
# Create alternative angles for ambiguous atom names.
|
419 |
+
chi_is_ambiguous = utils.batched_gather(
|
420 |
+
jnp.asarray(residue_constants.chi_pi_periodic), aatype)
|
421 |
+
mirror_torsion_angles = jnp.concatenate(
|
422 |
+
[jnp.ones([num_batch, num_res, 3]),
|
423 |
+
1.0 - 2.0 * chi_is_ambiguous], axis=-1)
|
424 |
+
alt_torsion_angles_sin_cos = (
|
425 |
+
torsion_angles_sin_cos * mirror_torsion_angles[:, :, :, None])
|
426 |
+
|
427 |
+
if placeholder_for_undefined:
|
428 |
+
# Add placeholder torsions in place of undefined torsion angles
|
429 |
+
# (e.g. N-terminus pre-omega)
|
430 |
+
placeholder_torsions = jnp.stack([
|
431 |
+
jnp.ones(torsion_angles_sin_cos.shape[:-1]),
|
432 |
+
jnp.zeros(torsion_angles_sin_cos.shape[:-1])
|
433 |
+
], axis=-1)
|
434 |
+
torsion_angles_sin_cos = torsion_angles_sin_cos * torsion_angles_mask[
|
435 |
+
..., None] + placeholder_torsions * (1 - torsion_angles_mask[..., None])
|
436 |
+
alt_torsion_angles_sin_cos = alt_torsion_angles_sin_cos * torsion_angles_mask[
|
437 |
+
..., None] + placeholder_torsions * (1 - torsion_angles_mask[..., None])
|
438 |
+
|
439 |
+
return {
|
440 |
+
'torsion_angles_sin_cos': torsion_angles_sin_cos, # (B, N, 7, 2)
|
441 |
+
'alt_torsion_angles_sin_cos': alt_torsion_angles_sin_cos, # (B, N, 7, 2)
|
442 |
+
'torsion_angles_mask': torsion_angles_mask # (B, N, 7)
|
443 |
+
}
|
444 |
+
|
445 |
+
|
446 |
+
def torsion_angles_to_frames(
|
447 |
+
aatype: jnp.ndarray, # (N)
|
448 |
+
backb_to_global: r3.Rigids, # (N)
|
449 |
+
torsion_angles_sin_cos: jnp.ndarray # (N, 7, 2)
|
450 |
+
) -> r3.Rigids: # (N, 8)
|
451 |
+
"""Compute rigid group frames from torsion angles.
|
452 |
+
|
453 |
+
Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates" lines 2-10
|
454 |
+
Jumper et al. (2021) Suppl. Alg. 25 "makeRotX"
|
455 |
+
|
456 |
+
Args:
|
457 |
+
aatype: aatype for each residue
|
458 |
+
backb_to_global: Rigid transformations describing transformation from
|
459 |
+
backbone frame to global frame.
|
460 |
+
torsion_angles_sin_cos: sin and cosine of the 7 torsion angles
|
461 |
+
Returns:
|
462 |
+
Frames corresponding to all the Sidechain Rigid Transforms
|
463 |
+
"""
|
464 |
+
if jnp.issubdtype(aatype.dtype, jnp.integer):
|
465 |
+
assert len(aatype.shape) == 1
|
466 |
+
else:
|
467 |
+
assert len(aatype.shape) == 2
|
468 |
+
assert len(backb_to_global.rot.xx.shape) == 1
|
469 |
+
assert len(torsion_angles_sin_cos.shape) == 3
|
470 |
+
assert torsion_angles_sin_cos.shape[1] == 7
|
471 |
+
assert torsion_angles_sin_cos.shape[2] == 2
|
472 |
+
|
473 |
+
# Gather the default frames for all rigid groups.
|
474 |
+
# r3.Rigids with shape (N, 8)
|
475 |
+
|
476 |
+
if jnp.issubdtype(aatype.dtype, jnp.integer):
|
477 |
+
m = utils.batched_gather(residue_constants.restype_rigid_group_default_frame, aatype)
|
478 |
+
else:
|
479 |
+
m = jnp.einsum("...a,abcd->...bcd",aatype,residue_constants.restype_rigid_group_default_frame)
|
480 |
+
|
481 |
+
default_frames = r3.rigids_from_tensor4x4(m)
|
482 |
+
|
483 |
+
# Create the rotation matrices according to the given angles (each frame is
|
484 |
+
# defined such that its rotation is around the x-axis).
|
485 |
+
sin_angles = torsion_angles_sin_cos[..., 0]
|
486 |
+
cos_angles = torsion_angles_sin_cos[..., 1]
|
487 |
+
|
488 |
+
# insert zero rotation for backbone group.
|
489 |
+
if jnp.issubdtype(aatype.dtype, jnp.integer):
|
490 |
+
num_residues, = aatype.shape
|
491 |
+
else:
|
492 |
+
num_residues,_ = aatype.shape
|
493 |
+
sin_angles = jnp.concatenate([jnp.zeros([num_residues, 1]), sin_angles],axis=-1)
|
494 |
+
cos_angles = jnp.concatenate([jnp.ones([num_residues, 1]), cos_angles],axis=-1)
|
495 |
+
zeros = jnp.zeros_like(sin_angles)
|
496 |
+
ones = jnp.ones_like(sin_angles)
|
497 |
+
|
498 |
+
# all_rots are r3.Rots with shape (N, 8)
|
499 |
+
all_rots = r3.Rots(ones, zeros, zeros,
|
500 |
+
zeros, cos_angles, -sin_angles,
|
501 |
+
zeros, sin_angles, cos_angles)
|
502 |
+
|
503 |
+
# Apply rotations to the frames.
|
504 |
+
all_frames = r3.rigids_mul_rots(default_frames, all_rots)
|
505 |
+
|
506 |
+
# chi2, chi3, and chi4 frames do not transform to the backbone frame but to
|
507 |
+
# the previous frame. So chain them up accordingly.
|
508 |
+
chi2_frame_to_frame = jax.tree_map(lambda x: x[:, 5], all_frames)
|
509 |
+
chi3_frame_to_frame = jax.tree_map(lambda x: x[:, 6], all_frames)
|
510 |
+
chi4_frame_to_frame = jax.tree_map(lambda x: x[:, 7], all_frames)
|
511 |
+
|
512 |
+
chi1_frame_to_backb = jax.tree_map(lambda x: x[:, 4], all_frames)
|
513 |
+
chi2_frame_to_backb = r3.rigids_mul_rigids(chi1_frame_to_backb,
|
514 |
+
chi2_frame_to_frame)
|
515 |
+
chi3_frame_to_backb = r3.rigids_mul_rigids(chi2_frame_to_backb,
|
516 |
+
chi3_frame_to_frame)
|
517 |
+
chi4_frame_to_backb = r3.rigids_mul_rigids(chi3_frame_to_backb,
|
518 |
+
chi4_frame_to_frame)
|
519 |
+
|
520 |
+
# Recombine them to a r3.Rigids with shape (N, 8).
|
521 |
+
def _concat_frames(xall, x5, x6, x7):
|
522 |
+
return jnp.concatenate(
|
523 |
+
[xall[:, 0:5], x5[:, None], x6[:, None], x7[:, None]], axis=-1)
|
524 |
+
|
525 |
+
all_frames_to_backb = jax.tree_map(
|
526 |
+
_concat_frames,
|
527 |
+
all_frames,
|
528 |
+
chi2_frame_to_backb,
|
529 |
+
chi3_frame_to_backb,
|
530 |
+
chi4_frame_to_backb)
|
531 |
+
|
532 |
+
# Create the global frames.
|
533 |
+
# shape (N, 8)
|
534 |
+
all_frames_to_global = r3.rigids_mul_rigids(
|
535 |
+
jax.tree_map(lambda x: x[:, None], backb_to_global),
|
536 |
+
all_frames_to_backb)
|
537 |
+
|
538 |
+
return all_frames_to_global
|
539 |
+
|
540 |
+
|
541 |
+
def frames_and_literature_positions_to_atom14_pos(
|
542 |
+
aatype: jnp.ndarray, # (N)
|
543 |
+
all_frames_to_global: r3.Rigids # (N, 8)
|
544 |
+
) -> r3.Vecs: # (N, 14)
|
545 |
+
"""Put atom literature positions (atom14 encoding) in each rigid group.
|
546 |
+
|
547 |
+
Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates" line 11
|
548 |
+
|
549 |
+
Args:
|
550 |
+
aatype: aatype for each residue.
|
551 |
+
all_frames_to_global: All per residue coordinate frames.
|
552 |
+
Returns:
|
553 |
+
Positions of all atom coordinates in global frame.
|
554 |
+
"""
|
555 |
+
|
556 |
+
# Pick the appropriate transform for every atom.
|
557 |
+
if jnp.issubdtype(aatype.dtype, jnp.integer):
|
558 |
+
residx_to_group_idx = utils.batched_gather(residue_constants.restype_atom14_to_rigid_group, aatype)
|
559 |
+
group_mask = jax.nn.one_hot(residx_to_group_idx, num_classes=8) # shape (N, 14, 8)
|
560 |
+
else:
|
561 |
+
group_mask = jnp.einsum("...a,abc->...bc",aatype, jax.nn.one_hot(residue_constants.restype_atom14_to_rigid_group, 8))
|
562 |
+
|
563 |
+
# r3.Rigids with shape (N, 14)
|
564 |
+
map_atoms_to_global = jax.tree_map(
|
565 |
+
lambda x: jnp.sum(x[:, None, :] * group_mask, axis=-1),
|
566 |
+
all_frames_to_global)
|
567 |
+
|
568 |
+
# Gather the literature atom positions for each residue.
|
569 |
+
# r3.Vecs with shape (N, 14)
|
570 |
+
if jnp.issubdtype(aatype.dtype, jnp.integer):
|
571 |
+
group_pos = utils.batched_gather(residue_constants.restype_atom14_rigid_group_positions, aatype)
|
572 |
+
else:
|
573 |
+
group_pos = jnp.einsum("...a,abc->...bc", aatype, residue_constants.restype_atom14_rigid_group_positions)
|
574 |
+
lit_positions = r3.vecs_from_tensor(group_pos)
|
575 |
+
|
576 |
+
# Transform each atom from its local frame to the global frame.
|
577 |
+
# r3.Vecs with shape (N, 14)
|
578 |
+
pred_positions = r3.rigids_mul_vecs(map_atoms_to_global, lit_positions)
|
579 |
+
|
580 |
+
# Mask out non-existing atoms.
|
581 |
+
if jnp.issubdtype(aatype.dtype, jnp.integer):
|
582 |
+
mask = utils.batched_gather(residue_constants.restype_atom14_mask, aatype)
|
583 |
+
else:
|
584 |
+
mask = jnp.einsum("...a,ab->...b",aatype,residue_constants.restype_atom14_mask)
|
585 |
+
pred_positions = jax.tree_map(lambda x: x * mask, pred_positions)
|
586 |
+
return pred_positions
|
587 |
+
|
588 |
+
|
589 |
+
def extreme_ca_ca_distance_violations(
|
590 |
+
pred_atom_positions: jnp.ndarray, # (N, 37(14), 3)
|
591 |
+
pred_atom_mask: jnp.ndarray, # (N, 37(14))
|
592 |
+
residue_index: jnp.ndarray, # (N)
|
593 |
+
max_angstrom_tolerance=1.5
|
594 |
+
) -> jnp.ndarray:
|
595 |
+
"""Counts residues whose Ca is a large distance from its neighbour.
|
596 |
+
|
597 |
+
Measures the fraction of CA-CA pairs between consecutive amino acids that are
|
598 |
+
more than 'max_angstrom_tolerance' apart.
|
599 |
+
|
600 |
+
Args:
|
601 |
+
pred_atom_positions: Atom positions in atom37/14 representation
|
602 |
+
pred_atom_mask: Atom mask in atom37/14 representation
|
603 |
+
residue_index: Residue index for given amino acid, this is assumed to be
|
604 |
+
monotonically increasing.
|
605 |
+
max_angstrom_tolerance: Maximum distance allowed to not count as violation.
|
606 |
+
Returns:
|
607 |
+
Fraction of consecutive CA-CA pairs with violation.
|
608 |
+
"""
|
609 |
+
this_ca_pos = pred_atom_positions[:-1, 1, :] # (N - 1, 3)
|
610 |
+
this_ca_mask = pred_atom_mask[:-1, 1] # (N - 1)
|
611 |
+
next_ca_pos = pred_atom_positions[1:, 1, :] # (N - 1, 3)
|
612 |
+
next_ca_mask = pred_atom_mask[1:, 1] # (N - 1)
|
613 |
+
has_no_gap_mask = ((residue_index[1:] - residue_index[:-1]) == 1.0).astype(
|
614 |
+
jnp.float32)
|
615 |
+
ca_ca_distance = jnp.sqrt(
|
616 |
+
1e-6 + jnp.sum(squared_difference(this_ca_pos, next_ca_pos), axis=-1))
|
617 |
+
violations = (ca_ca_distance -
|
618 |
+
residue_constants.ca_ca) > max_angstrom_tolerance
|
619 |
+
mask = this_ca_mask * next_ca_mask * has_no_gap_mask
|
620 |
+
return utils.mask_mean(mask=mask, value=violations)
|
621 |
+
|
622 |
+
|
623 |
+
def between_residue_bond_loss(
|
624 |
+
pred_atom_positions: jnp.ndarray, # (N, 37(14), 3)
|
625 |
+
pred_atom_mask: jnp.ndarray, # (N, 37(14))
|
626 |
+
residue_index: jnp.ndarray, # (N)
|
627 |
+
aatype: jnp.ndarray, # (N)
|
628 |
+
tolerance_factor_soft=12.0,
|
629 |
+
tolerance_factor_hard=12.0
|
630 |
+
) -> Dict[str, jnp.ndarray]:
|
631 |
+
"""Flat-bottom loss to penalize structural violations between residues.
|
632 |
+
|
633 |
+
This is a loss penalizing any violation of the geometry around the peptide
|
634 |
+
bond between consecutive amino acids. This loss corresponds to
|
635 |
+
Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 44, 45.
|
636 |
+
|
637 |
+
Args:
|
638 |
+
pred_atom_positions: Atom positions in atom37/14 representation
|
639 |
+
pred_atom_mask: Atom mask in atom37/14 representation
|
640 |
+
residue_index: Residue index for given amino acid, this is assumed to be
|
641 |
+
monotonically increasing.
|
642 |
+
aatype: Amino acid type of given residue
|
643 |
+
tolerance_factor_soft: soft tolerance factor measured in standard deviations
|
644 |
+
of pdb distributions
|
645 |
+
tolerance_factor_hard: hard tolerance factor measured in standard deviations
|
646 |
+
of pdb distributions
|
647 |
+
|
648 |
+
Returns:
|
649 |
+
Dict containing:
|
650 |
+
* 'c_n_loss_mean': Loss for peptide bond length violations
|
651 |
+
* 'ca_c_n_loss_mean': Loss for violations of bond angle around C spanned
|
652 |
+
by CA, C, N
|
653 |
+
* 'c_n_ca_loss_mean': Loss for violations of bond angle around N spanned
|
654 |
+
by C, N, CA
|
655 |
+
* 'per_residue_loss_sum': sum of all losses for each residue
|
656 |
+
* 'per_residue_violation_mask': mask denoting all residues with violation
|
657 |
+
present.
|
658 |
+
"""
|
659 |
+
assert len(pred_atom_positions.shape) == 3
|
660 |
+
assert len(pred_atom_mask.shape) == 2
|
661 |
+
assert len(residue_index.shape) == 1
|
662 |
+
assert len(aatype.shape) == 1
|
663 |
+
|
664 |
+
# Get the positions of the relevant backbone atoms.
|
665 |
+
this_ca_pos = pred_atom_positions[:-1, 1, :] # (N - 1, 3)
|
666 |
+
this_ca_mask = pred_atom_mask[:-1, 1] # (N - 1)
|
667 |
+
this_c_pos = pred_atom_positions[:-1, 2, :] # (N - 1, 3)
|
668 |
+
this_c_mask = pred_atom_mask[:-1, 2] # (N - 1)
|
669 |
+
next_n_pos = pred_atom_positions[1:, 0, :] # (N - 1, 3)
|
670 |
+
next_n_mask = pred_atom_mask[1:, 0] # (N - 1)
|
671 |
+
next_ca_pos = pred_atom_positions[1:, 1, :] # (N - 1, 3)
|
672 |
+
next_ca_mask = pred_atom_mask[1:, 1] # (N - 1)
|
673 |
+
has_no_gap_mask = ((residue_index[1:] - residue_index[:-1]) == 1.0).astype(
|
674 |
+
jnp.float32)
|
675 |
+
|
676 |
+
# Compute loss for the C--N bond.
|
677 |
+
c_n_bond_length = jnp.sqrt(
|
678 |
+
1e-6 + jnp.sum(squared_difference(this_c_pos, next_n_pos), axis=-1))
|
679 |
+
|
680 |
+
# The C-N bond to proline has slightly different length because of the ring.
|
681 |
+
next_is_proline = (
|
682 |
+
aatype[1:] == residue_constants.resname_to_idx['PRO']).astype(jnp.float32)
|
683 |
+
gt_length = (
|
684 |
+
(1. - next_is_proline) * residue_constants.between_res_bond_length_c_n[0]
|
685 |
+
+ next_is_proline * residue_constants.between_res_bond_length_c_n[1])
|
686 |
+
gt_stddev = (
|
687 |
+
(1. - next_is_proline) *
|
688 |
+
residue_constants.between_res_bond_length_stddev_c_n[0] +
|
689 |
+
next_is_proline * residue_constants.between_res_bond_length_stddev_c_n[1])
|
690 |
+
c_n_bond_length_error = jnp.sqrt(1e-6 +
|
691 |
+
jnp.square(c_n_bond_length - gt_length))
|
692 |
+
c_n_loss_per_residue = jax.nn.relu(
|
693 |
+
c_n_bond_length_error - tolerance_factor_soft * gt_stddev)
|
694 |
+
mask = this_c_mask * next_n_mask * has_no_gap_mask
|
695 |
+
c_n_loss = jnp.sum(mask * c_n_loss_per_residue) / (jnp.sum(mask) + 1e-6)
|
696 |
+
c_n_violation_mask = mask * (
|
697 |
+
c_n_bond_length_error > (tolerance_factor_hard * gt_stddev))
|
698 |
+
|
699 |
+
# Compute loss for the angles.
|
700 |
+
ca_c_bond_length = jnp.sqrt(1e-6 + jnp.sum(
|
701 |
+
squared_difference(this_ca_pos, this_c_pos), axis=-1))
|
702 |
+
n_ca_bond_length = jnp.sqrt(1e-6 + jnp.sum(
|
703 |
+
squared_difference(next_n_pos, next_ca_pos), axis=-1))
|
704 |
+
|
705 |
+
c_ca_unit_vec = (this_ca_pos - this_c_pos) / ca_c_bond_length[:, None]
|
706 |
+
c_n_unit_vec = (next_n_pos - this_c_pos) / c_n_bond_length[:, None]
|
707 |
+
n_ca_unit_vec = (next_ca_pos - next_n_pos) / n_ca_bond_length[:, None]
|
708 |
+
|
709 |
+
ca_c_n_cos_angle = jnp.sum(c_ca_unit_vec * c_n_unit_vec, axis=-1)
|
710 |
+
gt_angle = residue_constants.between_res_cos_angles_ca_c_n[0]
|
711 |
+
gt_stddev = residue_constants.between_res_bond_length_stddev_c_n[0]
|
712 |
+
ca_c_n_cos_angle_error = jnp.sqrt(
|
713 |
+
1e-6 + jnp.square(ca_c_n_cos_angle - gt_angle))
|
714 |
+
ca_c_n_loss_per_residue = jax.nn.relu(
|
715 |
+
ca_c_n_cos_angle_error - tolerance_factor_soft * gt_stddev)
|
716 |
+
mask = this_ca_mask * this_c_mask * next_n_mask * has_no_gap_mask
|
717 |
+
ca_c_n_loss = jnp.sum(mask * ca_c_n_loss_per_residue) / (jnp.sum(mask) + 1e-6)
|
718 |
+
ca_c_n_violation_mask = mask * (ca_c_n_cos_angle_error >
|
719 |
+
(tolerance_factor_hard * gt_stddev))
|
720 |
+
|
721 |
+
c_n_ca_cos_angle = jnp.sum((-c_n_unit_vec) * n_ca_unit_vec, axis=-1)
|
722 |
+
gt_angle = residue_constants.between_res_cos_angles_c_n_ca[0]
|
723 |
+
gt_stddev = residue_constants.between_res_cos_angles_c_n_ca[1]
|
724 |
+
c_n_ca_cos_angle_error = jnp.sqrt(
|
725 |
+
1e-6 + jnp.square(c_n_ca_cos_angle - gt_angle))
|
726 |
+
c_n_ca_loss_per_residue = jax.nn.relu(
|
727 |
+
c_n_ca_cos_angle_error - tolerance_factor_soft * gt_stddev)
|
728 |
+
mask = this_c_mask * next_n_mask * next_ca_mask * has_no_gap_mask
|
729 |
+
c_n_ca_loss = jnp.sum(mask * c_n_ca_loss_per_residue) / (jnp.sum(mask) + 1e-6)
|
730 |
+
c_n_ca_violation_mask = mask * (
|
731 |
+
c_n_ca_cos_angle_error > (tolerance_factor_hard * gt_stddev))
|
732 |
+
|
733 |
+
# Compute a per residue loss (equally distribute the loss to both
|
734 |
+
# neighbouring residues).
|
735 |
+
per_residue_loss_sum = (c_n_loss_per_residue +
|
736 |
+
ca_c_n_loss_per_residue +
|
737 |
+
c_n_ca_loss_per_residue)
|
738 |
+
per_residue_loss_sum = 0.5 * (jnp.pad(per_residue_loss_sum, [[0, 1]]) +
|
739 |
+
jnp.pad(per_residue_loss_sum, [[1, 0]]))
|
740 |
+
|
741 |
+
# Compute hard violations.
|
742 |
+
violation_mask = jnp.max(
|
743 |
+
jnp.stack([c_n_violation_mask,
|
744 |
+
ca_c_n_violation_mask,
|
745 |
+
c_n_ca_violation_mask]), axis=0)
|
746 |
+
violation_mask = jnp.maximum(
|
747 |
+
jnp.pad(violation_mask, [[0, 1]]),
|
748 |
+
jnp.pad(violation_mask, [[1, 0]]))
|
749 |
+
|
750 |
+
return {'c_n_loss_mean': c_n_loss, # shape ()
|
751 |
+
'ca_c_n_loss_mean': ca_c_n_loss, # shape ()
|
752 |
+
'c_n_ca_loss_mean': c_n_ca_loss, # shape ()
|
753 |
+
'per_residue_loss_sum': per_residue_loss_sum, # shape (N)
|
754 |
+
'per_residue_violation_mask': violation_mask # shape (N)
|
755 |
+
}
|
756 |
+
|
757 |
+
|
758 |
+
def between_residue_clash_loss(
|
759 |
+
atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
|
760 |
+
atom14_atom_exists: jnp.ndarray, # (N, 14)
|
761 |
+
atom14_atom_radius: jnp.ndarray, # (N, 14)
|
762 |
+
residue_index: jnp.ndarray, # (N)
|
763 |
+
overlap_tolerance_soft=1.5,
|
764 |
+
overlap_tolerance_hard=1.5
|
765 |
+
) -> Dict[str, jnp.ndarray]:
|
766 |
+
"""Loss to penalize steric clashes between residues.
|
767 |
+
|
768 |
+
This is a loss penalizing any steric clashes due to non bonded atoms in
|
769 |
+
different peptides coming too close. This loss corresponds to the part with
|
770 |
+
different residues of
|
771 |
+
Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46.
|
772 |
+
|
773 |
+
Args:
|
774 |
+
atom14_pred_positions: Predicted positions of atoms in
|
775 |
+
global prediction frame
|
776 |
+
atom14_atom_exists: Mask denoting whether atom at positions exists for given
|
777 |
+
amino acid type
|
778 |
+
atom14_atom_radius: Van der Waals radius for each atom.
|
779 |
+
residue_index: Residue index for given amino acid.
|
780 |
+
overlap_tolerance_soft: Soft tolerance factor.
|
781 |
+
overlap_tolerance_hard: Hard tolerance factor.
|
782 |
+
|
783 |
+
Returns:
|
784 |
+
Dict containing:
|
785 |
+
* 'mean_loss': average clash loss
|
786 |
+
* 'per_atom_loss_sum': sum of all clash losses per atom, shape (N, 14)
|
787 |
+
* 'per_atom_clash_mask': mask whether atom clashes with any other atom
|
788 |
+
shape (N, 14)
|
789 |
+
"""
|
790 |
+
assert len(atom14_pred_positions.shape) == 3
|
791 |
+
assert len(atom14_atom_exists.shape) == 2
|
792 |
+
assert len(atom14_atom_radius.shape) == 2
|
793 |
+
assert len(residue_index.shape) == 1
|
794 |
+
|
795 |
+
# Create the distance matrix.
|
796 |
+
# (N, N, 14, 14)
|
797 |
+
dists = jnp.sqrt(1e-10 + jnp.sum(
|
798 |
+
squared_difference(
|
799 |
+
atom14_pred_positions[:, None, :, None, :],
|
800 |
+
atom14_pred_positions[None, :, None, :, :]),
|
801 |
+
axis=-1))
|
802 |
+
|
803 |
+
# Create the mask for valid distances.
|
804 |
+
# shape (N, N, 14, 14)
|
805 |
+
dists_mask = (atom14_atom_exists[:, None, :, None] *
|
806 |
+
atom14_atom_exists[None, :, None, :])
|
807 |
+
|
808 |
+
# Mask out all the duplicate entries in the lower triangular matrix.
|
809 |
+
# Also mask out the diagonal (atom-pairs from the same residue) -- these atoms
|
810 |
+
# are handled separately.
|
811 |
+
dists_mask *= (
|
812 |
+
residue_index[:, None, None, None] < residue_index[None, :, None, None])
|
813 |
+
|
814 |
+
# Backbone C--N bond between subsequent residues is no clash.
|
815 |
+
c_one_hot = jax.nn.one_hot(2, num_classes=14)
|
816 |
+
n_one_hot = jax.nn.one_hot(0, num_classes=14)
|
817 |
+
neighbour_mask = ((residue_index[:, None, None, None] +
|
818 |
+
1) == residue_index[None, :, None, None])
|
819 |
+
c_n_bonds = neighbour_mask * c_one_hot[None, None, :,
|
820 |
+
None] * n_one_hot[None, None, None, :]
|
821 |
+
dists_mask *= (1. - c_n_bonds)
|
822 |
+
|
823 |
+
# Disulfide bridge between two cysteines is no clash.
|
824 |
+
cys_sg_idx = residue_constants.restype_name_to_atom14_names['CYS'].index('SG')
|
825 |
+
cys_sg_one_hot = jax.nn.one_hot(cys_sg_idx, num_classes=14)
|
826 |
+
disulfide_bonds = (cys_sg_one_hot[None, None, :, None] *
|
827 |
+
cys_sg_one_hot[None, None, None, :])
|
828 |
+
dists_mask *= (1. - disulfide_bonds)
|
829 |
+
|
830 |
+
# Compute the lower bound for the allowed distances.
|
831 |
+
# shape (N, N, 14, 14)
|
832 |
+
dists_lower_bound = dists_mask * (atom14_atom_radius[:, None, :, None] +
|
833 |
+
atom14_atom_radius[None, :, None, :])
|
834 |
+
|
835 |
+
# Compute the error.
|
836 |
+
# shape (N, N, 14, 14)
|
837 |
+
dists_to_low_error = dists_mask * jax.nn.relu(
|
838 |
+
dists_lower_bound - overlap_tolerance_soft - dists)
|
839 |
+
|
840 |
+
# Compute the mean loss.
|
841 |
+
# shape ()
|
842 |
+
mean_loss = (jnp.sum(dists_to_low_error)
|
843 |
+
/ (1e-6 + jnp.sum(dists_mask)))
|
844 |
+
|
845 |
+
# Compute the per atom loss sum.
|
846 |
+
# shape (N, 14)
|
847 |
+
per_atom_loss_sum = (jnp.sum(dists_to_low_error, axis=[0, 2]) +
|
848 |
+
jnp.sum(dists_to_low_error, axis=[1, 3]))
|
849 |
+
|
850 |
+
# Compute the hard clash mask.
|
851 |
+
# shape (N, N, 14, 14)
|
852 |
+
clash_mask = dists_mask * (
|
853 |
+
dists < (dists_lower_bound - overlap_tolerance_hard))
|
854 |
+
|
855 |
+
# Compute the per atom clash.
|
856 |
+
# shape (N, 14)
|
857 |
+
per_atom_clash_mask = jnp.maximum(
|
858 |
+
jnp.max(clash_mask, axis=[0, 2]),
|
859 |
+
jnp.max(clash_mask, axis=[1, 3]))
|
860 |
+
|
861 |
+
return {'mean_loss': mean_loss, # shape ()
|
862 |
+
'per_atom_loss_sum': per_atom_loss_sum, # shape (N, 14)
|
863 |
+
'per_atom_clash_mask': per_atom_clash_mask # shape (N, 14)
|
864 |
+
}
|
865 |
+
|
866 |
+
|
867 |
+
def within_residue_violations(
|
868 |
+
atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
|
869 |
+
atom14_atom_exists: jnp.ndarray, # (N, 14)
|
870 |
+
atom14_dists_lower_bound: jnp.ndarray, # (N, 14, 14)
|
871 |
+
atom14_dists_upper_bound: jnp.ndarray, # (N, 14, 14)
|
872 |
+
tighten_bounds_for_loss=0.0,
|
873 |
+
) -> Dict[str, jnp.ndarray]:
|
874 |
+
"""Loss to penalize steric clashes within residues.
|
875 |
+
|
876 |
+
This is a loss penalizing any steric violations or clashes of non-bonded atoms
|
877 |
+
in a given peptide. This loss corresponds to the part with
|
878 |
+
the same residues of
|
879 |
+
Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46.
|
880 |
+
|
881 |
+
Args:
|
882 |
+
atom14_pred_positions: Predicted positions of atoms in
|
883 |
+
global prediction frame
|
884 |
+
atom14_atom_exists: Mask denoting whether atom at positions exists for given
|
885 |
+
amino acid type
|
886 |
+
atom14_dists_lower_bound: Lower bound on allowed distances.
|
887 |
+
atom14_dists_upper_bound: Upper bound on allowed distances
|
888 |
+
tighten_bounds_for_loss: Extra factor to tighten loss
|
889 |
+
|
890 |
+
Returns:
|
891 |
+
Dict containing:
|
892 |
+
* 'per_atom_loss_sum': sum of all clash losses per atom, shape (N, 14)
|
893 |
+
* 'per_atom_clash_mask': mask whether atom clashes with any other atom
|
894 |
+
shape (N, 14)
|
895 |
+
"""
|
896 |
+
assert len(atom14_pred_positions.shape) == 3
|
897 |
+
assert len(atom14_atom_exists.shape) == 2
|
898 |
+
assert len(atom14_dists_lower_bound.shape) == 3
|
899 |
+
assert len(atom14_dists_upper_bound.shape) == 3
|
900 |
+
|
901 |
+
# Compute the mask for each residue.
|
902 |
+
# shape (N, 14, 14)
|
903 |
+
dists_masks = (1. - jnp.eye(14, 14)[None])
|
904 |
+
dists_masks *= (atom14_atom_exists[:, :, None] *
|
905 |
+
atom14_atom_exists[:, None, :])
|
906 |
+
|
907 |
+
# Distance matrix
|
908 |
+
# shape (N, 14, 14)
|
909 |
+
dists = jnp.sqrt(1e-10 + jnp.sum(
|
910 |
+
squared_difference(
|
911 |
+
atom14_pred_positions[:, :, None, :],
|
912 |
+
atom14_pred_positions[:, None, :, :]),
|
913 |
+
axis=-1))
|
914 |
+
|
915 |
+
# Compute the loss.
|
916 |
+
# shape (N, 14, 14)
|
917 |
+
dists_to_low_error = jax.nn.relu(
|
918 |
+
atom14_dists_lower_bound + tighten_bounds_for_loss - dists)
|
919 |
+
dists_to_high_error = jax.nn.relu(
|
920 |
+
dists - (atom14_dists_upper_bound - tighten_bounds_for_loss))
|
921 |
+
loss = dists_masks * (dists_to_low_error + dists_to_high_error)
|
922 |
+
|
923 |
+
# Compute the per atom loss sum.
|
924 |
+
# shape (N, 14)
|
925 |
+
per_atom_loss_sum = (jnp.sum(loss, axis=1) +
|
926 |
+
jnp.sum(loss, axis=2))
|
927 |
+
|
928 |
+
# Compute the violations mask.
|
929 |
+
# shape (N, 14, 14)
|
930 |
+
violations = dists_masks * ((dists < atom14_dists_lower_bound) |
|
931 |
+
(dists > atom14_dists_upper_bound))
|
932 |
+
|
933 |
+
# Compute the per atom violations.
|
934 |
+
# shape (N, 14)
|
935 |
+
per_atom_violations = jnp.maximum(
|
936 |
+
jnp.max(violations, axis=1), jnp.max(violations, axis=2))
|
937 |
+
|
938 |
+
return {'per_atom_loss_sum': per_atom_loss_sum, # shape (N, 14)
|
939 |
+
'per_atom_violations': per_atom_violations # shape (N, 14)
|
940 |
+
}
|
941 |
+
|
942 |
+
|
943 |
+
def find_optimal_renaming(
|
944 |
+
atom14_gt_positions: jnp.ndarray, # (N, 14, 3)
|
945 |
+
atom14_alt_gt_positions: jnp.ndarray, # (N, 14, 3)
|
946 |
+
atom14_atom_is_ambiguous: jnp.ndarray, # (N, 14)
|
947 |
+
atom14_gt_exists: jnp.ndarray, # (N, 14)
|
948 |
+
atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
|
949 |
+
atom14_atom_exists: jnp.ndarray, # (N, 14)
|
950 |
+
) -> jnp.ndarray: # (N):
|
951 |
+
"""Find optimal renaming for ground truth that maximizes LDDT.
|
952 |
+
|
953 |
+
Jumper et al. (2021) Suppl. Alg. 26
|
954 |
+
"renameSymmetricGroundTruthAtoms" lines 1-5
|
955 |
+
|
956 |
+
Args:
|
957 |
+
atom14_gt_positions: Ground truth positions in global frame of ground truth.
|
958 |
+
atom14_alt_gt_positions: Alternate ground truth positions in global frame of
|
959 |
+
ground truth with coordinates of ambiguous atoms swapped relative to
|
960 |
+
'atom14_gt_positions'.
|
961 |
+
atom14_atom_is_ambiguous: Mask denoting whether atom is among ambiguous
|
962 |
+
atoms, see Jumper et al. (2021) Suppl. Table 3
|
963 |
+
atom14_gt_exists: Mask denoting whether atom at positions exists in ground
|
964 |
+
truth.
|
965 |
+
atom14_pred_positions: Predicted positions of atoms in
|
966 |
+
global prediction frame
|
967 |
+
atom14_atom_exists: Mask denoting whether atom at positions exists for given
|
968 |
+
amino acid type
|
969 |
+
|
970 |
+
Returns:
|
971 |
+
Float array of shape [N] with 1. where atom14_alt_gt_positions is closer to
|
972 |
+
prediction and 0. otherwise
|
973 |
+
"""
|
974 |
+
assert len(atom14_gt_positions.shape) == 3
|
975 |
+
assert len(atom14_alt_gt_positions.shape) == 3
|
976 |
+
assert len(atom14_atom_is_ambiguous.shape) == 2
|
977 |
+
assert len(atom14_gt_exists.shape) == 2
|
978 |
+
assert len(atom14_pred_positions.shape) == 3
|
979 |
+
assert len(atom14_atom_exists.shape) == 2
|
980 |
+
|
981 |
+
# Create the pred distance matrix.
|
982 |
+
# shape (N, N, 14, 14)
|
983 |
+
pred_dists = jnp.sqrt(1e-10 + jnp.sum(
|
984 |
+
squared_difference(
|
985 |
+
atom14_pred_positions[:, None, :, None, :],
|
986 |
+
atom14_pred_positions[None, :, None, :, :]),
|
987 |
+
axis=-1))
|
988 |
+
|
989 |
+
# Compute distances for ground truth with original and alternative names.
|
990 |
+
# shape (N, N, 14, 14)
|
991 |
+
gt_dists = jnp.sqrt(1e-10 + jnp.sum(
|
992 |
+
squared_difference(
|
993 |
+
atom14_gt_positions[:, None, :, None, :],
|
994 |
+
atom14_gt_positions[None, :, None, :, :]),
|
995 |
+
axis=-1))
|
996 |
+
alt_gt_dists = jnp.sqrt(1e-10 + jnp.sum(
|
997 |
+
squared_difference(
|
998 |
+
atom14_alt_gt_positions[:, None, :, None, :],
|
999 |
+
atom14_alt_gt_positions[None, :, None, :, :]),
|
1000 |
+
axis=-1))
|
1001 |
+
|
1002 |
+
# Compute LDDT's.
|
1003 |
+
# shape (N, N, 14, 14)
|
1004 |
+
lddt = jnp.sqrt(1e-10 + squared_difference(pred_dists, gt_dists))
|
1005 |
+
alt_lddt = jnp.sqrt(1e-10 + squared_difference(pred_dists, alt_gt_dists))
|
1006 |
+
|
1007 |
+
# Create a mask for ambiguous atoms in rows vs. non-ambiguous atoms
|
1008 |
+
# in cols.
|
1009 |
+
# shape (N ,N, 14, 14)
|
1010 |
+
mask = (atom14_gt_exists[:, None, :, None] * # rows
|
1011 |
+
atom14_atom_is_ambiguous[:, None, :, None] * # rows
|
1012 |
+
atom14_gt_exists[None, :, None, :] * # cols
|
1013 |
+
(1. - atom14_atom_is_ambiguous[None, :, None, :])) # cols
|
1014 |
+
|
1015 |
+
# Aggregate distances for each residue to the non-amibuguous atoms.
|
1016 |
+
# shape (N)
|
1017 |
+
per_res_lddt = jnp.sum(mask * lddt, axis=[1, 2, 3])
|
1018 |
+
alt_per_res_lddt = jnp.sum(mask * alt_lddt, axis=[1, 2, 3])
|
1019 |
+
|
1020 |
+
# Decide for each residue, whether alternative naming is better.
|
1021 |
+
# shape (N)
|
1022 |
+
alt_naming_is_better = (alt_per_res_lddt < per_res_lddt).astype(jnp.float32)
|
1023 |
+
|
1024 |
+
return alt_naming_is_better # shape (N)
|
1025 |
+
|
1026 |
+
|
1027 |
+
def frame_aligned_point_error(
|
1028 |
+
pred_frames: r3.Rigids, # shape (num_frames)
|
1029 |
+
target_frames: r3.Rigids, # shape (num_frames)
|
1030 |
+
frames_mask: jnp.ndarray, # shape (num_frames)
|
1031 |
+
pred_positions: r3.Vecs, # shape (num_positions)
|
1032 |
+
target_positions: r3.Vecs, # shape (num_positions)
|
1033 |
+
positions_mask: jnp.ndarray, # shape (num_positions)
|
1034 |
+
length_scale: float,
|
1035 |
+
l1_clamp_distance: Optional[float] = None,
|
1036 |
+
epsilon=1e-4) -> jnp.ndarray: # shape ()
|
1037 |
+
"""Measure point error under different alignments.
|
1038 |
+
|
1039 |
+
Jumper et al. (2021) Suppl. Alg. 28 "computeFAPE"
|
1040 |
+
|
1041 |
+
Computes error between two structures with B points under A alignments derived
|
1042 |
+
from the given pairs of frames.
|
1043 |
+
Args:
|
1044 |
+
pred_frames: num_frames reference frames for 'pred_positions'.
|
1045 |
+
target_frames: num_frames reference frames for 'target_positions'.
|
1046 |
+
frames_mask: Mask for frame pairs to use.
|
1047 |
+
pred_positions: num_positions predicted positions of the structure.
|
1048 |
+
target_positions: num_positions target positions of the structure.
|
1049 |
+
positions_mask: Mask on which positions to score.
|
1050 |
+
length_scale: length scale to divide loss by.
|
1051 |
+
l1_clamp_distance: Distance cutoff on error beyond which gradients will
|
1052 |
+
be zero.
|
1053 |
+
epsilon: small value used to regularize denominator for masked average.
|
1054 |
+
Returns:
|
1055 |
+
Masked Frame Aligned Point Error.
|
1056 |
+
"""
|
1057 |
+
assert pred_frames.rot.xx.ndim == 1
|
1058 |
+
assert target_frames.rot.xx.ndim == 1
|
1059 |
+
assert frames_mask.ndim == 1, frames_mask.ndim
|
1060 |
+
assert pred_positions.x.ndim == 1
|
1061 |
+
assert target_positions.x.ndim == 1
|
1062 |
+
assert positions_mask.ndim == 1
|
1063 |
+
|
1064 |
+
# Compute array of predicted positions in the predicted frames.
|
1065 |
+
# r3.Vecs (num_frames, num_positions)
|
1066 |
+
local_pred_pos = r3.rigids_mul_vecs(
|
1067 |
+
jax.tree_map(lambda r: r[:, None], r3.invert_rigids(pred_frames)),
|
1068 |
+
jax.tree_map(lambda x: x[None, :], pred_positions))
|
1069 |
+
|
1070 |
+
# Compute array of target positions in the target frames.
|
1071 |
+
# r3.Vecs (num_frames, num_positions)
|
1072 |
+
local_target_pos = r3.rigids_mul_vecs(
|
1073 |
+
jax.tree_map(lambda r: r[:, None], r3.invert_rigids(target_frames)),
|
1074 |
+
jax.tree_map(lambda x: x[None, :], target_positions))
|
1075 |
+
|
1076 |
+
# Compute errors between the structures.
|
1077 |
+
# jnp.ndarray (num_frames, num_positions)
|
1078 |
+
error_dist = jnp.sqrt(
|
1079 |
+
r3.vecs_squared_distance(local_pred_pos, local_target_pos)
|
1080 |
+
+ epsilon)
|
1081 |
+
|
1082 |
+
if l1_clamp_distance:
|
1083 |
+
error_dist = jnp.clip(error_dist, 0, l1_clamp_distance)
|
1084 |
+
|
1085 |
+
normed_error = error_dist / length_scale
|
1086 |
+
normed_error *= jnp.expand_dims(frames_mask, axis=-1)
|
1087 |
+
normed_error *= jnp.expand_dims(positions_mask, axis=-2)
|
1088 |
+
|
1089 |
+
normalization_factor = (
|
1090 |
+
jnp.sum(frames_mask, axis=-1) *
|
1091 |
+
jnp.sum(positions_mask, axis=-1))
|
1092 |
+
return (jnp.sum(normed_error, axis=(-2, -1)) /
|
1093 |
+
(epsilon + normalization_factor))
|
1094 |
+
|
1095 |
+
|
1096 |
+
def _make_renaming_matrices():
|
1097 |
+
"""Matrices to map atoms to symmetry partners in ambiguous case."""
|
1098 |
+
# As the atom naming is ambiguous for 7 of the 20 amino acids, provide
|
1099 |
+
# alternative groundtruth coordinates where the naming is swapped
|
1100 |
+
restype_3 = [
|
1101 |
+
residue_constants.restype_1to3[res] for res in residue_constants.restypes
|
1102 |
+
]
|
1103 |
+
restype_3 += ['UNK']
|
1104 |
+
# Matrices for renaming ambiguous atoms.
|
1105 |
+
all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3}
|
1106 |
+
for resname, swap in residue_constants.residue_atom_renaming_swaps.items():
|
1107 |
+
correspondences = np.arange(14)
|
1108 |
+
for source_atom_swap, target_atom_swap in swap.items():
|
1109 |
+
source_index = residue_constants.restype_name_to_atom14_names[
|
1110 |
+
resname].index(source_atom_swap)
|
1111 |
+
target_index = residue_constants.restype_name_to_atom14_names[
|
1112 |
+
resname].index(target_atom_swap)
|
1113 |
+
correspondences[source_index] = target_index
|
1114 |
+
correspondences[target_index] = source_index
|
1115 |
+
renaming_matrix = np.zeros((14, 14), dtype=np.float32)
|
1116 |
+
for index, correspondence in enumerate(correspondences):
|
1117 |
+
renaming_matrix[index, correspondence] = 1.
|
1118 |
+
all_matrices[resname] = renaming_matrix.astype(np.float32)
|
1119 |
+
renaming_matrices = np.stack([all_matrices[restype] for restype in restype_3])
|
1120 |
+
return renaming_matrices
|
1121 |
+
|
1122 |
+
|
1123 |
+
RENAMING_MATRICES = _make_renaming_matrices()
|
1124 |
+
|
1125 |
+
|
1126 |
+
def get_alt_atom14(aatype, positions, mask):
|
1127 |
+
"""Get alternative atom14 positions.
|
1128 |
+
|
1129 |
+
Constructs renamed atom positions for ambiguous residues.
|
1130 |
+
|
1131 |
+
Jumper et al. (2021) Suppl. Table 3 "Ambiguous atom names due to 180 degree-
|
1132 |
+
rotation-symmetry"
|
1133 |
+
|
1134 |
+
Args:
|
1135 |
+
aatype: Amino acid at given position
|
1136 |
+
positions: Atom positions as r3.Vecs in atom14 representation, (N, 14)
|
1137 |
+
mask: Atom masks in atom14 representation, (N, 14)
|
1138 |
+
Returns:
|
1139 |
+
renamed atom positions, renamed atom mask
|
1140 |
+
"""
|
1141 |
+
# pick the transformation matrices for the given residue sequence
|
1142 |
+
# shape (num_res, 14, 14)
|
1143 |
+
renaming_transform = utils.batched_gather(
|
1144 |
+
jnp.asarray(RENAMING_MATRICES), aatype)
|
1145 |
+
|
1146 |
+
positions = jax.tree_map(lambda x: x[:, :, None], positions)
|
1147 |
+
alternative_positions = jax.tree_map(
|
1148 |
+
lambda x: jnp.sum(x, axis=1), positions * renaming_transform)
|
1149 |
+
|
1150 |
+
# Create the mask for the alternative ground truth (differs from the
|
1151 |
+
# ground truth mask, if only one of the atoms in an ambiguous pair has a
|
1152 |
+
# ground truth position)
|
1153 |
+
alternative_mask = jnp.sum(mask[..., None] * renaming_transform, axis=1)
|
1154 |
+
|
1155 |
+
return alternative_positions, alternative_mask
|
af_backprop/alphafold/model/common_modules.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""A collection of common Haiku modules for use in protein folding."""
|
16 |
+
import haiku as hk
|
17 |
+
import jax.numpy as jnp
|
18 |
+
|
19 |
+
|
20 |
+
class Linear(hk.Module):
|
21 |
+
"""Protein folding specific Linear Module.
|
22 |
+
|
23 |
+
This differs from the standard Haiku Linear in a few ways:
|
24 |
+
* It supports inputs of arbitrary rank
|
25 |
+
* Initializers are specified by strings
|
26 |
+
"""
|
27 |
+
|
28 |
+
def __init__(self,
|
29 |
+
num_output: int,
|
30 |
+
initializer: str = 'linear',
|
31 |
+
use_bias: bool = True,
|
32 |
+
bias_init: float = 0.,
|
33 |
+
name: str = 'linear'):
|
34 |
+
"""Constructs Linear Module.
|
35 |
+
|
36 |
+
Args:
|
37 |
+
num_output: number of output channels.
|
38 |
+
initializer: What initializer to use, should be one of {'linear', 'relu',
|
39 |
+
'zeros'}
|
40 |
+
use_bias: Whether to include trainable bias
|
41 |
+
bias_init: Value used to initialize bias.
|
42 |
+
name: name of module, used for name scopes.
|
43 |
+
"""
|
44 |
+
|
45 |
+
super().__init__(name=name)
|
46 |
+
self.num_output = num_output
|
47 |
+
self.initializer = initializer
|
48 |
+
self.use_bias = use_bias
|
49 |
+
self.bias_init = bias_init
|
50 |
+
|
51 |
+
def __call__(self, inputs: jnp.ndarray) -> jnp.ndarray:
|
52 |
+
"""Connects Module.
|
53 |
+
|
54 |
+
Args:
|
55 |
+
inputs: Tensor of shape [..., num_channel]
|
56 |
+
|
57 |
+
Returns:
|
58 |
+
output of shape [..., num_output]
|
59 |
+
"""
|
60 |
+
n_channels = int(inputs.shape[-1])
|
61 |
+
|
62 |
+
weight_shape = [n_channels, self.num_output]
|
63 |
+
if self.initializer == 'linear':
|
64 |
+
weight_init = hk.initializers.VarianceScaling(mode='fan_in', scale=1.)
|
65 |
+
elif self.initializer == 'relu':
|
66 |
+
weight_init = hk.initializers.VarianceScaling(mode='fan_in', scale=2.)
|
67 |
+
elif self.initializer == 'zeros':
|
68 |
+
weight_init = hk.initializers.Constant(0.0)
|
69 |
+
|
70 |
+
weights = hk.get_parameter('weights', weight_shape, inputs.dtype,
|
71 |
+
weight_init)
|
72 |
+
|
73 |
+
# this is equivalent to einsum('...c,cd->...d', inputs, weights)
|
74 |
+
# but turns out to be slightly faster
|
75 |
+
inputs = jnp.swapaxes(inputs, -1, -2)
|
76 |
+
output = jnp.einsum('...cb,cd->...db', inputs, weights)
|
77 |
+
output = jnp.swapaxes(output, -1, -2)
|
78 |
+
|
79 |
+
if self.use_bias:
|
80 |
+
bias = hk.get_parameter('bias', [self.num_output], inputs.dtype,
|
81 |
+
hk.initializers.Constant(self.bias_init))
|
82 |
+
output += bias
|
83 |
+
|
84 |
+
return output
|
af_backprop/alphafold/model/config.py
ADDED
@@ -0,0 +1,412 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Model config."""
|
15 |
+
|
16 |
+
import copy
|
17 |
+
from alphafold.model.tf import shape_placeholders
|
18 |
+
import ml_collections
|
19 |
+
|
20 |
+
|
21 |
+
NUM_RES = shape_placeholders.NUM_RES
|
22 |
+
NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ
|
23 |
+
NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ
|
24 |
+
NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES
|
25 |
+
|
26 |
+
|
27 |
+
def model_config(name: str) -> ml_collections.ConfigDict:
|
28 |
+
"""Get the ConfigDict of a CASP14 model."""
|
29 |
+
|
30 |
+
if name not in CONFIG_DIFFS:
|
31 |
+
raise ValueError(f'Invalid model name {name}.')
|
32 |
+
cfg = copy.deepcopy(CONFIG)
|
33 |
+
cfg.update_from_flattened_dict(CONFIG_DIFFS[name])
|
34 |
+
return cfg
|
35 |
+
|
36 |
+
|
37 |
+
CONFIG_DIFFS = {
|
38 |
+
'model_1': {
|
39 |
+
# Jumper et al. (2021) Suppl. Table 5, Model 1.1.1
|
40 |
+
'data.common.max_extra_msa': 5120,
|
41 |
+
'data.common.reduce_msa_clusters_by_max_templates': True,
|
42 |
+
'data.common.use_templates': True,
|
43 |
+
'model.embeddings_and_evoformer.template.embed_torsion_angles': True,
|
44 |
+
'model.embeddings_and_evoformer.template.enabled': True
|
45 |
+
},
|
46 |
+
'model_2': {
|
47 |
+
# Jumper et al. (2021) Suppl. Table 5, Model 1.1.2
|
48 |
+
'data.common.reduce_msa_clusters_by_max_templates': True,
|
49 |
+
'data.common.use_templates': True,
|
50 |
+
'model.embeddings_and_evoformer.template.embed_torsion_angles': True,
|
51 |
+
'model.embeddings_and_evoformer.template.enabled': True
|
52 |
+
},
|
53 |
+
'model_3': {
|
54 |
+
# Jumper et al. (2021) Suppl. Table 5, Model 1.2.1
|
55 |
+
'data.common.max_extra_msa': 5120,
|
56 |
+
},
|
57 |
+
'model_4': {
|
58 |
+
# Jumper et al. (2021) Suppl. Table 5, Model 1.2.2
|
59 |
+
'data.common.max_extra_msa': 5120,
|
60 |
+
},
|
61 |
+
'model_5': {
|
62 |
+
# Jumper et al. (2021) Suppl. Table 5, Model 1.2.3
|
63 |
+
},
|
64 |
+
|
65 |
+
# The following models are fine-tuned from the corresponding models above
|
66 |
+
# with an additional predicted_aligned_error head that can produce
|
67 |
+
# predicted TM-score (pTM) and predicted aligned errors.
|
68 |
+
'model_1_ptm': {
|
69 |
+
'data.common.max_extra_msa': 5120,
|
70 |
+
'data.common.reduce_msa_clusters_by_max_templates': True,
|
71 |
+
'data.common.use_templates': True,
|
72 |
+
'model.embeddings_and_evoformer.template.embed_torsion_angles': True,
|
73 |
+
'model.embeddings_and_evoformer.template.enabled': True,
|
74 |
+
'model.heads.predicted_aligned_error.weight': 0.1
|
75 |
+
},
|
76 |
+
'model_2_ptm': {
|
77 |
+
'data.common.reduce_msa_clusters_by_max_templates': True,
|
78 |
+
'data.common.use_templates': True,
|
79 |
+
'model.embeddings_and_evoformer.template.embed_torsion_angles': True,
|
80 |
+
'model.embeddings_and_evoformer.template.enabled': True,
|
81 |
+
'model.heads.predicted_aligned_error.weight': 0.1
|
82 |
+
},
|
83 |
+
'model_3_ptm': {
|
84 |
+
'data.common.max_extra_msa': 5120,
|
85 |
+
'model.heads.predicted_aligned_error.weight': 0.1
|
86 |
+
},
|
87 |
+
'model_4_ptm': {
|
88 |
+
'data.common.max_extra_msa': 5120,
|
89 |
+
'model.heads.predicted_aligned_error.weight': 0.1
|
90 |
+
},
|
91 |
+
'model_5_ptm': {
|
92 |
+
'model.heads.predicted_aligned_error.weight': 0.1
|
93 |
+
}
|
94 |
+
}
|
95 |
+
|
96 |
+
CONFIG = ml_collections.ConfigDict({
|
97 |
+
'data': {
|
98 |
+
'common': {
|
99 |
+
'masked_msa': {
|
100 |
+
'profile_prob': 0.1,
|
101 |
+
'same_prob': 0.1,
|
102 |
+
'uniform_prob': 0.1
|
103 |
+
},
|
104 |
+
'max_extra_msa': 1024,
|
105 |
+
'msa_cluster_features': True,
|
106 |
+
'num_recycle': 3,
|
107 |
+
'reduce_msa_clusters_by_max_templates': False,
|
108 |
+
'resample_msa_in_recycling': True,
|
109 |
+
'template_features': [
|
110 |
+
'template_all_atom_positions', 'template_sum_probs',
|
111 |
+
'template_aatype', 'template_all_atom_masks',
|
112 |
+
'template_domain_names'
|
113 |
+
],
|
114 |
+
'unsupervised_features': [
|
115 |
+
'aatype', 'residue_index', 'sequence', 'msa', 'domain_name',
|
116 |
+
'num_alignments', 'seq_length', 'between_segment_residues',
|
117 |
+
'deletion_matrix'
|
118 |
+
],
|
119 |
+
'use_templates': False,
|
120 |
+
},
|
121 |
+
'eval': {
|
122 |
+
'feat': {
|
123 |
+
'aatype': [NUM_RES],
|
124 |
+
'all_atom_mask': [NUM_RES, None],
|
125 |
+
'all_atom_positions': [NUM_RES, None, None],
|
126 |
+
'alt_chi_angles': [NUM_RES, None],
|
127 |
+
'atom14_alt_gt_exists': [NUM_RES, None],
|
128 |
+
'atom14_alt_gt_positions': [NUM_RES, None, None],
|
129 |
+
'atom14_atom_exists': [NUM_RES, None],
|
130 |
+
'atom14_atom_is_ambiguous': [NUM_RES, None],
|
131 |
+
'atom14_gt_exists': [NUM_RES, None],
|
132 |
+
'atom14_gt_positions': [NUM_RES, None, None],
|
133 |
+
'atom37_atom_exists': [NUM_RES, None],
|
134 |
+
'backbone_affine_mask': [NUM_RES],
|
135 |
+
'backbone_affine_tensor': [NUM_RES, None],
|
136 |
+
'bert_mask': [NUM_MSA_SEQ, NUM_RES],
|
137 |
+
'chi_angles': [NUM_RES, None],
|
138 |
+
'chi_mask': [NUM_RES, None],
|
139 |
+
'extra_deletion_value': [NUM_EXTRA_SEQ, NUM_RES],
|
140 |
+
'extra_has_deletion': [NUM_EXTRA_SEQ, NUM_RES],
|
141 |
+
'extra_msa': [NUM_EXTRA_SEQ, NUM_RES],
|
142 |
+
'extra_msa_mask': [NUM_EXTRA_SEQ, NUM_RES],
|
143 |
+
'extra_msa_row_mask': [NUM_EXTRA_SEQ],
|
144 |
+
'is_distillation': [],
|
145 |
+
'msa_feat': [NUM_MSA_SEQ, NUM_RES, None],
|
146 |
+
'msa_mask': [NUM_MSA_SEQ, NUM_RES],
|
147 |
+
'msa_row_mask': [NUM_MSA_SEQ],
|
148 |
+
'pseudo_beta': [NUM_RES, None],
|
149 |
+
'pseudo_beta_mask': [NUM_RES],
|
150 |
+
'random_crop_to_size_seed': [None],
|
151 |
+
'residue_index': [NUM_RES],
|
152 |
+
'residx_atom14_to_atom37': [NUM_RES, None],
|
153 |
+
'residx_atom37_to_atom14': [NUM_RES, None],
|
154 |
+
'resolution': [],
|
155 |
+
'rigidgroups_alt_gt_frames': [NUM_RES, None, None],
|
156 |
+
'rigidgroups_group_exists': [NUM_RES, None],
|
157 |
+
'rigidgroups_group_is_ambiguous': [NUM_RES, None],
|
158 |
+
'rigidgroups_gt_exists': [NUM_RES, None],
|
159 |
+
'rigidgroups_gt_frames': [NUM_RES, None, None],
|
160 |
+
'seq_length': [],
|
161 |
+
'seq_mask': [NUM_RES],
|
162 |
+
'target_feat': [NUM_RES, None],
|
163 |
+
'template_aatype': [NUM_TEMPLATES, NUM_RES],
|
164 |
+
'template_all_atom_masks': [NUM_TEMPLATES, NUM_RES, None],
|
165 |
+
'template_all_atom_positions': [
|
166 |
+
NUM_TEMPLATES, NUM_RES, None, None],
|
167 |
+
'template_backbone_affine_mask': [NUM_TEMPLATES, NUM_RES],
|
168 |
+
'template_backbone_affine_tensor': [
|
169 |
+
NUM_TEMPLATES, NUM_RES, None],
|
170 |
+
'template_mask': [NUM_TEMPLATES],
|
171 |
+
'template_pseudo_beta': [NUM_TEMPLATES, NUM_RES, None],
|
172 |
+
'template_pseudo_beta_mask': [NUM_TEMPLATES, NUM_RES],
|
173 |
+
'template_sum_probs': [NUM_TEMPLATES, None],
|
174 |
+
'true_msa': [NUM_MSA_SEQ, NUM_RES]
|
175 |
+
},
|
176 |
+
'fixed_size': True,
|
177 |
+
'subsample_templates': False, # We want top templates.
|
178 |
+
'masked_msa_replace_fraction': 0.15,
|
179 |
+
'max_msa_clusters': 512,
|
180 |
+
'max_templates': 4,
|
181 |
+
'num_ensemble': 1,
|
182 |
+
},
|
183 |
+
},
|
184 |
+
'model': {
|
185 |
+
'embeddings_and_evoformer': {
|
186 |
+
'evoformer_num_block': 48,
|
187 |
+
'evoformer': {
|
188 |
+
'msa_row_attention_with_pair_bias': {
|
189 |
+
'dropout_rate': 0.15,
|
190 |
+
'gating': True,
|
191 |
+
'num_head': 8,
|
192 |
+
'orientation': 'per_row',
|
193 |
+
'shared_dropout': True
|
194 |
+
},
|
195 |
+
'msa_column_attention': {
|
196 |
+
'dropout_rate': 0.0,
|
197 |
+
'gating': True,
|
198 |
+
'num_head': 8,
|
199 |
+
'orientation': 'per_column',
|
200 |
+
'shared_dropout': True
|
201 |
+
},
|
202 |
+
'msa_transition': {
|
203 |
+
'dropout_rate': 0.0,
|
204 |
+
'num_intermediate_factor': 4,
|
205 |
+
'orientation': 'per_row',
|
206 |
+
'shared_dropout': True
|
207 |
+
},
|
208 |
+
'outer_product_mean': {
|
209 |
+
'chunk_size': 128,
|
210 |
+
'dropout_rate': 0.0,
|
211 |
+
'num_outer_channel': 32,
|
212 |
+
'orientation': 'per_row',
|
213 |
+
'shared_dropout': True
|
214 |
+
},
|
215 |
+
'triangle_attention_starting_node': {
|
216 |
+
'dropout_rate': 0.25,
|
217 |
+
'gating': True,
|
218 |
+
'num_head': 4,
|
219 |
+
'orientation': 'per_row',
|
220 |
+
'shared_dropout': True
|
221 |
+
},
|
222 |
+
'triangle_attention_ending_node': {
|
223 |
+
'dropout_rate': 0.25,
|
224 |
+
'gating': True,
|
225 |
+
'num_head': 4,
|
226 |
+
'orientation': 'per_column',
|
227 |
+
'shared_dropout': True
|
228 |
+
},
|
229 |
+
'triangle_multiplication_outgoing': {
|
230 |
+
'dropout_rate': 0.25,
|
231 |
+
'equation': 'ikc,jkc->ijc',
|
232 |
+
'num_intermediate_channel': 128,
|
233 |
+
'orientation': 'per_row',
|
234 |
+
'shared_dropout': True
|
235 |
+
},
|
236 |
+
'triangle_multiplication_incoming': {
|
237 |
+
'dropout_rate': 0.25,
|
238 |
+
'equation': 'kjc,kic->ijc',
|
239 |
+
'num_intermediate_channel': 128,
|
240 |
+
'orientation': 'per_row',
|
241 |
+
'shared_dropout': True
|
242 |
+
},
|
243 |
+
'pair_transition': {
|
244 |
+
'dropout_rate': 0.0,
|
245 |
+
'num_intermediate_factor': 4,
|
246 |
+
'orientation': 'per_row',
|
247 |
+
'shared_dropout': True
|
248 |
+
}
|
249 |
+
},
|
250 |
+
'extra_msa_channel': 64,
|
251 |
+
'extra_msa_stack_num_block': 4,
|
252 |
+
'max_relative_feature': 32,
|
253 |
+
'custom_relative_features': False,
|
254 |
+
'msa_channel': 256,
|
255 |
+
'pair_channel': 128,
|
256 |
+
'prev_pos': {
|
257 |
+
'min_bin': 3.25,
|
258 |
+
'max_bin': 20.75,
|
259 |
+
'num_bins': 15
|
260 |
+
},
|
261 |
+
'recycle_features': True,
|
262 |
+
'recycle_pos': True,
|
263 |
+
'recycle_dgram': False,
|
264 |
+
'backprop_dgram': False,
|
265 |
+
'backprop_dgram_temp': 1.0,
|
266 |
+
'seq_channel': 384,
|
267 |
+
'template': {
|
268 |
+
'attention': {
|
269 |
+
'gating': False,
|
270 |
+
'key_dim': 64,
|
271 |
+
'num_head': 4,
|
272 |
+
'value_dim': 64
|
273 |
+
},
|
274 |
+
'dgram_features': {
|
275 |
+
'min_bin': 3.25,
|
276 |
+
'max_bin': 50.75,
|
277 |
+
'num_bins': 39
|
278 |
+
},
|
279 |
+
'backprop_dgram': False,
|
280 |
+
'backprop_dgram_temp': 1.0,
|
281 |
+
'embed_torsion_angles': False,
|
282 |
+
'enabled': False,
|
283 |
+
'template_pair_stack': {
|
284 |
+
'num_block': 2,
|
285 |
+
'triangle_attention_starting_node': {
|
286 |
+
'dropout_rate': 0.25,
|
287 |
+
'gating': True,
|
288 |
+
'key_dim': 64,
|
289 |
+
'num_head': 4,
|
290 |
+
'orientation': 'per_row',
|
291 |
+
'shared_dropout': True,
|
292 |
+
'value_dim': 64
|
293 |
+
},
|
294 |
+
'triangle_attention_ending_node': {
|
295 |
+
'dropout_rate': 0.25,
|
296 |
+
'gating': True,
|
297 |
+
'key_dim': 64,
|
298 |
+
'num_head': 4,
|
299 |
+
'orientation': 'per_column',
|
300 |
+
'shared_dropout': True,
|
301 |
+
'value_dim': 64
|
302 |
+
},
|
303 |
+
'triangle_multiplication_outgoing': {
|
304 |
+
'dropout_rate': 0.25,
|
305 |
+
'equation': 'ikc,jkc->ijc',
|
306 |
+
'num_intermediate_channel': 64,
|
307 |
+
'orientation': 'per_row',
|
308 |
+
'shared_dropout': True
|
309 |
+
},
|
310 |
+
'triangle_multiplication_incoming': {
|
311 |
+
'dropout_rate': 0.25,
|
312 |
+
'equation': 'kjc,kic->ijc',
|
313 |
+
'num_intermediate_channel': 64,
|
314 |
+
'orientation': 'per_row',
|
315 |
+
'shared_dropout': True
|
316 |
+
},
|
317 |
+
'pair_transition': {
|
318 |
+
'dropout_rate': 0.0,
|
319 |
+
'num_intermediate_factor': 2,
|
320 |
+
'orientation': 'per_row',
|
321 |
+
'shared_dropout': True
|
322 |
+
}
|
323 |
+
},
|
324 |
+
'max_templates': 4,
|
325 |
+
'subbatch_size': 128,
|
326 |
+
'use_template_unit_vector': False,
|
327 |
+
}
|
328 |
+
},
|
329 |
+
'global_config': {
|
330 |
+
'mixed_precision': False,
|
331 |
+
'deterministic': False,
|
332 |
+
'subbatch_size': 4,
|
333 |
+
'use_remat': False,
|
334 |
+
'zero_init': True
|
335 |
+
},
|
336 |
+
'heads': {
|
337 |
+
'distogram': {
|
338 |
+
'first_break': 2.3125,
|
339 |
+
'last_break': 21.6875,
|
340 |
+
'num_bins': 64,
|
341 |
+
'weight': 0.3
|
342 |
+
},
|
343 |
+
'predicted_aligned_error': {
|
344 |
+
# `num_bins - 1` bins uniformly space the
|
345 |
+
# [0, max_error_bin A] range.
|
346 |
+
# The final bin covers [max_error_bin A, +infty]
|
347 |
+
# 31A gives bins with 0.5A width.
|
348 |
+
'max_error_bin': 31.,
|
349 |
+
'num_bins': 64,
|
350 |
+
'num_channels': 128,
|
351 |
+
'filter_by_resolution': True,
|
352 |
+
'min_resolution': 0.1,
|
353 |
+
'max_resolution': 3.0,
|
354 |
+
'weight': 0.0,
|
355 |
+
},
|
356 |
+
'experimentally_resolved': {
|
357 |
+
'filter_by_resolution': True,
|
358 |
+
'max_resolution': 3.0,
|
359 |
+
'min_resolution': 0.1,
|
360 |
+
'weight': 0.01
|
361 |
+
},
|
362 |
+
'structure_module': {
|
363 |
+
'num_layer': 8,
|
364 |
+
'fape': {
|
365 |
+
'clamp_distance': 10.0,
|
366 |
+
'clamp_type': 'relu',
|
367 |
+
'loss_unit_distance': 10.0
|
368 |
+
},
|
369 |
+
'angle_norm_weight': 0.01,
|
370 |
+
'chi_weight': 0.5,
|
371 |
+
'clash_overlap_tolerance': 1.5,
|
372 |
+
'compute_in_graph_metrics': True,
|
373 |
+
'dropout': 0.1,
|
374 |
+
'num_channel': 384,
|
375 |
+
'num_head': 12,
|
376 |
+
'num_layer_in_transition': 3,
|
377 |
+
'num_point_qk': 4,
|
378 |
+
'num_point_v': 8,
|
379 |
+
'num_scalar_qk': 16,
|
380 |
+
'num_scalar_v': 16,
|
381 |
+
'position_scale': 10.0,
|
382 |
+
'sidechain': {
|
383 |
+
'atom_clamp_distance': 10.0,
|
384 |
+
'num_channel': 128,
|
385 |
+
'num_residual_block': 2,
|
386 |
+
'weight_frac': 0.5,
|
387 |
+
'length_scale': 10.,
|
388 |
+
},
|
389 |
+
'structural_violation_loss_weight': 1.0,
|
390 |
+
'violation_tolerance_factor': 12.0,
|
391 |
+
'weight': 1.0
|
392 |
+
},
|
393 |
+
'predicted_lddt': {
|
394 |
+
'filter_by_resolution': True,
|
395 |
+
'max_resolution': 3.0,
|
396 |
+
'min_resolution': 0.1,
|
397 |
+
'num_bins': 50,
|
398 |
+
'num_channels': 128,
|
399 |
+
'weight': 0.01
|
400 |
+
},
|
401 |
+
'masked_msa': {
|
402 |
+
'num_output': 23,
|
403 |
+
'weight': 2.0
|
404 |
+
},
|
405 |
+
},
|
406 |
+
'num_recycle': 3,
|
407 |
+
'backprop_recycle': False,
|
408 |
+
'resample_msa_in_recycling': True,
|
409 |
+
'add_prev': False,
|
410 |
+
'use_struct': True,
|
411 |
+
},
|
412 |
+
})
|
af_backprop/alphafold/model/data.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Convenience functions for reading data."""
|
16 |
+
|
17 |
+
import io
|
18 |
+
import os
|
19 |
+
from typing import List
|
20 |
+
from alphafold.model import utils
|
21 |
+
import haiku as hk
|
22 |
+
import numpy as np
|
23 |
+
# Internal import (7716).
|
24 |
+
|
25 |
+
|
26 |
+
def casp_model_names(data_dir: str) -> List[str]:
|
27 |
+
params = os.listdir(os.path.join(data_dir, 'params'))
|
28 |
+
return [os.path.splitext(filename)[0] for filename in params]
|
29 |
+
|
30 |
+
|
31 |
+
def get_model_haiku_params(model_name: str, data_dir: str) -> hk.Params:
|
32 |
+
"""Get the Haiku parameters from a model name."""
|
33 |
+
|
34 |
+
path = os.path.join(data_dir, 'params', f'params_{model_name}.npz')
|
35 |
+
|
36 |
+
with open(path, 'rb') as f:
|
37 |
+
params = np.load(io.BytesIO(f.read()), allow_pickle=False)
|
38 |
+
|
39 |
+
return utils.flat_params_to_haiku(params)
|
af_backprop/alphafold/model/features.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Code to generate processed features."""
|
16 |
+
import copy
|
17 |
+
from typing import List, Mapping, Tuple
|
18 |
+
from alphafold.model.tf import input_pipeline
|
19 |
+
from alphafold.model.tf import proteins_dataset
|
20 |
+
import ml_collections
|
21 |
+
import numpy as np
|
22 |
+
import tensorflow.compat.v1 as tf
|
23 |
+
|
24 |
+
FeatureDict = Mapping[str, np.ndarray]
|
25 |
+
|
26 |
+
|
27 |
+
def make_data_config(
|
28 |
+
config: ml_collections.ConfigDict,
|
29 |
+
num_res: int,
|
30 |
+
) -> Tuple[ml_collections.ConfigDict, List[str]]:
|
31 |
+
"""Makes a data config for the input pipeline."""
|
32 |
+
cfg = copy.deepcopy(config.data)
|
33 |
+
|
34 |
+
feature_names = cfg.common.unsupervised_features
|
35 |
+
if cfg.common.use_templates:
|
36 |
+
feature_names += cfg.common.template_features
|
37 |
+
|
38 |
+
with cfg.unlocked():
|
39 |
+
cfg.eval.crop_size = num_res
|
40 |
+
|
41 |
+
return cfg, feature_names
|
42 |
+
|
43 |
+
|
44 |
+
def tf_example_to_features(tf_example: tf.train.Example,
|
45 |
+
config: ml_collections.ConfigDict,
|
46 |
+
random_seed: int = 0) -> FeatureDict:
|
47 |
+
"""Converts tf_example to numpy feature dictionary."""
|
48 |
+
num_res = int(tf_example.features.feature['seq_length'].int64_list.value[0])
|
49 |
+
cfg, feature_names = make_data_config(config, num_res=num_res)
|
50 |
+
|
51 |
+
if 'deletion_matrix_int' in set(tf_example.features.feature):
|
52 |
+
deletion_matrix_int = (
|
53 |
+
tf_example.features.feature['deletion_matrix_int'].int64_list.value)
|
54 |
+
feat = tf.train.Feature(float_list=tf.train.FloatList(
|
55 |
+
value=map(float, deletion_matrix_int)))
|
56 |
+
tf_example.features.feature['deletion_matrix'].CopyFrom(feat)
|
57 |
+
del tf_example.features.feature['deletion_matrix_int']
|
58 |
+
|
59 |
+
tf_graph = tf.Graph()
|
60 |
+
with tf_graph.as_default(), tf.device('/device:CPU:0'):
|
61 |
+
tf.compat.v1.set_random_seed(random_seed)
|
62 |
+
tensor_dict = proteins_dataset.create_tensor_dict(
|
63 |
+
raw_data=tf_example.SerializeToString(),
|
64 |
+
features=feature_names)
|
65 |
+
processed_batch = input_pipeline.process_tensors_from_config(
|
66 |
+
tensor_dict, cfg)
|
67 |
+
|
68 |
+
tf_graph.finalize()
|
69 |
+
|
70 |
+
with tf.Session(graph=tf_graph) as sess:
|
71 |
+
features = sess.run(processed_batch)
|
72 |
+
|
73 |
+
return {k: v for k, v in features.items() if v.dtype != 'O'}
|
74 |
+
|
75 |
+
|
76 |
+
def np_example_to_features(np_example: FeatureDict,
|
77 |
+
config: ml_collections.ConfigDict,
|
78 |
+
random_seed: int = 0) -> FeatureDict:
|
79 |
+
"""Preprocesses NumPy feature dict using TF pipeline."""
|
80 |
+
np_example = dict(np_example)
|
81 |
+
num_res = int(np_example['seq_length'][0])
|
82 |
+
cfg, feature_names = make_data_config(config, num_res=num_res)
|
83 |
+
|
84 |
+
if 'deletion_matrix_int' in np_example:
|
85 |
+
np_example['deletion_matrix'] = (
|
86 |
+
np_example.pop('deletion_matrix_int').astype(np.float32))
|
87 |
+
|
88 |
+
tf_graph = tf.Graph()
|
89 |
+
with tf_graph.as_default(), tf.device('/device:CPU:0'):
|
90 |
+
tf.compat.v1.set_random_seed(random_seed)
|
91 |
+
tensor_dict = proteins_dataset.np_to_tensor_dict(
|
92 |
+
np_example=np_example, features=feature_names)
|
93 |
+
|
94 |
+
processed_batch = input_pipeline.process_tensors_from_config(
|
95 |
+
tensor_dict, cfg)
|
96 |
+
|
97 |
+
tf_graph.finalize()
|
98 |
+
|
99 |
+
with tf.Session(graph=tf_graph) as sess:
|
100 |
+
features = sess.run(processed_batch)
|
101 |
+
|
102 |
+
return {k: v for k, v in features.items() if v.dtype != 'O'}
|
af_backprop/alphafold/model/folding.py
ADDED
@@ -0,0 +1,1016 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Modules and utilities for the structure module."""
|
16 |
+
|
17 |
+
import functools
|
18 |
+
from typing import Dict
|
19 |
+
from alphafold.common import residue_constants
|
20 |
+
from alphafold.model import all_atom
|
21 |
+
from alphafold.model import common_modules
|
22 |
+
from alphafold.model import prng
|
23 |
+
from alphafold.model import quat_affine
|
24 |
+
from alphafold.model import r3
|
25 |
+
from alphafold.model import utils
|
26 |
+
import haiku as hk
|
27 |
+
import jax
|
28 |
+
import jax.numpy as jnp
|
29 |
+
import ml_collections
|
30 |
+
import numpy as np
|
31 |
+
|
32 |
+
|
33 |
+
def squared_difference(x, y):
|
34 |
+
return jnp.square(x - y)
|
35 |
+
|
36 |
+
|
37 |
+
class InvariantPointAttention(hk.Module):
|
38 |
+
"""Invariant Point attention module.
|
39 |
+
|
40 |
+
The high-level idea is that this attention module works over a set of points
|
41 |
+
and associated orientations in 3D space (e.g. protein residues).
|
42 |
+
|
43 |
+
Each residue outputs a set of queries and keys as points in their local
|
44 |
+
reference frame. The attention is then defined as the euclidean distance
|
45 |
+
between the queries and keys in the global frame.
|
46 |
+
|
47 |
+
Jumper et al. (2021) Suppl. Alg. 22 "InvariantPointAttention"
|
48 |
+
"""
|
49 |
+
|
50 |
+
def __init__(self,
|
51 |
+
config,
|
52 |
+
global_config,
|
53 |
+
dist_epsilon=1e-8,
|
54 |
+
name='invariant_point_attention'):
|
55 |
+
"""Initialize.
|
56 |
+
|
57 |
+
Args:
|
58 |
+
config: Structure Module Config
|
59 |
+
global_config: Global Config of Model.
|
60 |
+
dist_epsilon: Small value to avoid NaN in distance calculation.
|
61 |
+
name: Haiku Module name.
|
62 |
+
"""
|
63 |
+
super().__init__(name=name)
|
64 |
+
|
65 |
+
self._dist_epsilon = dist_epsilon
|
66 |
+
self._zero_initialize_last = global_config.zero_init
|
67 |
+
|
68 |
+
self.config = config
|
69 |
+
|
70 |
+
self.global_config = global_config
|
71 |
+
|
72 |
+
def __call__(self, inputs_1d, inputs_2d, mask, affine):
|
73 |
+
"""Compute geometry-aware attention.
|
74 |
+
|
75 |
+
Given a set of query residues (defined by affines and associated scalar
|
76 |
+
features), this function computes geometry-aware attention between the
|
77 |
+
query residues and target residues.
|
78 |
+
|
79 |
+
The residues produce points in their local reference frame, which
|
80 |
+
are converted into the global frame in order to compute attention via
|
81 |
+
euclidean distance.
|
82 |
+
|
83 |
+
Equivalently, the target residues produce points in their local frame to be
|
84 |
+
used as attention values, which are converted into the query residues'
|
85 |
+
local frames.
|
86 |
+
|
87 |
+
Args:
|
88 |
+
inputs_1d: (N, C) 1D input embedding that is the basis for the
|
89 |
+
scalar queries.
|
90 |
+
inputs_2d: (N, M, C') 2D input embedding, used for biases and values.
|
91 |
+
mask: (N, 1) mask to indicate which elements of inputs_1d participate
|
92 |
+
in the attention.
|
93 |
+
affine: QuatAffine object describing the position and orientation of
|
94 |
+
every element in inputs_1d.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
Transformation of the input embedding.
|
98 |
+
"""
|
99 |
+
num_residues, _ = inputs_1d.shape
|
100 |
+
|
101 |
+
# Improve readability by removing a large number of 'self's.
|
102 |
+
num_head = self.config.num_head
|
103 |
+
num_scalar_qk = self.config.num_scalar_qk
|
104 |
+
num_point_qk = self.config.num_point_qk
|
105 |
+
num_scalar_v = self.config.num_scalar_v
|
106 |
+
num_point_v = self.config.num_point_v
|
107 |
+
num_output = self.config.num_channel
|
108 |
+
|
109 |
+
assert num_scalar_qk > 0
|
110 |
+
assert num_point_qk > 0
|
111 |
+
assert num_point_v > 0
|
112 |
+
|
113 |
+
# Construct scalar queries of shape:
|
114 |
+
# [num_query_residues, num_head, num_points]
|
115 |
+
q_scalar = common_modules.Linear(
|
116 |
+
num_head * num_scalar_qk, name='q_scalar')(
|
117 |
+
inputs_1d)
|
118 |
+
q_scalar = jnp.reshape(
|
119 |
+
q_scalar, [num_residues, num_head, num_scalar_qk])
|
120 |
+
|
121 |
+
# Construct scalar keys/values of shape:
|
122 |
+
# [num_target_residues, num_head, num_points]
|
123 |
+
kv_scalar = common_modules.Linear(
|
124 |
+
num_head * (num_scalar_v + num_scalar_qk), name='kv_scalar')(
|
125 |
+
inputs_1d)
|
126 |
+
kv_scalar = jnp.reshape(kv_scalar,
|
127 |
+
[num_residues, num_head,
|
128 |
+
num_scalar_v + num_scalar_qk])
|
129 |
+
k_scalar, v_scalar = jnp.split(kv_scalar, [num_scalar_qk], axis=-1)
|
130 |
+
|
131 |
+
# Construct query points of shape:
|
132 |
+
# [num_residues, num_head, num_point_qk]
|
133 |
+
|
134 |
+
# First construct query points in local frame.
|
135 |
+
q_point_local = common_modules.Linear(
|
136 |
+
num_head * 3 * num_point_qk, name='q_point_local')(
|
137 |
+
inputs_1d)
|
138 |
+
q_point_local = jnp.split(q_point_local, 3, axis=-1)
|
139 |
+
# Project query points into global frame.
|
140 |
+
q_point_global = affine.apply_to_point(q_point_local, extra_dims=1)
|
141 |
+
# Reshape query point for later use.
|
142 |
+
q_point = [
|
143 |
+
jnp.reshape(x, [num_residues, num_head, num_point_qk])
|
144 |
+
for x in q_point_global]
|
145 |
+
|
146 |
+
# Construct key and value points.
|
147 |
+
# Key points have shape [num_residues, num_head, num_point_qk]
|
148 |
+
# Value points have shape [num_residues, num_head, num_point_v]
|
149 |
+
|
150 |
+
# Construct key and value points in local frame.
|
151 |
+
kv_point_local = common_modules.Linear(
|
152 |
+
num_head * 3 * (num_point_qk + num_point_v), name='kv_point_local')(
|
153 |
+
inputs_1d)
|
154 |
+
kv_point_local = jnp.split(kv_point_local, 3, axis=-1)
|
155 |
+
# Project key and value points into global frame.
|
156 |
+
kv_point_global = affine.apply_to_point(kv_point_local, extra_dims=1)
|
157 |
+
kv_point_global = [
|
158 |
+
jnp.reshape(x, [num_residues,
|
159 |
+
num_head, (num_point_qk + num_point_v)])
|
160 |
+
for x in kv_point_global]
|
161 |
+
# Split key and value points.
|
162 |
+
k_point, v_point = list(
|
163 |
+
zip(*[
|
164 |
+
jnp.split(x, [num_point_qk,], axis=-1)
|
165 |
+
for x in kv_point_global
|
166 |
+
]))
|
167 |
+
|
168 |
+
# We assume that all queries and keys come iid from N(0, 1) distribution
|
169 |
+
# and compute the variances of the attention logits.
|
170 |
+
# Each scalar pair (q, k) contributes Var q*k = 1
|
171 |
+
scalar_variance = max(num_scalar_qk, 1) * 1.
|
172 |
+
# Each point pair (q, k) contributes Var [0.5 ||q||^2 - <q, k>] = 9 / 2
|
173 |
+
point_variance = max(num_point_qk, 1) * 9. / 2
|
174 |
+
|
175 |
+
# Allocate equal variance to scalar, point and attention 2d parts so that
|
176 |
+
# the sum is 1.
|
177 |
+
|
178 |
+
num_logit_terms = 3
|
179 |
+
|
180 |
+
scalar_weights = np.sqrt(1.0 / (num_logit_terms * scalar_variance))
|
181 |
+
point_weights = np.sqrt(1.0 / (num_logit_terms * point_variance))
|
182 |
+
attention_2d_weights = np.sqrt(1.0 / (num_logit_terms))
|
183 |
+
|
184 |
+
# Trainable per-head weights for points.
|
185 |
+
trainable_point_weights = jax.nn.softplus(hk.get_parameter(
|
186 |
+
'trainable_point_weights', shape=[num_head],
|
187 |
+
# softplus^{-1} (1)
|
188 |
+
init=hk.initializers.Constant(np.log(np.exp(1.) - 1.))))
|
189 |
+
point_weights *= jnp.expand_dims(trainable_point_weights, axis=1)
|
190 |
+
|
191 |
+
v_point = [jnp.swapaxes(x, -2, -3) for x in v_point]
|
192 |
+
|
193 |
+
q_point = [jnp.swapaxes(x, -2, -3) for x in q_point]
|
194 |
+
k_point = [jnp.swapaxes(x, -2, -3) for x in k_point]
|
195 |
+
dist2 = [
|
196 |
+
squared_difference(qx[:, :, None, :], kx[:, None, :, :])
|
197 |
+
for qx, kx in zip(q_point, k_point)
|
198 |
+
]
|
199 |
+
dist2 = sum(dist2)
|
200 |
+
attn_qk_point = -0.5 * jnp.sum(
|
201 |
+
point_weights[:, None, None, :] * dist2, axis=-1)
|
202 |
+
|
203 |
+
v = jnp.swapaxes(v_scalar, -2, -3)
|
204 |
+
q = jnp.swapaxes(scalar_weights * q_scalar, -2, -3)
|
205 |
+
k = jnp.swapaxes(k_scalar, -2, -3)
|
206 |
+
attn_qk_scalar = jnp.matmul(q, jnp.swapaxes(k, -2, -1))
|
207 |
+
attn_logits = attn_qk_scalar + attn_qk_point
|
208 |
+
|
209 |
+
attention_2d = common_modules.Linear(
|
210 |
+
num_head, name='attention_2d')(
|
211 |
+
inputs_2d)
|
212 |
+
|
213 |
+
attention_2d = jnp.transpose(attention_2d, [2, 0, 1])
|
214 |
+
attention_2d = attention_2d_weights * attention_2d
|
215 |
+
attn_logits += attention_2d
|
216 |
+
|
217 |
+
mask_2d = mask * jnp.swapaxes(mask, -1, -2)
|
218 |
+
attn_logits -= 1e5 * (1. - mask_2d)
|
219 |
+
|
220 |
+
# [num_head, num_query_residues, num_target_residues]
|
221 |
+
attn = jax.nn.softmax(attn_logits)
|
222 |
+
|
223 |
+
# [num_head, num_query_residues, num_head * num_scalar_v]
|
224 |
+
result_scalar = jnp.matmul(attn, v)
|
225 |
+
|
226 |
+
# For point result, implement matmul manually so that it will be a float32
|
227 |
+
# on TPU. This is equivalent to
|
228 |
+
# result_point_global = [jnp.einsum('bhqk,bhkc->bhqc', attn, vx)
|
229 |
+
# for vx in v_point]
|
230 |
+
# but on the TPU, doing the multiply and reduce_sum ensures the
|
231 |
+
# computation happens in float32 instead of bfloat16.
|
232 |
+
result_point_global = [jnp.sum(
|
233 |
+
attn[:, :, :, None] * vx[:, None, :, :],
|
234 |
+
axis=-2) for vx in v_point]
|
235 |
+
|
236 |
+
# [num_query_residues, num_head, num_head * num_(scalar|point)_v]
|
237 |
+
result_scalar = jnp.swapaxes(result_scalar, -2, -3)
|
238 |
+
result_point_global = [
|
239 |
+
jnp.swapaxes(x, -2, -3)
|
240 |
+
for x in result_point_global]
|
241 |
+
|
242 |
+
# Features used in the linear output projection. Should have the size
|
243 |
+
# [num_query_residues, ?]
|
244 |
+
output_features = []
|
245 |
+
|
246 |
+
result_scalar = jnp.reshape(
|
247 |
+
result_scalar, [num_residues, num_head * num_scalar_v])
|
248 |
+
output_features.append(result_scalar)
|
249 |
+
|
250 |
+
result_point_global = [
|
251 |
+
jnp.reshape(r, [num_residues, num_head * num_point_v])
|
252 |
+
for r in result_point_global]
|
253 |
+
result_point_local = affine.invert_point(result_point_global, extra_dims=1)
|
254 |
+
output_features.extend(result_point_local)
|
255 |
+
|
256 |
+
output_features.append(jnp.sqrt(self._dist_epsilon +
|
257 |
+
jnp.square(result_point_local[0]) +
|
258 |
+
jnp.square(result_point_local[1]) +
|
259 |
+
jnp.square(result_point_local[2])))
|
260 |
+
|
261 |
+
# Dimensions: h = heads, i and j = residues,
|
262 |
+
# c = inputs_2d channels
|
263 |
+
# Contraction happens over the second residue dimension, similarly to how
|
264 |
+
# the usual attention is performed.
|
265 |
+
result_attention_over_2d = jnp.einsum('hij, ijc->ihc', attn, inputs_2d)
|
266 |
+
num_out = num_head * result_attention_over_2d.shape[-1]
|
267 |
+
output_features.append(
|
268 |
+
jnp.reshape(result_attention_over_2d,
|
269 |
+
[num_residues, num_out]))
|
270 |
+
|
271 |
+
final_init = 'zeros' if self._zero_initialize_last else 'linear'
|
272 |
+
|
273 |
+
final_act = jnp.concatenate(output_features, axis=-1)
|
274 |
+
|
275 |
+
return common_modules.Linear(
|
276 |
+
num_output,
|
277 |
+
initializer=final_init,
|
278 |
+
name='output_projection')(final_act)
|
279 |
+
|
280 |
+
|
281 |
+
class FoldIteration(hk.Module):
|
282 |
+
"""A single iteration of the main structure module loop.
|
283 |
+
|
284 |
+
Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" lines 6-21
|
285 |
+
|
286 |
+
First, each residue attends to all residues using InvariantPointAttention.
|
287 |
+
Then, we apply transition layers to update the hidden representations.
|
288 |
+
Finally, we use the hidden representations to produce an update to the
|
289 |
+
affine of each residue.
|
290 |
+
"""
|
291 |
+
|
292 |
+
def __init__(self, config, global_config,
|
293 |
+
name='fold_iteration'):
|
294 |
+
super().__init__(name=name)
|
295 |
+
self.config = config
|
296 |
+
self.global_config = global_config
|
297 |
+
|
298 |
+
def __call__(self,
|
299 |
+
activations,
|
300 |
+
sequence_mask,
|
301 |
+
update_affine,
|
302 |
+
is_training,
|
303 |
+
initial_act,
|
304 |
+
safe_key=None,
|
305 |
+
static_feat_2d=None,
|
306 |
+
aatype=None,
|
307 |
+
scale_rate=1.0):
|
308 |
+
c = self.config
|
309 |
+
|
310 |
+
if safe_key is None:
|
311 |
+
safe_key = prng.SafeKey(hk.next_rng_key())
|
312 |
+
|
313 |
+
def safe_dropout_fn(tensor, safe_key):
|
314 |
+
return prng.safe_dropout(
|
315 |
+
tensor=tensor,
|
316 |
+
safe_key=safe_key,
|
317 |
+
rate=c.dropout * scale_rate,
|
318 |
+
is_deterministic=self.global_config.deterministic,
|
319 |
+
is_training=is_training)
|
320 |
+
|
321 |
+
affine = quat_affine.QuatAffine.from_tensor(activations['affine'])
|
322 |
+
|
323 |
+
act = activations['act']
|
324 |
+
attention_module = InvariantPointAttention(self.config, self.global_config)
|
325 |
+
# Attention
|
326 |
+
attn = attention_module(
|
327 |
+
inputs_1d=act,
|
328 |
+
inputs_2d=static_feat_2d,
|
329 |
+
mask=sequence_mask,
|
330 |
+
affine=affine)
|
331 |
+
act += attn
|
332 |
+
safe_key, *sub_keys = safe_key.split(3)
|
333 |
+
sub_keys = iter(sub_keys)
|
334 |
+
act = safe_dropout_fn(act, next(sub_keys))
|
335 |
+
act = hk.LayerNorm(
|
336 |
+
axis=[-1],
|
337 |
+
create_scale=True,
|
338 |
+
create_offset=True,
|
339 |
+
name='attention_layer_norm')(
|
340 |
+
act)
|
341 |
+
|
342 |
+
final_init = 'zeros' if self.global_config.zero_init else 'linear'
|
343 |
+
|
344 |
+
# Transition
|
345 |
+
input_act = act
|
346 |
+
for i in range(c.num_layer_in_transition):
|
347 |
+
init = 'relu' if i < c.num_layer_in_transition - 1 else final_init
|
348 |
+
act = common_modules.Linear(
|
349 |
+
c.num_channel,
|
350 |
+
initializer=init,
|
351 |
+
name='transition')(
|
352 |
+
act)
|
353 |
+
if i < c.num_layer_in_transition - 1:
|
354 |
+
act = jax.nn.relu(act)
|
355 |
+
act += input_act
|
356 |
+
act = safe_dropout_fn(act, next(sub_keys))
|
357 |
+
act = hk.LayerNorm(
|
358 |
+
axis=[-1],
|
359 |
+
create_scale=True,
|
360 |
+
create_offset=True,
|
361 |
+
name='transition_layer_norm')(act)
|
362 |
+
|
363 |
+
if update_affine:
|
364 |
+
# This block corresponds to
|
365 |
+
# Jumper et al. (2021) Alg. 23 "Backbone update"
|
366 |
+
affine_update_size = 6
|
367 |
+
|
368 |
+
# Affine update
|
369 |
+
affine_update = common_modules.Linear(
|
370 |
+
affine_update_size,
|
371 |
+
initializer=final_init,
|
372 |
+
name='affine_update')(
|
373 |
+
act)
|
374 |
+
|
375 |
+
affine = affine.pre_compose(affine_update)
|
376 |
+
|
377 |
+
sc = MultiRigidSidechain(c.sidechain, self.global_config)(
|
378 |
+
affine.scale_translation(c.position_scale), [act, initial_act], aatype)
|
379 |
+
|
380 |
+
outputs = {'affine': affine.to_tensor(), 'sc': sc}
|
381 |
+
|
382 |
+
# affine = affine.apply_rotation_tensor_fn(jax.lax.stop_gradient)
|
383 |
+
|
384 |
+
new_activations = {
|
385 |
+
'act': act,
|
386 |
+
'affine': affine.to_tensor()
|
387 |
+
}
|
388 |
+
return new_activations, outputs
|
389 |
+
|
390 |
+
|
391 |
+
def generate_affines(representations, batch, config, global_config,
|
392 |
+
is_training, safe_key):
|
393 |
+
"""Generate predicted affines for a single chain.
|
394 |
+
|
395 |
+
Jumper et al. (2021) Suppl. Alg. 20 "StructureModule"
|
396 |
+
|
397 |
+
This is the main part of the structure module - it iteratively applies
|
398 |
+
folding to produce a set of predicted residue positions.
|
399 |
+
|
400 |
+
Args:
|
401 |
+
representations: Representations dictionary.
|
402 |
+
batch: Batch dictionary.
|
403 |
+
config: Config for the structure module.
|
404 |
+
global_config: Global config.
|
405 |
+
is_training: Whether the model is being trained.
|
406 |
+
safe_key: A prng.SafeKey object that wraps a PRNG key.
|
407 |
+
|
408 |
+
Returns:
|
409 |
+
A dictionary containing residue affines and sidechain positions.
|
410 |
+
"""
|
411 |
+
c = config
|
412 |
+
sequence_mask = batch['seq_mask'][:, None]
|
413 |
+
|
414 |
+
act = hk.LayerNorm(
|
415 |
+
axis=[-1],
|
416 |
+
create_scale=True,
|
417 |
+
create_offset=True,
|
418 |
+
name='single_layer_norm')(
|
419 |
+
representations['single'])
|
420 |
+
|
421 |
+
initial_act = act
|
422 |
+
act = common_modules.Linear(
|
423 |
+
c.num_channel, name='initial_projection')(
|
424 |
+
act)
|
425 |
+
|
426 |
+
affine = generate_new_affine(sequence_mask)
|
427 |
+
|
428 |
+
fold_iteration = FoldIteration(
|
429 |
+
c, global_config, name='fold_iteration')
|
430 |
+
|
431 |
+
assert len(batch['seq_mask'].shape) == 1
|
432 |
+
|
433 |
+
activations = {'act': act,
|
434 |
+
'affine': affine.to_tensor(),
|
435 |
+
}
|
436 |
+
|
437 |
+
act_2d = hk.LayerNorm(
|
438 |
+
axis=[-1],
|
439 |
+
create_scale=True,
|
440 |
+
create_offset=True,
|
441 |
+
name='pair_layer_norm')(
|
442 |
+
representations['pair'])
|
443 |
+
|
444 |
+
def fold_iter(x,_):
|
445 |
+
x["key"], key = x["key"].split()
|
446 |
+
x["act"], out = fold_iteration(
|
447 |
+
x["act"],
|
448 |
+
initial_act=initial_act,
|
449 |
+
static_feat_2d=act_2d,
|
450 |
+
safe_key=key,
|
451 |
+
sequence_mask=sequence_mask,
|
452 |
+
update_affine=True,
|
453 |
+
is_training=is_training,
|
454 |
+
aatype=batch['aatype'],
|
455 |
+
scale_rate=batch["scale_rate"])
|
456 |
+
return x, out
|
457 |
+
x = {"act":activations,"key":safe_key}
|
458 |
+
x, output = hk.scan(fold_iter, x, None, c.num_layer)
|
459 |
+
activations = x["act"]
|
460 |
+
|
461 |
+
# Include the activations in the output dict for use by the LDDT-Head.
|
462 |
+
output['act'] = activations['act']
|
463 |
+
|
464 |
+
return output
|
465 |
+
|
466 |
+
|
467 |
+
class dummy(hk.Module):
|
468 |
+
def __init__(self, config, global_config, compute_loss=True):
|
469 |
+
super().__init__(name="dummy")
|
470 |
+
def __call__(self, representations, batch, is_training, safe_key=None):
|
471 |
+
if safe_key is None:
|
472 |
+
safe_key = prng.SafeKey(hk.next_rng_key())
|
473 |
+
return {}
|
474 |
+
|
475 |
+
class StructureModule(hk.Module):
|
476 |
+
"""StructureModule as a network head.
|
477 |
+
|
478 |
+
Jumper et al. (2021) Suppl. Alg. 20 "StructureModule"
|
479 |
+
"""
|
480 |
+
|
481 |
+
def __init__(self, config, global_config, compute_loss=True,
|
482 |
+
name='structure_module'):
|
483 |
+
super().__init__(name=name)
|
484 |
+
self.config = config
|
485 |
+
self.global_config = global_config
|
486 |
+
self.compute_loss = compute_loss
|
487 |
+
|
488 |
+
def __call__(self, representations, batch, is_training,
|
489 |
+
safe_key=None):
|
490 |
+
c = self.config
|
491 |
+
ret = {}
|
492 |
+
|
493 |
+
if safe_key is None:
|
494 |
+
safe_key = prng.SafeKey(hk.next_rng_key())
|
495 |
+
|
496 |
+
output = generate_affines(
|
497 |
+
representations=representations,
|
498 |
+
batch=batch,
|
499 |
+
config=self.config,
|
500 |
+
global_config=self.global_config,
|
501 |
+
is_training=is_training,
|
502 |
+
safe_key=safe_key)
|
503 |
+
|
504 |
+
ret['representations'] = {'structure_module': output['act']}
|
505 |
+
|
506 |
+
ret['traj'] = output['affine'] * jnp.array([1.] * 4 + [c.position_scale] * 3)
|
507 |
+
ret['sidechains'] = output['sc']
|
508 |
+
atom14_pred_positions = r3.vecs_to_tensor(output['sc']['atom_pos'])[-1]
|
509 |
+
ret['final_atom14_positions'] = atom14_pred_positions # (N, 14, 3)
|
510 |
+
ret['final_atom14_mask'] = batch['atom14_atom_exists'] # (N, 14)
|
511 |
+
|
512 |
+
atom37_pred_positions = all_atom.atom14_to_atom37(atom14_pred_positions, batch)
|
513 |
+
atom37_pred_positions *= batch['atom37_atom_exists'][:, :, None]
|
514 |
+
ret['final_atom_positions'] = atom37_pred_positions # (N, 37, 3)
|
515 |
+
ret['final_atom_mask'] = batch['atom37_atom_exists'] # (N, 37)
|
516 |
+
ret['final_affines'] = ret['traj'][-1]
|
517 |
+
|
518 |
+
return ret
|
519 |
+
|
520 |
+
def loss(self, value, batch):
|
521 |
+
ret = {'loss': 0.}
|
522 |
+
|
523 |
+
ret['metrics'] = {}
|
524 |
+
# If requested, compute in-graph metrics.
|
525 |
+
if self.config.compute_in_graph_metrics:
|
526 |
+
atom14_pred_positions = value['final_atom14_positions']
|
527 |
+
# Compute renaming and violations.
|
528 |
+
value.update(compute_renamed_ground_truth(batch, atom14_pred_positions))
|
529 |
+
value['violations'] = find_structural_violations(
|
530 |
+
batch, atom14_pred_positions, self.config)
|
531 |
+
|
532 |
+
# Several violation metrics:
|
533 |
+
violation_metrics = compute_violation_metrics(
|
534 |
+
batch=batch,
|
535 |
+
atom14_pred_positions=atom14_pred_positions,
|
536 |
+
violations=value['violations'])
|
537 |
+
ret['metrics'].update(violation_metrics)
|
538 |
+
|
539 |
+
backbone_loss(ret, batch, value, self.config)
|
540 |
+
|
541 |
+
if 'renamed_atom14_gt_positions' not in value:
|
542 |
+
value.update(compute_renamed_ground_truth(
|
543 |
+
batch, value['final_atom14_positions']))
|
544 |
+
sc_loss = sidechain_loss(batch, value, self.config)
|
545 |
+
|
546 |
+
ret['loss'] = ((1 - self.config.sidechain.weight_frac) * ret['loss'] +
|
547 |
+
self.config.sidechain.weight_frac * sc_loss['loss'])
|
548 |
+
ret['sidechain_fape'] = sc_loss['fape']
|
549 |
+
|
550 |
+
supervised_chi_loss(ret, batch, value, self.config)
|
551 |
+
|
552 |
+
if self.config.structural_violation_loss_weight:
|
553 |
+
if 'violations' not in value:
|
554 |
+
value['violations'] = find_structural_violations(
|
555 |
+
batch, value['final_atom14_positions'], self.config)
|
556 |
+
structural_violation_loss(ret, batch, value, self.config)
|
557 |
+
|
558 |
+
return ret
|
559 |
+
|
560 |
+
|
561 |
+
def compute_renamed_ground_truth(
|
562 |
+
batch: Dict[str, jnp.ndarray],
|
563 |
+
atom14_pred_positions: jnp.ndarray,
|
564 |
+
) -> Dict[str, jnp.ndarray]:
|
565 |
+
"""Find optimal renaming of ground truth based on the predicted positions.
|
566 |
+
|
567 |
+
Jumper et al. (2021) Suppl. Alg. 26 "renameSymmetricGroundTruthAtoms"
|
568 |
+
|
569 |
+
This renamed ground truth is then used for all losses,
|
570 |
+
such that each loss moves the atoms in the same direction.
|
571 |
+
Shape (N).
|
572 |
+
|
573 |
+
Args:
|
574 |
+
batch: Dictionary containing:
|
575 |
+
* atom14_gt_positions: Ground truth positions.
|
576 |
+
* atom14_alt_gt_positions: Ground truth positions with renaming swaps.
|
577 |
+
* atom14_atom_is_ambiguous: 1.0 for atoms that are affected by
|
578 |
+
renaming swaps.
|
579 |
+
* atom14_gt_exists: Mask for which atoms exist in ground truth.
|
580 |
+
* atom14_alt_gt_exists: Mask for which atoms exist in ground truth
|
581 |
+
after renaming.
|
582 |
+
* atom14_atom_exists: Mask for whether each atom is part of the given
|
583 |
+
amino acid type.
|
584 |
+
atom14_pred_positions: Array of atom positions in global frame with shape
|
585 |
+
(N, 14, 3).
|
586 |
+
Returns:
|
587 |
+
Dictionary containing:
|
588 |
+
alt_naming_is_better: Array with 1.0 where alternative swap is better.
|
589 |
+
renamed_atom14_gt_positions: Array of optimal ground truth positions
|
590 |
+
after renaming swaps are performed.
|
591 |
+
renamed_atom14_gt_exists: Mask after renaming swap is performed.
|
592 |
+
"""
|
593 |
+
alt_naming_is_better = all_atom.find_optimal_renaming(
|
594 |
+
atom14_gt_positions=batch['atom14_gt_positions'],
|
595 |
+
atom14_alt_gt_positions=batch['atom14_alt_gt_positions'],
|
596 |
+
atom14_atom_is_ambiguous=batch['atom14_atom_is_ambiguous'],
|
597 |
+
atom14_gt_exists=batch['atom14_gt_exists'],
|
598 |
+
atom14_pred_positions=atom14_pred_positions,
|
599 |
+
atom14_atom_exists=batch['atom14_atom_exists'])
|
600 |
+
|
601 |
+
renamed_atom14_gt_positions = (
|
602 |
+
(1. - alt_naming_is_better[:, None, None])
|
603 |
+
* batch['atom14_gt_positions']
|
604 |
+
+ alt_naming_is_better[:, None, None]
|
605 |
+
* batch['atom14_alt_gt_positions'])
|
606 |
+
|
607 |
+
renamed_atom14_gt_mask = (
|
608 |
+
(1. - alt_naming_is_better[:, None]) * batch['atom14_gt_exists']
|
609 |
+
+ alt_naming_is_better[:, None] * batch['atom14_alt_gt_exists'])
|
610 |
+
|
611 |
+
return {
|
612 |
+
'alt_naming_is_better': alt_naming_is_better, # (N)
|
613 |
+
'renamed_atom14_gt_positions': renamed_atom14_gt_positions, # (N, 14, 3)
|
614 |
+
'renamed_atom14_gt_exists': renamed_atom14_gt_mask, # (N, 14)
|
615 |
+
}
|
616 |
+
|
617 |
+
|
618 |
+
def backbone_loss(ret, batch, value, config):
|
619 |
+
"""Backbone FAPE Loss.
|
620 |
+
|
621 |
+
Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" line 17
|
622 |
+
|
623 |
+
Args:
|
624 |
+
ret: Dictionary to write outputs into, needs to contain 'loss'.
|
625 |
+
batch: Batch, needs to contain 'backbone_affine_tensor',
|
626 |
+
'backbone_affine_mask'.
|
627 |
+
value: Dictionary containing structure module output, needs to contain
|
628 |
+
'traj', a trajectory of rigids.
|
629 |
+
config: Configuration of loss, should contain 'fape.clamp_distance' and
|
630 |
+
'fape.loss_unit_distance'.
|
631 |
+
"""
|
632 |
+
affine_trajectory = quat_affine.QuatAffine.from_tensor(value['traj'])
|
633 |
+
rigid_trajectory = r3.rigids_from_quataffine(affine_trajectory)
|
634 |
+
|
635 |
+
if 'backbone_affine_tensor' in batch:
|
636 |
+
gt_affine = quat_affine.QuatAffine.from_tensor(batch['backbone_affine_tensor'])
|
637 |
+
backbone_mask = batch['backbone_affine_mask']
|
638 |
+
else:
|
639 |
+
n_xyz = batch['all_atom_positions'][...,0,:]
|
640 |
+
ca_xyz = batch['all_atom_positions'][...,1,:]
|
641 |
+
c_xyz = batch['all_atom_positions'][...,2,:]
|
642 |
+
rot, trans = quat_affine.make_transform_from_reference(n_xyz, ca_xyz, c_xyz)
|
643 |
+
gt_affine = quat_affine.QuatAffine(quaternion=None,
|
644 |
+
translation=trans,
|
645 |
+
rotation=rot,
|
646 |
+
unstack_inputs=True)
|
647 |
+
backbone_mask = batch['all_atom_mask'][...,0]
|
648 |
+
|
649 |
+
gt_rigid = r3.rigids_from_quataffine(gt_affine)
|
650 |
+
|
651 |
+
fape_loss_fn = functools.partial(
|
652 |
+
all_atom.frame_aligned_point_error,
|
653 |
+
l1_clamp_distance=config.fape.clamp_distance,
|
654 |
+
length_scale=config.fape.loss_unit_distance)
|
655 |
+
|
656 |
+
fape_loss_fn = jax.vmap(fape_loss_fn, (0, None, None, 0, None, None))
|
657 |
+
fape_loss = fape_loss_fn(rigid_trajectory, gt_rigid, backbone_mask,
|
658 |
+
rigid_trajectory.trans, gt_rigid.trans,
|
659 |
+
backbone_mask)
|
660 |
+
|
661 |
+
if 'use_clamped_fape' in batch:
|
662 |
+
# Jumper et al. (2021) Suppl. Sec. 1.11.5 "Loss clamping details"
|
663 |
+
use_clamped_fape = jnp.asarray(batch['use_clamped_fape'], jnp.float32)
|
664 |
+
unclamped_fape_loss_fn = functools.partial(
|
665 |
+
all_atom.frame_aligned_point_error,
|
666 |
+
l1_clamp_distance=None,
|
667 |
+
length_scale=config.fape.loss_unit_distance)
|
668 |
+
unclamped_fape_loss_fn = jax.vmap(unclamped_fape_loss_fn,
|
669 |
+
(0, None, None, 0, None, None))
|
670 |
+
fape_loss_unclamped = unclamped_fape_loss_fn(rigid_trajectory, gt_rigid,
|
671 |
+
backbone_mask,
|
672 |
+
rigid_trajectory.trans,
|
673 |
+
gt_rigid.trans,
|
674 |
+
backbone_mask)
|
675 |
+
|
676 |
+
fape_loss = (fape_loss * use_clamped_fape + fape_loss_unclamped * (1 - use_clamped_fape))
|
677 |
+
|
678 |
+
ret['fape'] = fape_loss[-1]
|
679 |
+
ret['loss'] += jnp.mean(fape_loss)
|
680 |
+
|
681 |
+
|
682 |
+
def sidechain_loss(batch, value, config):
|
683 |
+
"""All Atom FAPE Loss using renamed rigids."""
|
684 |
+
# Rename Frames
|
685 |
+
# Jumper et al. (2021) Suppl. Alg. 26 "renameSymmetricGroundTruthAtoms" line 7
|
686 |
+
alt_naming_is_better = value['alt_naming_is_better']
|
687 |
+
renamed_gt_frames = (
|
688 |
+
(1. - alt_naming_is_better[:, None, None])
|
689 |
+
* batch['rigidgroups_gt_frames']
|
690 |
+
+ alt_naming_is_better[:, None, None]
|
691 |
+
* batch['rigidgroups_alt_gt_frames'])
|
692 |
+
|
693 |
+
flat_gt_frames = r3.rigids_from_tensor_flat12(jnp.reshape(renamed_gt_frames, [-1, 12]))
|
694 |
+
flat_frames_mask = jnp.reshape(batch['rigidgroups_gt_exists'], [-1])
|
695 |
+
|
696 |
+
flat_gt_positions = r3.vecs_from_tensor(jnp.reshape(value['renamed_atom14_gt_positions'], [-1, 3]))
|
697 |
+
flat_positions_mask = jnp.reshape(value['renamed_atom14_gt_exists'], [-1])
|
698 |
+
|
699 |
+
# Compute frame_aligned_point_error score for the final layer.
|
700 |
+
pred_frames = value['sidechains']['frames']
|
701 |
+
pred_positions = value['sidechains']['atom_pos']
|
702 |
+
|
703 |
+
def _slice_last_layer_and_flatten(x):
|
704 |
+
return jnp.reshape(x[-1], [-1])
|
705 |
+
|
706 |
+
flat_pred_frames = jax.tree_map(_slice_last_layer_and_flatten, pred_frames)
|
707 |
+
flat_pred_positions = jax.tree_map(_slice_last_layer_and_flatten, pred_positions)
|
708 |
+
# FAPE Loss on sidechains
|
709 |
+
fape = all_atom.frame_aligned_point_error(
|
710 |
+
pred_frames=flat_pred_frames,
|
711 |
+
target_frames=flat_gt_frames,
|
712 |
+
frames_mask=flat_frames_mask,
|
713 |
+
pred_positions=flat_pred_positions,
|
714 |
+
target_positions=flat_gt_positions,
|
715 |
+
positions_mask=flat_positions_mask,
|
716 |
+
l1_clamp_distance=config.sidechain.atom_clamp_distance,
|
717 |
+
length_scale=config.sidechain.length_scale)
|
718 |
+
|
719 |
+
return {
|
720 |
+
'fape': fape,
|
721 |
+
'loss': fape}
|
722 |
+
|
723 |
+
|
724 |
+
def structural_violation_loss(ret, batch, value, config):
|
725 |
+
"""Computes loss for structural violations."""
|
726 |
+
assert config.sidechain.weight_frac
|
727 |
+
|
728 |
+
# Put all violation losses together to one large loss.
|
729 |
+
violations = value['violations']
|
730 |
+
num_atoms = jnp.sum(batch['atom14_atom_exists']).astype(jnp.float32)
|
731 |
+
ret['loss'] += (config.structural_violation_loss_weight * (
|
732 |
+
violations['between_residues']['bonds_c_n_loss_mean'] +
|
733 |
+
violations['between_residues']['angles_ca_c_n_loss_mean'] +
|
734 |
+
violations['between_residues']['angles_c_n_ca_loss_mean'] +
|
735 |
+
jnp.sum(
|
736 |
+
violations['between_residues']['clashes_per_atom_loss_sum'] +
|
737 |
+
violations['within_residues']['per_atom_loss_sum']) /
|
738 |
+
(1e-6 + num_atoms)))
|
739 |
+
|
740 |
+
|
741 |
+
def find_structural_violations(
|
742 |
+
batch: Dict[str, jnp.ndarray],
|
743 |
+
atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
|
744 |
+
config: ml_collections.ConfigDict
|
745 |
+
):
|
746 |
+
"""Computes several checks for structural violations."""
|
747 |
+
|
748 |
+
# Compute between residue backbone violations of bonds and angles.
|
749 |
+
connection_violations = all_atom.between_residue_bond_loss(
|
750 |
+
pred_atom_positions=atom14_pred_positions,
|
751 |
+
pred_atom_mask=batch['atom14_atom_exists'].astype(jnp.float32),
|
752 |
+
residue_index=batch['residue_index'].astype(jnp.float32),
|
753 |
+
aatype=batch['aatype'],
|
754 |
+
tolerance_factor_soft=config.violation_tolerance_factor,
|
755 |
+
tolerance_factor_hard=config.violation_tolerance_factor)
|
756 |
+
|
757 |
+
# Compute the Van der Waals radius for every atom
|
758 |
+
# (the first letter of the atom name is the element type).
|
759 |
+
# Shape: (N, 14).
|
760 |
+
atomtype_radius = [
|
761 |
+
residue_constants.van_der_waals_radius[name[0]]
|
762 |
+
for name in residue_constants.atom_types
|
763 |
+
]
|
764 |
+
atom14_atom_radius = batch['atom14_atom_exists'] * utils.batched_gather(
|
765 |
+
atomtype_radius, batch['residx_atom14_to_atom37'])
|
766 |
+
|
767 |
+
# Compute the between residue clash loss.
|
768 |
+
between_residue_clashes = all_atom.between_residue_clash_loss(
|
769 |
+
atom14_pred_positions=atom14_pred_positions,
|
770 |
+
atom14_atom_exists=batch['atom14_atom_exists'],
|
771 |
+
atom14_atom_radius=atom14_atom_radius,
|
772 |
+
residue_index=batch['residue_index'],
|
773 |
+
overlap_tolerance_soft=config.clash_overlap_tolerance,
|
774 |
+
overlap_tolerance_hard=config.clash_overlap_tolerance)
|
775 |
+
|
776 |
+
# Compute all within-residue violations (clashes,
|
777 |
+
# bond length and angle violations).
|
778 |
+
restype_atom14_bounds = residue_constants.make_atom14_dists_bounds(
|
779 |
+
overlap_tolerance=config.clash_overlap_tolerance,
|
780 |
+
bond_length_tolerance_factor=config.violation_tolerance_factor)
|
781 |
+
atom14_dists_lower_bound = utils.batched_gather(
|
782 |
+
restype_atom14_bounds['lower_bound'], batch['aatype'])
|
783 |
+
atom14_dists_upper_bound = utils.batched_gather(
|
784 |
+
restype_atom14_bounds['upper_bound'], batch['aatype'])
|
785 |
+
within_residue_violations = all_atom.within_residue_violations(
|
786 |
+
atom14_pred_positions=atom14_pred_positions,
|
787 |
+
atom14_atom_exists=batch['atom14_atom_exists'],
|
788 |
+
atom14_dists_lower_bound=atom14_dists_lower_bound,
|
789 |
+
atom14_dists_upper_bound=atom14_dists_upper_bound,
|
790 |
+
tighten_bounds_for_loss=0.0)
|
791 |
+
|
792 |
+
# Combine them to a single per-residue violation mask (used later for LDDT).
|
793 |
+
per_residue_violations_mask = jnp.max(jnp.stack([
|
794 |
+
connection_violations['per_residue_violation_mask'],
|
795 |
+
jnp.max(between_residue_clashes['per_atom_clash_mask'], axis=-1),
|
796 |
+
jnp.max(within_residue_violations['per_atom_violations'],
|
797 |
+
axis=-1)]), axis=0)
|
798 |
+
|
799 |
+
return {
|
800 |
+
'between_residues': {
|
801 |
+
'bonds_c_n_loss_mean':
|
802 |
+
connection_violations['c_n_loss_mean'], # ()
|
803 |
+
'angles_ca_c_n_loss_mean':
|
804 |
+
connection_violations['ca_c_n_loss_mean'], # ()
|
805 |
+
'angles_c_n_ca_loss_mean':
|
806 |
+
connection_violations['c_n_ca_loss_mean'], # ()
|
807 |
+
'connections_per_residue_loss_sum':
|
808 |
+
connection_violations['per_residue_loss_sum'], # (N)
|
809 |
+
'connections_per_residue_violation_mask':
|
810 |
+
connection_violations['per_residue_violation_mask'], # (N)
|
811 |
+
'clashes_mean_loss':
|
812 |
+
between_residue_clashes['mean_loss'], # ()
|
813 |
+
'clashes_per_atom_loss_sum':
|
814 |
+
between_residue_clashes['per_atom_loss_sum'], # (N, 14)
|
815 |
+
'clashes_per_atom_clash_mask':
|
816 |
+
between_residue_clashes['per_atom_clash_mask'], # (N, 14)
|
817 |
+
},
|
818 |
+
'within_residues': {
|
819 |
+
'per_atom_loss_sum':
|
820 |
+
within_residue_violations['per_atom_loss_sum'], # (N, 14)
|
821 |
+
'per_atom_violations':
|
822 |
+
within_residue_violations['per_atom_violations'], # (N, 14),
|
823 |
+
},
|
824 |
+
'total_per_residue_violations_mask':
|
825 |
+
per_residue_violations_mask, # (N)
|
826 |
+
}
|
827 |
+
|
828 |
+
|
829 |
+
def compute_violation_metrics(
|
830 |
+
batch: Dict[str, jnp.ndarray],
|
831 |
+
atom14_pred_positions: jnp.ndarray, # (N, 14, 3)
|
832 |
+
violations: Dict[str, jnp.ndarray],
|
833 |
+
) -> Dict[str, jnp.ndarray]:
|
834 |
+
"""Compute several metrics to assess the structural violations."""
|
835 |
+
|
836 |
+
ret = {}
|
837 |
+
extreme_ca_ca_violations = all_atom.extreme_ca_ca_distance_violations(
|
838 |
+
pred_atom_positions=atom14_pred_positions,
|
839 |
+
pred_atom_mask=batch['atom14_atom_exists'].astype(jnp.float32),
|
840 |
+
residue_index=batch['residue_index'].astype(jnp.float32))
|
841 |
+
ret['violations_extreme_ca_ca_distance'] = extreme_ca_ca_violations
|
842 |
+
ret['violations_between_residue_bond'] = utils.mask_mean(
|
843 |
+
mask=batch['seq_mask'],
|
844 |
+
value=violations['between_residues'][
|
845 |
+
'connections_per_residue_violation_mask'])
|
846 |
+
ret['violations_between_residue_clash'] = utils.mask_mean(
|
847 |
+
mask=batch['seq_mask'],
|
848 |
+
value=jnp.max(
|
849 |
+
violations['between_residues']['clashes_per_atom_clash_mask'],
|
850 |
+
axis=-1))
|
851 |
+
ret['violations_within_residue'] = utils.mask_mean(
|
852 |
+
mask=batch['seq_mask'],
|
853 |
+
value=jnp.max(
|
854 |
+
violations['within_residues']['per_atom_violations'], axis=-1))
|
855 |
+
ret['violations_per_residue'] = utils.mask_mean(
|
856 |
+
mask=batch['seq_mask'],
|
857 |
+
value=violations['total_per_residue_violations_mask'])
|
858 |
+
return ret
|
859 |
+
|
860 |
+
|
861 |
+
def supervised_chi_loss(ret, batch, value, config):
|
862 |
+
"""Computes loss for direct chi angle supervision.
|
863 |
+
|
864 |
+
Jumper et al. (2021) Suppl. Alg. 27 "torsionAngleLoss"
|
865 |
+
|
866 |
+
Args:
|
867 |
+
ret: Dictionary to write outputs into, needs to contain 'loss'.
|
868 |
+
batch: Batch, needs to contain 'seq_mask', 'chi_mask', 'chi_angles'.
|
869 |
+
value: Dictionary containing structure module output, needs to contain
|
870 |
+
value['sidechains']['angles_sin_cos'] for angles and
|
871 |
+
value['sidechains']['unnormalized_angles_sin_cos'] for unnormalized
|
872 |
+
angles.
|
873 |
+
config: Configuration of loss, should contain 'chi_weight' and
|
874 |
+
'angle_norm_weight', 'angle_norm_weight' scales angle norm term,
|
875 |
+
'chi_weight' scales torsion term.
|
876 |
+
"""
|
877 |
+
eps = 1e-6
|
878 |
+
|
879 |
+
sequence_mask = batch['seq_mask']
|
880 |
+
num_res = sequence_mask.shape[0]
|
881 |
+
chi_mask = batch['chi_mask'].astype(jnp.float32)
|
882 |
+
pred_angles = jnp.reshape(
|
883 |
+
value['sidechains']['angles_sin_cos'], [-1, num_res, 7, 2])
|
884 |
+
pred_angles = pred_angles[:, :, 3:]
|
885 |
+
|
886 |
+
residue_type_one_hot = jax.nn.one_hot(
|
887 |
+
batch['aatype'], residue_constants.restype_num + 1,
|
888 |
+
dtype=jnp.float32)[None]
|
889 |
+
chi_pi_periodic = jnp.einsum('ijk, kl->ijl', residue_type_one_hot,
|
890 |
+
jnp.asarray(residue_constants.chi_pi_periodic))
|
891 |
+
|
892 |
+
true_chi = batch['chi_angles'][None]
|
893 |
+
sin_true_chi = jnp.sin(true_chi)
|
894 |
+
cos_true_chi = jnp.cos(true_chi)
|
895 |
+
sin_cos_true_chi = jnp.stack([sin_true_chi, cos_true_chi], axis=-1)
|
896 |
+
|
897 |
+
# This is -1 if chi is pi-periodic and +1 if it's 2pi-periodic
|
898 |
+
shifted_mask = (1 - 2 * chi_pi_periodic)[..., None]
|
899 |
+
sin_cos_true_chi_shifted = shifted_mask * sin_cos_true_chi
|
900 |
+
|
901 |
+
sq_chi_error = jnp.sum(
|
902 |
+
squared_difference(sin_cos_true_chi, pred_angles), -1)
|
903 |
+
sq_chi_error_shifted = jnp.sum(
|
904 |
+
squared_difference(sin_cos_true_chi_shifted, pred_angles), -1)
|
905 |
+
sq_chi_error = jnp.minimum(sq_chi_error, sq_chi_error_shifted)
|
906 |
+
|
907 |
+
sq_chi_loss = utils.mask_mean(mask=chi_mask[None], value=sq_chi_error)
|
908 |
+
ret['chi_loss'] = sq_chi_loss
|
909 |
+
ret['loss'] += config.chi_weight * sq_chi_loss
|
910 |
+
unnormed_angles = jnp.reshape(
|
911 |
+
value['sidechains']['unnormalized_angles_sin_cos'], [-1, num_res, 7, 2])
|
912 |
+
angle_norm = jnp.sqrt(jnp.sum(jnp.square(unnormed_angles), axis=-1) + eps)
|
913 |
+
norm_error = jnp.abs(angle_norm - 1.)
|
914 |
+
angle_norm_loss = utils.mask_mean(mask=sequence_mask[None, :, None],
|
915 |
+
value=norm_error)
|
916 |
+
|
917 |
+
ret['angle_norm_loss'] = angle_norm_loss
|
918 |
+
ret['loss'] += config.angle_norm_weight * angle_norm_loss
|
919 |
+
|
920 |
+
|
921 |
+
def generate_new_affine(sequence_mask):
|
922 |
+
num_residues, _ = sequence_mask.shape
|
923 |
+
quaternion = jnp.tile(
|
924 |
+
jnp.reshape(jnp.asarray([1., 0., 0., 0.]), [1, 4]),
|
925 |
+
[num_residues, 1])
|
926 |
+
|
927 |
+
translation = jnp.zeros([num_residues, 3])
|
928 |
+
return quat_affine.QuatAffine(quaternion, translation, unstack_inputs=True)
|
929 |
+
|
930 |
+
|
931 |
+
def l2_normalize(x, axis=-1, epsilon=1e-12):
|
932 |
+
return x / jnp.sqrt(
|
933 |
+
jnp.maximum(jnp.sum(x**2, axis=axis, keepdims=True), epsilon))
|
934 |
+
|
935 |
+
|
936 |
+
class MultiRigidSidechain(hk.Module):
|
937 |
+
"""Class to make side chain atoms."""
|
938 |
+
|
939 |
+
def __init__(self, config, global_config, name='rigid_sidechain'):
|
940 |
+
super().__init__(name=name)
|
941 |
+
self.config = config
|
942 |
+
self.global_config = global_config
|
943 |
+
|
944 |
+
def __call__(self, affine, representations_list, aatype):
|
945 |
+
"""Predict side chains using multi-rigid representations.
|
946 |
+
|
947 |
+
Args:
|
948 |
+
affine: The affines for each residue (translations in angstroms).
|
949 |
+
representations_list: A list of activations to predict side chains from.
|
950 |
+
aatype: Amino acid types.
|
951 |
+
|
952 |
+
Returns:
|
953 |
+
Dict containing atom positions and frames (in angstroms).
|
954 |
+
"""
|
955 |
+
act = [
|
956 |
+
common_modules.Linear( # pylint: disable=g-complex-comprehension
|
957 |
+
self.config.num_channel,
|
958 |
+
name='input_projection')(jax.nn.relu(x))
|
959 |
+
for x in representations_list
|
960 |
+
]
|
961 |
+
# Sum the activation list (equivalent to concat then Linear).
|
962 |
+
act = sum(act)
|
963 |
+
|
964 |
+
final_init = 'zeros' if self.global_config.zero_init else 'linear'
|
965 |
+
|
966 |
+
# Mapping with some residual blocks.
|
967 |
+
for _ in range(self.config.num_residual_block):
|
968 |
+
old_act = act
|
969 |
+
act = common_modules.Linear(
|
970 |
+
self.config.num_channel,
|
971 |
+
initializer='relu',
|
972 |
+
name='resblock1')(
|
973 |
+
jax.nn.relu(act))
|
974 |
+
act = common_modules.Linear(
|
975 |
+
self.config.num_channel,
|
976 |
+
initializer=final_init,
|
977 |
+
name='resblock2')(
|
978 |
+
jax.nn.relu(act))
|
979 |
+
act += old_act
|
980 |
+
|
981 |
+
# Map activations to torsion angles. Shape: (num_res, 14).
|
982 |
+
num_res = act.shape[0]
|
983 |
+
unnormalized_angles = common_modules.Linear(
|
984 |
+
14, name='unnormalized_angles')(
|
985 |
+
jax.nn.relu(act))
|
986 |
+
unnormalized_angles = jnp.reshape(
|
987 |
+
unnormalized_angles, [num_res, 7, 2])
|
988 |
+
angles = l2_normalize(unnormalized_angles, axis=-1)
|
989 |
+
|
990 |
+
outputs = {
|
991 |
+
'angles_sin_cos': angles, # jnp.ndarray (N, 7, 2)
|
992 |
+
'unnormalized_angles_sin_cos':
|
993 |
+
unnormalized_angles, # jnp.ndarray (N, 7, 2)
|
994 |
+
}
|
995 |
+
|
996 |
+
# Map torsion angles to frames.
|
997 |
+
backb_to_global = r3.rigids_from_quataffine(affine)
|
998 |
+
|
999 |
+
# Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates"
|
1000 |
+
|
1001 |
+
# r3.Rigids with shape (N, 8).
|
1002 |
+
all_frames_to_global = all_atom.torsion_angles_to_frames(
|
1003 |
+
aatype,
|
1004 |
+
backb_to_global,
|
1005 |
+
angles)
|
1006 |
+
|
1007 |
+
# Use frames and literature positions to create the final atom coordinates.
|
1008 |
+
# r3.Vecs with shape (N, 14).
|
1009 |
+
pred_positions = all_atom.frames_and_literature_positions_to_atom14_pos(
|
1010 |
+
aatype, all_frames_to_global)
|
1011 |
+
|
1012 |
+
outputs.update({
|
1013 |
+
'atom_pos': pred_positions, # r3.Vecs (N, 14)
|
1014 |
+
'frames': all_frames_to_global, # r3.Rigids (N, 8)
|
1015 |
+
})
|
1016 |
+
return outputs
|
af_backprop/alphafold/model/layer_stack.py
ADDED
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Function to stack repeats of a layer function without shared parameters."""
|
16 |
+
|
17 |
+
import collections
|
18 |
+
import contextlib
|
19 |
+
import functools
|
20 |
+
import inspect
|
21 |
+
from typing import Any, Callable, Optional, Tuple, Union
|
22 |
+
|
23 |
+
import haiku as hk
|
24 |
+
import jax
|
25 |
+
import jax.numpy as jnp
|
26 |
+
|
27 |
+
LayerStackCarry = collections.namedtuple('LayerStackCarry', ['x', 'rng'])
|
28 |
+
LayerStackScanned = collections.namedtuple('LayerStackScanned',
|
29 |
+
['i', 'args_ys'])
|
30 |
+
|
31 |
+
# WrappedFn should take in arbitrarily nested `jnp.ndarray`, and return the
|
32 |
+
# exact same type. We cannot express this with `typing`. So we just use it
|
33 |
+
# to inform the user. In reality, the typing below will accept anything.
|
34 |
+
NestedArray = Any
|
35 |
+
WrappedFn = Callable[..., Union[NestedArray, Tuple[NestedArray]]]
|
36 |
+
|
37 |
+
|
38 |
+
def _check_no_varargs(f):
|
39 |
+
if list(inspect.signature(
|
40 |
+
f).parameters.values())[0].kind == inspect.Parameter.VAR_POSITIONAL:
|
41 |
+
raise ValueError(
|
42 |
+
'The function `f` should not have any `varargs` (that is *args) '
|
43 |
+
'argument. Instead, it should only use explicit positional'
|
44 |
+
'arguments.')
|
45 |
+
|
46 |
+
|
47 |
+
@contextlib.contextmanager
|
48 |
+
def nullcontext():
|
49 |
+
yield
|
50 |
+
|
51 |
+
|
52 |
+
def maybe_with_rng(key):
|
53 |
+
if key is not None:
|
54 |
+
return hk.with_rng(key)
|
55 |
+
else:
|
56 |
+
return nullcontext()
|
57 |
+
|
58 |
+
|
59 |
+
def maybe_fold_in(key, data):
|
60 |
+
if key is not None:
|
61 |
+
return jax.random.fold_in(key, data)
|
62 |
+
else:
|
63 |
+
return None
|
64 |
+
|
65 |
+
|
66 |
+
class _LayerStack(hk.Module):
|
67 |
+
"""Module to compose parameterized functions, implemented as a scan."""
|
68 |
+
|
69 |
+
def __init__(self,
|
70 |
+
count: int,
|
71 |
+
unroll: int,
|
72 |
+
name: Optional[str] = None):
|
73 |
+
"""Iterate a function `f` `count` times, with non-shared parameters."""
|
74 |
+
super().__init__(name=name)
|
75 |
+
self._count = count
|
76 |
+
self._unroll = unroll
|
77 |
+
|
78 |
+
def __call__(self, x, *args_ys):
|
79 |
+
count = self._count
|
80 |
+
if hk.running_init():
|
81 |
+
# At initialization time, we run just one layer but add an extra first
|
82 |
+
# dimension to every initialized tensor, making sure to use different
|
83 |
+
# random keys for different slices.
|
84 |
+
def creator(next_creator, shape, dtype, init, context):
|
85 |
+
del context
|
86 |
+
|
87 |
+
def multi_init(shape, dtype):
|
88 |
+
assert shape[0] == count
|
89 |
+
key = hk.maybe_next_rng_key()
|
90 |
+
|
91 |
+
def rng_context_init(slice_idx):
|
92 |
+
slice_key = maybe_fold_in(key, slice_idx)
|
93 |
+
with maybe_with_rng(slice_key):
|
94 |
+
return init(shape[1:], dtype)
|
95 |
+
|
96 |
+
return jax.vmap(rng_context_init)(jnp.arange(count))
|
97 |
+
|
98 |
+
return next_creator((count,) + tuple(shape), dtype, multi_init)
|
99 |
+
|
100 |
+
def getter(next_getter, value, context):
|
101 |
+
trailing_dims = len(context.original_shape) + 1
|
102 |
+
sliced_value = jax.lax.index_in_dim(
|
103 |
+
value, index=0, axis=value.ndim - trailing_dims, keepdims=False)
|
104 |
+
return next_getter(sliced_value)
|
105 |
+
|
106 |
+
with hk.experimental.custom_creator(
|
107 |
+
creator), hk.experimental.custom_getter(getter):
|
108 |
+
if len(args_ys) == 1 and args_ys[0] is None:
|
109 |
+
args0 = (None,)
|
110 |
+
else:
|
111 |
+
args0 = [
|
112 |
+
jax.lax.dynamic_index_in_dim(ys, 0, keepdims=False)
|
113 |
+
for ys in args_ys
|
114 |
+
]
|
115 |
+
x, z = self._call_wrapped(x, *args0)
|
116 |
+
if z is None:
|
117 |
+
return x, z
|
118 |
+
|
119 |
+
# Broadcast state to hold each layer state.
|
120 |
+
def broadcast_state(layer_state):
|
121 |
+
return jnp.broadcast_to(
|
122 |
+
layer_state, [count,] + list(layer_state.shape))
|
123 |
+
zs = jax.tree_util.tree_map(broadcast_state, z)
|
124 |
+
return x, zs
|
125 |
+
else:
|
126 |
+
# Use scan during apply, threading through random seed so that it's
|
127 |
+
# unique for each layer.
|
128 |
+
def layer(carry: LayerStackCarry, scanned: LayerStackScanned):
|
129 |
+
rng = carry.rng
|
130 |
+
|
131 |
+
def getter(next_getter, value, context):
|
132 |
+
# Getter slices the full param at the current loop index.
|
133 |
+
trailing_dims = len(context.original_shape) + 1
|
134 |
+
assert value.shape[value.ndim - trailing_dims] == count, (
|
135 |
+
f'Attempting to use a parameter stack of size '
|
136 |
+
f'{value.shape[value.ndim - trailing_dims]} for a LayerStack of '
|
137 |
+
f'size {count}.')
|
138 |
+
|
139 |
+
sliced_value = jax.lax.dynamic_index_in_dim(
|
140 |
+
value, scanned.i, axis=value.ndim - trailing_dims, keepdims=False)
|
141 |
+
return next_getter(sliced_value)
|
142 |
+
|
143 |
+
with hk.experimental.custom_getter(getter):
|
144 |
+
if rng is None:
|
145 |
+
out_x, z = self._call_wrapped(carry.x, *scanned.args_ys)
|
146 |
+
else:
|
147 |
+
rng, rng_ = jax.random.split(rng)
|
148 |
+
with hk.with_rng(rng_):
|
149 |
+
out_x, z = self._call_wrapped(carry.x, *scanned.args_ys)
|
150 |
+
return LayerStackCarry(x=out_x, rng=rng), z
|
151 |
+
|
152 |
+
carry = LayerStackCarry(x=x, rng=hk.maybe_next_rng_key())
|
153 |
+
scanned = LayerStackScanned(i=jnp.arange(count, dtype=jnp.int32),
|
154 |
+
args_ys=args_ys)
|
155 |
+
|
156 |
+
carry, zs = hk.scan(
|
157 |
+
layer, carry, scanned, length=count, unroll=self._unroll)
|
158 |
+
return carry.x, zs
|
159 |
+
|
160 |
+
def _call_wrapped(self,
|
161 |
+
x: jnp.ndarray,
|
162 |
+
*args,
|
163 |
+
) -> Tuple[jnp.ndarray, Optional[jnp.ndarray]]:
|
164 |
+
raise NotImplementedError()
|
165 |
+
|
166 |
+
|
167 |
+
class _LayerStackNoState(_LayerStack):
|
168 |
+
"""_LayerStack impl with no per-layer state provided to the function."""
|
169 |
+
|
170 |
+
def __init__(self,
|
171 |
+
f: WrappedFn,
|
172 |
+
count: int,
|
173 |
+
unroll: int,
|
174 |
+
name: Optional[str] = None):
|
175 |
+
super().__init__(count=count, unroll=unroll, name=name)
|
176 |
+
_check_no_varargs(f)
|
177 |
+
self._f = f
|
178 |
+
|
179 |
+
@hk.transparent
|
180 |
+
def _call_wrapped(self, args, y):
|
181 |
+
del y
|
182 |
+
ret = self._f(*args)
|
183 |
+
if len(args) == 1:
|
184 |
+
# If the function takes a single argument, the wrapped function receives
|
185 |
+
# a tuple of length 1, and therefore it must return a tuple of length 1.
|
186 |
+
ret = (ret,)
|
187 |
+
return ret, None
|
188 |
+
|
189 |
+
|
190 |
+
class _LayerStackWithState(_LayerStack):
|
191 |
+
"""_LayerStack impl with per-layer state provided to the function."""
|
192 |
+
|
193 |
+
def __init__(self,
|
194 |
+
f: WrappedFn,
|
195 |
+
count: int,
|
196 |
+
unroll: int,
|
197 |
+
name: Optional[str] = None):
|
198 |
+
super().__init__(count=count, unroll=unroll, name=name)
|
199 |
+
self._f = f
|
200 |
+
|
201 |
+
@hk.transparent
|
202 |
+
def _call_wrapped(self, x, *args):
|
203 |
+
return self._f(x, *args)
|
204 |
+
|
205 |
+
|
206 |
+
def layer_stack(num_layers: int,
|
207 |
+
with_state=False,
|
208 |
+
unroll: int = 1,
|
209 |
+
name: Optional[str] = None):
|
210 |
+
"""Utility to wrap a Haiku function and recursively apply it to an input.
|
211 |
+
|
212 |
+
A function is valid if it uses only explicit position parameters, and
|
213 |
+
its return type matches its input type. The position parameters can be
|
214 |
+
arbitrarily nested structures with `jnp.ndarray` at the leaf nodes. Note
|
215 |
+
that kwargs are not supported, neither are functions with variable number
|
216 |
+
of parameters (specified by `*args`).
|
217 |
+
|
218 |
+
If `with_state=False` then the new, wrapped function can be understood as
|
219 |
+
performing the following:
|
220 |
+
```
|
221 |
+
for i in range(num_layers):
|
222 |
+
x = f(x)
|
223 |
+
return x
|
224 |
+
```
|
225 |
+
|
226 |
+
And if `with_state=True`, assuming `f` takes two arguments on top of `x`:
|
227 |
+
```
|
228 |
+
for i in range(num_layers):
|
229 |
+
x, zs[i] = f(x, ys_0[i], ys_1[i])
|
230 |
+
return x, zs
|
231 |
+
```
|
232 |
+
The code using `layer_stack` for the above function would be:
|
233 |
+
```
|
234 |
+
def f(x, y_0, y_1):
|
235 |
+
...
|
236 |
+
return new_x, z
|
237 |
+
x, zs = layer_stack.layer_stack(num_layers,
|
238 |
+
with_state=True)(f)(x, ys_0, ys_1)
|
239 |
+
```
|
240 |
+
|
241 |
+
Crucially, any parameters created inside `f` will not be shared across
|
242 |
+
iterations.
|
243 |
+
|
244 |
+
Args:
|
245 |
+
num_layers: The number of times to iterate the wrapped function.
|
246 |
+
with_state: Whether or not to pass per-layer state to the wrapped function.
|
247 |
+
unroll: the unroll used by `scan`.
|
248 |
+
name: Name of the Haiku context.
|
249 |
+
|
250 |
+
Returns:
|
251 |
+
Callable that will produce a layer stack when called with a valid function.
|
252 |
+
"""
|
253 |
+
def iterate(f):
|
254 |
+
if with_state:
|
255 |
+
@functools.wraps(f)
|
256 |
+
def wrapped(x, *args):
|
257 |
+
for ys in args:
|
258 |
+
assert ys.shape[0] == num_layers
|
259 |
+
return _LayerStackWithState(
|
260 |
+
f, num_layers, unroll=unroll, name=name)(x, *args)
|
261 |
+
else:
|
262 |
+
_check_no_varargs(f)
|
263 |
+
@functools.wraps(f)
|
264 |
+
def wrapped(*args):
|
265 |
+
ret = _LayerStackNoState(
|
266 |
+
f, num_layers, unroll=unroll, name=name)(args, None)[0]
|
267 |
+
if len(args) == 1:
|
268 |
+
# If the function takes a single argument, we must also return a
|
269 |
+
# single value, and not a tuple of length 1.
|
270 |
+
ret = ret[0]
|
271 |
+
return ret
|
272 |
+
|
273 |
+
return wrapped
|
274 |
+
return iterate
|
af_backprop/alphafold/model/lddt.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""lDDT protein distance score."""
|
16 |
+
import jax.numpy as jnp
|
17 |
+
|
18 |
+
|
19 |
+
def lddt(predicted_points,
|
20 |
+
true_points,
|
21 |
+
true_points_mask,
|
22 |
+
cutoff=15.,
|
23 |
+
per_residue=False):
|
24 |
+
"""Measure (approximate) lDDT for a batch of coordinates.
|
25 |
+
|
26 |
+
lDDT reference:
|
27 |
+
Mariani, V., Biasini, M., Barbato, A. & Schwede, T. lDDT: A local
|
28 |
+
superposition-free score for comparing protein structures and models using
|
29 |
+
distance difference tests. Bioinformatics 29, 2722–2728 (2013).
|
30 |
+
|
31 |
+
lDDT is a measure of the difference between the true distance matrix and the
|
32 |
+
distance matrix of the predicted points. The difference is computed only on
|
33 |
+
points closer than cutoff *in the true structure*.
|
34 |
+
|
35 |
+
This function does not compute the exact lDDT value that the original paper
|
36 |
+
describes because it does not include terms for physical feasibility
|
37 |
+
(e.g. bond length violations). Therefore this is only an approximate
|
38 |
+
lDDT score.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
predicted_points: (batch, length, 3) array of predicted 3D points
|
42 |
+
true_points: (batch, length, 3) array of true 3D points
|
43 |
+
true_points_mask: (batch, length, 1) binary-valued float array. This mask
|
44 |
+
should be 1 for points that exist in the true points.
|
45 |
+
cutoff: Maximum distance for a pair of points to be included
|
46 |
+
per_residue: If true, return score for each residue. Note that the overall
|
47 |
+
lDDT is not exactly the mean of the per_residue lDDT's because some
|
48 |
+
residues have more contacts than others.
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
An (approximate, see above) lDDT score in the range 0-1.
|
52 |
+
"""
|
53 |
+
|
54 |
+
assert len(predicted_points.shape) == 3
|
55 |
+
assert predicted_points.shape[-1] == 3
|
56 |
+
assert true_points_mask.shape[-1] == 1
|
57 |
+
assert len(true_points_mask.shape) == 3
|
58 |
+
|
59 |
+
# Compute true and predicted distance matrices.
|
60 |
+
dmat_true = jnp.sqrt(1e-10 + jnp.sum(
|
61 |
+
(true_points[:, :, None] - true_points[:, None, :])**2, axis=-1))
|
62 |
+
|
63 |
+
dmat_predicted = jnp.sqrt(1e-10 + jnp.sum(
|
64 |
+
(predicted_points[:, :, None] -
|
65 |
+
predicted_points[:, None, :])**2, axis=-1))
|
66 |
+
|
67 |
+
dists_to_score = (
|
68 |
+
(dmat_true < cutoff).astype(jnp.float32) * true_points_mask *
|
69 |
+
jnp.transpose(true_points_mask, [0, 2, 1]) *
|
70 |
+
(1. - jnp.eye(dmat_true.shape[1])) # Exclude self-interaction.
|
71 |
+
)
|
72 |
+
|
73 |
+
# Shift unscored distances to be far away.
|
74 |
+
dist_l1 = jnp.abs(dmat_true - dmat_predicted)
|
75 |
+
|
76 |
+
# True lDDT uses a number of fixed bins.
|
77 |
+
# We ignore the physical plausibility correction to lDDT, though.
|
78 |
+
score = 0.25 * ((dist_l1 < 0.5).astype(jnp.float32) +
|
79 |
+
(dist_l1 < 1.0).astype(jnp.float32) +
|
80 |
+
(dist_l1 < 2.0).astype(jnp.float32) +
|
81 |
+
(dist_l1 < 4.0).astype(jnp.float32))
|
82 |
+
|
83 |
+
# Normalize over the appropriate axes.
|
84 |
+
reduce_axes = (-1,) if per_residue else (-2, -1)
|
85 |
+
norm = 1. / (1e-10 + jnp.sum(dists_to_score, axis=reduce_axes))
|
86 |
+
score = norm * (1e-10 + jnp.sum(dists_to_score * score, axis=reduce_axes))
|
87 |
+
|
88 |
+
return score
|
af_backprop/alphafold/model/mapping.py
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Specialized mapping functions."""
|
16 |
+
|
17 |
+
import functools
|
18 |
+
|
19 |
+
from typing import Any, Callable, Optional, Sequence, Union
|
20 |
+
|
21 |
+
import haiku as hk
|
22 |
+
import jax
|
23 |
+
import jax.numpy as jnp
|
24 |
+
|
25 |
+
|
26 |
+
PYTREE = Any
|
27 |
+
PYTREE_JAX_ARRAY = Any
|
28 |
+
|
29 |
+
partial = functools.partial
|
30 |
+
PROXY = object()
|
31 |
+
|
32 |
+
|
33 |
+
def _maybe_slice(array, i, slice_size, axis):
|
34 |
+
if axis is PROXY:
|
35 |
+
return array
|
36 |
+
else:
|
37 |
+
return jax.lax.dynamic_slice_in_dim(
|
38 |
+
array, i, slice_size=slice_size, axis=axis)
|
39 |
+
|
40 |
+
|
41 |
+
def _maybe_get_size(array, axis):
|
42 |
+
if axis == PROXY:
|
43 |
+
return -1
|
44 |
+
else:
|
45 |
+
return array.shape[axis]
|
46 |
+
|
47 |
+
|
48 |
+
def _expand_axes(axes, values, name='sharded_apply'):
|
49 |
+
values_tree_def = jax.tree_flatten(values)[1]
|
50 |
+
flat_axes = jax.api_util.flatten_axes(name, values_tree_def, axes)
|
51 |
+
# Replace None's with PROXY
|
52 |
+
flat_axes = [PROXY if x is None else x for x in flat_axes]
|
53 |
+
return jax.tree_unflatten(values_tree_def, flat_axes)
|
54 |
+
|
55 |
+
|
56 |
+
def sharded_map(
|
57 |
+
fun: Callable[..., PYTREE_JAX_ARRAY],
|
58 |
+
shard_size: Union[int, None] = 1,
|
59 |
+
in_axes: Union[int, PYTREE] = 0,
|
60 |
+
out_axes: Union[int, PYTREE] = 0) -> Callable[..., PYTREE_JAX_ARRAY]:
|
61 |
+
"""Sharded vmap.
|
62 |
+
|
63 |
+
Maps `fun` over axes, in a way similar to vmap, but does so in shards of
|
64 |
+
`shard_size`. This allows a smooth trade-off between memory usage
|
65 |
+
(as in a plain map) vs higher throughput (as in a vmap).
|
66 |
+
|
67 |
+
Args:
|
68 |
+
fun: Function to apply smap transform to.
|
69 |
+
shard_size: Integer denoting shard size.
|
70 |
+
in_axes: Either integer or pytree describing which axis to map over for each
|
71 |
+
input to `fun`, None denotes broadcasting.
|
72 |
+
out_axes: integer or pytree denoting to what axis in the output the mapped
|
73 |
+
over axis maps.
|
74 |
+
|
75 |
+
Returns:
|
76 |
+
function with smap applied.
|
77 |
+
"""
|
78 |
+
vmapped_fun = hk.vmap(fun, in_axes, out_axes)
|
79 |
+
return sharded_apply(vmapped_fun, shard_size, in_axes, out_axes)
|
80 |
+
|
81 |
+
|
82 |
+
def sharded_apply(
|
83 |
+
fun: Callable[..., PYTREE_JAX_ARRAY], # pylint: disable=g-bare-generic
|
84 |
+
shard_size: Union[int, None] = 1,
|
85 |
+
in_axes: Union[int, PYTREE] = 0,
|
86 |
+
out_axes: Union[int, PYTREE] = 0,
|
87 |
+
new_out_axes: bool = False) -> Callable[..., PYTREE_JAX_ARRAY]:
|
88 |
+
"""Sharded apply.
|
89 |
+
|
90 |
+
Applies `fun` over shards to axes, in a way similar to vmap,
|
91 |
+
but does so in shards of `shard_size`. Shards are stacked after.
|
92 |
+
This allows a smooth trade-off between
|
93 |
+
memory usage (as in a plain map) vs higher throughput (as in a vmap).
|
94 |
+
|
95 |
+
Args:
|
96 |
+
fun: Function to apply smap transform to.
|
97 |
+
shard_size: Integer denoting shard size.
|
98 |
+
in_axes: Either integer or pytree describing which axis to map over for each
|
99 |
+
input to `fun`, None denotes broadcasting.
|
100 |
+
out_axes: integer or pytree denoting to what axis in the output the mapped
|
101 |
+
over axis maps.
|
102 |
+
new_out_axes: whether to stack outputs on new axes. This assumes that the
|
103 |
+
output sizes for each shard (including the possible remainder shard) are
|
104 |
+
the same.
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
function with smap applied.
|
108 |
+
"""
|
109 |
+
docstr = ('Mapped version of {fun}. Takes similar arguments to {fun} '
|
110 |
+
'but with additional array axes over which {fun} is mapped.')
|
111 |
+
if new_out_axes:
|
112 |
+
raise NotImplementedError('New output axes not yet implemented.')
|
113 |
+
|
114 |
+
# shard size None denotes no sharding
|
115 |
+
if shard_size is None:
|
116 |
+
return fun
|
117 |
+
|
118 |
+
@jax.util.wraps(fun, docstr=docstr)
|
119 |
+
def mapped_fn(*args):
|
120 |
+
# Expand in axes and Determine Loop range
|
121 |
+
in_axes_ = _expand_axes(in_axes, args)
|
122 |
+
|
123 |
+
in_sizes = jax.tree_util.tree_map(_maybe_get_size, args, in_axes_)
|
124 |
+
flat_sizes = jax.tree_flatten(in_sizes)[0]
|
125 |
+
in_size = max(flat_sizes)
|
126 |
+
assert all(i in {in_size, -1} for i in flat_sizes)
|
127 |
+
|
128 |
+
num_extra_shards = (in_size - 1) // shard_size
|
129 |
+
|
130 |
+
# Fix Up if necessary
|
131 |
+
last_shard_size = in_size % shard_size
|
132 |
+
last_shard_size = shard_size if last_shard_size == 0 else last_shard_size
|
133 |
+
|
134 |
+
def apply_fun_to_slice(slice_start, slice_size):
|
135 |
+
input_slice = jax.tree_util.tree_map(
|
136 |
+
lambda array, axis: _maybe_slice(array, slice_start, slice_size, axis
|
137 |
+
), args, in_axes_)
|
138 |
+
return fun(*input_slice)
|
139 |
+
|
140 |
+
remainder_shape_dtype = hk.eval_shape(
|
141 |
+
partial(apply_fun_to_slice, 0, last_shard_size))
|
142 |
+
out_dtypes = jax.tree_map(lambda x: x.dtype, remainder_shape_dtype)
|
143 |
+
out_shapes = jax.tree_map(lambda x: x.shape, remainder_shape_dtype)
|
144 |
+
out_axes_ = _expand_axes(out_axes, remainder_shape_dtype)
|
145 |
+
|
146 |
+
if num_extra_shards > 0:
|
147 |
+
regular_shard_shape_dtype = hk.eval_shape(
|
148 |
+
partial(apply_fun_to_slice, 0, shard_size))
|
149 |
+
shard_shapes = jax.tree_map(lambda x: x.shape, regular_shard_shape_dtype)
|
150 |
+
|
151 |
+
def make_output_shape(axis, shard_shape, remainder_shape):
|
152 |
+
return shard_shape[:axis] + (
|
153 |
+
shard_shape[axis] * num_extra_shards +
|
154 |
+
remainder_shape[axis],) + shard_shape[axis + 1:]
|
155 |
+
|
156 |
+
out_shapes = jax.tree_util.tree_map(make_output_shape, out_axes_, shard_shapes,
|
157 |
+
out_shapes)
|
158 |
+
|
159 |
+
# Calls dynamic Update slice with different argument order
|
160 |
+
# This is here since tree_multimap only works with positional arguments
|
161 |
+
def dynamic_update_slice_in_dim(full_array, update, axis, i):
|
162 |
+
return jax.lax.dynamic_update_slice_in_dim(full_array, update, i, axis)
|
163 |
+
|
164 |
+
def compute_shard(outputs, slice_start, slice_size):
|
165 |
+
slice_out = apply_fun_to_slice(slice_start, slice_size)
|
166 |
+
update_slice = partial(
|
167 |
+
dynamic_update_slice_in_dim, i=slice_start)
|
168 |
+
return jax.tree_util.tree_map(update_slice, outputs, slice_out, out_axes_)
|
169 |
+
|
170 |
+
def scan_iteration(outputs, i):
|
171 |
+
new_outputs = compute_shard(outputs, i, shard_size)
|
172 |
+
return new_outputs, ()
|
173 |
+
|
174 |
+
slice_starts = jnp.arange(0, in_size - shard_size + 1, shard_size)
|
175 |
+
|
176 |
+
def allocate_buffer(dtype, shape):
|
177 |
+
return jnp.zeros(shape, dtype=dtype)
|
178 |
+
|
179 |
+
outputs = jax.tree_util.tree_map(allocate_buffer, out_dtypes, out_shapes)
|
180 |
+
|
181 |
+
if slice_starts.shape[0] > 0:
|
182 |
+
outputs, _ = hk.scan(scan_iteration, outputs, slice_starts)
|
183 |
+
|
184 |
+
if last_shard_size != shard_size:
|
185 |
+
remainder_start = in_size - last_shard_size
|
186 |
+
outputs = compute_shard(outputs, remainder_start, last_shard_size)
|
187 |
+
|
188 |
+
return outputs
|
189 |
+
|
190 |
+
return mapped_fn
|
191 |
+
|
192 |
+
|
193 |
+
def inference_subbatch(
|
194 |
+
module: Callable[..., PYTREE_JAX_ARRAY],
|
195 |
+
subbatch_size: int,
|
196 |
+
batched_args: Sequence[PYTREE_JAX_ARRAY],
|
197 |
+
nonbatched_args: Sequence[PYTREE_JAX_ARRAY],
|
198 |
+
low_memory: bool = True,
|
199 |
+
input_subbatch_dim: int = 0,
|
200 |
+
output_subbatch_dim: Optional[int] = None) -> PYTREE_JAX_ARRAY:
|
201 |
+
"""Run through subbatches (like batch apply but with split and concat)."""
|
202 |
+
assert len(batched_args) > 0 # pylint: disable=g-explicit-length-test
|
203 |
+
|
204 |
+
if not low_memory:
|
205 |
+
args = list(batched_args) + list(nonbatched_args)
|
206 |
+
return module(*args)
|
207 |
+
|
208 |
+
if output_subbatch_dim is None:
|
209 |
+
output_subbatch_dim = input_subbatch_dim
|
210 |
+
|
211 |
+
def run_module(*batched_args):
|
212 |
+
args = list(batched_args) + list(nonbatched_args)
|
213 |
+
return module(*args)
|
214 |
+
sharded_module = sharded_apply(run_module,
|
215 |
+
shard_size=subbatch_size,
|
216 |
+
in_axes=input_subbatch_dim,
|
217 |
+
out_axes=output_subbatch_dim)
|
218 |
+
return sharded_module(*batched_args)
|
af_backprop/alphafold/model/model.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Code for constructing the model."""
|
16 |
+
from typing import Any, Mapping, Optional, Union
|
17 |
+
|
18 |
+
from absl import logging
|
19 |
+
from alphafold.common import confidence
|
20 |
+
from alphafold.model import features
|
21 |
+
from alphafold.model import modules
|
22 |
+
import haiku as hk
|
23 |
+
import jax
|
24 |
+
import ml_collections
|
25 |
+
import numpy as np
|
26 |
+
import tensorflow.compat.v1 as tf
|
27 |
+
import tree
|
28 |
+
|
29 |
+
|
30 |
+
def get_confidence_metrics(
|
31 |
+
prediction_result: Mapping[str, Any]) -> Mapping[str, Any]:
|
32 |
+
"""Post processes prediction_result to get confidence metrics."""
|
33 |
+
|
34 |
+
confidence_metrics = {}
|
35 |
+
confidence_metrics['plddt'] = confidence.compute_plddt(
|
36 |
+
prediction_result['predicted_lddt']['logits'])
|
37 |
+
if 'predicted_aligned_error' in prediction_result:
|
38 |
+
confidence_metrics.update(confidence.compute_predicted_aligned_error(
|
39 |
+
prediction_result['predicted_aligned_error']['logits'],
|
40 |
+
prediction_result['predicted_aligned_error']['breaks']))
|
41 |
+
confidence_metrics['ptm'] = confidence.predicted_tm_score(
|
42 |
+
prediction_result['predicted_aligned_error']['logits'],
|
43 |
+
prediction_result['predicted_aligned_error']['breaks'])
|
44 |
+
|
45 |
+
return confidence_metrics
|
46 |
+
|
47 |
+
|
48 |
+
class RunModel:
|
49 |
+
"""Container for JAX model."""
|
50 |
+
|
51 |
+
def __init__(self,
|
52 |
+
config: ml_collections.ConfigDict,
|
53 |
+
params: Optional[Mapping[str, Mapping[str, np.ndarray]]] = None,
|
54 |
+
is_training=True,
|
55 |
+
return_representations=True):
|
56 |
+
self.config = config
|
57 |
+
self.params = params
|
58 |
+
|
59 |
+
def _forward_fn(batch):
|
60 |
+
model = modules.AlphaFold(self.config.model)
|
61 |
+
return model(
|
62 |
+
batch,
|
63 |
+
is_training=is_training,
|
64 |
+
compute_loss=False,
|
65 |
+
ensemble_representations=False,
|
66 |
+
return_representations=return_representations)
|
67 |
+
|
68 |
+
self.apply = jax.jit(hk.transform(_forward_fn).apply)
|
69 |
+
self.init = jax.jit(hk.transform(_forward_fn).init)
|
70 |
+
|
71 |
+
def init_params(self, feat: features.FeatureDict, random_seed: int = 0):
|
72 |
+
"""Initializes the model parameters.
|
73 |
+
|
74 |
+
If none were provided when this class was instantiated then the parameters
|
75 |
+
are randomly initialized.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
feat: A dictionary of NumPy feature arrays as output by
|
79 |
+
RunModel.process_features.
|
80 |
+
random_seed: A random seed to use to initialize the parameters if none
|
81 |
+
were set when this class was initialized.
|
82 |
+
"""
|
83 |
+
if not self.params:
|
84 |
+
# Init params randomly.
|
85 |
+
rng = jax.random.PRNGKey(random_seed)
|
86 |
+
self.params = hk.data_structures.to_mutable_dict(
|
87 |
+
self.init(rng, feat))
|
88 |
+
logging.warning('Initialized parameters randomly')
|
89 |
+
|
90 |
+
def process_features(
|
91 |
+
self,
|
92 |
+
raw_features: Union[tf.train.Example, features.FeatureDict],
|
93 |
+
random_seed: int) -> features.FeatureDict:
|
94 |
+
"""Processes features to prepare for feeding them into the model.
|
95 |
+
|
96 |
+
Args:
|
97 |
+
raw_features: The output of the data pipeline either as a dict of NumPy
|
98 |
+
arrays or as a tf.train.Example.
|
99 |
+
random_seed: The random seed to use when processing the features.
|
100 |
+
|
101 |
+
Returns:
|
102 |
+
A dict of NumPy feature arrays suitable for feeding into the model.
|
103 |
+
"""
|
104 |
+
if isinstance(raw_features, dict):
|
105 |
+
return features.np_example_to_features(
|
106 |
+
np_example=raw_features,
|
107 |
+
config=self.config,
|
108 |
+
random_seed=random_seed)
|
109 |
+
else:
|
110 |
+
return features.tf_example_to_features(
|
111 |
+
tf_example=raw_features,
|
112 |
+
config=self.config,
|
113 |
+
random_seed=random_seed)
|
114 |
+
|
115 |
+
def eval_shape(self, feat: features.FeatureDict) -> jax.ShapeDtypeStruct:
|
116 |
+
self.init_params(feat)
|
117 |
+
logging.info('Running eval_shape with shape(feat) = %s',
|
118 |
+
tree.map_structure(lambda x: x.shape, feat))
|
119 |
+
shape = jax.eval_shape(self.apply, self.params, jax.random.PRNGKey(0), feat)
|
120 |
+
logging.info('Output shape was %s', shape)
|
121 |
+
return shape
|
122 |
+
|
123 |
+
def predict(self, feat: features.FeatureDict) -> Mapping[str, Any]:
|
124 |
+
"""Makes a prediction by inferencing the model on the provided features.
|
125 |
+
|
126 |
+
Args:
|
127 |
+
feat: A dictionary of NumPy feature arrays as output by
|
128 |
+
RunModel.process_features.
|
129 |
+
|
130 |
+
Returns:
|
131 |
+
A dictionary of model outputs.
|
132 |
+
"""
|
133 |
+
self.init_params(feat)
|
134 |
+
logging.info('Running predict with shape(feat) = %s',
|
135 |
+
tree.map_structure(lambda x: x.shape, feat))
|
136 |
+
result = self.apply(self.params, jax.random.PRNGKey(0), feat)
|
137 |
+
# This block is to ensure benchmark timings are accurate. Some blocking is
|
138 |
+
# already happening when computing get_confidence_metrics, and this ensures
|
139 |
+
# all outputs are blocked on.
|
140 |
+
jax.tree_map(lambda x: x.block_until_ready(), result)
|
141 |
+
if self.config.use_struct:
|
142 |
+
result.update(get_confidence_metrics(result))
|
143 |
+
logging.info('Output shape was %s',
|
144 |
+
tree.map_structure(lambda x: x.shape, result))
|
145 |
+
return result
|
af_backprop/alphafold/model/modules.py
ADDED
@@ -0,0 +1,2164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Modules and code used in the core part of AlphaFold.
|
16 |
+
|
17 |
+
The structure generation code is in 'folding.py'.
|
18 |
+
"""
|
19 |
+
import functools
|
20 |
+
from alphafold.common import residue_constants
|
21 |
+
from alphafold.model import all_atom
|
22 |
+
from alphafold.model import common_modules
|
23 |
+
from alphafold.model import folding
|
24 |
+
from alphafold.model import layer_stack
|
25 |
+
from alphafold.model import lddt
|
26 |
+
from alphafold.model import mapping
|
27 |
+
from alphafold.model import prng
|
28 |
+
from alphafold.model import quat_affine
|
29 |
+
from alphafold.model import utils
|
30 |
+
import haiku as hk
|
31 |
+
import jax
|
32 |
+
import jax.numpy as jnp
|
33 |
+
|
34 |
+
from alphafold.model.r3 import Rigids, Rots, Vecs
|
35 |
+
|
36 |
+
|
37 |
+
def softmax_cross_entropy(logits, labels):
|
38 |
+
"""Computes softmax cross entropy given logits and one-hot class labels."""
|
39 |
+
loss = -jnp.sum(labels * jax.nn.log_softmax(logits), axis=-1)
|
40 |
+
return jnp.asarray(loss)
|
41 |
+
|
42 |
+
|
43 |
+
def sigmoid_cross_entropy(logits, labels):
|
44 |
+
"""Computes sigmoid cross entropy given logits and multiple class labels."""
|
45 |
+
log_p = jax.nn.log_sigmoid(logits)
|
46 |
+
# log(1 - sigmoid(x)) = log_sigmoid(-x), the latter is more numerically stable
|
47 |
+
log_not_p = jax.nn.log_sigmoid(-logits)
|
48 |
+
loss = -labels * log_p - (1. - labels) * log_not_p
|
49 |
+
return jnp.asarray(loss)
|
50 |
+
|
51 |
+
|
52 |
+
def apply_dropout(*, tensor, safe_key, rate, is_training, broadcast_dim=None):
|
53 |
+
"""Applies dropout to a tensor."""
|
54 |
+
if is_training: # and rate != 0.0:
|
55 |
+
shape = list(tensor.shape)
|
56 |
+
if broadcast_dim is not None:
|
57 |
+
shape[broadcast_dim] = 1
|
58 |
+
keep_rate = 1.0 - rate
|
59 |
+
keep = jax.random.bernoulli(safe_key.get(), keep_rate, shape=shape)
|
60 |
+
return keep * tensor / keep_rate
|
61 |
+
else:
|
62 |
+
return tensor
|
63 |
+
|
64 |
+
|
65 |
+
def dropout_wrapper(module,
|
66 |
+
input_act,
|
67 |
+
mask,
|
68 |
+
safe_key,
|
69 |
+
global_config,
|
70 |
+
output_act=None,
|
71 |
+
is_training=True,
|
72 |
+
scale_rate=1.0,
|
73 |
+
**kwargs):
|
74 |
+
"""Applies module + dropout + residual update."""
|
75 |
+
if output_act is None:
|
76 |
+
output_act = input_act
|
77 |
+
|
78 |
+
gc = global_config
|
79 |
+
residual = module(input_act, mask, is_training=is_training, **kwargs)
|
80 |
+
dropout_rate = 0.0 if gc.deterministic else module.config.dropout_rate
|
81 |
+
|
82 |
+
if module.config.shared_dropout:
|
83 |
+
if module.config.orientation == 'per_row':
|
84 |
+
broadcast_dim = 0
|
85 |
+
else:
|
86 |
+
broadcast_dim = 1
|
87 |
+
else:
|
88 |
+
broadcast_dim = None
|
89 |
+
|
90 |
+
residual = apply_dropout(tensor=residual,
|
91 |
+
safe_key=safe_key,
|
92 |
+
rate=dropout_rate * scale_rate,
|
93 |
+
is_training=is_training,
|
94 |
+
broadcast_dim=broadcast_dim)
|
95 |
+
|
96 |
+
new_act = output_act + residual
|
97 |
+
|
98 |
+
return new_act
|
99 |
+
|
100 |
+
|
101 |
+
def create_extra_msa_feature(batch):
|
102 |
+
"""Expand extra_msa into 1hot and concat with other extra msa features.
|
103 |
+
|
104 |
+
We do this as late as possible as the one_hot extra msa can be very large.
|
105 |
+
|
106 |
+
Arguments:
|
107 |
+
batch: a dictionary with the following keys:
|
108 |
+
* 'extra_msa': [N_extra_seq, N_res] MSA that wasn't selected as a cluster
|
109 |
+
centre. Note, that this is not one-hot encoded.
|
110 |
+
* 'extra_has_deletion': [N_extra_seq, N_res] Whether there is a deletion to
|
111 |
+
the left of each position in the extra MSA.
|
112 |
+
* 'extra_deletion_value': [N_extra_seq, N_res] The number of deletions to
|
113 |
+
the left of each position in the extra MSA.
|
114 |
+
|
115 |
+
Returns:
|
116 |
+
Concatenated tensor of extra MSA features.
|
117 |
+
"""
|
118 |
+
# 23 = 20 amino acids + 'X' for unknown + gap + bert mask
|
119 |
+
msa_1hot = jax.nn.one_hot(batch['extra_msa'], 23)
|
120 |
+
msa_feat = [msa_1hot,
|
121 |
+
jnp.expand_dims(batch['extra_has_deletion'], axis=-1),
|
122 |
+
jnp.expand_dims(batch['extra_deletion_value'], axis=-1)]
|
123 |
+
return jnp.concatenate(msa_feat, axis=-1)
|
124 |
+
|
125 |
+
|
126 |
+
class AlphaFoldIteration(hk.Module):
|
127 |
+
"""A single recycling iteration of AlphaFold architecture.
|
128 |
+
|
129 |
+
Computes ensembled (averaged) representations from the provided features.
|
130 |
+
These representations are then passed to the various heads
|
131 |
+
that have been requested by the configuration file. Each head also returns a
|
132 |
+
loss which is combined as a weighted sum to produce the total loss.
|
133 |
+
|
134 |
+
Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 3-22
|
135 |
+
"""
|
136 |
+
|
137 |
+
def __init__(self, config, global_config, name='alphafold_iteration'):
|
138 |
+
super().__init__(name=name)
|
139 |
+
self.config = config
|
140 |
+
self.global_config = global_config
|
141 |
+
|
142 |
+
def __call__(self,
|
143 |
+
ensembled_batch,
|
144 |
+
non_ensembled_batch,
|
145 |
+
is_training,
|
146 |
+
compute_loss=False,
|
147 |
+
ensemble_representations=False,
|
148 |
+
return_representations=False):
|
149 |
+
|
150 |
+
num_ensemble = jnp.asarray(ensembled_batch['seq_length'].shape[0])
|
151 |
+
|
152 |
+
if not ensemble_representations:
|
153 |
+
assert ensembled_batch['seq_length'].shape[0] == 1
|
154 |
+
|
155 |
+
def slice_batch(i):
|
156 |
+
b = {k: v[i] for k, v in ensembled_batch.items()}
|
157 |
+
b.update(non_ensembled_batch)
|
158 |
+
return b
|
159 |
+
|
160 |
+
# Compute representations for each batch element and average.
|
161 |
+
evoformer_module = EmbeddingsAndEvoformer(
|
162 |
+
self.config.embeddings_and_evoformer, self.global_config)
|
163 |
+
batch0 = slice_batch(0)
|
164 |
+
representations = evoformer_module(batch0, is_training)
|
165 |
+
|
166 |
+
# MSA representations are not ensembled so
|
167 |
+
# we don't pass tensor into the loop.
|
168 |
+
msa_representation = representations['msa']
|
169 |
+
del representations['msa']
|
170 |
+
|
171 |
+
# Average the representations (except MSA) over the batch dimension.
|
172 |
+
if ensemble_representations:
|
173 |
+
def body(x):
|
174 |
+
"""Add one element to the representations ensemble."""
|
175 |
+
i, current_representations = x
|
176 |
+
feats = slice_batch(i)
|
177 |
+
representations_update = evoformer_module(
|
178 |
+
feats, is_training)
|
179 |
+
|
180 |
+
new_representations = {}
|
181 |
+
for k in current_representations:
|
182 |
+
new_representations[k] = (
|
183 |
+
current_representations[k] + representations_update[k])
|
184 |
+
return i+1, new_representations
|
185 |
+
|
186 |
+
if hk.running_init():
|
187 |
+
# When initializing the Haiku module, run one iteration of the
|
188 |
+
# while_loop to initialize the Haiku modules used in `body`.
|
189 |
+
_, representations = body((1, representations))
|
190 |
+
else:
|
191 |
+
_, representations = hk.while_loop(
|
192 |
+
lambda x: x[0] < num_ensemble,
|
193 |
+
body,
|
194 |
+
(1, representations))
|
195 |
+
|
196 |
+
for k in representations:
|
197 |
+
if k != 'msa':
|
198 |
+
representations[k] /= num_ensemble.astype(representations[k].dtype)
|
199 |
+
|
200 |
+
representations['msa'] = msa_representation
|
201 |
+
batch = batch0 # We are not ensembled from here on.
|
202 |
+
|
203 |
+
if jnp.issubdtype(ensembled_batch['aatype'].dtype, jnp.integer):
|
204 |
+
_, num_residues = ensembled_batch['aatype'].shape
|
205 |
+
else:
|
206 |
+
_, num_residues, _ = ensembled_batch['aatype'].shape
|
207 |
+
|
208 |
+
if self.config.use_struct:
|
209 |
+
struct_module = folding.StructureModule
|
210 |
+
else:
|
211 |
+
struct_module = folding.dummy
|
212 |
+
|
213 |
+
heads = {}
|
214 |
+
for head_name, head_config in sorted(self.config.heads.items()):
|
215 |
+
if not head_config.weight:
|
216 |
+
continue # Do not instantiate zero-weight heads.
|
217 |
+
head_factory = {
|
218 |
+
'masked_msa': MaskedMsaHead,
|
219 |
+
'distogram': DistogramHead,
|
220 |
+
'structure_module': functools.partial(struct_module, compute_loss=compute_loss),
|
221 |
+
'predicted_lddt': PredictedLDDTHead,
|
222 |
+
'predicted_aligned_error': PredictedAlignedErrorHead,
|
223 |
+
'experimentally_resolved': ExperimentallyResolvedHead,
|
224 |
+
}[head_name]
|
225 |
+
heads[head_name] = (head_config,
|
226 |
+
head_factory(head_config, self.global_config))
|
227 |
+
|
228 |
+
total_loss = 0.
|
229 |
+
ret = {}
|
230 |
+
ret['representations'] = representations
|
231 |
+
|
232 |
+
def loss(module, head_config, ret, name, filter_ret=True):
|
233 |
+
if filter_ret:
|
234 |
+
value = ret[name]
|
235 |
+
else:
|
236 |
+
value = ret
|
237 |
+
loss_output = module.loss(value, batch)
|
238 |
+
ret[name].update(loss_output)
|
239 |
+
loss = head_config.weight * ret[name]['loss']
|
240 |
+
return loss
|
241 |
+
|
242 |
+
for name, (head_config, module) in heads.items():
|
243 |
+
# Skip PredictedLDDTHead and PredictedAlignedErrorHead until
|
244 |
+
# StructureModule is executed.
|
245 |
+
if name in ('predicted_lddt', 'predicted_aligned_error'):
|
246 |
+
continue
|
247 |
+
else:
|
248 |
+
ret[name] = module(representations, batch, is_training)
|
249 |
+
if 'representations' in ret[name]:
|
250 |
+
# Extra representations from the head. Used by the structure module
|
251 |
+
# to provide activations for the PredictedLDDTHead.
|
252 |
+
representations.update(ret[name].pop('representations'))
|
253 |
+
if compute_loss:
|
254 |
+
total_loss += loss(module, head_config, ret, name)
|
255 |
+
|
256 |
+
if self.config.use_struct:
|
257 |
+
if self.config.heads.get('predicted_lddt.weight', 0.0):
|
258 |
+
# Add PredictedLDDTHead after StructureModule executes.
|
259 |
+
name = 'predicted_lddt'
|
260 |
+
# Feed all previous results to give access to structure_module result.
|
261 |
+
head_config, module = heads[name]
|
262 |
+
ret[name] = module(representations, batch, is_training)
|
263 |
+
if compute_loss:
|
264 |
+
total_loss += loss(module, head_config, ret, name, filter_ret=False)
|
265 |
+
|
266 |
+
if ('predicted_aligned_error' in self.config.heads
|
267 |
+
and self.config.heads.get('predicted_aligned_error.weight', 0.0)):
|
268 |
+
# Add PredictedAlignedErrorHead after StructureModule executes.
|
269 |
+
name = 'predicted_aligned_error'
|
270 |
+
# Feed all previous results to give access to structure_module result.
|
271 |
+
head_config, module = heads[name]
|
272 |
+
ret[name] = module(representations, batch, is_training)
|
273 |
+
if compute_loss:
|
274 |
+
total_loss += loss(module, head_config, ret, name, filter_ret=False)
|
275 |
+
|
276 |
+
if compute_loss:
|
277 |
+
return ret, total_loss
|
278 |
+
else:
|
279 |
+
return ret
|
280 |
+
|
281 |
+
class AlphaFold(hk.Module):
|
282 |
+
"""AlphaFold model with recycling.
|
283 |
+
|
284 |
+
Jumper et al. (2021) Suppl. Alg. 2 "Inference"
|
285 |
+
"""
|
286 |
+
|
287 |
+
def __init__(self, config, name='alphafold'):
|
288 |
+
super().__init__(name=name)
|
289 |
+
self.config = config
|
290 |
+
self.global_config = config.global_config
|
291 |
+
|
292 |
+
def __call__(
|
293 |
+
self,
|
294 |
+
batch,
|
295 |
+
is_training,
|
296 |
+
compute_loss=False,
|
297 |
+
ensemble_representations=False,
|
298 |
+
return_representations=False):
|
299 |
+
"""Run the AlphaFold model.
|
300 |
+
|
301 |
+
Arguments:
|
302 |
+
batch: Dictionary with inputs to the AlphaFold model.
|
303 |
+
is_training: Whether the system is in training or inference mode.
|
304 |
+
compute_loss: Whether to compute losses (requires extra features
|
305 |
+
to be present in the batch and knowing the true structure).
|
306 |
+
ensemble_representations: Whether to use ensembling of representations.
|
307 |
+
return_representations: Whether to also return the intermediate
|
308 |
+
representations.
|
309 |
+
|
310 |
+
Returns:
|
311 |
+
When compute_loss is True:
|
312 |
+
a tuple of loss and output of AlphaFoldIteration.
|
313 |
+
When compute_loss is False:
|
314 |
+
just output of AlphaFoldIteration.
|
315 |
+
|
316 |
+
The output of AlphaFoldIteration is a nested dictionary containing
|
317 |
+
predictions from the various heads.
|
318 |
+
"""
|
319 |
+
if "scale_rate" not in batch:
|
320 |
+
batch["scale_rate"] = jnp.ones((1,))
|
321 |
+
impl = AlphaFoldIteration(self.config, self.global_config)
|
322 |
+
if jnp.issubdtype(batch['aatype'].dtype, jnp.integer):
|
323 |
+
batch_size, num_residues = batch['aatype'].shape
|
324 |
+
else:
|
325 |
+
batch_size, num_residues, _ = batch['aatype'].shape
|
326 |
+
|
327 |
+
def get_prev(ret):
|
328 |
+
new_prev = {
|
329 |
+
'prev_msa_first_row': ret['representations']['msa_first_row'],
|
330 |
+
'prev_pair': ret['representations']['pair'],
|
331 |
+
'prev_dgram': ret["distogram"]["logits"],
|
332 |
+
}
|
333 |
+
if self.config.use_struct:
|
334 |
+
new_prev.update({'prev_pos': ret['structure_module']['final_atom_positions'],
|
335 |
+
'prev_plddt': ret["predicted_lddt"]["logits"]})
|
336 |
+
|
337 |
+
if "predicted_aligned_error" in ret:
|
338 |
+
new_prev["prev_pae"] = ret["predicted_aligned_error"]["logits"]
|
339 |
+
|
340 |
+
if not self.config.backprop_recycle:
|
341 |
+
for k in ["prev_pos","prev_msa_first_row","prev_pair"]:
|
342 |
+
if k in new_prev:
|
343 |
+
new_prev[k] = jax.lax.stop_gradient(new_prev[k])
|
344 |
+
|
345 |
+
return new_prev
|
346 |
+
|
347 |
+
def do_call(prev,
|
348 |
+
recycle_idx,
|
349 |
+
compute_loss=compute_loss):
|
350 |
+
if self.config.resample_msa_in_recycling:
|
351 |
+
num_ensemble = batch_size // (self.config.num_recycle + 1)
|
352 |
+
def slice_recycle_idx(x):
|
353 |
+
start = recycle_idx * num_ensemble
|
354 |
+
size = num_ensemble
|
355 |
+
return jax.lax.dynamic_slice_in_dim(x, start, size, axis=0)
|
356 |
+
ensembled_batch = jax.tree_map(slice_recycle_idx, batch)
|
357 |
+
else:
|
358 |
+
num_ensemble = batch_size
|
359 |
+
ensembled_batch = batch
|
360 |
+
non_ensembled_batch = jax.tree_map(lambda x: x, prev)
|
361 |
+
|
362 |
+
return impl(ensembled_batch=ensembled_batch,
|
363 |
+
non_ensembled_batch=non_ensembled_batch,
|
364 |
+
is_training=is_training,
|
365 |
+
compute_loss=compute_loss,
|
366 |
+
ensemble_representations=ensemble_representations)
|
367 |
+
|
368 |
+
|
369 |
+
emb_config = self.config.embeddings_and_evoformer
|
370 |
+
prev = {
|
371 |
+
'prev_msa_first_row': jnp.zeros([num_residues, emb_config.msa_channel]),
|
372 |
+
'prev_pair': jnp.zeros([num_residues, num_residues, emb_config.pair_channel]),
|
373 |
+
'prev_dgram': jnp.zeros([num_residues, num_residues, 64]),
|
374 |
+
}
|
375 |
+
if self.config.use_struct:
|
376 |
+
prev.update({'prev_pos': jnp.zeros([num_residues, residue_constants.atom_type_num, 3]),
|
377 |
+
'prev_plddt': jnp.zeros([num_residues, 50]),
|
378 |
+
'prev_pae': jnp.zeros([num_residues, num_residues, 64])})
|
379 |
+
|
380 |
+
for k in ["pos","msa_first_row","pair","dgram"]:
|
381 |
+
if f"init_{k}" in batch: prev[f"prev_{k}"] = batch[f"init_{k}"][0]
|
382 |
+
|
383 |
+
if self.config.num_recycle:
|
384 |
+
if 'num_iter_recycling' in batch:
|
385 |
+
# Training time: num_iter_recycling is in batch.
|
386 |
+
# The value for each ensemble batch is the same, so arbitrarily taking
|
387 |
+
# 0-th.
|
388 |
+
num_iter = batch['num_iter_recycling'][0]
|
389 |
+
|
390 |
+
# Add insurance that we will not run more
|
391 |
+
# recyclings than the model is configured to run.
|
392 |
+
num_iter = jnp.minimum(num_iter, self.config.num_recycle)
|
393 |
+
else:
|
394 |
+
# Eval mode or tests: use the maximum number of iterations.
|
395 |
+
num_iter = self.config.num_recycle
|
396 |
+
|
397 |
+
def add_prev(p,p_):
|
398 |
+
p_["prev_dgram"] += p["prev_dgram"]
|
399 |
+
if self.config.use_struct:
|
400 |
+
p_["prev_plddt"] += p["prev_plddt"]
|
401 |
+
p_["prev_pae"] += p["prev_pae"]
|
402 |
+
return p_
|
403 |
+
|
404 |
+
##############################################################
|
405 |
+
def body(p, i):
|
406 |
+
p_ = get_prev(do_call(p, recycle_idx=i, compute_loss=False))
|
407 |
+
if self.config.add_prev:
|
408 |
+
p_ = add_prev(p, p_)
|
409 |
+
return p_, None
|
410 |
+
if hk.running_init():
|
411 |
+
prev,_ = body(prev, 0)
|
412 |
+
else:
|
413 |
+
prev,_ = hk.scan(body, prev, jnp.arange(num_iter))
|
414 |
+
##############################################################
|
415 |
+
|
416 |
+
else:
|
417 |
+
num_iter = 0
|
418 |
+
|
419 |
+
ret = do_call(prev=prev, recycle_idx=num_iter)
|
420 |
+
if self.config.add_prev:
|
421 |
+
prev_ = get_prev(ret)
|
422 |
+
if compute_loss:
|
423 |
+
ret = ret[0], [ret[1]]
|
424 |
+
|
425 |
+
if not return_representations:
|
426 |
+
del (ret[0] if compute_loss else ret)['representations'] # pytype: disable=unsupported-operands
|
427 |
+
|
428 |
+
if self.config.add_prev and num_iter > 0:
|
429 |
+
prev_ = add_prev(prev, prev_)
|
430 |
+
ret["distogram"]["logits"] = prev_["prev_dgram"]/(num_iter+1)
|
431 |
+
if self.config.use_struct:
|
432 |
+
ret["predicted_lddt"]["logits"] = prev_["prev_plddt"]/(num_iter+1)
|
433 |
+
if "predicted_aligned_error" in ret:
|
434 |
+
ret["predicted_aligned_error"]["logits"] = prev_["prev_pae"]/(num_iter+1)
|
435 |
+
|
436 |
+
return ret
|
437 |
+
|
438 |
+
class TemplatePairStack(hk.Module):
|
439 |
+
"""Pair stack for the templates.
|
440 |
+
|
441 |
+
Jumper et al. (2021) Suppl. Alg. 16 "TemplatePairStack"
|
442 |
+
"""
|
443 |
+
|
444 |
+
def __init__(self, config, global_config, name='template_pair_stack'):
|
445 |
+
super().__init__(name=name)
|
446 |
+
self.config = config
|
447 |
+
self.global_config = global_config
|
448 |
+
|
449 |
+
def __call__(self, pair_act, pair_mask, is_training, safe_key=None, scale_rate=1.0):
|
450 |
+
"""Builds TemplatePairStack module.
|
451 |
+
|
452 |
+
Arguments:
|
453 |
+
pair_act: Pair activations for single template, shape [N_res, N_res, c_t].
|
454 |
+
pair_mask: Pair mask, shape [N_res, N_res].
|
455 |
+
is_training: Whether the module is in training mode.
|
456 |
+
safe_key: Safe key object encapsulating the random number generation key.
|
457 |
+
|
458 |
+
Returns:
|
459 |
+
Updated pair_act, shape [N_res, N_res, c_t].
|
460 |
+
"""
|
461 |
+
|
462 |
+
if safe_key is None:
|
463 |
+
safe_key = prng.SafeKey(hk.next_rng_key())
|
464 |
+
|
465 |
+
gc = self.global_config
|
466 |
+
c = self.config
|
467 |
+
|
468 |
+
if not c.num_block:
|
469 |
+
return pair_act
|
470 |
+
|
471 |
+
def block(x):
|
472 |
+
"""One block of the template pair stack."""
|
473 |
+
pair_act, safe_key = x
|
474 |
+
|
475 |
+
dropout_wrapper_fn = functools.partial(
|
476 |
+
dropout_wrapper, is_training=is_training, global_config=gc, scale_rate=scale_rate)
|
477 |
+
|
478 |
+
safe_key, *sub_keys = safe_key.split(6)
|
479 |
+
sub_keys = iter(sub_keys)
|
480 |
+
|
481 |
+
pair_act = dropout_wrapper_fn(
|
482 |
+
TriangleAttention(c.triangle_attention_starting_node, gc,
|
483 |
+
name='triangle_attention_starting_node'),
|
484 |
+
pair_act,
|
485 |
+
pair_mask,
|
486 |
+
next(sub_keys))
|
487 |
+
pair_act = dropout_wrapper_fn(
|
488 |
+
TriangleAttention(c.triangle_attention_ending_node, gc,
|
489 |
+
name='triangle_attention_ending_node'),
|
490 |
+
pair_act,
|
491 |
+
pair_mask,
|
492 |
+
next(sub_keys))
|
493 |
+
pair_act = dropout_wrapper_fn(
|
494 |
+
TriangleMultiplication(c.triangle_multiplication_outgoing, gc,
|
495 |
+
name='triangle_multiplication_outgoing'),
|
496 |
+
pair_act,
|
497 |
+
pair_mask,
|
498 |
+
next(sub_keys))
|
499 |
+
pair_act = dropout_wrapper_fn(
|
500 |
+
TriangleMultiplication(c.triangle_multiplication_incoming, gc,
|
501 |
+
name='triangle_multiplication_incoming'),
|
502 |
+
pair_act,
|
503 |
+
pair_mask,
|
504 |
+
next(sub_keys))
|
505 |
+
pair_act = dropout_wrapper_fn(
|
506 |
+
Transition(c.pair_transition, gc, name='pair_transition'),
|
507 |
+
pair_act,
|
508 |
+
pair_mask,
|
509 |
+
next(sub_keys))
|
510 |
+
|
511 |
+
return pair_act, safe_key
|
512 |
+
|
513 |
+
if gc.use_remat:
|
514 |
+
block = hk.remat(block)
|
515 |
+
|
516 |
+
res_stack = layer_stack.layer_stack(c.num_block)(block)
|
517 |
+
pair_act, safe_key = res_stack((pair_act, safe_key))
|
518 |
+
return pair_act
|
519 |
+
|
520 |
+
|
521 |
+
class Transition(hk.Module):
|
522 |
+
"""Transition layer.
|
523 |
+
|
524 |
+
Jumper et al. (2021) Suppl. Alg. 9 "MSATransition"
|
525 |
+
Jumper et al. (2021) Suppl. Alg. 15 "PairTransition"
|
526 |
+
"""
|
527 |
+
|
528 |
+
def __init__(self, config, global_config, name='transition_block'):
|
529 |
+
super().__init__(name=name)
|
530 |
+
self.config = config
|
531 |
+
self.global_config = global_config
|
532 |
+
|
533 |
+
def __call__(self, act, mask, is_training=True):
|
534 |
+
"""Builds Transition module.
|
535 |
+
|
536 |
+
Arguments:
|
537 |
+
act: A tensor of queries of size [batch_size, N_res, N_channel].
|
538 |
+
mask: A tensor denoting the mask of size [batch_size, N_res].
|
539 |
+
is_training: Whether the module is in training mode.
|
540 |
+
|
541 |
+
Returns:
|
542 |
+
A float32 tensor of size [batch_size, N_res, N_channel].
|
543 |
+
"""
|
544 |
+
_, _, nc = act.shape
|
545 |
+
|
546 |
+
num_intermediate = int(nc * self.config.num_intermediate_factor)
|
547 |
+
mask = jnp.expand_dims(mask, axis=-1)
|
548 |
+
|
549 |
+
act = hk.LayerNorm(
|
550 |
+
axis=[-1],
|
551 |
+
create_scale=True,
|
552 |
+
create_offset=True,
|
553 |
+
name='input_layer_norm')(
|
554 |
+
act)
|
555 |
+
|
556 |
+
transition_module = hk.Sequential([
|
557 |
+
common_modules.Linear(
|
558 |
+
num_intermediate,
|
559 |
+
initializer='relu',
|
560 |
+
name='transition1'), jax.nn.relu,
|
561 |
+
common_modules.Linear(
|
562 |
+
nc,
|
563 |
+
initializer=utils.final_init(self.global_config),
|
564 |
+
name='transition2')
|
565 |
+
])
|
566 |
+
|
567 |
+
act = mapping.inference_subbatch(
|
568 |
+
transition_module,
|
569 |
+
self.global_config.subbatch_size,
|
570 |
+
batched_args=[act],
|
571 |
+
nonbatched_args=[],
|
572 |
+
low_memory=not is_training)
|
573 |
+
|
574 |
+
return act
|
575 |
+
|
576 |
+
|
577 |
+
def glorot_uniform():
|
578 |
+
return hk.initializers.VarianceScaling(scale=1.0,
|
579 |
+
mode='fan_avg',
|
580 |
+
distribution='uniform')
|
581 |
+
|
582 |
+
|
583 |
+
class Attention(hk.Module):
|
584 |
+
"""Multihead attention."""
|
585 |
+
|
586 |
+
def __init__(self, config, global_config, output_dim, name='attention'):
|
587 |
+
super().__init__(name=name)
|
588 |
+
|
589 |
+
self.config = config
|
590 |
+
self.global_config = global_config
|
591 |
+
self.output_dim = output_dim
|
592 |
+
|
593 |
+
def __call__(self, q_data, m_data, bias, nonbatched_bias=None):
|
594 |
+
"""Builds Attention module.
|
595 |
+
|
596 |
+
Arguments:
|
597 |
+
q_data: A tensor of queries, shape [batch_size, N_queries, q_channels].
|
598 |
+
m_data: A tensor of memories from which the keys and values are
|
599 |
+
projected, shape [batch_size, N_keys, m_channels].
|
600 |
+
bias: A bias for the attention, shape [batch_size, N_queries, N_keys].
|
601 |
+
nonbatched_bias: Shared bias, shape [N_queries, N_keys].
|
602 |
+
|
603 |
+
Returns:
|
604 |
+
A float32 tensor of shape [batch_size, N_queries, output_dim].
|
605 |
+
"""
|
606 |
+
# Sensible default for when the config keys are missing
|
607 |
+
key_dim = self.config.get('key_dim', int(q_data.shape[-1]))
|
608 |
+
value_dim = self.config.get('value_dim', int(m_data.shape[-1]))
|
609 |
+
num_head = self.config.num_head
|
610 |
+
assert key_dim % num_head == 0
|
611 |
+
assert value_dim % num_head == 0
|
612 |
+
key_dim = key_dim // num_head
|
613 |
+
value_dim = value_dim // num_head
|
614 |
+
|
615 |
+
q_weights = hk.get_parameter(
|
616 |
+
'query_w', shape=(q_data.shape[-1], num_head, key_dim),
|
617 |
+
init=glorot_uniform())
|
618 |
+
k_weights = hk.get_parameter(
|
619 |
+
'key_w', shape=(m_data.shape[-1], num_head, key_dim),
|
620 |
+
init=glorot_uniform())
|
621 |
+
v_weights = hk.get_parameter(
|
622 |
+
'value_w', shape=(m_data.shape[-1], num_head, value_dim),
|
623 |
+
init=glorot_uniform())
|
624 |
+
|
625 |
+
q = jnp.einsum('bqa,ahc->bqhc', q_data, q_weights) * key_dim**(-0.5)
|
626 |
+
k = jnp.einsum('bka,ahc->bkhc', m_data, k_weights)
|
627 |
+
v = jnp.einsum('bka,ahc->bkhc', m_data, v_weights)
|
628 |
+
logits = jnp.einsum('bqhc,bkhc->bhqk', q, k) + bias
|
629 |
+
if nonbatched_bias is not None:
|
630 |
+
logits += jnp.expand_dims(nonbatched_bias, axis=0)
|
631 |
+
weights = jax.nn.softmax(logits)
|
632 |
+
weighted_avg = jnp.einsum('bhqk,bkhc->bqhc', weights, v)
|
633 |
+
|
634 |
+
if self.global_config.zero_init:
|
635 |
+
init = hk.initializers.Constant(0.0)
|
636 |
+
else:
|
637 |
+
init = glorot_uniform()
|
638 |
+
|
639 |
+
if self.config.gating:
|
640 |
+
gating_weights = hk.get_parameter(
|
641 |
+
'gating_w',
|
642 |
+
shape=(q_data.shape[-1], num_head, value_dim),
|
643 |
+
init=hk.initializers.Constant(0.0))
|
644 |
+
gating_bias = hk.get_parameter(
|
645 |
+
'gating_b',
|
646 |
+
shape=(num_head, value_dim),
|
647 |
+
init=hk.initializers.Constant(1.0))
|
648 |
+
|
649 |
+
gate_values = jnp.einsum('bqc, chv->bqhv', q_data,
|
650 |
+
gating_weights) + gating_bias
|
651 |
+
|
652 |
+
gate_values = jax.nn.sigmoid(gate_values)
|
653 |
+
|
654 |
+
weighted_avg *= gate_values
|
655 |
+
|
656 |
+
o_weights = hk.get_parameter(
|
657 |
+
'output_w', shape=(num_head, value_dim, self.output_dim),
|
658 |
+
init=init)
|
659 |
+
o_bias = hk.get_parameter('output_b', shape=(self.output_dim,),
|
660 |
+
init=hk.initializers.Constant(0.0))
|
661 |
+
|
662 |
+
output = jnp.einsum('bqhc,hco->bqo', weighted_avg, o_weights) + o_bias
|
663 |
+
|
664 |
+
return output
|
665 |
+
|
666 |
+
|
667 |
+
class GlobalAttention(hk.Module):
|
668 |
+
"""Global attention.
|
669 |
+
|
670 |
+
Jumper et al. (2021) Suppl. Alg. 19 "MSAColumnGlobalAttention" lines 2-7
|
671 |
+
"""
|
672 |
+
|
673 |
+
def __init__(self, config, global_config, output_dim, name='attention'):
|
674 |
+
super().__init__(name=name)
|
675 |
+
|
676 |
+
self.config = config
|
677 |
+
self.global_config = global_config
|
678 |
+
self.output_dim = output_dim
|
679 |
+
|
680 |
+
def __call__(self, q_data, m_data, q_mask, bias):
|
681 |
+
"""Builds GlobalAttention module.
|
682 |
+
|
683 |
+
Arguments:
|
684 |
+
q_data: A tensor of queries with size [batch_size, N_queries,
|
685 |
+
q_channels]
|
686 |
+
m_data: A tensor of memories from which the keys and values
|
687 |
+
projected. Size [batch_size, N_keys, m_channels]
|
688 |
+
q_mask: A binary mask for q_data with zeros in the padded sequence
|
689 |
+
elements and ones otherwise. Size [batch_size, N_queries, q_channels]
|
690 |
+
(or broadcastable to this shape).
|
691 |
+
bias: A bias for the attention.
|
692 |
+
|
693 |
+
Returns:
|
694 |
+
A float32 tensor of size [batch_size, N_queries, output_dim].
|
695 |
+
"""
|
696 |
+
# Sensible default for when the config keys are missing
|
697 |
+
key_dim = self.config.get('key_dim', int(q_data.shape[-1]))
|
698 |
+
value_dim = self.config.get('value_dim', int(m_data.shape[-1]))
|
699 |
+
num_head = self.config.num_head
|
700 |
+
assert key_dim % num_head == 0
|
701 |
+
assert value_dim % num_head == 0
|
702 |
+
key_dim = key_dim // num_head
|
703 |
+
value_dim = value_dim // num_head
|
704 |
+
|
705 |
+
q_weights = hk.get_parameter(
|
706 |
+
'query_w', shape=(q_data.shape[-1], num_head, key_dim),
|
707 |
+
init=glorot_uniform())
|
708 |
+
k_weights = hk.get_parameter(
|
709 |
+
'key_w', shape=(m_data.shape[-1], key_dim),
|
710 |
+
init=glorot_uniform())
|
711 |
+
v_weights = hk.get_parameter(
|
712 |
+
'value_w', shape=(m_data.shape[-1], value_dim),
|
713 |
+
init=glorot_uniform())
|
714 |
+
|
715 |
+
v = jnp.einsum('bka,ac->bkc', m_data, v_weights)
|
716 |
+
|
717 |
+
q_avg = utils.mask_mean(q_mask, q_data, axis=1)
|
718 |
+
|
719 |
+
q = jnp.einsum('ba,ahc->bhc', q_avg, q_weights) * key_dim**(-0.5)
|
720 |
+
k = jnp.einsum('bka,ac->bkc', m_data, k_weights)
|
721 |
+
bias = (1e9 * (q_mask[:, None, :, 0] - 1.))
|
722 |
+
logits = jnp.einsum('bhc,bkc->bhk', q, k) + bias
|
723 |
+
weights = jax.nn.softmax(logits)
|
724 |
+
weighted_avg = jnp.einsum('bhk,bkc->bhc', weights, v)
|
725 |
+
|
726 |
+
if self.global_config.zero_init:
|
727 |
+
init = hk.initializers.Constant(0.0)
|
728 |
+
else:
|
729 |
+
init = glorot_uniform()
|
730 |
+
|
731 |
+
o_weights = hk.get_parameter(
|
732 |
+
'output_w', shape=(num_head, value_dim, self.output_dim),
|
733 |
+
init=init)
|
734 |
+
o_bias = hk.get_parameter('output_b', shape=(self.output_dim,),
|
735 |
+
init=hk.initializers.Constant(0.0))
|
736 |
+
|
737 |
+
if self.config.gating:
|
738 |
+
gating_weights = hk.get_parameter(
|
739 |
+
'gating_w',
|
740 |
+
shape=(q_data.shape[-1], num_head, value_dim),
|
741 |
+
init=hk.initializers.Constant(0.0))
|
742 |
+
gating_bias = hk.get_parameter(
|
743 |
+
'gating_b',
|
744 |
+
shape=(num_head, value_dim),
|
745 |
+
init=hk.initializers.Constant(1.0))
|
746 |
+
|
747 |
+
gate_values = jnp.einsum('bqc, chv->bqhv', q_data, gating_weights)
|
748 |
+
gate_values = jax.nn.sigmoid(gate_values + gating_bias)
|
749 |
+
weighted_avg = weighted_avg[:, None] * gate_values
|
750 |
+
output = jnp.einsum('bqhc,hco->bqo', weighted_avg, o_weights) + o_bias
|
751 |
+
else:
|
752 |
+
output = jnp.einsum('bhc,hco->bo', weighted_avg, o_weights) + o_bias
|
753 |
+
output = output[:, None]
|
754 |
+
return output
|
755 |
+
|
756 |
+
|
757 |
+
class MSARowAttentionWithPairBias(hk.Module):
|
758 |
+
"""MSA per-row attention biased by the pair representation.
|
759 |
+
|
760 |
+
Jumper et al. (2021) Suppl. Alg. 7 "MSARowAttentionWithPairBias"
|
761 |
+
"""
|
762 |
+
|
763 |
+
def __init__(self, config, global_config,
|
764 |
+
name='msa_row_attention_with_pair_bias'):
|
765 |
+
super().__init__(name=name)
|
766 |
+
self.config = config
|
767 |
+
self.global_config = global_config
|
768 |
+
|
769 |
+
def __call__(self,
|
770 |
+
msa_act,
|
771 |
+
msa_mask,
|
772 |
+
pair_act,
|
773 |
+
is_training=False):
|
774 |
+
"""Builds MSARowAttentionWithPairBias module.
|
775 |
+
|
776 |
+
Arguments:
|
777 |
+
msa_act: [N_seq, N_res, c_m] MSA representation.
|
778 |
+
msa_mask: [N_seq, N_res] mask of non-padded regions.
|
779 |
+
pair_act: [N_res, N_res, c_z] pair representation.
|
780 |
+
is_training: Whether the module is in training mode.
|
781 |
+
|
782 |
+
Returns:
|
783 |
+
Update to msa_act, shape [N_seq, N_res, c_m].
|
784 |
+
"""
|
785 |
+
c = self.config
|
786 |
+
|
787 |
+
assert len(msa_act.shape) == 3
|
788 |
+
assert len(msa_mask.shape) == 2
|
789 |
+
assert c.orientation == 'per_row'
|
790 |
+
|
791 |
+
bias = (1e9 * (msa_mask - 1.))[:, None, None, :]
|
792 |
+
assert len(bias.shape) == 4
|
793 |
+
|
794 |
+
msa_act = hk.LayerNorm(
|
795 |
+
axis=[-1], create_scale=True, create_offset=True, name='query_norm')(
|
796 |
+
msa_act)
|
797 |
+
|
798 |
+
pair_act = hk.LayerNorm(
|
799 |
+
axis=[-1],
|
800 |
+
create_scale=True,
|
801 |
+
create_offset=True,
|
802 |
+
name='feat_2d_norm')(
|
803 |
+
pair_act)
|
804 |
+
|
805 |
+
init_factor = 1. / jnp.sqrt(int(pair_act.shape[-1]))
|
806 |
+
weights = hk.get_parameter(
|
807 |
+
'feat_2d_weights',
|
808 |
+
shape=(pair_act.shape[-1], c.num_head),
|
809 |
+
init=hk.initializers.RandomNormal(stddev=init_factor))
|
810 |
+
nonbatched_bias = jnp.einsum('qkc,ch->hqk', pair_act, weights)
|
811 |
+
|
812 |
+
attn_mod = Attention(
|
813 |
+
c, self.global_config, msa_act.shape[-1])
|
814 |
+
msa_act = mapping.inference_subbatch(
|
815 |
+
attn_mod,
|
816 |
+
self.global_config.subbatch_size,
|
817 |
+
batched_args=[msa_act, msa_act, bias],
|
818 |
+
nonbatched_args=[nonbatched_bias],
|
819 |
+
low_memory=not is_training)
|
820 |
+
|
821 |
+
return msa_act
|
822 |
+
|
823 |
+
|
824 |
+
class MSAColumnAttention(hk.Module):
|
825 |
+
"""MSA per-column attention.
|
826 |
+
|
827 |
+
Jumper et al. (2021) Suppl. Alg. 8 "MSAColumnAttention"
|
828 |
+
"""
|
829 |
+
|
830 |
+
def __init__(self, config, global_config, name='msa_column_attention'):
|
831 |
+
super().__init__(name=name)
|
832 |
+
self.config = config
|
833 |
+
self.global_config = global_config
|
834 |
+
|
835 |
+
def __call__(self,
|
836 |
+
msa_act,
|
837 |
+
msa_mask,
|
838 |
+
is_training=False):
|
839 |
+
"""Builds MSAColumnAttention module.
|
840 |
+
|
841 |
+
Arguments:
|
842 |
+
msa_act: [N_seq, N_res, c_m] MSA representation.
|
843 |
+
msa_mask: [N_seq, N_res] mask of non-padded regions.
|
844 |
+
is_training: Whether the module is in training mode.
|
845 |
+
|
846 |
+
Returns:
|
847 |
+
Update to msa_act, shape [N_seq, N_res, c_m]
|
848 |
+
"""
|
849 |
+
c = self.config
|
850 |
+
|
851 |
+
assert len(msa_act.shape) == 3
|
852 |
+
assert len(msa_mask.shape) == 2
|
853 |
+
assert c.orientation == 'per_column'
|
854 |
+
|
855 |
+
msa_act = jnp.swapaxes(msa_act, -2, -3)
|
856 |
+
msa_mask = jnp.swapaxes(msa_mask, -1, -2)
|
857 |
+
|
858 |
+
bias = (1e9 * (msa_mask - 1.))[:, None, None, :]
|
859 |
+
assert len(bias.shape) == 4
|
860 |
+
|
861 |
+
msa_act = hk.LayerNorm(
|
862 |
+
axis=[-1], create_scale=True, create_offset=True, name='query_norm')(
|
863 |
+
msa_act)
|
864 |
+
|
865 |
+
attn_mod = Attention(
|
866 |
+
c, self.global_config, msa_act.shape[-1])
|
867 |
+
msa_act = mapping.inference_subbatch(
|
868 |
+
attn_mod,
|
869 |
+
self.global_config.subbatch_size,
|
870 |
+
batched_args=[msa_act, msa_act, bias],
|
871 |
+
nonbatched_args=[],
|
872 |
+
low_memory=not is_training)
|
873 |
+
|
874 |
+
msa_act = jnp.swapaxes(msa_act, -2, -3)
|
875 |
+
|
876 |
+
return msa_act
|
877 |
+
|
878 |
+
|
879 |
+
class MSAColumnGlobalAttention(hk.Module):
|
880 |
+
"""MSA per-column global attention.
|
881 |
+
|
882 |
+
Jumper et al. (2021) Suppl. Alg. 19 "MSAColumnGlobalAttention"
|
883 |
+
"""
|
884 |
+
|
885 |
+
def __init__(self, config, global_config, name='msa_column_global_attention'):
|
886 |
+
super().__init__(name=name)
|
887 |
+
self.config = config
|
888 |
+
self.global_config = global_config
|
889 |
+
|
890 |
+
def __call__(self,
|
891 |
+
msa_act,
|
892 |
+
msa_mask,
|
893 |
+
is_training=False):
|
894 |
+
"""Builds MSAColumnGlobalAttention module.
|
895 |
+
|
896 |
+
Arguments:
|
897 |
+
msa_act: [N_seq, N_res, c_m] MSA representation.
|
898 |
+
msa_mask: [N_seq, N_res] mask of non-padded regions.
|
899 |
+
is_training: Whether the module is in training mode.
|
900 |
+
|
901 |
+
Returns:
|
902 |
+
Update to msa_act, shape [N_seq, N_res, c_m].
|
903 |
+
"""
|
904 |
+
c = self.config
|
905 |
+
|
906 |
+
assert len(msa_act.shape) == 3
|
907 |
+
assert len(msa_mask.shape) == 2
|
908 |
+
assert c.orientation == 'per_column'
|
909 |
+
|
910 |
+
msa_act = jnp.swapaxes(msa_act, -2, -3)
|
911 |
+
msa_mask = jnp.swapaxes(msa_mask, -1, -2)
|
912 |
+
|
913 |
+
bias = (1e9 * (msa_mask - 1.))[:, None, None, :]
|
914 |
+
assert len(bias.shape) == 4
|
915 |
+
|
916 |
+
msa_act = hk.LayerNorm(
|
917 |
+
axis=[-1], create_scale=True, create_offset=True, name='query_norm')(
|
918 |
+
msa_act)
|
919 |
+
|
920 |
+
attn_mod = GlobalAttention(
|
921 |
+
c, self.global_config, msa_act.shape[-1],
|
922 |
+
name='attention')
|
923 |
+
# [N_seq, N_res, 1]
|
924 |
+
msa_mask = jnp.expand_dims(msa_mask, axis=-1)
|
925 |
+
msa_act = mapping.inference_subbatch(
|
926 |
+
attn_mod,
|
927 |
+
self.global_config.subbatch_size,
|
928 |
+
batched_args=[msa_act, msa_act, msa_mask, bias],
|
929 |
+
nonbatched_args=[],
|
930 |
+
low_memory=not is_training)
|
931 |
+
|
932 |
+
msa_act = jnp.swapaxes(msa_act, -2, -3)
|
933 |
+
|
934 |
+
return msa_act
|
935 |
+
|
936 |
+
|
937 |
+
class TriangleAttention(hk.Module):
|
938 |
+
"""Triangle Attention.
|
939 |
+
|
940 |
+
Jumper et al. (2021) Suppl. Alg. 13 "TriangleAttentionStartingNode"
|
941 |
+
Jumper et al. (2021) Suppl. Alg. 14 "TriangleAttentionEndingNode"
|
942 |
+
"""
|
943 |
+
|
944 |
+
def __init__(self, config, global_config, name='triangle_attention'):
|
945 |
+
super().__init__(name=name)
|
946 |
+
self.config = config
|
947 |
+
self.global_config = global_config
|
948 |
+
|
949 |
+
def __call__(self, pair_act, pair_mask, is_training=False):
|
950 |
+
"""Builds TriangleAttention module.
|
951 |
+
|
952 |
+
Arguments:
|
953 |
+
pair_act: [N_res, N_res, c_z] pair activations tensor
|
954 |
+
pair_mask: [N_res, N_res] mask of non-padded regions in the tensor.
|
955 |
+
is_training: Whether the module is in training mode.
|
956 |
+
|
957 |
+
Returns:
|
958 |
+
Update to pair_act, shape [N_res, N_res, c_z].
|
959 |
+
"""
|
960 |
+
c = self.config
|
961 |
+
|
962 |
+
assert len(pair_act.shape) == 3
|
963 |
+
assert len(pair_mask.shape) == 2
|
964 |
+
assert c.orientation in ['per_row', 'per_column']
|
965 |
+
|
966 |
+
if c.orientation == 'per_column':
|
967 |
+
pair_act = jnp.swapaxes(pair_act, -2, -3)
|
968 |
+
pair_mask = jnp.swapaxes(pair_mask, -1, -2)
|
969 |
+
|
970 |
+
bias = (1e9 * (pair_mask - 1.))[:, None, None, :]
|
971 |
+
assert len(bias.shape) == 4
|
972 |
+
|
973 |
+
pair_act = hk.LayerNorm(
|
974 |
+
axis=[-1], create_scale=True, create_offset=True, name='query_norm')(
|
975 |
+
pair_act)
|
976 |
+
|
977 |
+
init_factor = 1. / jnp.sqrt(int(pair_act.shape[-1]))
|
978 |
+
weights = hk.get_parameter(
|
979 |
+
'feat_2d_weights',
|
980 |
+
shape=(pair_act.shape[-1], c.num_head),
|
981 |
+
init=hk.initializers.RandomNormal(stddev=init_factor))
|
982 |
+
nonbatched_bias = jnp.einsum('qkc,ch->hqk', pair_act, weights)
|
983 |
+
|
984 |
+
attn_mod = Attention(
|
985 |
+
c, self.global_config, pair_act.shape[-1])
|
986 |
+
pair_act = mapping.inference_subbatch(
|
987 |
+
attn_mod,
|
988 |
+
self.global_config.subbatch_size,
|
989 |
+
batched_args=[pair_act, pair_act, bias],
|
990 |
+
nonbatched_args=[nonbatched_bias],
|
991 |
+
low_memory=not is_training)
|
992 |
+
|
993 |
+
if c.orientation == 'per_column':
|
994 |
+
pair_act = jnp.swapaxes(pair_act, -2, -3)
|
995 |
+
|
996 |
+
return pair_act
|
997 |
+
|
998 |
+
|
999 |
+
class MaskedMsaHead(hk.Module):
|
1000 |
+
"""Head to predict MSA at the masked locations.
|
1001 |
+
|
1002 |
+
The MaskedMsaHead employs a BERT-style objective to reconstruct a masked
|
1003 |
+
version of the full MSA, based on a linear projection of
|
1004 |
+
the MSA representation.
|
1005 |
+
Jumper et al. (2021) Suppl. Sec. 1.9.9 "Masked MSA prediction"
|
1006 |
+
"""
|
1007 |
+
|
1008 |
+
def __init__(self, config, global_config, name='masked_msa_head'):
|
1009 |
+
super().__init__(name=name)
|
1010 |
+
self.config = config
|
1011 |
+
self.global_config = global_config
|
1012 |
+
|
1013 |
+
def __call__(self, representations, batch, is_training):
|
1014 |
+
"""Builds MaskedMsaHead module.
|
1015 |
+
|
1016 |
+
Arguments:
|
1017 |
+
representations: Dictionary of representations, must contain:
|
1018 |
+
* 'msa': MSA representation, shape [N_seq, N_res, c_m].
|
1019 |
+
batch: Batch, unused.
|
1020 |
+
is_training: Whether the module is in training mode.
|
1021 |
+
|
1022 |
+
Returns:
|
1023 |
+
Dictionary containing:
|
1024 |
+
* 'logits': logits of shape [N_seq, N_res, N_aatype] with
|
1025 |
+
(unnormalized) log probabilies of predicted aatype at position.
|
1026 |
+
"""
|
1027 |
+
del batch
|
1028 |
+
logits = common_modules.Linear(
|
1029 |
+
self.config.num_output,
|
1030 |
+
initializer=utils.final_init(self.global_config),
|
1031 |
+
name='logits')(
|
1032 |
+
representations['msa'])
|
1033 |
+
return dict(logits=logits)
|
1034 |
+
|
1035 |
+
def loss(self, value, batch):
|
1036 |
+
errors = softmax_cross_entropy(
|
1037 |
+
labels=jax.nn.one_hot(batch['true_msa'], num_classes=23),
|
1038 |
+
logits=value['logits'])
|
1039 |
+
loss = (jnp.sum(errors * batch['bert_mask'], axis=(-2, -1)) /
|
1040 |
+
(1e-8 + jnp.sum(batch['bert_mask'], axis=(-2, -1))))
|
1041 |
+
return {'loss': loss}
|
1042 |
+
|
1043 |
+
|
1044 |
+
class PredictedLDDTHead(hk.Module):
|
1045 |
+
"""Head to predict the per-residue LDDT to be used as a confidence measure.
|
1046 |
+
|
1047 |
+
Jumper et al. (2021) Suppl. Sec. 1.9.6 "Model confidence prediction (pLDDT)"
|
1048 |
+
Jumper et al. (2021) Suppl. Alg. 29 "predictPerResidueLDDT_Ca"
|
1049 |
+
"""
|
1050 |
+
|
1051 |
+
def __init__(self, config, global_config, name='predicted_lddt_head'):
|
1052 |
+
super().__init__(name=name)
|
1053 |
+
self.config = config
|
1054 |
+
self.global_config = global_config
|
1055 |
+
|
1056 |
+
def __call__(self, representations, batch, is_training):
|
1057 |
+
"""Builds ExperimentallyResolvedHead module.
|
1058 |
+
|
1059 |
+
Arguments:
|
1060 |
+
representations: Dictionary of representations, must contain:
|
1061 |
+
* 'structure_module': Single representation from the structure module,
|
1062 |
+
shape [N_res, c_s].
|
1063 |
+
batch: Batch, unused.
|
1064 |
+
is_training: Whether the module is in training mode.
|
1065 |
+
|
1066 |
+
Returns:
|
1067 |
+
Dictionary containing :
|
1068 |
+
* 'logits': logits of shape [N_res, N_bins] with
|
1069 |
+
(unnormalized) log probabilies of binned predicted lDDT.
|
1070 |
+
"""
|
1071 |
+
act = representations['structure_module']
|
1072 |
+
|
1073 |
+
act = hk.LayerNorm(
|
1074 |
+
axis=[-1],
|
1075 |
+
create_scale=True,
|
1076 |
+
create_offset=True,
|
1077 |
+
name='input_layer_norm')(
|
1078 |
+
act)
|
1079 |
+
|
1080 |
+
act = common_modules.Linear(
|
1081 |
+
self.config.num_channels,
|
1082 |
+
initializer='relu',
|
1083 |
+
name='act_0')(
|
1084 |
+
act)
|
1085 |
+
act = jax.nn.relu(act)
|
1086 |
+
|
1087 |
+
act = common_modules.Linear(
|
1088 |
+
self.config.num_channels,
|
1089 |
+
initializer='relu',
|
1090 |
+
name='act_1')(
|
1091 |
+
act)
|
1092 |
+
act = jax.nn.relu(act)
|
1093 |
+
|
1094 |
+
logits = common_modules.Linear(
|
1095 |
+
self.config.num_bins,
|
1096 |
+
initializer=utils.final_init(self.global_config),
|
1097 |
+
name='logits')(
|
1098 |
+
act)
|
1099 |
+
# Shape (batch_size, num_res, num_bins)
|
1100 |
+
return dict(logits=logits)
|
1101 |
+
|
1102 |
+
def loss(self, value, batch):
|
1103 |
+
# Shape (num_res, 37, 3)
|
1104 |
+
pred_all_atom_pos = value['structure_module']['final_atom_positions']
|
1105 |
+
# Shape (num_res, 37, 3)
|
1106 |
+
true_all_atom_pos = batch['all_atom_positions']
|
1107 |
+
# Shape (num_res, 37)
|
1108 |
+
all_atom_mask = batch['all_atom_mask']
|
1109 |
+
|
1110 |
+
# Shape (num_res,)
|
1111 |
+
lddt_ca = lddt.lddt(
|
1112 |
+
# Shape (batch_size, num_res, 3)
|
1113 |
+
predicted_points=pred_all_atom_pos[None, :, 1, :],
|
1114 |
+
# Shape (batch_size, num_res, 3)
|
1115 |
+
true_points=true_all_atom_pos[None, :, 1, :],
|
1116 |
+
# Shape (batch_size, num_res, 1)
|
1117 |
+
true_points_mask=all_atom_mask[None, :, 1:2].astype(jnp.float32),
|
1118 |
+
cutoff=15.,
|
1119 |
+
per_residue=True)[0]
|
1120 |
+
lddt_ca = jax.lax.stop_gradient(lddt_ca)
|
1121 |
+
|
1122 |
+
num_bins = self.config.num_bins
|
1123 |
+
bin_index = jnp.floor(lddt_ca * num_bins).astype(jnp.int32)
|
1124 |
+
|
1125 |
+
# protect against out of range for lddt_ca == 1
|
1126 |
+
bin_index = jnp.minimum(bin_index, num_bins - 1)
|
1127 |
+
lddt_ca_one_hot = jax.nn.one_hot(bin_index, num_classes=num_bins)
|
1128 |
+
|
1129 |
+
# Shape (num_res, num_channel)
|
1130 |
+
logits = value['predicted_lddt']['logits']
|
1131 |
+
errors = softmax_cross_entropy(labels=lddt_ca_one_hot, logits=logits)
|
1132 |
+
|
1133 |
+
# Shape (num_res,)
|
1134 |
+
mask_ca = all_atom_mask[:, residue_constants.atom_order['CA']]
|
1135 |
+
mask_ca = mask_ca.astype(jnp.float32)
|
1136 |
+
loss = jnp.sum(errors * mask_ca) / (jnp.sum(mask_ca) + 1e-8)
|
1137 |
+
|
1138 |
+
if self.config.filter_by_resolution:
|
1139 |
+
# NMR & distillation have resolution = 0
|
1140 |
+
loss *= ((batch['resolution'] >= self.config.min_resolution)
|
1141 |
+
& (batch['resolution'] <= self.config.max_resolution)).astype(
|
1142 |
+
jnp.float32)
|
1143 |
+
|
1144 |
+
output = {'loss': loss}
|
1145 |
+
return output
|
1146 |
+
|
1147 |
+
|
1148 |
+
class PredictedAlignedErrorHead(hk.Module):
|
1149 |
+
"""Head to predict the distance errors in the backbone alignment frames.
|
1150 |
+
|
1151 |
+
Can be used to compute predicted TM-Score.
|
1152 |
+
Jumper et al. (2021) Suppl. Sec. 1.9.7 "TM-score prediction"
|
1153 |
+
"""
|
1154 |
+
|
1155 |
+
def __init__(self, config, global_config,
|
1156 |
+
name='predicted_aligned_error_head'):
|
1157 |
+
super().__init__(name=name)
|
1158 |
+
self.config = config
|
1159 |
+
self.global_config = global_config
|
1160 |
+
|
1161 |
+
def __call__(self, representations, batch, is_training):
|
1162 |
+
"""Builds PredictedAlignedErrorHead module.
|
1163 |
+
|
1164 |
+
Arguments:
|
1165 |
+
representations: Dictionary of representations, must contain:
|
1166 |
+
* 'pair': pair representation, shape [N_res, N_res, c_z].
|
1167 |
+
batch: Batch, unused.
|
1168 |
+
is_training: Whether the module is in training mode.
|
1169 |
+
|
1170 |
+
Returns:
|
1171 |
+
Dictionary containing:
|
1172 |
+
* logits: logits for aligned error, shape [N_res, N_res, N_bins].
|
1173 |
+
* bin_breaks: array containing bin breaks, shape [N_bins - 1].
|
1174 |
+
"""
|
1175 |
+
|
1176 |
+
act = representations['pair']
|
1177 |
+
|
1178 |
+
# Shape (num_res, num_res, num_bins)
|
1179 |
+
logits = common_modules.Linear(
|
1180 |
+
self.config.num_bins,
|
1181 |
+
initializer=utils.final_init(self.global_config),
|
1182 |
+
name='logits')(act)
|
1183 |
+
# Shape (num_bins,)
|
1184 |
+
breaks = jnp.linspace(
|
1185 |
+
0., self.config.max_error_bin, self.config.num_bins - 1)
|
1186 |
+
return dict(logits=logits, breaks=breaks)
|
1187 |
+
|
1188 |
+
def loss(self, value, batch):
|
1189 |
+
# Shape (num_res, 7)
|
1190 |
+
predicted_affine = quat_affine.QuatAffine.from_tensor(
|
1191 |
+
value['structure_module']['final_affines'])
|
1192 |
+
# Shape (num_res, 7)
|
1193 |
+
true_affine = quat_affine.QuatAffine.from_tensor(
|
1194 |
+
batch['backbone_affine_tensor'])
|
1195 |
+
# Shape (num_res)
|
1196 |
+
mask = batch['backbone_affine_mask']
|
1197 |
+
# Shape (num_res, num_res)
|
1198 |
+
square_mask = mask[:, None] * mask[None, :]
|
1199 |
+
num_bins = self.config.num_bins
|
1200 |
+
# (1, num_bins - 1)
|
1201 |
+
breaks = value['predicted_aligned_error']['breaks']
|
1202 |
+
# (1, num_bins)
|
1203 |
+
logits = value['predicted_aligned_error']['logits']
|
1204 |
+
|
1205 |
+
# Compute the squared error for each alignment.
|
1206 |
+
def _local_frame_points(affine):
|
1207 |
+
points = [jnp.expand_dims(x, axis=-2) for x in affine.translation]
|
1208 |
+
return affine.invert_point(points, extra_dims=1)
|
1209 |
+
error_dist2_xyz = [
|
1210 |
+
jnp.square(a - b)
|
1211 |
+
for a, b in zip(_local_frame_points(predicted_affine),
|
1212 |
+
_local_frame_points(true_affine))]
|
1213 |
+
error_dist2 = sum(error_dist2_xyz)
|
1214 |
+
# Shape (num_res, num_res)
|
1215 |
+
# First num_res are alignment frames, second num_res are the residues.
|
1216 |
+
error_dist2 = jax.lax.stop_gradient(error_dist2)
|
1217 |
+
|
1218 |
+
sq_breaks = jnp.square(breaks)
|
1219 |
+
true_bins = jnp.sum((
|
1220 |
+
error_dist2[..., None] > sq_breaks).astype(jnp.int32), axis=-1)
|
1221 |
+
|
1222 |
+
errors = softmax_cross_entropy(
|
1223 |
+
labels=jax.nn.one_hot(true_bins, num_bins, axis=-1), logits=logits)
|
1224 |
+
|
1225 |
+
loss = (jnp.sum(errors * square_mask, axis=(-2, -1)) /
|
1226 |
+
(1e-8 + jnp.sum(square_mask, axis=(-2, -1))))
|
1227 |
+
|
1228 |
+
if self.config.filter_by_resolution:
|
1229 |
+
# NMR & distillation have resolution = 0
|
1230 |
+
loss *= ((batch['resolution'] >= self.config.min_resolution)
|
1231 |
+
& (batch['resolution'] <= self.config.max_resolution)).astype(
|
1232 |
+
jnp.float32)
|
1233 |
+
|
1234 |
+
output = {'loss': loss}
|
1235 |
+
return output
|
1236 |
+
|
1237 |
+
|
1238 |
+
class ExperimentallyResolvedHead(hk.Module):
|
1239 |
+
"""Predicts if an atom is experimentally resolved in a high-res structure.
|
1240 |
+
|
1241 |
+
Only trained on high-resolution X-ray crystals & cryo-EM.
|
1242 |
+
Jumper et al. (2021) Suppl. Sec. 1.9.10 '"Experimentally resolved" prediction'
|
1243 |
+
"""
|
1244 |
+
|
1245 |
+
def __init__(self, config, global_config,
|
1246 |
+
name='experimentally_resolved_head'):
|
1247 |
+
super().__init__(name=name)
|
1248 |
+
self.config = config
|
1249 |
+
self.global_config = global_config
|
1250 |
+
|
1251 |
+
def __call__(self, representations, batch, is_training):
|
1252 |
+
"""Builds ExperimentallyResolvedHead module.
|
1253 |
+
|
1254 |
+
Arguments:
|
1255 |
+
representations: Dictionary of representations, must contain:
|
1256 |
+
* 'single': Single representation, shape [N_res, c_s].
|
1257 |
+
batch: Batch, unused.
|
1258 |
+
is_training: Whether the module is in training mode.
|
1259 |
+
|
1260 |
+
Returns:
|
1261 |
+
Dictionary containing:
|
1262 |
+
* 'logits': logits of shape [N_res, 37],
|
1263 |
+
log probability that an atom is resolved in atom37 representation,
|
1264 |
+
can be converted to probability by applying sigmoid.
|
1265 |
+
"""
|
1266 |
+
logits = common_modules.Linear(
|
1267 |
+
37, # atom_exists.shape[-1]
|
1268 |
+
initializer=utils.final_init(self.global_config),
|
1269 |
+
name='logits')(representations['single'])
|
1270 |
+
return dict(logits=logits)
|
1271 |
+
|
1272 |
+
def loss(self, value, batch):
|
1273 |
+
logits = value['logits']
|
1274 |
+
assert len(logits.shape) == 2
|
1275 |
+
|
1276 |
+
# Does the atom appear in the amino acid?
|
1277 |
+
atom_exists = batch['atom37_atom_exists']
|
1278 |
+
# Is the atom resolved in the experiment? Subset of atom_exists,
|
1279 |
+
# *except for OXT*
|
1280 |
+
all_atom_mask = batch['all_atom_mask'].astype(jnp.float32)
|
1281 |
+
|
1282 |
+
xent = sigmoid_cross_entropy(labels=all_atom_mask, logits=logits)
|
1283 |
+
loss = jnp.sum(xent * atom_exists) / (1e-8 + jnp.sum(atom_exists))
|
1284 |
+
|
1285 |
+
if self.config.filter_by_resolution:
|
1286 |
+
# NMR & distillation examples have resolution = 0.
|
1287 |
+
loss *= ((batch['resolution'] >= self.config.min_resolution)
|
1288 |
+
& (batch['resolution'] <= self.config.max_resolution)).astype(
|
1289 |
+
jnp.float32)
|
1290 |
+
|
1291 |
+
output = {'loss': loss}
|
1292 |
+
return output
|
1293 |
+
|
1294 |
+
|
1295 |
+
class TriangleMultiplication(hk.Module):
|
1296 |
+
"""Triangle multiplication layer ("outgoing" or "incoming").
|
1297 |
+
|
1298 |
+
Jumper et al. (2021) Suppl. Alg. 11 "TriangleMultiplicationOutgoing"
|
1299 |
+
Jumper et al. (2021) Suppl. Alg. 12 "TriangleMultiplicationIncoming"
|
1300 |
+
"""
|
1301 |
+
|
1302 |
+
def __init__(self, config, global_config, name='triangle_multiplication'):
|
1303 |
+
super().__init__(name=name)
|
1304 |
+
self.config = config
|
1305 |
+
self.global_config = global_config
|
1306 |
+
|
1307 |
+
def __call__(self, act, mask, is_training=True):
|
1308 |
+
"""Builds TriangleMultiplication module.
|
1309 |
+
|
1310 |
+
Arguments:
|
1311 |
+
act: Pair activations, shape [N_res, N_res, c_z]
|
1312 |
+
mask: Pair mask, shape [N_res, N_res].
|
1313 |
+
is_training: Whether the module is in training mode.
|
1314 |
+
|
1315 |
+
Returns:
|
1316 |
+
Outputs, same shape/type as act.
|
1317 |
+
"""
|
1318 |
+
del is_training
|
1319 |
+
c = self.config
|
1320 |
+
gc = self.global_config
|
1321 |
+
|
1322 |
+
mask = mask[..., None]
|
1323 |
+
|
1324 |
+
act = hk.LayerNorm(axis=[-1], create_scale=True, create_offset=True,
|
1325 |
+
name='layer_norm_input')(act)
|
1326 |
+
input_act = act
|
1327 |
+
|
1328 |
+
left_projection = common_modules.Linear(
|
1329 |
+
c.num_intermediate_channel,
|
1330 |
+
name='left_projection')
|
1331 |
+
left_proj_act = mask * left_projection(act)
|
1332 |
+
|
1333 |
+
right_projection = common_modules.Linear(
|
1334 |
+
c.num_intermediate_channel,
|
1335 |
+
name='right_projection')
|
1336 |
+
right_proj_act = mask * right_projection(act)
|
1337 |
+
|
1338 |
+
left_gate_values = jax.nn.sigmoid(common_modules.Linear(
|
1339 |
+
c.num_intermediate_channel,
|
1340 |
+
bias_init=1.,
|
1341 |
+
initializer=utils.final_init(gc),
|
1342 |
+
name='left_gate')(act))
|
1343 |
+
|
1344 |
+
right_gate_values = jax.nn.sigmoid(common_modules.Linear(
|
1345 |
+
c.num_intermediate_channel,
|
1346 |
+
bias_init=1.,
|
1347 |
+
initializer=utils.final_init(gc),
|
1348 |
+
name='right_gate')(act))
|
1349 |
+
|
1350 |
+
left_proj_act *= left_gate_values
|
1351 |
+
right_proj_act *= right_gate_values
|
1352 |
+
|
1353 |
+
# "Outgoing" edges equation: 'ikc,jkc->ijc'
|
1354 |
+
# "Incoming" edges equation: 'kjc,kic->ijc'
|
1355 |
+
# Note on the Suppl. Alg. 11 & 12 notation:
|
1356 |
+
# For the "outgoing" edges, a = left_proj_act and b = right_proj_act
|
1357 |
+
# For the "incoming" edges, it's swapped:
|
1358 |
+
# b = left_proj_act and a = right_proj_act
|
1359 |
+
act = jnp.einsum(c.equation, left_proj_act, right_proj_act)
|
1360 |
+
|
1361 |
+
act = hk.LayerNorm(
|
1362 |
+
axis=[-1],
|
1363 |
+
create_scale=True,
|
1364 |
+
create_offset=True,
|
1365 |
+
name='center_layer_norm')(
|
1366 |
+
act)
|
1367 |
+
|
1368 |
+
output_channel = int(input_act.shape[-1])
|
1369 |
+
|
1370 |
+
act = common_modules.Linear(
|
1371 |
+
output_channel,
|
1372 |
+
initializer=utils.final_init(gc),
|
1373 |
+
name='output_projection')(act)
|
1374 |
+
|
1375 |
+
gate_values = jax.nn.sigmoid(common_modules.Linear(
|
1376 |
+
output_channel,
|
1377 |
+
bias_init=1.,
|
1378 |
+
initializer=utils.final_init(gc),
|
1379 |
+
name='gating_linear')(input_act))
|
1380 |
+
act *= gate_values
|
1381 |
+
|
1382 |
+
return act
|
1383 |
+
|
1384 |
+
|
1385 |
+
class DistogramHead(hk.Module):
|
1386 |
+
"""Head to predict a distogram.
|
1387 |
+
|
1388 |
+
Jumper et al. (2021) Suppl. Sec. 1.9.8 "Distogram prediction"
|
1389 |
+
"""
|
1390 |
+
|
1391 |
+
def __init__(self, config, global_config, name='distogram_head'):
|
1392 |
+
super().__init__(name=name)
|
1393 |
+
self.config = config
|
1394 |
+
self.global_config = global_config
|
1395 |
+
|
1396 |
+
def __call__(self, representations, batch, is_training):
|
1397 |
+
"""Builds DistogramHead module.
|
1398 |
+
|
1399 |
+
Arguments:
|
1400 |
+
representations: Dictionary of representations, must contain:
|
1401 |
+
* 'pair': pair representation, shape [N_res, N_res, c_z].
|
1402 |
+
batch: Batch, unused.
|
1403 |
+
is_training: Whether the module is in training mode.
|
1404 |
+
|
1405 |
+
Returns:
|
1406 |
+
Dictionary containing:
|
1407 |
+
* logits: logits for distogram, shape [N_res, N_res, N_bins].
|
1408 |
+
* bin_breaks: array containing bin breaks, shape [N_bins - 1,].
|
1409 |
+
"""
|
1410 |
+
half_logits = common_modules.Linear(
|
1411 |
+
self.config.num_bins,
|
1412 |
+
initializer=utils.final_init(self.global_config),
|
1413 |
+
name='half_logits')(
|
1414 |
+
representations['pair'])
|
1415 |
+
|
1416 |
+
logits = half_logits + jnp.swapaxes(half_logits, -2, -3)
|
1417 |
+
breaks = jnp.linspace(self.config.first_break, self.config.last_break,
|
1418 |
+
self.config.num_bins - 1)
|
1419 |
+
|
1420 |
+
return dict(logits=logits, bin_edges=breaks)
|
1421 |
+
|
1422 |
+
def loss(self, value, batch):
|
1423 |
+
return _distogram_log_loss(value['logits'], value['bin_edges'],
|
1424 |
+
batch, self.config.num_bins)
|
1425 |
+
|
1426 |
+
|
1427 |
+
def _distogram_log_loss(logits, bin_edges, batch, num_bins):
|
1428 |
+
"""Log loss of a distogram."""
|
1429 |
+
|
1430 |
+
assert len(logits.shape) == 3
|
1431 |
+
positions = batch['pseudo_beta']
|
1432 |
+
mask = batch['pseudo_beta_mask']
|
1433 |
+
|
1434 |
+
assert positions.shape[-1] == 3
|
1435 |
+
|
1436 |
+
sq_breaks = jnp.square(bin_edges)
|
1437 |
+
|
1438 |
+
dist2 = jnp.sum(
|
1439 |
+
jnp.square(
|
1440 |
+
jnp.expand_dims(positions, axis=-2) -
|
1441 |
+
jnp.expand_dims(positions, axis=-3)),
|
1442 |
+
axis=-1,
|
1443 |
+
keepdims=True)
|
1444 |
+
|
1445 |
+
true_bins = jnp.sum(dist2 > sq_breaks, axis=-1)
|
1446 |
+
|
1447 |
+
errors = softmax_cross_entropy(
|
1448 |
+
labels=jax.nn.one_hot(true_bins, num_bins), logits=logits)
|
1449 |
+
|
1450 |
+
square_mask = jnp.expand_dims(mask, axis=-2) * jnp.expand_dims(mask, axis=-1)
|
1451 |
+
|
1452 |
+
avg_error = (
|
1453 |
+
jnp.sum(errors * square_mask, axis=(-2, -1)) /
|
1454 |
+
(1e-6 + jnp.sum(square_mask, axis=(-2, -1))))
|
1455 |
+
dist2 = dist2[..., 0]
|
1456 |
+
return dict(loss=avg_error, true_dist=jnp.sqrt(1e-6 + dist2))
|
1457 |
+
|
1458 |
+
|
1459 |
+
class OuterProductMean(hk.Module):
|
1460 |
+
"""Computes mean outer product.
|
1461 |
+
|
1462 |
+
Jumper et al. (2021) Suppl. Alg. 10 "OuterProductMean"
|
1463 |
+
"""
|
1464 |
+
|
1465 |
+
def __init__(self,
|
1466 |
+
config,
|
1467 |
+
global_config,
|
1468 |
+
num_output_channel,
|
1469 |
+
name='outer_product_mean'):
|
1470 |
+
super().__init__(name=name)
|
1471 |
+
self.global_config = global_config
|
1472 |
+
self.config = config
|
1473 |
+
self.num_output_channel = num_output_channel
|
1474 |
+
|
1475 |
+
def __call__(self, act, mask, is_training=True):
|
1476 |
+
"""Builds OuterProductMean module.
|
1477 |
+
|
1478 |
+
Arguments:
|
1479 |
+
act: MSA representation, shape [N_seq, N_res, c_m].
|
1480 |
+
mask: MSA mask, shape [N_seq, N_res].
|
1481 |
+
is_training: Whether the module is in training mode.
|
1482 |
+
|
1483 |
+
Returns:
|
1484 |
+
Update to pair representation, shape [N_res, N_res, c_z].
|
1485 |
+
"""
|
1486 |
+
gc = self.global_config
|
1487 |
+
c = self.config
|
1488 |
+
|
1489 |
+
mask = mask[..., None]
|
1490 |
+
act = hk.LayerNorm([-1], True, True, name='layer_norm_input')(act)
|
1491 |
+
|
1492 |
+
left_act = mask * common_modules.Linear(
|
1493 |
+
c.num_outer_channel,
|
1494 |
+
initializer='linear',
|
1495 |
+
name='left_projection')(
|
1496 |
+
act)
|
1497 |
+
|
1498 |
+
right_act = mask * common_modules.Linear(
|
1499 |
+
c.num_outer_channel,
|
1500 |
+
initializer='linear',
|
1501 |
+
name='right_projection')(
|
1502 |
+
act)
|
1503 |
+
|
1504 |
+
if gc.zero_init:
|
1505 |
+
init_w = hk.initializers.Constant(0.0)
|
1506 |
+
else:
|
1507 |
+
init_w = hk.initializers.VarianceScaling(scale=2., mode='fan_in')
|
1508 |
+
|
1509 |
+
output_w = hk.get_parameter(
|
1510 |
+
'output_w',
|
1511 |
+
shape=(c.num_outer_channel, c.num_outer_channel,
|
1512 |
+
self.num_output_channel),
|
1513 |
+
init=init_w)
|
1514 |
+
output_b = hk.get_parameter(
|
1515 |
+
'output_b', shape=(self.num_output_channel,),
|
1516 |
+
init=hk.initializers.Constant(0.0))
|
1517 |
+
|
1518 |
+
def compute_chunk(left_act):
|
1519 |
+
# This is equivalent to
|
1520 |
+
#
|
1521 |
+
# act = jnp.einsum('abc,ade->dceb', left_act, right_act)
|
1522 |
+
# act = jnp.einsum('dceb,cef->bdf', act, output_w) + output_b
|
1523 |
+
#
|
1524 |
+
# but faster.
|
1525 |
+
left_act = jnp.transpose(left_act, [0, 2, 1])
|
1526 |
+
act = jnp.einsum('acb,ade->dceb', left_act, right_act)
|
1527 |
+
act = jnp.einsum('dceb,cef->dbf', act, output_w) + output_b
|
1528 |
+
return jnp.transpose(act, [1, 0, 2])
|
1529 |
+
|
1530 |
+
act = mapping.inference_subbatch(
|
1531 |
+
compute_chunk,
|
1532 |
+
c.chunk_size,
|
1533 |
+
batched_args=[left_act],
|
1534 |
+
nonbatched_args=[],
|
1535 |
+
low_memory=True,
|
1536 |
+
input_subbatch_dim=1,
|
1537 |
+
output_subbatch_dim=0)
|
1538 |
+
|
1539 |
+
epsilon = 1e-3
|
1540 |
+
norm = jnp.einsum('abc,adc->bdc', mask, mask)
|
1541 |
+
act /= epsilon + norm
|
1542 |
+
|
1543 |
+
return act
|
1544 |
+
|
1545 |
+
def dgram_from_positions(positions, num_bins, min_bin, max_bin):
|
1546 |
+
"""Compute distogram from amino acid positions.
|
1547 |
+
Arguments:
|
1548 |
+
positions: [N_res, 3] Position coordinates.
|
1549 |
+
num_bins: The number of bins in the distogram.
|
1550 |
+
min_bin: The left edge of the first bin.
|
1551 |
+
max_bin: The left edge of the final bin. The final bin catches
|
1552 |
+
everything larger than `max_bin`.
|
1553 |
+
Returns:
|
1554 |
+
Distogram with the specified number of bins.
|
1555 |
+
"""
|
1556 |
+
def squared_difference(x, y):
|
1557 |
+
return jnp.square(x - y)
|
1558 |
+
|
1559 |
+
lower_breaks = jnp.linspace(min_bin, max_bin, num_bins)
|
1560 |
+
lower_breaks = jnp.square(lower_breaks)
|
1561 |
+
upper_breaks = jnp.concatenate([lower_breaks[1:],jnp.array([1e8], dtype=jnp.float32)], axis=-1)
|
1562 |
+
dist2 = jnp.sum(
|
1563 |
+
squared_difference(
|
1564 |
+
jnp.expand_dims(positions, axis=-2),
|
1565 |
+
jnp.expand_dims(positions, axis=-3)),
|
1566 |
+
axis=-1, keepdims=True)
|
1567 |
+
|
1568 |
+
return ((dist2 > lower_breaks).astype(jnp.float32) * (dist2 < upper_breaks).astype(jnp.float32))
|
1569 |
+
|
1570 |
+
def dgram_from_positions_soft(positions, num_bins, min_bin, max_bin, temp=2.0):
|
1571 |
+
'''soft positions to dgram converter'''
|
1572 |
+
lower_breaks = jnp.append(-1e8,jnp.linspace(min_bin, max_bin, num_bins))
|
1573 |
+
upper_breaks = jnp.append(lower_breaks[1:],1e8)
|
1574 |
+
dist = jnp.sqrt(jnp.square(positions[...,:,None,:] - positions[...,None,:,:]).sum(-1,keepdims=True) + 1e-8)
|
1575 |
+
o = jax.nn.sigmoid((dist - lower_breaks)/temp) * jax.nn.sigmoid((upper_breaks - dist)/temp)
|
1576 |
+
o = o/(o.sum(-1,keepdims=True) + 1e-8)
|
1577 |
+
return o[...,1:]
|
1578 |
+
|
1579 |
+
def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks):
|
1580 |
+
"""Create pseudo beta features."""
|
1581 |
+
|
1582 |
+
ca_idx = residue_constants.atom_order['CA']
|
1583 |
+
cb_idx = residue_constants.atom_order['CB']
|
1584 |
+
|
1585 |
+
if jnp.issubdtype(aatype.dtype, jnp.integer):
|
1586 |
+
is_gly = jnp.equal(aatype, residue_constants.restype_order['G'])
|
1587 |
+
is_gly_tile = jnp.tile(is_gly[..., None], [1] * len(is_gly.shape) + [3])
|
1588 |
+
pseudo_beta = jnp.where(is_gly_tile, all_atom_positions[..., ca_idx, :], all_atom_positions[..., cb_idx, :])
|
1589 |
+
|
1590 |
+
if all_atom_masks is not None:
|
1591 |
+
pseudo_beta_mask = jnp.where(is_gly, all_atom_masks[..., ca_idx], all_atom_masks[..., cb_idx])
|
1592 |
+
pseudo_beta_mask = pseudo_beta_mask.astype(jnp.float32)
|
1593 |
+
return pseudo_beta, pseudo_beta_mask
|
1594 |
+
else:
|
1595 |
+
return pseudo_beta
|
1596 |
+
else:
|
1597 |
+
is_gly = aatype[...,residue_constants.restype_order['G']]
|
1598 |
+
ca_pos = all_atom_positions[...,ca_idx,:]
|
1599 |
+
cb_pos = all_atom_positions[...,cb_idx,:]
|
1600 |
+
pseudo_beta = is_gly[...,None] * ca_pos + (1-is_gly[...,None]) * cb_pos
|
1601 |
+
if all_atom_masks is not None:
|
1602 |
+
ca_mask = all_atom_masks[...,ca_idx]
|
1603 |
+
cb_mask = all_atom_masks[...,cb_idx]
|
1604 |
+
pseudo_beta_mask = is_gly * ca_mask + (1-is_gly) * cb_mask
|
1605 |
+
return pseudo_beta, pseudo_beta_mask
|
1606 |
+
else:
|
1607 |
+
return pseudo_beta
|
1608 |
+
|
1609 |
+
class EvoformerIteration(hk.Module):
|
1610 |
+
"""Single iteration (block) of Evoformer stack.
|
1611 |
+
Jumper et al. (2021) Suppl. Alg. 6 "EvoformerStack" lines 2-10
|
1612 |
+
"""
|
1613 |
+
|
1614 |
+
def __init__(self, config, global_config, is_extra_msa,
|
1615 |
+
name='evoformer_iteration'):
|
1616 |
+
super().__init__(name=name)
|
1617 |
+
self.config = config
|
1618 |
+
self.global_config = global_config
|
1619 |
+
self.is_extra_msa = is_extra_msa
|
1620 |
+
|
1621 |
+
def __call__(self, activations, masks, is_training=True, safe_key=None, scale_rate=1.0):
|
1622 |
+
"""Builds EvoformerIteration module.
|
1623 |
+
|
1624 |
+
Arguments:
|
1625 |
+
activations: Dictionary containing activations:
|
1626 |
+
* 'msa': MSA activations, shape [N_seq, N_res, c_m].
|
1627 |
+
* 'pair': pair activations, shape [N_res, N_res, c_z].
|
1628 |
+
masks: Dictionary of masks:
|
1629 |
+
* 'msa': MSA mask, shape [N_seq, N_res].
|
1630 |
+
* 'pair': pair mask, shape [N_res, N_res].
|
1631 |
+
is_training: Whether the module is in training mode.
|
1632 |
+
safe_key: prng.SafeKey encapsulating rng key.
|
1633 |
+
|
1634 |
+
Returns:
|
1635 |
+
Outputs, same shape/type as act.
|
1636 |
+
"""
|
1637 |
+
c = self.config
|
1638 |
+
gc = self.global_config
|
1639 |
+
|
1640 |
+
msa_act, pair_act = activations['msa'], activations['pair']
|
1641 |
+
|
1642 |
+
if safe_key is None:
|
1643 |
+
safe_key = prng.SafeKey(hk.next_rng_key())
|
1644 |
+
|
1645 |
+
msa_mask, pair_mask = masks['msa'], masks['pair']
|
1646 |
+
|
1647 |
+
dropout_wrapper_fn = functools.partial(
|
1648 |
+
dropout_wrapper,
|
1649 |
+
is_training=is_training,
|
1650 |
+
global_config=gc,
|
1651 |
+
scale_rate=scale_rate)
|
1652 |
+
|
1653 |
+
safe_key, *sub_keys = safe_key.split(10)
|
1654 |
+
sub_keys = iter(sub_keys)
|
1655 |
+
|
1656 |
+
msa_act = dropout_wrapper_fn(
|
1657 |
+
MSARowAttentionWithPairBias(
|
1658 |
+
c.msa_row_attention_with_pair_bias, gc,
|
1659 |
+
name='msa_row_attention_with_pair_bias'),
|
1660 |
+
msa_act,
|
1661 |
+
msa_mask,
|
1662 |
+
safe_key=next(sub_keys),
|
1663 |
+
pair_act=pair_act)
|
1664 |
+
|
1665 |
+
if not self.is_extra_msa:
|
1666 |
+
attn_mod = MSAColumnAttention(
|
1667 |
+
c.msa_column_attention, gc, name='msa_column_attention')
|
1668 |
+
else:
|
1669 |
+
attn_mod = MSAColumnGlobalAttention(
|
1670 |
+
c.msa_column_attention, gc, name='msa_column_global_attention')
|
1671 |
+
msa_act = dropout_wrapper_fn(
|
1672 |
+
attn_mod,
|
1673 |
+
msa_act,
|
1674 |
+
msa_mask,
|
1675 |
+
safe_key=next(sub_keys))
|
1676 |
+
|
1677 |
+
msa_act = dropout_wrapper_fn(
|
1678 |
+
Transition(c.msa_transition, gc, name='msa_transition'),
|
1679 |
+
msa_act,
|
1680 |
+
msa_mask,
|
1681 |
+
safe_key=next(sub_keys))
|
1682 |
+
|
1683 |
+
pair_act = dropout_wrapper_fn(
|
1684 |
+
OuterProductMean(
|
1685 |
+
config=c.outer_product_mean,
|
1686 |
+
global_config=self.global_config,
|
1687 |
+
num_output_channel=int(pair_act.shape[-1]),
|
1688 |
+
name='outer_product_mean'),
|
1689 |
+
msa_act,
|
1690 |
+
msa_mask,
|
1691 |
+
safe_key=next(sub_keys),
|
1692 |
+
output_act=pair_act)
|
1693 |
+
|
1694 |
+
pair_act = dropout_wrapper_fn(
|
1695 |
+
TriangleMultiplication(c.triangle_multiplication_outgoing, gc,
|
1696 |
+
name='triangle_multiplication_outgoing'),
|
1697 |
+
pair_act,
|
1698 |
+
pair_mask,
|
1699 |
+
safe_key=next(sub_keys))
|
1700 |
+
pair_act = dropout_wrapper_fn(
|
1701 |
+
TriangleMultiplication(c.triangle_multiplication_incoming, gc,
|
1702 |
+
name='triangle_multiplication_incoming'),
|
1703 |
+
pair_act,
|
1704 |
+
pair_mask,
|
1705 |
+
safe_key=next(sub_keys))
|
1706 |
+
|
1707 |
+
pair_act = dropout_wrapper_fn(
|
1708 |
+
TriangleAttention(c.triangle_attention_starting_node, gc,
|
1709 |
+
name='triangle_attention_starting_node'),
|
1710 |
+
pair_act,
|
1711 |
+
pair_mask,
|
1712 |
+
safe_key=next(sub_keys))
|
1713 |
+
pair_act = dropout_wrapper_fn(
|
1714 |
+
TriangleAttention(c.triangle_attention_ending_node, gc,
|
1715 |
+
name='triangle_attention_ending_node'),
|
1716 |
+
pair_act,
|
1717 |
+
pair_mask,
|
1718 |
+
safe_key=next(sub_keys))
|
1719 |
+
|
1720 |
+
pair_act = dropout_wrapper_fn(
|
1721 |
+
Transition(c.pair_transition, gc, name='pair_transition'),
|
1722 |
+
pair_act,
|
1723 |
+
pair_mask,
|
1724 |
+
safe_key=next(sub_keys))
|
1725 |
+
|
1726 |
+
return {'msa': msa_act, 'pair': pair_act}
|
1727 |
+
|
1728 |
+
|
1729 |
+
class EmbeddingsAndEvoformer(hk.Module):
|
1730 |
+
"""Embeds the input data and runs Evoformer.
|
1731 |
+
|
1732 |
+
Produces the MSA, single and pair representations.
|
1733 |
+
Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 5-18
|
1734 |
+
"""
|
1735 |
+
|
1736 |
+
def __init__(self, config, global_config, name='evoformer'):
|
1737 |
+
super().__init__(name=name)
|
1738 |
+
self.config = config
|
1739 |
+
self.global_config = global_config
|
1740 |
+
|
1741 |
+
def __call__(self, batch, is_training, safe_key=None):
|
1742 |
+
|
1743 |
+
c = self.config
|
1744 |
+
gc = self.global_config
|
1745 |
+
|
1746 |
+
if safe_key is None:
|
1747 |
+
safe_key = prng.SafeKey(hk.next_rng_key())
|
1748 |
+
|
1749 |
+
# Embed clustered MSA.
|
1750 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 5
|
1751 |
+
# Jumper et al. (2021) Suppl. Alg. 3 "InputEmbedder"
|
1752 |
+
preprocess_1d = common_modules.Linear(
|
1753 |
+
c.msa_channel, name='preprocess_1d')(
|
1754 |
+
batch['target_feat'])
|
1755 |
+
|
1756 |
+
preprocess_msa = common_modules.Linear(
|
1757 |
+
c.msa_channel, name='preprocess_msa')(
|
1758 |
+
batch['msa_feat'])
|
1759 |
+
|
1760 |
+
msa_activations = jnp.expand_dims(preprocess_1d, axis=0) + preprocess_msa
|
1761 |
+
|
1762 |
+
left_single = common_modules.Linear(
|
1763 |
+
c.pair_channel, name='left_single')(
|
1764 |
+
batch['target_feat'])
|
1765 |
+
right_single = common_modules.Linear(
|
1766 |
+
c.pair_channel, name='right_single')(
|
1767 |
+
batch['target_feat'])
|
1768 |
+
pair_activations = left_single[:, None] + right_single[None]
|
1769 |
+
mask_2d = batch['seq_mask'][:, None] * batch['seq_mask'][None, :]
|
1770 |
+
|
1771 |
+
# Inject previous outputs for recycling.
|
1772 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 6
|
1773 |
+
# Jumper et al. (2021) Suppl. Alg. 32 "RecyclingEmbedder"
|
1774 |
+
|
1775 |
+
if "prev_pos" in batch:
|
1776 |
+
# use predicted position input
|
1777 |
+
prev_pseudo_beta = pseudo_beta_fn(batch['aatype'], batch['prev_pos'], None)
|
1778 |
+
if c.backprop_dgram:
|
1779 |
+
dgram = dgram_from_positions_soft(prev_pseudo_beta, temp=c.backprop_dgram_temp, **c.prev_pos)
|
1780 |
+
else:
|
1781 |
+
dgram = dgram_from_positions(prev_pseudo_beta, **c.prev_pos)
|
1782 |
+
|
1783 |
+
elif 'prev_dgram' in batch:
|
1784 |
+
# use predicted distogram input (from Sergey)
|
1785 |
+
dgram = jax.nn.softmax(batch["prev_dgram"])
|
1786 |
+
dgram_map = jax.nn.one_hot(jnp.repeat(jnp.append(0,jnp.arange(15)),4),15).at[:,0].set(0)
|
1787 |
+
dgram = dgram @ dgram_map
|
1788 |
+
|
1789 |
+
pair_activations += common_modules.Linear(c.pair_channel, name='prev_pos_linear')(dgram)
|
1790 |
+
|
1791 |
+
if c.recycle_features:
|
1792 |
+
if 'prev_msa_first_row' in batch:
|
1793 |
+
prev_msa_first_row = hk.LayerNorm([-1],
|
1794 |
+
True,
|
1795 |
+
True,
|
1796 |
+
name='prev_msa_first_row_norm')(
|
1797 |
+
batch['prev_msa_first_row'])
|
1798 |
+
msa_activations = msa_activations.at[0].add(prev_msa_first_row)
|
1799 |
+
|
1800 |
+
if 'prev_pair' in batch:
|
1801 |
+
pair_activations += hk.LayerNorm([-1],
|
1802 |
+
True,
|
1803 |
+
True,
|
1804 |
+
name='prev_pair_norm')(
|
1805 |
+
batch['prev_pair'])
|
1806 |
+
|
1807 |
+
# Relative position encoding.
|
1808 |
+
# Jumper et al. (2021) Suppl. Alg. 4 "relpos"
|
1809 |
+
# Jumper et al. (2021) Suppl. Alg. 5 "one_hot"
|
1810 |
+
if c.max_relative_feature:
|
1811 |
+
# Add one-hot-encoded clipped residue distances to the pair activations.
|
1812 |
+
if "rel_pos" in batch:
|
1813 |
+
rel_pos = batch['rel_pos']
|
1814 |
+
else:
|
1815 |
+
if "offset" in batch:
|
1816 |
+
offset = batch['offset']
|
1817 |
+
else:
|
1818 |
+
pos = batch['residue_index']
|
1819 |
+
offset = pos[:, None] - pos[None, :]
|
1820 |
+
rel_pos = jax.nn.one_hot(
|
1821 |
+
jnp.clip(
|
1822 |
+
offset + c.max_relative_feature,
|
1823 |
+
a_min=0,
|
1824 |
+
a_max=2 * c.max_relative_feature),
|
1825 |
+
2 * c.max_relative_feature + 1)
|
1826 |
+
pair_activations += common_modules.Linear(c.pair_channel, name='pair_activiations')(rel_pos)
|
1827 |
+
|
1828 |
+
# Embed templates into the pair activations.
|
1829 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-13
|
1830 |
+
|
1831 |
+
if c.template.enabled:
|
1832 |
+
template_batch = {k: batch[k] for k in batch if k.startswith('template_')}
|
1833 |
+
template_pair_representation = TemplateEmbedding(c.template, gc)(
|
1834 |
+
pair_activations,
|
1835 |
+
template_batch,
|
1836 |
+
mask_2d,
|
1837 |
+
is_training=is_training,
|
1838 |
+
scale_rate=batch["scale_rate"])
|
1839 |
+
|
1840 |
+
pair_activations += template_pair_representation
|
1841 |
+
|
1842 |
+
# Embed extra MSA features.
|
1843 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 14-16
|
1844 |
+
extra_msa_feat = create_extra_msa_feature(batch)
|
1845 |
+
extra_msa_activations = common_modules.Linear(
|
1846 |
+
c.extra_msa_channel,
|
1847 |
+
name='extra_msa_activations')(
|
1848 |
+
extra_msa_feat)
|
1849 |
+
|
1850 |
+
# Extra MSA Stack.
|
1851 |
+
# Jumper et al. (2021) Suppl. Alg. 18 "ExtraMsaStack"
|
1852 |
+
extra_msa_stack_input = {
|
1853 |
+
'msa': extra_msa_activations,
|
1854 |
+
'pair': pair_activations,
|
1855 |
+
}
|
1856 |
+
|
1857 |
+
extra_msa_stack_iteration = EvoformerIteration(
|
1858 |
+
c.evoformer, gc, is_extra_msa=True, name='extra_msa_stack')
|
1859 |
+
|
1860 |
+
def extra_msa_stack_fn(x):
|
1861 |
+
act, safe_key = x
|
1862 |
+
safe_key, safe_subkey = safe_key.split()
|
1863 |
+
extra_evoformer_output = extra_msa_stack_iteration(
|
1864 |
+
activations=act,
|
1865 |
+
masks={
|
1866 |
+
'msa': batch['extra_msa_mask'],
|
1867 |
+
'pair': mask_2d
|
1868 |
+
},
|
1869 |
+
is_training=is_training,
|
1870 |
+
safe_key=safe_subkey, scale_rate=batch["scale_rate"])
|
1871 |
+
return (extra_evoformer_output, safe_key)
|
1872 |
+
|
1873 |
+
if gc.use_remat:
|
1874 |
+
extra_msa_stack_fn = hk.remat(extra_msa_stack_fn)
|
1875 |
+
|
1876 |
+
extra_msa_stack = layer_stack.layer_stack(
|
1877 |
+
c.extra_msa_stack_num_block)(
|
1878 |
+
extra_msa_stack_fn)
|
1879 |
+
extra_msa_output, safe_key = extra_msa_stack(
|
1880 |
+
(extra_msa_stack_input, safe_key))
|
1881 |
+
|
1882 |
+
pair_activations = extra_msa_output['pair']
|
1883 |
+
|
1884 |
+
evoformer_input = {
|
1885 |
+
'msa': msa_activations,
|
1886 |
+
'pair': pair_activations,
|
1887 |
+
}
|
1888 |
+
|
1889 |
+
evoformer_masks = {'msa': batch['msa_mask'], 'pair': mask_2d}
|
1890 |
+
|
1891 |
+
####################################################################
|
1892 |
+
####################################################################
|
1893 |
+
|
1894 |
+
# Append num_templ rows to msa_activations with template embeddings.
|
1895 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 7-8
|
1896 |
+
if c.template.enabled and c.template.embed_torsion_angles:
|
1897 |
+
if jnp.issubdtype(batch['template_aatype'].dtype, jnp.integer):
|
1898 |
+
num_templ, num_res = batch['template_aatype'].shape
|
1899 |
+
# Embed the templates aatypes.
|
1900 |
+
aatype = batch['template_aatype']
|
1901 |
+
aatype_one_hot = jax.nn.one_hot(batch['template_aatype'], 22, axis=-1)
|
1902 |
+
else:
|
1903 |
+
num_templ, num_res, _ = batch['template_aatype'].shape
|
1904 |
+
aatype = batch['template_aatype'].argmax(-1)
|
1905 |
+
aatype_one_hot = batch['template_aatype']
|
1906 |
+
|
1907 |
+
# Embed the templates aatype, torsion angles and masks.
|
1908 |
+
# Shape (templates, residues, msa_channels)
|
1909 |
+
ret = all_atom.atom37_to_torsion_angles(
|
1910 |
+
aatype=aatype,
|
1911 |
+
all_atom_pos=batch['template_all_atom_positions'],
|
1912 |
+
all_atom_mask=batch['template_all_atom_masks'],
|
1913 |
+
# Ensure consistent behaviour during testing:
|
1914 |
+
placeholder_for_undefined=not gc.zero_init)
|
1915 |
+
|
1916 |
+
template_features = jnp.concatenate([
|
1917 |
+
aatype_one_hot,
|
1918 |
+
jnp.reshape(ret['torsion_angles_sin_cos'], [num_templ, num_res, 14]),
|
1919 |
+
jnp.reshape(ret['alt_torsion_angles_sin_cos'], [num_templ, num_res, 14]),
|
1920 |
+
ret['torsion_angles_mask']], axis=-1)
|
1921 |
+
|
1922 |
+
template_activations = common_modules.Linear(
|
1923 |
+
c.msa_channel,
|
1924 |
+
initializer='relu',
|
1925 |
+
name='template_single_embedding')(template_features)
|
1926 |
+
template_activations = jax.nn.relu(template_activations)
|
1927 |
+
template_activations = common_modules.Linear(
|
1928 |
+
c.msa_channel,
|
1929 |
+
initializer='relu',
|
1930 |
+
name='template_projection')(template_activations)
|
1931 |
+
|
1932 |
+
# Concatenate the templates to the msa.
|
1933 |
+
evoformer_input['msa'] = jnp.concatenate([evoformer_input['msa'], template_activations], axis=0)
|
1934 |
+
|
1935 |
+
# Concatenate templates masks to the msa masks.
|
1936 |
+
# Use mask from the psi angle, as it only depends on the backbone atoms
|
1937 |
+
# from a single residue.
|
1938 |
+
torsion_angle_mask = ret['torsion_angles_mask'][:, :, 2]
|
1939 |
+
torsion_angle_mask = torsion_angle_mask.astype(evoformer_masks['msa'].dtype)
|
1940 |
+
evoformer_masks['msa'] = jnp.concatenate([evoformer_masks['msa'], torsion_angle_mask], axis=0)
|
1941 |
+
|
1942 |
+
####################################################################
|
1943 |
+
####################################################################
|
1944 |
+
|
1945 |
+
# Main trunk of the network
|
1946 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 17-18
|
1947 |
+
evoformer_iteration = EvoformerIteration(
|
1948 |
+
c.evoformer, gc, is_extra_msa=False, name='evoformer_iteration')
|
1949 |
+
|
1950 |
+
def evoformer_fn(x):
|
1951 |
+
act, safe_key = x
|
1952 |
+
safe_key, safe_subkey = safe_key.split()
|
1953 |
+
evoformer_output = evoformer_iteration(
|
1954 |
+
activations=act,
|
1955 |
+
masks=evoformer_masks,
|
1956 |
+
is_training=is_training,
|
1957 |
+
safe_key=safe_subkey, scale_rate=batch["scale_rate"])
|
1958 |
+
return (evoformer_output, safe_key)
|
1959 |
+
|
1960 |
+
if gc.use_remat:
|
1961 |
+
evoformer_fn = hk.remat(evoformer_fn)
|
1962 |
+
|
1963 |
+
evoformer_stack = layer_stack.layer_stack(c.evoformer_num_block)(evoformer_fn)
|
1964 |
+
evoformer_output, safe_key = evoformer_stack((evoformer_input, safe_key))
|
1965 |
+
|
1966 |
+
msa_activations = evoformer_output['msa']
|
1967 |
+
pair_activations = evoformer_output['pair']
|
1968 |
+
|
1969 |
+
single_activations = common_modules.Linear(
|
1970 |
+
c.seq_channel, name='single_activations')(msa_activations[0])
|
1971 |
+
|
1972 |
+
num_sequences = batch['msa_feat'].shape[0]
|
1973 |
+
output = {
|
1974 |
+
'single': single_activations,
|
1975 |
+
'pair': pair_activations,
|
1976 |
+
# Crop away template rows such that they are not used in MaskedMsaHead.
|
1977 |
+
'msa': msa_activations[:num_sequences, :, :],
|
1978 |
+
'msa_first_row': msa_activations[0],
|
1979 |
+
}
|
1980 |
+
|
1981 |
+
return output
|
1982 |
+
|
1983 |
+
####################################################################
|
1984 |
+
####################################################################
|
1985 |
+
class SingleTemplateEmbedding(hk.Module):
|
1986 |
+
"""Embeds a single template.
|
1987 |
+
Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9+11
|
1988 |
+
"""
|
1989 |
+
|
1990 |
+
def __init__(self, config, global_config, name='single_template_embedding'):
|
1991 |
+
super().__init__(name=name)
|
1992 |
+
self.config = config
|
1993 |
+
self.global_config = global_config
|
1994 |
+
|
1995 |
+
def __call__(self, query_embedding, batch, mask_2d, is_training, scale_rate=1.0):
|
1996 |
+
"""Build the single template embedding.
|
1997 |
+
Arguments:
|
1998 |
+
query_embedding: Query pair representation, shape [N_res, N_res, c_z].
|
1999 |
+
batch: A batch of template features (note the template dimension has been
|
2000 |
+
stripped out as this module only runs over a single template).
|
2001 |
+
mask_2d: Padding mask (Note: this doesn't care if a template exists,
|
2002 |
+
unlike the template_pseudo_beta_mask).
|
2003 |
+
is_training: Whether the module is in training mode.
|
2004 |
+
Returns:
|
2005 |
+
A template embedding [N_res, N_res, c_z].
|
2006 |
+
"""
|
2007 |
+
assert mask_2d.dtype == query_embedding.dtype
|
2008 |
+
dtype = query_embedding.dtype
|
2009 |
+
num_res = batch['template_aatype'].shape[0]
|
2010 |
+
num_channels = (self.config.template_pair_stack
|
2011 |
+
.triangle_attention_ending_node.value_dim)
|
2012 |
+
template_mask = batch['template_pseudo_beta_mask']
|
2013 |
+
template_mask_2d = template_mask[:, None] * template_mask[None, :]
|
2014 |
+
template_mask_2d = template_mask_2d.astype(dtype)
|
2015 |
+
|
2016 |
+
if "template_dgram" in batch:
|
2017 |
+
template_dgram = batch["template_dgram"]
|
2018 |
+
else:
|
2019 |
+
if self.config.backprop_dgram:
|
2020 |
+
template_dgram = dgram_from_positions_soft(batch['template_pseudo_beta'],
|
2021 |
+
temp=self.config.backprop_dgram_temp,
|
2022 |
+
**self.config.dgram_features)
|
2023 |
+
else:
|
2024 |
+
template_dgram = dgram_from_positions(batch['template_pseudo_beta'],
|
2025 |
+
**self.config.dgram_features)
|
2026 |
+
template_dgram = template_dgram.astype(dtype)
|
2027 |
+
|
2028 |
+
to_concat = [template_dgram, template_mask_2d[:, :, None]]
|
2029 |
+
|
2030 |
+
if jnp.issubdtype(batch['template_aatype'].dtype, jnp.integer):
|
2031 |
+
aatype = jax.nn.one_hot(batch['template_aatype'], 22, axis=-1, dtype=dtype)
|
2032 |
+
else:
|
2033 |
+
aatype = batch['template_aatype']
|
2034 |
+
|
2035 |
+
to_concat.append(jnp.tile(aatype[None, :, :], [num_res, 1, 1]))
|
2036 |
+
to_concat.append(jnp.tile(aatype[:, None, :], [1, num_res, 1]))
|
2037 |
+
|
2038 |
+
# Backbone affine mask: whether the residue has C, CA, N
|
2039 |
+
# (the template mask defined above only considers pseudo CB).
|
2040 |
+
n, ca, c = [residue_constants.atom_order[a] for a in ('N', 'CA', 'C')]
|
2041 |
+
template_mask = (
|
2042 |
+
batch['template_all_atom_masks'][..., n] *
|
2043 |
+
batch['template_all_atom_masks'][..., ca] *
|
2044 |
+
batch['template_all_atom_masks'][..., c])
|
2045 |
+
template_mask_2d = template_mask[:, None] * template_mask[None, :]
|
2046 |
+
|
2047 |
+
# compute unit_vector (not used by default)
|
2048 |
+
if self.config.use_template_unit_vector:
|
2049 |
+
rot, trans = quat_affine.make_transform_from_reference(
|
2050 |
+
n_xyz=batch['template_all_atom_positions'][:, n],
|
2051 |
+
ca_xyz=batch['template_all_atom_positions'][:, ca],
|
2052 |
+
c_xyz=batch['template_all_atom_positions'][:, c])
|
2053 |
+
affines = quat_affine.QuatAffine(
|
2054 |
+
quaternion=quat_affine.rot_to_quat(rot, unstack_inputs=True),
|
2055 |
+
translation=trans,
|
2056 |
+
rotation=rot,
|
2057 |
+
unstack_inputs=True)
|
2058 |
+
points = [jnp.expand_dims(x, axis=-2) for x in affines.translation]
|
2059 |
+
affine_vec = affines.invert_point(points, extra_dims=1)
|
2060 |
+
inv_distance_scalar = jax.lax.rsqrt(1e-6 + sum([jnp.square(x) for x in affine_vec]))
|
2061 |
+
inv_distance_scalar *= template_mask_2d.astype(inv_distance_scalar.dtype)
|
2062 |
+
unit_vector = [(x * inv_distance_scalar)[..., None] for x in affine_vec]
|
2063 |
+
else:
|
2064 |
+
unit_vector = [jnp.zeros((num_res,num_res,1))] * 3
|
2065 |
+
|
2066 |
+
unit_vector = [x.astype(dtype) for x in unit_vector]
|
2067 |
+
to_concat.extend(unit_vector)
|
2068 |
+
|
2069 |
+
template_mask_2d = template_mask_2d.astype(dtype)
|
2070 |
+
to_concat.append(template_mask_2d[..., None])
|
2071 |
+
|
2072 |
+
act = jnp.concatenate(to_concat, axis=-1)
|
2073 |
+
|
2074 |
+
# Mask out non-template regions so we don't get arbitrary values in the
|
2075 |
+
# distogram for these regions.
|
2076 |
+
act *= template_mask_2d[..., None]
|
2077 |
+
|
2078 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 9
|
2079 |
+
act = common_modules.Linear(
|
2080 |
+
num_channels,
|
2081 |
+
initializer='relu',
|
2082 |
+
name='embedding2d')(act)
|
2083 |
+
|
2084 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 11
|
2085 |
+
act = TemplatePairStack(
|
2086 |
+
self.config.template_pair_stack, self.global_config)(act, mask_2d, is_training, scale_rate=scale_rate)
|
2087 |
+
|
2088 |
+
act = hk.LayerNorm([-1], True, True, name='output_layer_norm')(act)
|
2089 |
+
return act
|
2090 |
+
|
2091 |
+
|
2092 |
+
class TemplateEmbedding(hk.Module):
|
2093 |
+
"""Embeds a set of templates.
|
2094 |
+
Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-12
|
2095 |
+
Jumper et al. (2021) Suppl. Alg. 17 "TemplatePointwiseAttention"
|
2096 |
+
"""
|
2097 |
+
|
2098 |
+
def __init__(self, config, global_config, name='template_embedding'):
|
2099 |
+
super().__init__(name=name)
|
2100 |
+
self.config = config
|
2101 |
+
self.global_config = global_config
|
2102 |
+
|
2103 |
+
def __call__(self, query_embedding, template_batch, mask_2d, is_training, scale_rate=1.0):
|
2104 |
+
"""Build TemplateEmbedding module.
|
2105 |
+
Arguments:
|
2106 |
+
query_embedding: Query pair representation, shape [N_res, N_res, c_z].
|
2107 |
+
template_batch: A batch of template features.
|
2108 |
+
mask_2d: Padding mask (Note: this doesn't care if a template exists,
|
2109 |
+
unlike the template_pseudo_beta_mask).
|
2110 |
+
is_training: Whether the module is in training mode.
|
2111 |
+
Returns:
|
2112 |
+
A template embedding [N_res, N_res, c_z].
|
2113 |
+
"""
|
2114 |
+
|
2115 |
+
num_templates = template_batch['template_mask'].shape[0]
|
2116 |
+
num_channels = (self.config.template_pair_stack
|
2117 |
+
.triangle_attention_ending_node.value_dim)
|
2118 |
+
num_res = query_embedding.shape[0]
|
2119 |
+
|
2120 |
+
dtype = query_embedding.dtype
|
2121 |
+
template_mask = template_batch['template_mask']
|
2122 |
+
template_mask = template_mask.astype(dtype)
|
2123 |
+
|
2124 |
+
query_num_channels = query_embedding.shape[-1]
|
2125 |
+
|
2126 |
+
# Make sure the weights are shared across templates by constructing the
|
2127 |
+
# embedder here.
|
2128 |
+
# Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-12
|
2129 |
+
template_embedder = SingleTemplateEmbedding(self.config, self.global_config)
|
2130 |
+
|
2131 |
+
def map_fn(batch):
|
2132 |
+
return template_embedder(query_embedding, batch, mask_2d, is_training, scale_rate=scale_rate)
|
2133 |
+
|
2134 |
+
template_pair_representation = mapping.sharded_map(map_fn, in_axes=0)(template_batch)
|
2135 |
+
|
2136 |
+
# Cross attend from the query to the templates along the residue
|
2137 |
+
# dimension by flattening everything else into the batch dimension.
|
2138 |
+
# Jumper et al. (2021) Suppl. Alg. 17 "TemplatePointwiseAttention"
|
2139 |
+
flat_query = jnp.reshape(query_embedding,[num_res * num_res, 1, query_num_channels])
|
2140 |
+
|
2141 |
+
flat_templates = jnp.reshape(
|
2142 |
+
jnp.transpose(template_pair_representation, [1, 2, 0, 3]),
|
2143 |
+
[num_res * num_res, num_templates, num_channels])
|
2144 |
+
|
2145 |
+
bias = (1e9 * (template_mask[None, None, None, :] - 1.))
|
2146 |
+
|
2147 |
+
template_pointwise_attention_module = Attention(
|
2148 |
+
self.config.attention, self.global_config, query_num_channels)
|
2149 |
+
nonbatched_args = [bias]
|
2150 |
+
batched_args = [flat_query, flat_templates]
|
2151 |
+
|
2152 |
+
embedding = mapping.inference_subbatch(
|
2153 |
+
template_pointwise_attention_module,
|
2154 |
+
self.config.subbatch_size,
|
2155 |
+
batched_args=batched_args,
|
2156 |
+
nonbatched_args=nonbatched_args,
|
2157 |
+
low_memory=not is_training)
|
2158 |
+
embedding = jnp.reshape(embedding,[num_res, num_res, query_num_channels])
|
2159 |
+
|
2160 |
+
# No gradients if no templates.
|
2161 |
+
embedding *= (jnp.sum(template_mask) > 0.).astype(embedding.dtype)
|
2162 |
+
|
2163 |
+
return embedding
|
2164 |
+
####################################################################
|
af_backprop/alphafold/model/prng.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""A collection of utilities surrounding PRNG usage in protein folding."""
|
16 |
+
|
17 |
+
import haiku as hk
|
18 |
+
import jax
|
19 |
+
|
20 |
+
def safe_dropout(*, tensor, safe_key, rate, is_deterministic, is_training):
|
21 |
+
"""Applies dropout to a tensor."""
|
22 |
+
if is_training and not is_deterministic:
|
23 |
+
keep_rate = 1.0 - rate
|
24 |
+
keep = jax.random.bernoulli(safe_key.get(), keep_rate, shape=tensor.shape)
|
25 |
+
return keep * tensor / keep_rate
|
26 |
+
else:
|
27 |
+
return tensor
|
28 |
+
|
29 |
+
class SafeKey:
|
30 |
+
"""Safety wrapper for PRNG keys."""
|
31 |
+
|
32 |
+
def __init__(self, key):
|
33 |
+
self._key = key
|
34 |
+
self._used = False
|
35 |
+
|
36 |
+
def _assert_not_used(self):
|
37 |
+
if self._used:
|
38 |
+
raise RuntimeError('Random key has been used previously.')
|
39 |
+
|
40 |
+
def get(self):
|
41 |
+
self._assert_not_used()
|
42 |
+
self._used = True
|
43 |
+
return self._key
|
44 |
+
|
45 |
+
def split(self, num_keys=2):
|
46 |
+
self._assert_not_used()
|
47 |
+
self._used = True
|
48 |
+
new_keys = jax.random.split(self._key, num_keys)
|
49 |
+
return jax.tree_map(SafeKey, tuple(new_keys))
|
50 |
+
|
51 |
+
def duplicate(self, num_keys=2):
|
52 |
+
self._assert_not_used()
|
53 |
+
self._used = True
|
54 |
+
return tuple(SafeKey(self._key) for _ in range(num_keys))
|
55 |
+
|
56 |
+
|
57 |
+
def _safe_key_flatten(safe_key):
|
58 |
+
# Flatten transfers "ownership" to the tree
|
59 |
+
return (safe_key._key,), safe_key._used # pylint: disable=protected-access
|
60 |
+
|
61 |
+
|
62 |
+
def _safe_key_unflatten(aux_data, children):
|
63 |
+
ret = SafeKey(children[0])
|
64 |
+
ret._used = aux_data # pylint: disable=protected-access
|
65 |
+
return ret
|
66 |
+
|
67 |
+
|
68 |
+
jax.tree_util.register_pytree_node(
|
69 |
+
SafeKey, _safe_key_flatten, _safe_key_unflatten)
|
70 |
+
|
af_backprop/alphafold/model/quat_affine.py
ADDED
@@ -0,0 +1,459 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Quaternion geometry modules.
|
16 |
+
|
17 |
+
This introduces a representation of coordinate frames that is based around a
|
18 |
+
‘QuatAffine’ object. This object describes an array of coordinate frames.
|
19 |
+
It consists of vectors corresponding to the
|
20 |
+
origin of the frames as well as orientations which are stored in two
|
21 |
+
ways, as unit quaternions as well as a rotation matrices.
|
22 |
+
The rotation matrices are derived from the unit quaternions and the two are kept
|
23 |
+
in sync.
|
24 |
+
For an explanation of the relation between unit quaternions and rotations see
|
25 |
+
https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation
|
26 |
+
|
27 |
+
This representation is used in the model for the backbone frames.
|
28 |
+
|
29 |
+
One important thing to note here, is that while we update both representations
|
30 |
+
the jit compiler is going to ensure that only the parts that are
|
31 |
+
actually used are executed.
|
32 |
+
"""
|
33 |
+
|
34 |
+
|
35 |
+
import functools
|
36 |
+
from typing import Tuple
|
37 |
+
|
38 |
+
import jax
|
39 |
+
import jax.numpy as jnp
|
40 |
+
import numpy as np
|
41 |
+
|
42 |
+
# pylint: disable=bad-whitespace
|
43 |
+
QUAT_TO_ROT = np.zeros((4, 4, 3, 3), dtype=np.float32)
|
44 |
+
|
45 |
+
QUAT_TO_ROT[0, 0] = [[ 1, 0, 0], [ 0, 1, 0], [ 0, 0, 1]] # rr
|
46 |
+
QUAT_TO_ROT[1, 1] = [[ 1, 0, 0], [ 0,-1, 0], [ 0, 0,-1]] # ii
|
47 |
+
QUAT_TO_ROT[2, 2] = [[-1, 0, 0], [ 0, 1, 0], [ 0, 0,-1]] # jj
|
48 |
+
QUAT_TO_ROT[3, 3] = [[-1, 0, 0], [ 0,-1, 0], [ 0, 0, 1]] # kk
|
49 |
+
|
50 |
+
QUAT_TO_ROT[1, 2] = [[ 0, 2, 0], [ 2, 0, 0], [ 0, 0, 0]] # ij
|
51 |
+
QUAT_TO_ROT[1, 3] = [[ 0, 0, 2], [ 0, 0, 0], [ 2, 0, 0]] # ik
|
52 |
+
QUAT_TO_ROT[2, 3] = [[ 0, 0, 0], [ 0, 0, 2], [ 0, 2, 0]] # jk
|
53 |
+
|
54 |
+
QUAT_TO_ROT[0, 1] = [[ 0, 0, 0], [ 0, 0,-2], [ 0, 2, 0]] # ir
|
55 |
+
QUAT_TO_ROT[0, 2] = [[ 0, 0, 2], [ 0, 0, 0], [-2, 0, 0]] # jr
|
56 |
+
QUAT_TO_ROT[0, 3] = [[ 0,-2, 0], [ 2, 0, 0], [ 0, 0, 0]] # kr
|
57 |
+
|
58 |
+
QUAT_MULTIPLY = np.zeros((4, 4, 4), dtype=np.float32)
|
59 |
+
QUAT_MULTIPLY[:, :, 0] = [[ 1, 0, 0, 0],
|
60 |
+
[ 0,-1, 0, 0],
|
61 |
+
[ 0, 0,-1, 0],
|
62 |
+
[ 0, 0, 0,-1]]
|
63 |
+
|
64 |
+
QUAT_MULTIPLY[:, :, 1] = [[ 0, 1, 0, 0],
|
65 |
+
[ 1, 0, 0, 0],
|
66 |
+
[ 0, 0, 0, 1],
|
67 |
+
[ 0, 0,-1, 0]]
|
68 |
+
|
69 |
+
QUAT_MULTIPLY[:, :, 2] = [[ 0, 0, 1, 0],
|
70 |
+
[ 0, 0, 0,-1],
|
71 |
+
[ 1, 0, 0, 0],
|
72 |
+
[ 0, 1, 0, 0]]
|
73 |
+
|
74 |
+
QUAT_MULTIPLY[:, :, 3] = [[ 0, 0, 0, 1],
|
75 |
+
[ 0, 0, 1, 0],
|
76 |
+
[ 0,-1, 0, 0],
|
77 |
+
[ 1, 0, 0, 0]]
|
78 |
+
|
79 |
+
QUAT_MULTIPLY_BY_VEC = QUAT_MULTIPLY[:, 1:, :]
|
80 |
+
# pylint: enable=bad-whitespace
|
81 |
+
|
82 |
+
|
83 |
+
def rot_to_quat(rot, unstack_inputs=False):
|
84 |
+
"""Convert rotation matrix to quaternion.
|
85 |
+
|
86 |
+
Note that this function calls self_adjoint_eig which is extremely expensive on
|
87 |
+
the GPU. If at all possible, this function should run on the CPU.
|
88 |
+
|
89 |
+
Args:
|
90 |
+
rot: rotation matrix (see below for format).
|
91 |
+
unstack_inputs: If true, rotation matrix should be shape (..., 3, 3)
|
92 |
+
otherwise the rotation matrix should be a list of lists of tensors.
|
93 |
+
|
94 |
+
Returns:
|
95 |
+
Quaternion as (..., 4) tensor.
|
96 |
+
"""
|
97 |
+
if unstack_inputs:
|
98 |
+
rot = [jnp.moveaxis(x, -1, 0) for x in jnp.moveaxis(rot, -2, 0)]
|
99 |
+
|
100 |
+
[[xx, xy, xz], [yx, yy, yz], [zx, zy, zz]] = rot
|
101 |
+
|
102 |
+
# pylint: disable=bad-whitespace
|
103 |
+
k = [[ xx + yy + zz, zy - yz, xz - zx, yx - xy,],
|
104 |
+
[ zy - yz, xx - yy - zz, xy + yx, xz + zx,],
|
105 |
+
[ xz - zx, xy + yx, yy - xx - zz, yz + zy,],
|
106 |
+
[ yx - xy, xz + zx, yz + zy, zz - xx - yy,]]
|
107 |
+
# pylint: enable=bad-whitespace
|
108 |
+
|
109 |
+
k = (1./3.) * jnp.stack([jnp.stack(x, axis=-1) for x in k],
|
110 |
+
axis=-2)
|
111 |
+
|
112 |
+
# Get eigenvalues in non-decreasing order and associated.
|
113 |
+
_, qs = jnp.linalg.eigh(k)
|
114 |
+
return qs[..., -1]
|
115 |
+
|
116 |
+
|
117 |
+
def rot_list_to_tensor(rot_list):
|
118 |
+
"""Convert list of lists to rotation tensor."""
|
119 |
+
return jnp.stack(
|
120 |
+
[jnp.stack(rot_list[0], axis=-1),
|
121 |
+
jnp.stack(rot_list[1], axis=-1),
|
122 |
+
jnp.stack(rot_list[2], axis=-1)],
|
123 |
+
axis=-2)
|
124 |
+
|
125 |
+
|
126 |
+
def vec_list_to_tensor(vec_list):
|
127 |
+
"""Convert list to vector tensor."""
|
128 |
+
return jnp.stack(vec_list, axis=-1)
|
129 |
+
|
130 |
+
|
131 |
+
def quat_to_rot(normalized_quat):
|
132 |
+
"""Convert a normalized quaternion to a rotation matrix."""
|
133 |
+
rot_tensor = jnp.sum(
|
134 |
+
np.reshape(QUAT_TO_ROT, (4, 4, 9)) *
|
135 |
+
normalized_quat[..., :, None, None] *
|
136 |
+
normalized_quat[..., None, :, None],
|
137 |
+
axis=(-3, -2))
|
138 |
+
rot = jnp.moveaxis(rot_tensor, -1, 0) # Unstack.
|
139 |
+
return [[rot[0], rot[1], rot[2]],
|
140 |
+
[rot[3], rot[4], rot[5]],
|
141 |
+
[rot[6], rot[7], rot[8]]]
|
142 |
+
|
143 |
+
|
144 |
+
def quat_multiply_by_vec(quat, vec):
|
145 |
+
"""Multiply a quaternion by a pure-vector quaternion."""
|
146 |
+
return jnp.sum(
|
147 |
+
QUAT_MULTIPLY_BY_VEC *
|
148 |
+
quat[..., :, None, None] *
|
149 |
+
vec[..., None, :, None],
|
150 |
+
axis=(-3, -2))
|
151 |
+
|
152 |
+
|
153 |
+
def quat_multiply(quat1, quat2):
|
154 |
+
"""Multiply a quaternion by another quaternion."""
|
155 |
+
return jnp.sum(
|
156 |
+
QUAT_MULTIPLY *
|
157 |
+
quat1[..., :, None, None] *
|
158 |
+
quat2[..., None, :, None],
|
159 |
+
axis=(-3, -2))
|
160 |
+
|
161 |
+
|
162 |
+
def apply_rot_to_vec(rot, vec, unstack=False):
|
163 |
+
"""Multiply rotation matrix by a vector."""
|
164 |
+
if unstack:
|
165 |
+
x, y, z = [vec[:, i] for i in range(3)]
|
166 |
+
else:
|
167 |
+
x, y, z = vec
|
168 |
+
return [rot[0][0] * x + rot[0][1] * y + rot[0][2] * z,
|
169 |
+
rot[1][0] * x + rot[1][1] * y + rot[1][2] * z,
|
170 |
+
rot[2][0] * x + rot[2][1] * y + rot[2][2] * z]
|
171 |
+
|
172 |
+
|
173 |
+
def apply_inverse_rot_to_vec(rot, vec):
|
174 |
+
"""Multiply the inverse of a rotation matrix by a vector."""
|
175 |
+
# Inverse rotation is just transpose
|
176 |
+
return [rot[0][0] * vec[0] + rot[1][0] * vec[1] + rot[2][0] * vec[2],
|
177 |
+
rot[0][1] * vec[0] + rot[1][1] * vec[1] + rot[2][1] * vec[2],
|
178 |
+
rot[0][2] * vec[0] + rot[1][2] * vec[1] + rot[2][2] * vec[2]]
|
179 |
+
|
180 |
+
|
181 |
+
class QuatAffine(object):
|
182 |
+
"""Affine transformation represented by quaternion and vector."""
|
183 |
+
|
184 |
+
def __init__(self, quaternion, translation, rotation=None, normalize=True,
|
185 |
+
unstack_inputs=False):
|
186 |
+
"""Initialize from quaternion and translation.
|
187 |
+
|
188 |
+
Args:
|
189 |
+
quaternion: Rotation represented by a quaternion, to be applied
|
190 |
+
before translation. Must be a unit quaternion unless normalize==True.
|
191 |
+
translation: Translation represented as a vector.
|
192 |
+
rotation: Same rotation as the quaternion, represented as a (..., 3, 3)
|
193 |
+
tensor. If None, rotation will be calculated from the quaternion.
|
194 |
+
normalize: If True, l2 normalize the quaternion on input.
|
195 |
+
unstack_inputs: If True, translation is a vector with last component 3
|
196 |
+
"""
|
197 |
+
|
198 |
+
if quaternion is not None:
|
199 |
+
assert quaternion.shape[-1] == 4
|
200 |
+
|
201 |
+
if unstack_inputs:
|
202 |
+
if rotation is not None:
|
203 |
+
rotation = [jnp.moveaxis(x, -1, 0) # Unstack.
|
204 |
+
for x in jnp.moveaxis(rotation, -2, 0)] # Unstack.
|
205 |
+
translation = jnp.moveaxis(translation, -1, 0) # Unstack.
|
206 |
+
|
207 |
+
if normalize and quaternion is not None:
|
208 |
+
quaternion = quaternion / jnp.linalg.norm(quaternion, axis=-1,
|
209 |
+
keepdims=True)
|
210 |
+
|
211 |
+
if rotation is None:
|
212 |
+
rotation = quat_to_rot(quaternion)
|
213 |
+
|
214 |
+
self.quaternion = quaternion
|
215 |
+
self.rotation = [list(row) for row in rotation]
|
216 |
+
self.translation = list(translation)
|
217 |
+
|
218 |
+
assert all(len(row) == 3 for row in self.rotation)
|
219 |
+
assert len(self.translation) == 3
|
220 |
+
|
221 |
+
def to_tensor(self):
|
222 |
+
return jnp.concatenate(
|
223 |
+
[self.quaternion] +
|
224 |
+
[jnp.expand_dims(x, axis=-1) for x in self.translation],
|
225 |
+
axis=-1)
|
226 |
+
|
227 |
+
def apply_tensor_fn(self, tensor_fn):
|
228 |
+
"""Return a new QuatAffine with tensor_fn applied (e.g. stop_gradient)."""
|
229 |
+
return QuatAffine(
|
230 |
+
tensor_fn(self.quaternion),
|
231 |
+
[tensor_fn(x) for x in self.translation],
|
232 |
+
rotation=[[tensor_fn(x) for x in row] for row in self.rotation],
|
233 |
+
normalize=False)
|
234 |
+
|
235 |
+
def apply_rotation_tensor_fn(self, tensor_fn):
|
236 |
+
"""Return a new QuatAffine with tensor_fn applied to the rotation part."""
|
237 |
+
return QuatAffine(
|
238 |
+
tensor_fn(self.quaternion),
|
239 |
+
[x for x in self.translation],
|
240 |
+
rotation=[[tensor_fn(x) for x in row] for row in self.rotation],
|
241 |
+
normalize=False)
|
242 |
+
|
243 |
+
def scale_translation(self, position_scale):
|
244 |
+
"""Return a new quat affine with a different scale for translation."""
|
245 |
+
|
246 |
+
return QuatAffine(
|
247 |
+
self.quaternion,
|
248 |
+
[x * position_scale for x in self.translation],
|
249 |
+
rotation=[[x for x in row] for row in self.rotation],
|
250 |
+
normalize=False)
|
251 |
+
|
252 |
+
@classmethod
|
253 |
+
def from_tensor(cls, tensor, normalize=False):
|
254 |
+
quaternion, tx, ty, tz = jnp.split(tensor, [4, 5, 6], axis=-1)
|
255 |
+
return cls(quaternion,
|
256 |
+
[tx[..., 0], ty[..., 0], tz[..., 0]],
|
257 |
+
normalize=normalize)
|
258 |
+
|
259 |
+
def pre_compose(self, update):
|
260 |
+
"""Return a new QuatAffine which applies the transformation update first.
|
261 |
+
|
262 |
+
Args:
|
263 |
+
update: Length-6 vector. 3-vector of x, y, and z such that the quaternion
|
264 |
+
update is (1, x, y, z) and zero for the 3-vector is the identity
|
265 |
+
quaternion. 3-vector for translation concatenated.
|
266 |
+
|
267 |
+
Returns:
|
268 |
+
New QuatAffine object.
|
269 |
+
"""
|
270 |
+
vector_quaternion_update, x, y, z = jnp.split(update, [3, 4, 5], axis=-1)
|
271 |
+
trans_update = [jnp.squeeze(x, axis=-1),
|
272 |
+
jnp.squeeze(y, axis=-1),
|
273 |
+
jnp.squeeze(z, axis=-1)]
|
274 |
+
|
275 |
+
new_quaternion = (self.quaternion +
|
276 |
+
quat_multiply_by_vec(self.quaternion,
|
277 |
+
vector_quaternion_update))
|
278 |
+
|
279 |
+
trans_update = apply_rot_to_vec(self.rotation, trans_update)
|
280 |
+
new_translation = [
|
281 |
+
self.translation[0] + trans_update[0],
|
282 |
+
self.translation[1] + trans_update[1],
|
283 |
+
self.translation[2] + trans_update[2]]
|
284 |
+
|
285 |
+
return QuatAffine(new_quaternion, new_translation)
|
286 |
+
|
287 |
+
def apply_to_point(self, point, extra_dims=0):
|
288 |
+
"""Apply affine to a point.
|
289 |
+
|
290 |
+
Args:
|
291 |
+
point: List of 3 tensors to apply affine.
|
292 |
+
extra_dims: Number of dimensions at the end of the transformed_point
|
293 |
+
shape that are not present in the rotation and translation. The most
|
294 |
+
common use is rotation N points at once with extra_dims=1 for use in a
|
295 |
+
network.
|
296 |
+
|
297 |
+
Returns:
|
298 |
+
Transformed point after applying affine.
|
299 |
+
"""
|
300 |
+
rotation = self.rotation
|
301 |
+
translation = self.translation
|
302 |
+
for _ in range(extra_dims):
|
303 |
+
expand_fn = functools.partial(jnp.expand_dims, axis=-1)
|
304 |
+
rotation = jax.tree_map(expand_fn, rotation)
|
305 |
+
translation = jax.tree_map(expand_fn, translation)
|
306 |
+
|
307 |
+
rot_point = apply_rot_to_vec(rotation, point)
|
308 |
+
return [
|
309 |
+
rot_point[0] + translation[0],
|
310 |
+
rot_point[1] + translation[1],
|
311 |
+
rot_point[2] + translation[2]]
|
312 |
+
|
313 |
+
def invert_point(self, transformed_point, extra_dims=0):
|
314 |
+
"""Apply inverse of transformation to a point.
|
315 |
+
|
316 |
+
Args:
|
317 |
+
transformed_point: List of 3 tensors to apply affine
|
318 |
+
extra_dims: Number of dimensions at the end of the transformed_point
|
319 |
+
shape that are not present in the rotation and translation. The most
|
320 |
+
common use is rotation N points at once with extra_dims=1 for use in a
|
321 |
+
network.
|
322 |
+
|
323 |
+
Returns:
|
324 |
+
Transformed point after applying affine.
|
325 |
+
"""
|
326 |
+
rotation = self.rotation
|
327 |
+
translation = self.translation
|
328 |
+
for _ in range(extra_dims):
|
329 |
+
expand_fn = functools.partial(jnp.expand_dims, axis=-1)
|
330 |
+
rotation = jax.tree_map(expand_fn, rotation)
|
331 |
+
translation = jax.tree_map(expand_fn, translation)
|
332 |
+
|
333 |
+
rot_point = [
|
334 |
+
transformed_point[0] - translation[0],
|
335 |
+
transformed_point[1] - translation[1],
|
336 |
+
transformed_point[2] - translation[2]]
|
337 |
+
|
338 |
+
return apply_inverse_rot_to_vec(rotation, rot_point)
|
339 |
+
|
340 |
+
def __repr__(self):
|
341 |
+
return 'QuatAffine(%r, %r)' % (self.quaternion, self.translation)
|
342 |
+
|
343 |
+
|
344 |
+
def _multiply(a, b):
|
345 |
+
return jnp.stack([
|
346 |
+
jnp.array([a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0],
|
347 |
+
a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1],
|
348 |
+
a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2]]),
|
349 |
+
|
350 |
+
jnp.array([a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0],
|
351 |
+
a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1],
|
352 |
+
a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2]]),
|
353 |
+
|
354 |
+
jnp.array([a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0],
|
355 |
+
a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1],
|
356 |
+
a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2]])])
|
357 |
+
|
358 |
+
|
359 |
+
def make_canonical_transform(
|
360 |
+
n_xyz: jnp.ndarray,
|
361 |
+
ca_xyz: jnp.ndarray,
|
362 |
+
c_xyz: jnp.ndarray) -> Tuple[jnp.ndarray, jnp.ndarray]:
|
363 |
+
"""Returns translation and rotation matrices to canonicalize residue atoms.
|
364 |
+
|
365 |
+
Note that this method does not take care of symmetries. If you provide the
|
366 |
+
atom positions in the non-standard way, the N atom will end up not at
|
367 |
+
[-0.527250, 1.359329, 0.0] but instead at [-0.527250, -1.359329, 0.0]. You
|
368 |
+
need to take care of such cases in your code.
|
369 |
+
|
370 |
+
Args:
|
371 |
+
n_xyz: An array of shape [batch, 3] of nitrogen xyz coordinates.
|
372 |
+
ca_xyz: An array of shape [batch, 3] of carbon alpha xyz coordinates.
|
373 |
+
c_xyz: An array of shape [batch, 3] of carbon xyz coordinates.
|
374 |
+
|
375 |
+
Returns:
|
376 |
+
A tuple (translation, rotation) where:
|
377 |
+
translation is an array of shape [batch, 3] defining the translation.
|
378 |
+
rotation is an array of shape [batch, 3, 3] defining the rotation.
|
379 |
+
After applying the translation and rotation to all atoms in a residue:
|
380 |
+
* All atoms will be shifted so that CA is at the origin,
|
381 |
+
* All atoms will be rotated so that C is at the x-axis,
|
382 |
+
* All atoms will be shifted so that N is in the xy plane.
|
383 |
+
"""
|
384 |
+
assert len(n_xyz.shape) == 2, n_xyz.shape
|
385 |
+
assert n_xyz.shape[-1] == 3, n_xyz.shape
|
386 |
+
assert n_xyz.shape == ca_xyz.shape == c_xyz.shape, (
|
387 |
+
n_xyz.shape, ca_xyz.shape, c_xyz.shape)
|
388 |
+
|
389 |
+
# Place CA at the origin.
|
390 |
+
translation = -ca_xyz
|
391 |
+
n_xyz = n_xyz + translation
|
392 |
+
c_xyz = c_xyz + translation
|
393 |
+
|
394 |
+
# Place C on the x-axis.
|
395 |
+
c_x, c_y, c_z = [c_xyz[:, i] for i in range(3)]
|
396 |
+
# Rotate by angle c1 in the x-y plane (around the z-axis).
|
397 |
+
sin_c1 = -c_y / jnp.sqrt(1e-20 + c_x**2 + c_y**2)
|
398 |
+
cos_c1 = c_x / jnp.sqrt(1e-20 + c_x**2 + c_y**2)
|
399 |
+
zeros = jnp.zeros_like(sin_c1)
|
400 |
+
ones = jnp.ones_like(sin_c1)
|
401 |
+
# pylint: disable=bad-whitespace
|
402 |
+
c1_rot_matrix = jnp.stack([jnp.array([cos_c1, -sin_c1, zeros]),
|
403 |
+
jnp.array([sin_c1, cos_c1, zeros]),
|
404 |
+
jnp.array([zeros, zeros, ones])])
|
405 |
+
|
406 |
+
# Rotate by angle c2 in the x-z plane (around the y-axis).
|
407 |
+
sin_c2 = c_z / jnp.sqrt(1e-20 + c_x**2 + c_y**2 + c_z**2)
|
408 |
+
cos_c2 = jnp.sqrt(c_x**2 + c_y**2) / jnp.sqrt(
|
409 |
+
1e-20 + c_x**2 + c_y**2 + c_z**2)
|
410 |
+
c2_rot_matrix = jnp.stack([jnp.array([cos_c2, zeros, sin_c2]),
|
411 |
+
jnp.array([zeros, ones, zeros]),
|
412 |
+
jnp.array([-sin_c2, zeros, cos_c2])])
|
413 |
+
|
414 |
+
c_rot_matrix = _multiply(c2_rot_matrix, c1_rot_matrix)
|
415 |
+
n_xyz = jnp.stack(apply_rot_to_vec(c_rot_matrix, n_xyz, unstack=True)).T
|
416 |
+
|
417 |
+
# Place N in the x-y plane.
|
418 |
+
_, n_y, n_z = [n_xyz[:, i] for i in range(3)]
|
419 |
+
# Rotate by angle alpha in the y-z plane (around the x-axis).
|
420 |
+
sin_n = -n_z / jnp.sqrt(1e-20 + n_y**2 + n_z**2)
|
421 |
+
cos_n = n_y / jnp.sqrt(1e-20 + n_y**2 + n_z**2)
|
422 |
+
n_rot_matrix = jnp.stack([jnp.array([ones, zeros, zeros]),
|
423 |
+
jnp.array([zeros, cos_n, -sin_n]),
|
424 |
+
jnp.array([zeros, sin_n, cos_n])])
|
425 |
+
# pylint: enable=bad-whitespace
|
426 |
+
|
427 |
+
return (translation,
|
428 |
+
jnp.transpose(_multiply(n_rot_matrix, c_rot_matrix), [2, 0, 1]))
|
429 |
+
|
430 |
+
|
431 |
+
def make_transform_from_reference(
|
432 |
+
n_xyz: jnp.ndarray,
|
433 |
+
ca_xyz: jnp.ndarray,
|
434 |
+
c_xyz: jnp.ndarray) -> Tuple[jnp.ndarray, jnp.ndarray]:
|
435 |
+
"""Returns rotation and translation matrices to convert from reference.
|
436 |
+
|
437 |
+
Note that this method does not take care of symmetries. If you provide the
|
438 |
+
atom positions in the non-standard way, the N atom will end up not at
|
439 |
+
[-0.527250, 1.359329, 0.0] but instead at [-0.527250, -1.359329, 0.0]. You
|
440 |
+
need to take care of such cases in your code.
|
441 |
+
|
442 |
+
Args:
|
443 |
+
n_xyz: An array of shape [batch, 3] of nitrogen xyz coordinates.
|
444 |
+
ca_xyz: An array of shape [batch, 3] of carbon alpha xyz coordinates.
|
445 |
+
c_xyz: An array of shape [batch, 3] of carbon xyz coordinates.
|
446 |
+
|
447 |
+
Returns:
|
448 |
+
A tuple (rotation, translation) where:
|
449 |
+
rotation is an array of shape [batch, 3, 3] defining the rotation.
|
450 |
+
translation is an array of shape [batch, 3] defining the translation.
|
451 |
+
After applying the translation and rotation to the reference backbone,
|
452 |
+
the coordinates will approximately equal to the input coordinates.
|
453 |
+
|
454 |
+
The order of translation and rotation differs from make_canonical_transform
|
455 |
+
because the rotation from this function should be applied before the
|
456 |
+
translation, unlike make_canonical_transform.
|
457 |
+
"""
|
458 |
+
translation, rotation = make_canonical_transform(n_xyz, ca_xyz, c_xyz)
|
459 |
+
return np.transpose(rotation, (0, 2, 1)), -translation
|
af_backprop/alphafold/model/r3.py
ADDED
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Transformations for 3D coordinates.
|
16 |
+
|
17 |
+
This Module contains objects for representing Vectors (Vecs), Rotation Matrices
|
18 |
+
(Rots) and proper Rigid transformation (Rigids). These are represented as
|
19 |
+
named tuples with arrays for each entry, for example a set of
|
20 |
+
[N, M] points would be represented as a Vecs object with arrays of shape [N, M]
|
21 |
+
for x, y and z.
|
22 |
+
|
23 |
+
This is being done to improve readability by making it very clear what objects
|
24 |
+
are geometric objects rather than relying on comments and array shapes.
|
25 |
+
Another reason for this is to avoid using matrix
|
26 |
+
multiplication primitives like matmul or einsum, on modern accelerator hardware
|
27 |
+
these can end up on specialized cores such as tensor cores on GPU or the MXU on
|
28 |
+
cloud TPUs, this often involves lower computational precision which can be
|
29 |
+
problematic for coordinate geometry. Also these cores are typically optimized
|
30 |
+
for larger matrices than 3 dimensional, this code is written to avoid any
|
31 |
+
unintended use of these cores on both GPUs and TPUs.
|
32 |
+
"""
|
33 |
+
|
34 |
+
import collections
|
35 |
+
from typing import List
|
36 |
+
from alphafold.model import quat_affine
|
37 |
+
import jax.numpy as jnp
|
38 |
+
import tree
|
39 |
+
|
40 |
+
# Array of 3-component vectors, stored as individual array for
|
41 |
+
# each component.
|
42 |
+
Vecs = collections.namedtuple('Vecs', ['x', 'y', 'z'])
|
43 |
+
|
44 |
+
# Array of 3x3 rotation matrices, stored as individual array for
|
45 |
+
# each component.
|
46 |
+
Rots = collections.namedtuple('Rots', ['xx', 'xy', 'xz',
|
47 |
+
'yx', 'yy', 'yz',
|
48 |
+
'zx', 'zy', 'zz'])
|
49 |
+
# Array of rigid 3D transformations, stored as array of rotations and
|
50 |
+
# array of translations.
|
51 |
+
Rigids = collections.namedtuple('Rigids', ['rot', 'trans'])
|
52 |
+
|
53 |
+
|
54 |
+
def squared_difference(x, y):
|
55 |
+
return jnp.square(x - y)
|
56 |
+
|
57 |
+
|
58 |
+
def invert_rigids(r: Rigids) -> Rigids:
|
59 |
+
"""Computes group inverse of rigid transformations 'r'."""
|
60 |
+
inv_rots = invert_rots(r.rot)
|
61 |
+
t = rots_mul_vecs(inv_rots, r.trans)
|
62 |
+
inv_trans = Vecs(-t.x, -t.y, -t.z)
|
63 |
+
return Rigids(inv_rots, inv_trans)
|
64 |
+
|
65 |
+
|
66 |
+
def invert_rots(m: Rots) -> Rots:
|
67 |
+
"""Computes inverse of rotations 'm'."""
|
68 |
+
return Rots(m.xx, m.yx, m.zx,
|
69 |
+
m.xy, m.yy, m.zy,
|
70 |
+
m.xz, m.yz, m.zz)
|
71 |
+
|
72 |
+
|
73 |
+
def rigids_from_3_points(
|
74 |
+
point_on_neg_x_axis: Vecs, # shape (...)
|
75 |
+
origin: Vecs, # shape (...)
|
76 |
+
point_on_xy_plane: Vecs, # shape (...)
|
77 |
+
) -> Rigids: # shape (...)
|
78 |
+
"""Create Rigids from 3 points.
|
79 |
+
|
80 |
+
Jumper et al. (2021) Suppl. Alg. 21 "rigidFrom3Points"
|
81 |
+
This creates a set of rigid transformations from 3 points by Gram Schmidt
|
82 |
+
orthogonalization.
|
83 |
+
|
84 |
+
Args:
|
85 |
+
point_on_neg_x_axis: Vecs corresponding to points on the negative x axis
|
86 |
+
origin: Origin of resulting rigid transformations
|
87 |
+
point_on_xy_plane: Vecs corresponding to points in the xy plane
|
88 |
+
Returns:
|
89 |
+
Rigid transformations from global frame to local frames derived from
|
90 |
+
the input points.
|
91 |
+
"""
|
92 |
+
m = rots_from_two_vecs(
|
93 |
+
e0_unnormalized=vecs_sub(origin, point_on_neg_x_axis),
|
94 |
+
e1_unnormalized=vecs_sub(point_on_xy_plane, origin))
|
95 |
+
|
96 |
+
return Rigids(rot=m, trans=origin)
|
97 |
+
|
98 |
+
|
99 |
+
def rigids_from_list(l: List[jnp.ndarray]) -> Rigids:
|
100 |
+
"""Converts flat list of arrays to rigid transformations."""
|
101 |
+
assert len(l) == 12
|
102 |
+
return Rigids(Rots(*(l[:9])), Vecs(*(l[9:])))
|
103 |
+
|
104 |
+
|
105 |
+
def rigids_from_quataffine(a: quat_affine.QuatAffine) -> Rigids:
|
106 |
+
"""Converts QuatAffine object to the corresponding Rigids object."""
|
107 |
+
return Rigids(Rots(*tree.flatten(a.rotation)),
|
108 |
+
Vecs(*a.translation))
|
109 |
+
|
110 |
+
|
111 |
+
def rigids_from_tensor4x4(
|
112 |
+
m: jnp.ndarray # shape (..., 4, 4)
|
113 |
+
) -> Rigids: # shape (...)
|
114 |
+
"""Construct Rigids object from an 4x4 array.
|
115 |
+
|
116 |
+
Here the 4x4 is representing the transformation in homogeneous coordinates.
|
117 |
+
|
118 |
+
Args:
|
119 |
+
m: Array representing transformations in homogeneous coordinates.
|
120 |
+
Returns:
|
121 |
+
Rigids object corresponding to transformations m
|
122 |
+
"""
|
123 |
+
assert m.shape[-1] == 4
|
124 |
+
assert m.shape[-2] == 4
|
125 |
+
return Rigids(
|
126 |
+
Rots(m[..., 0, 0], m[..., 0, 1], m[..., 0, 2],
|
127 |
+
m[..., 1, 0], m[..., 1, 1], m[..., 1, 2],
|
128 |
+
m[..., 2, 0], m[..., 2, 1], m[..., 2, 2]),
|
129 |
+
Vecs(m[..., 0, 3], m[..., 1, 3], m[..., 2, 3]))
|
130 |
+
|
131 |
+
|
132 |
+
def rigids_from_tensor_flat9(
|
133 |
+
m: jnp.ndarray # shape (..., 9)
|
134 |
+
) -> Rigids: # shape (...)
|
135 |
+
"""Flat9 encoding: first two columns of rotation matrix + translation."""
|
136 |
+
assert m.shape[-1] == 9
|
137 |
+
e0 = Vecs(m[..., 0], m[..., 1], m[..., 2])
|
138 |
+
e1 = Vecs(m[..., 3], m[..., 4], m[..., 5])
|
139 |
+
trans = Vecs(m[..., 6], m[..., 7], m[..., 8])
|
140 |
+
return Rigids(rot=rots_from_two_vecs(e0, e1),
|
141 |
+
trans=trans)
|
142 |
+
|
143 |
+
|
144 |
+
def rigids_from_tensor_flat12(
|
145 |
+
m: jnp.ndarray # shape (..., 12)
|
146 |
+
) -> Rigids: # shape (...)
|
147 |
+
"""Flat12 encoding: rotation matrix (9 floats) + translation (3 floats)."""
|
148 |
+
assert m.shape[-1] == 12
|
149 |
+
x = jnp.moveaxis(m, -1, 0) # Unstack
|
150 |
+
return Rigids(Rots(*x[:9]), Vecs(*x[9:]))
|
151 |
+
|
152 |
+
|
153 |
+
def rigids_mul_rigids(a: Rigids, b: Rigids) -> Rigids:
|
154 |
+
"""Group composition of Rigids 'a' and 'b'."""
|
155 |
+
return Rigids(
|
156 |
+
rots_mul_rots(a.rot, b.rot),
|
157 |
+
vecs_add(a.trans, rots_mul_vecs(a.rot, b.trans)))
|
158 |
+
|
159 |
+
|
160 |
+
def rigids_mul_rots(r: Rigids, m: Rots) -> Rigids:
|
161 |
+
"""Compose rigid transformations 'r' with rotations 'm'."""
|
162 |
+
return Rigids(rots_mul_rots(r.rot, m), r.trans)
|
163 |
+
|
164 |
+
|
165 |
+
def rigids_mul_vecs(r: Rigids, v: Vecs) -> Vecs:
|
166 |
+
"""Apply rigid transforms 'r' to points 'v'."""
|
167 |
+
return vecs_add(rots_mul_vecs(r.rot, v), r.trans)
|
168 |
+
|
169 |
+
|
170 |
+
def rigids_to_list(r: Rigids) -> List[jnp.ndarray]:
|
171 |
+
"""Turn Rigids into flat list, inverse of 'rigids_from_list'."""
|
172 |
+
return list(r.rot) + list(r.trans)
|
173 |
+
|
174 |
+
|
175 |
+
def rigids_to_quataffine(r: Rigids) -> quat_affine.QuatAffine:
|
176 |
+
"""Convert Rigids r into QuatAffine, inverse of 'rigids_from_quataffine'."""
|
177 |
+
return quat_affine.QuatAffine(
|
178 |
+
quaternion=None,
|
179 |
+
rotation=[[r.rot.xx, r.rot.xy, r.rot.xz],
|
180 |
+
[r.rot.yx, r.rot.yy, r.rot.yz],
|
181 |
+
[r.rot.zx, r.rot.zy, r.rot.zz]],
|
182 |
+
translation=[r.trans.x, r.trans.y, r.trans.z])
|
183 |
+
|
184 |
+
|
185 |
+
def rigids_to_tensor_flat9(
|
186 |
+
r: Rigids # shape (...)
|
187 |
+
) -> jnp.ndarray: # shape (..., 9)
|
188 |
+
"""Flat9 encoding: first two columns of rotation matrix + translation."""
|
189 |
+
return jnp.stack(
|
190 |
+
[r.rot.xx, r.rot.yx, r.rot.zx, r.rot.xy, r.rot.yy, r.rot.zy]
|
191 |
+
+ list(r.trans), axis=-1)
|
192 |
+
|
193 |
+
|
194 |
+
def rigids_to_tensor_flat12(
|
195 |
+
r: Rigids # shape (...)
|
196 |
+
) -> jnp.ndarray: # shape (..., 12)
|
197 |
+
"""Flat12 encoding: rotation matrix (9 floats) + translation (3 floats)."""
|
198 |
+
return jnp.stack(list(r.rot) + list(r.trans), axis=-1)
|
199 |
+
|
200 |
+
|
201 |
+
def rots_from_tensor3x3(
|
202 |
+
m: jnp.ndarray, # shape (..., 3, 3)
|
203 |
+
) -> Rots: # shape (...)
|
204 |
+
"""Convert rotations represented as (3, 3) array to Rots."""
|
205 |
+
assert m.shape[-1] == 3
|
206 |
+
assert m.shape[-2] == 3
|
207 |
+
return Rots(m[..., 0, 0], m[..., 0, 1], m[..., 0, 2],
|
208 |
+
m[..., 1, 0], m[..., 1, 1], m[..., 1, 2],
|
209 |
+
m[..., 2, 0], m[..., 2, 1], m[..., 2, 2])
|
210 |
+
|
211 |
+
|
212 |
+
def rots_from_two_vecs(e0_unnormalized: Vecs, e1_unnormalized: Vecs) -> Rots:
|
213 |
+
"""Create rotation matrices from unnormalized vectors for the x and y-axes.
|
214 |
+
|
215 |
+
This creates a rotation matrix from two vectors using Gram-Schmidt
|
216 |
+
orthogonalization.
|
217 |
+
|
218 |
+
Args:
|
219 |
+
e0_unnormalized: vectors lying along x-axis of resulting rotation
|
220 |
+
e1_unnormalized: vectors lying in xy-plane of resulting rotation
|
221 |
+
Returns:
|
222 |
+
Rotations resulting from Gram-Schmidt procedure.
|
223 |
+
"""
|
224 |
+
# Normalize the unit vector for the x-axis, e0.
|
225 |
+
e0 = vecs_robust_normalize(e0_unnormalized)
|
226 |
+
|
227 |
+
# make e1 perpendicular to e0.
|
228 |
+
c = vecs_dot_vecs(e1_unnormalized, e0)
|
229 |
+
e1 = Vecs(e1_unnormalized.x - c * e0.x,
|
230 |
+
e1_unnormalized.y - c * e0.y,
|
231 |
+
e1_unnormalized.z - c * e0.z)
|
232 |
+
e1 = vecs_robust_normalize(e1)
|
233 |
+
|
234 |
+
# Compute e2 as cross product of e0 and e1.
|
235 |
+
e2 = vecs_cross_vecs(e0, e1)
|
236 |
+
|
237 |
+
return Rots(e0.x, e1.x, e2.x, e0.y, e1.y, e2.y, e0.z, e1.z, e2.z)
|
238 |
+
|
239 |
+
|
240 |
+
def rots_mul_rots(a: Rots, b: Rots) -> Rots:
|
241 |
+
"""Composition of rotations 'a' and 'b'."""
|
242 |
+
c0 = rots_mul_vecs(a, Vecs(b.xx, b.yx, b.zx))
|
243 |
+
c1 = rots_mul_vecs(a, Vecs(b.xy, b.yy, b.zy))
|
244 |
+
c2 = rots_mul_vecs(a, Vecs(b.xz, b.yz, b.zz))
|
245 |
+
return Rots(c0.x, c1.x, c2.x, c0.y, c1.y, c2.y, c0.z, c1.z, c2.z)
|
246 |
+
|
247 |
+
|
248 |
+
def rots_mul_vecs(m: Rots, v: Vecs) -> Vecs:
|
249 |
+
"""Apply rotations 'm' to vectors 'v'."""
|
250 |
+
return Vecs(m.xx * v.x + m.xy * v.y + m.xz * v.z,
|
251 |
+
m.yx * v.x + m.yy * v.y + m.yz * v.z,
|
252 |
+
m.zx * v.x + m.zy * v.y + m.zz * v.z)
|
253 |
+
|
254 |
+
|
255 |
+
def vecs_add(v1: Vecs, v2: Vecs) -> Vecs:
|
256 |
+
"""Add two vectors 'v1' and 'v2'."""
|
257 |
+
return Vecs(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z)
|
258 |
+
|
259 |
+
|
260 |
+
def vecs_dot_vecs(v1: Vecs, v2: Vecs) -> jnp.ndarray:
|
261 |
+
"""Dot product of vectors 'v1' and 'v2'."""
|
262 |
+
return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z
|
263 |
+
|
264 |
+
|
265 |
+
def vecs_cross_vecs(v1: Vecs, v2: Vecs) -> Vecs:
|
266 |
+
"""Cross product of vectors 'v1' and 'v2'."""
|
267 |
+
return Vecs(v1.y * v2.z - v1.z * v2.y,
|
268 |
+
v1.z * v2.x - v1.x * v2.z,
|
269 |
+
v1.x * v2.y - v1.y * v2.x)
|
270 |
+
|
271 |
+
|
272 |
+
def vecs_from_tensor(x: jnp.ndarray # shape (..., 3)
|
273 |
+
) -> Vecs: # shape (...)
|
274 |
+
"""Converts from tensor of shape (3,) to Vecs."""
|
275 |
+
num_components = x.shape[-1]
|
276 |
+
assert num_components == 3
|
277 |
+
return Vecs(x[..., 0], x[..., 1], x[..., 2])
|
278 |
+
|
279 |
+
|
280 |
+
def vecs_robust_normalize(v: Vecs, epsilon: float = 1e-8) -> Vecs:
|
281 |
+
"""Normalizes vectors 'v'.
|
282 |
+
|
283 |
+
Args:
|
284 |
+
v: vectors to be normalized.
|
285 |
+
epsilon: small regularizer added to squared norm before taking square root.
|
286 |
+
Returns:
|
287 |
+
normalized vectors
|
288 |
+
"""
|
289 |
+
norms = vecs_robust_norm(v, epsilon)
|
290 |
+
return Vecs(v.x / norms, v.y / norms, v.z / norms)
|
291 |
+
|
292 |
+
|
293 |
+
def vecs_robust_norm(v: Vecs, epsilon: float = 1e-8) -> jnp.ndarray:
|
294 |
+
"""Computes norm of vectors 'v'.
|
295 |
+
|
296 |
+
Args:
|
297 |
+
v: vectors to be normalized.
|
298 |
+
epsilon: small regularizer added to squared norm before taking square root.
|
299 |
+
Returns:
|
300 |
+
norm of 'v'
|
301 |
+
"""
|
302 |
+
return jnp.sqrt(jnp.square(v.x) + jnp.square(v.y) + jnp.square(v.z) + epsilon)
|
303 |
+
|
304 |
+
|
305 |
+
def vecs_sub(v1: Vecs, v2: Vecs) -> Vecs:
|
306 |
+
"""Computes v1 - v2."""
|
307 |
+
return Vecs(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z)
|
308 |
+
|
309 |
+
|
310 |
+
def vecs_squared_distance(v1: Vecs, v2: Vecs) -> jnp.ndarray:
|
311 |
+
"""Computes squared euclidean difference between 'v1' and 'v2'."""
|
312 |
+
return (squared_difference(v1.x, v2.x) +
|
313 |
+
squared_difference(v1.y, v2.y) +
|
314 |
+
squared_difference(v1.z, v2.z))
|
315 |
+
|
316 |
+
|
317 |
+
def vecs_to_tensor(v: Vecs # shape (...)
|
318 |
+
) -> jnp.ndarray: # shape(..., 3)
|
319 |
+
"""Converts 'v' to tensor with shape 3, inverse of 'vecs_from_tensor'."""
|
320 |
+
return jnp.stack([v.x, v.y, v.z], axis=-1)
|
af_backprop/alphafold/model/tf/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
"""Alphafold model TensorFlow code."""
|
af_backprop/alphafold/model/tf/data_transforms.py
ADDED
@@ -0,0 +1,625 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Data for AlphaFold."""
|
16 |
+
|
17 |
+
from alphafold.common import residue_constants
|
18 |
+
from alphafold.model.tf import shape_helpers
|
19 |
+
from alphafold.model.tf import shape_placeholders
|
20 |
+
from alphafold.model.tf import utils
|
21 |
+
import numpy as np
|
22 |
+
import tensorflow.compat.v1 as tf
|
23 |
+
|
24 |
+
# Pylint gets confused by the curry1 decorator because it changes the number
|
25 |
+
# of arguments to the function.
|
26 |
+
# pylint:disable=no-value-for-parameter
|
27 |
+
|
28 |
+
|
29 |
+
NUM_RES = shape_placeholders.NUM_RES
|
30 |
+
NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ
|
31 |
+
NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ
|
32 |
+
NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES
|
33 |
+
|
34 |
+
|
35 |
+
def cast_64bit_ints(protein):
|
36 |
+
|
37 |
+
for k, v in protein.items():
|
38 |
+
if v.dtype == tf.int64:
|
39 |
+
protein[k] = tf.cast(v, tf.int32)
|
40 |
+
return protein
|
41 |
+
|
42 |
+
|
43 |
+
_MSA_FEATURE_NAMES = [
|
44 |
+
'msa', 'deletion_matrix', 'msa_mask', 'msa_row_mask', 'bert_mask',
|
45 |
+
'true_msa'
|
46 |
+
]
|
47 |
+
|
48 |
+
|
49 |
+
def make_seq_mask(protein):
|
50 |
+
protein['seq_mask'] = tf.ones(
|
51 |
+
shape_helpers.shape_list(protein['aatype']), dtype=tf.float32)
|
52 |
+
return protein
|
53 |
+
|
54 |
+
|
55 |
+
def make_template_mask(protein):
|
56 |
+
protein['template_mask'] = tf.ones(
|
57 |
+
shape_helpers.shape_list(protein['template_domain_names']),
|
58 |
+
dtype=tf.float32)
|
59 |
+
return protein
|
60 |
+
|
61 |
+
|
62 |
+
def curry1(f):
|
63 |
+
"""Supply all arguments but the first."""
|
64 |
+
|
65 |
+
def fc(*args, **kwargs):
|
66 |
+
return lambda x: f(x, *args, **kwargs)
|
67 |
+
|
68 |
+
return fc
|
69 |
+
|
70 |
+
|
71 |
+
@curry1
|
72 |
+
def add_distillation_flag(protein, distillation):
|
73 |
+
protein['is_distillation'] = tf.constant(float(distillation),
|
74 |
+
shape=[],
|
75 |
+
dtype=tf.float32)
|
76 |
+
return protein
|
77 |
+
|
78 |
+
|
79 |
+
def make_all_atom_aatype(protein):
|
80 |
+
protein['all_atom_aatype'] = protein['aatype']
|
81 |
+
return protein
|
82 |
+
|
83 |
+
|
84 |
+
def fix_templates_aatype(protein):
|
85 |
+
"""Fixes aatype encoding of templates."""
|
86 |
+
# Map one-hot to indices.
|
87 |
+
protein['template_aatype'] = tf.argmax(
|
88 |
+
protein['template_aatype'], output_type=tf.int32, axis=-1)
|
89 |
+
# Map hhsearch-aatype to our aatype.
|
90 |
+
new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE
|
91 |
+
new_order = tf.constant(new_order_list, dtype=tf.int32)
|
92 |
+
protein['template_aatype'] = tf.gather(params=new_order,
|
93 |
+
indices=protein['template_aatype'])
|
94 |
+
return protein
|
95 |
+
|
96 |
+
|
97 |
+
def correct_msa_restypes(protein):
|
98 |
+
"""Correct MSA restype to have the same order as residue_constants."""
|
99 |
+
new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE
|
100 |
+
new_order = tf.constant(new_order_list, dtype=protein['msa'].dtype)
|
101 |
+
protein['msa'] = tf.gather(new_order, protein['msa'], axis=0)
|
102 |
+
|
103 |
+
perm_matrix = np.zeros((22, 22), dtype=np.float32)
|
104 |
+
perm_matrix[range(len(new_order_list)), new_order_list] = 1.
|
105 |
+
|
106 |
+
for k in protein:
|
107 |
+
if 'profile' in k: # Include both hhblits and psiblast profiles
|
108 |
+
num_dim = protein[k].shape.as_list()[-1]
|
109 |
+
assert num_dim in [20, 21, 22], (
|
110 |
+
'num_dim for %s out of expected range: %s' % (k, num_dim))
|
111 |
+
protein[k] = tf.tensordot(protein[k], perm_matrix[:num_dim, :num_dim], 1)
|
112 |
+
return protein
|
113 |
+
|
114 |
+
|
115 |
+
def squeeze_features(protein):
|
116 |
+
"""Remove singleton and repeated dimensions in protein features."""
|
117 |
+
protein['aatype'] = tf.argmax(
|
118 |
+
protein['aatype'], axis=-1, output_type=tf.int32)
|
119 |
+
for k in [
|
120 |
+
'domain_name', 'msa', 'num_alignments', 'seq_length', 'sequence',
|
121 |
+
'superfamily', 'deletion_matrix', 'resolution',
|
122 |
+
'between_segment_residues', 'residue_index', 'template_all_atom_masks']:
|
123 |
+
if k in protein:
|
124 |
+
final_dim = shape_helpers.shape_list(protein[k])[-1]
|
125 |
+
if isinstance(final_dim, int) and final_dim == 1:
|
126 |
+
protein[k] = tf.squeeze(protein[k], axis=-1)
|
127 |
+
|
128 |
+
for k in ['seq_length', 'num_alignments']:
|
129 |
+
if k in protein:
|
130 |
+
protein[k] = protein[k][0] # Remove fake sequence dimension
|
131 |
+
return protein
|
132 |
+
|
133 |
+
|
134 |
+
def make_random_crop_to_size_seed(protein):
|
135 |
+
"""Random seed for cropping residues and templates."""
|
136 |
+
protein['random_crop_to_size_seed'] = utils.make_random_seed()
|
137 |
+
return protein
|
138 |
+
|
139 |
+
|
140 |
+
@curry1
|
141 |
+
def randomly_replace_msa_with_unknown(protein, replace_proportion):
|
142 |
+
"""Replace a proportion of the MSA with 'X'."""
|
143 |
+
msa_mask = (tf.random.uniform(shape_helpers.shape_list(protein['msa'])) <
|
144 |
+
replace_proportion)
|
145 |
+
x_idx = 20
|
146 |
+
gap_idx = 21
|
147 |
+
msa_mask = tf.logical_and(msa_mask, protein['msa'] != gap_idx)
|
148 |
+
protein['msa'] = tf.where(msa_mask,
|
149 |
+
tf.ones_like(protein['msa']) * x_idx,
|
150 |
+
protein['msa'])
|
151 |
+
aatype_mask = (
|
152 |
+
tf.random.uniform(shape_helpers.shape_list(protein['aatype'])) <
|
153 |
+
replace_proportion)
|
154 |
+
|
155 |
+
protein['aatype'] = tf.where(aatype_mask,
|
156 |
+
tf.ones_like(protein['aatype']) * x_idx,
|
157 |
+
protein['aatype'])
|
158 |
+
return protein
|
159 |
+
|
160 |
+
|
161 |
+
@curry1
|
162 |
+
def sample_msa(protein, max_seq, keep_extra):
|
163 |
+
"""Sample MSA randomly, remaining sequences are stored as `extra_*`.
|
164 |
+
|
165 |
+
Args:
|
166 |
+
protein: batch to sample msa from.
|
167 |
+
max_seq: number of sequences to sample.
|
168 |
+
keep_extra: When True sequences not sampled are put into fields starting
|
169 |
+
with 'extra_*'.
|
170 |
+
|
171 |
+
Returns:
|
172 |
+
Protein with sampled msa.
|
173 |
+
"""
|
174 |
+
num_seq = tf.shape(protein['msa'])[0]
|
175 |
+
shuffled = tf.random_shuffle(tf.range(1, num_seq))
|
176 |
+
index_order = tf.concat([[0], shuffled], axis=0)
|
177 |
+
num_sel = tf.minimum(max_seq, num_seq)
|
178 |
+
|
179 |
+
sel_seq, not_sel_seq = tf.split(index_order, [num_sel, num_seq - num_sel])
|
180 |
+
|
181 |
+
for k in _MSA_FEATURE_NAMES:
|
182 |
+
if k in protein:
|
183 |
+
if keep_extra:
|
184 |
+
protein['extra_' + k] = tf.gather(protein[k], not_sel_seq)
|
185 |
+
protein[k] = tf.gather(protein[k], sel_seq)
|
186 |
+
|
187 |
+
return protein
|
188 |
+
|
189 |
+
|
190 |
+
@curry1
|
191 |
+
def crop_extra_msa(protein, max_extra_msa):
|
192 |
+
"""MSA features are cropped so only `max_extra_msa` sequences are kept."""
|
193 |
+
num_seq = tf.shape(protein['extra_msa'])[0]
|
194 |
+
num_sel = tf.minimum(max_extra_msa, num_seq)
|
195 |
+
select_indices = tf.random_shuffle(tf.range(0, num_seq))[:num_sel]
|
196 |
+
for k in _MSA_FEATURE_NAMES:
|
197 |
+
if 'extra_' + k in protein:
|
198 |
+
protein['extra_' + k] = tf.gather(protein['extra_' + k], select_indices)
|
199 |
+
|
200 |
+
return protein
|
201 |
+
|
202 |
+
|
203 |
+
def delete_extra_msa(protein):
|
204 |
+
for k in _MSA_FEATURE_NAMES:
|
205 |
+
if 'extra_' + k in protein:
|
206 |
+
del protein['extra_' + k]
|
207 |
+
return protein
|
208 |
+
|
209 |
+
|
210 |
+
@curry1
|
211 |
+
def block_delete_msa(protein, config):
|
212 |
+
"""Sample MSA by deleting contiguous blocks.
|
213 |
+
|
214 |
+
Jumper et al. (2021) Suppl. Alg. 1 "MSABlockDeletion"
|
215 |
+
|
216 |
+
Arguments:
|
217 |
+
protein: batch dict containing the msa
|
218 |
+
config: ConfigDict with parameters
|
219 |
+
|
220 |
+
Returns:
|
221 |
+
updated protein
|
222 |
+
"""
|
223 |
+
num_seq = shape_helpers.shape_list(protein['msa'])[0]
|
224 |
+
block_num_seq = tf.cast(
|
225 |
+
tf.floor(tf.cast(num_seq, tf.float32) * config.msa_fraction_per_block),
|
226 |
+
tf.int32)
|
227 |
+
|
228 |
+
if config.randomize_num_blocks:
|
229 |
+
nb = tf.random.uniform([], 0, config.num_blocks + 1, dtype=tf.int32)
|
230 |
+
else:
|
231 |
+
nb = config.num_blocks
|
232 |
+
|
233 |
+
del_block_starts = tf.random.uniform([nb], 0, num_seq, dtype=tf.int32)
|
234 |
+
del_blocks = del_block_starts[:, None] + tf.range(block_num_seq)
|
235 |
+
del_blocks = tf.clip_by_value(del_blocks, 0, num_seq - 1)
|
236 |
+
del_indices = tf.unique(tf.sort(tf.reshape(del_blocks, [-1])))[0]
|
237 |
+
|
238 |
+
# Make sure we keep the original sequence
|
239 |
+
sparse_diff = tf.sets.difference(tf.range(1, num_seq)[None],
|
240 |
+
del_indices[None])
|
241 |
+
keep_indices = tf.squeeze(tf.sparse.to_dense(sparse_diff), 0)
|
242 |
+
keep_indices = tf.concat([[0], keep_indices], axis=0)
|
243 |
+
|
244 |
+
for k in _MSA_FEATURE_NAMES:
|
245 |
+
if k in protein:
|
246 |
+
protein[k] = tf.gather(protein[k], keep_indices)
|
247 |
+
|
248 |
+
return protein
|
249 |
+
|
250 |
+
|
251 |
+
@curry1
|
252 |
+
def nearest_neighbor_clusters(protein, gap_agreement_weight=0.):
|
253 |
+
"""Assign each extra MSA sequence to its nearest neighbor in sampled MSA."""
|
254 |
+
|
255 |
+
# Determine how much weight we assign to each agreement. In theory, we could
|
256 |
+
# use a full blosum matrix here, but right now let's just down-weight gap
|
257 |
+
# agreement because it could be spurious.
|
258 |
+
# Never put weight on agreeing on BERT mask
|
259 |
+
weights = tf.concat([
|
260 |
+
tf.ones(21),
|
261 |
+
gap_agreement_weight * tf.ones(1),
|
262 |
+
np.zeros(1)], 0)
|
263 |
+
|
264 |
+
# Make agreement score as weighted Hamming distance
|
265 |
+
sample_one_hot = (protein['msa_mask'][:, :, None] *
|
266 |
+
tf.one_hot(protein['msa'], 23))
|
267 |
+
extra_one_hot = (protein['extra_msa_mask'][:, :, None] *
|
268 |
+
tf.one_hot(protein['extra_msa'], 23))
|
269 |
+
|
270 |
+
num_seq, num_res, _ = shape_helpers.shape_list(sample_one_hot)
|
271 |
+
extra_num_seq, _, _ = shape_helpers.shape_list(extra_one_hot)
|
272 |
+
|
273 |
+
# Compute tf.einsum('mrc,nrc,c->mn', sample_one_hot, extra_one_hot, weights)
|
274 |
+
# in an optimized fashion to avoid possible memory or computation blowup.
|
275 |
+
agreement = tf.matmul(
|
276 |
+
tf.reshape(extra_one_hot, [extra_num_seq, num_res * 23]),
|
277 |
+
tf.reshape(sample_one_hot * weights, [num_seq, num_res * 23]),
|
278 |
+
transpose_b=True)
|
279 |
+
|
280 |
+
# Assign each sequence in the extra sequences to the closest MSA sample
|
281 |
+
protein['extra_cluster_assignment'] = tf.argmax(
|
282 |
+
agreement, axis=1, output_type=tf.int32)
|
283 |
+
|
284 |
+
return protein
|
285 |
+
|
286 |
+
|
287 |
+
@curry1
|
288 |
+
def summarize_clusters(protein):
|
289 |
+
"""Produce profile and deletion_matrix_mean within each cluster."""
|
290 |
+
num_seq = shape_helpers.shape_list(protein['msa'])[0]
|
291 |
+
def csum(x):
|
292 |
+
return tf.math.unsorted_segment_sum(
|
293 |
+
x, protein['extra_cluster_assignment'], num_seq)
|
294 |
+
|
295 |
+
mask = protein['extra_msa_mask']
|
296 |
+
mask_counts = 1e-6 + protein['msa_mask'] + csum(mask) # Include center
|
297 |
+
|
298 |
+
msa_sum = csum(mask[:, :, None] * tf.one_hot(protein['extra_msa'], 23))
|
299 |
+
msa_sum += tf.one_hot(protein['msa'], 23) # Original sequence
|
300 |
+
protein['cluster_profile'] = msa_sum / mask_counts[:, :, None]
|
301 |
+
|
302 |
+
del msa_sum
|
303 |
+
|
304 |
+
del_sum = csum(mask * protein['extra_deletion_matrix'])
|
305 |
+
del_sum += protein['deletion_matrix'] # Original sequence
|
306 |
+
protein['cluster_deletion_mean'] = del_sum / mask_counts
|
307 |
+
del del_sum
|
308 |
+
|
309 |
+
return protein
|
310 |
+
|
311 |
+
|
312 |
+
def make_msa_mask(protein):
|
313 |
+
"""Mask features are all ones, but will later be zero-padded."""
|
314 |
+
protein['msa_mask'] = tf.ones(
|
315 |
+
shape_helpers.shape_list(protein['msa']), dtype=tf.float32)
|
316 |
+
protein['msa_row_mask'] = tf.ones(
|
317 |
+
shape_helpers.shape_list(protein['msa'])[0], dtype=tf.float32)
|
318 |
+
return protein
|
319 |
+
|
320 |
+
|
321 |
+
def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks):
|
322 |
+
"""Create pseudo beta features."""
|
323 |
+
is_gly = tf.equal(aatype, residue_constants.restype_order['G'])
|
324 |
+
ca_idx = residue_constants.atom_order['CA']
|
325 |
+
cb_idx = residue_constants.atom_order['CB']
|
326 |
+
pseudo_beta = tf.where(
|
327 |
+
tf.tile(is_gly[..., None], [1] * len(is_gly.shape) + [3]),
|
328 |
+
all_atom_positions[..., ca_idx, :],
|
329 |
+
all_atom_positions[..., cb_idx, :])
|
330 |
+
|
331 |
+
if all_atom_masks is not None:
|
332 |
+
pseudo_beta_mask = tf.where(
|
333 |
+
is_gly, all_atom_masks[..., ca_idx], all_atom_masks[..., cb_idx])
|
334 |
+
pseudo_beta_mask = tf.cast(pseudo_beta_mask, tf.float32)
|
335 |
+
return pseudo_beta, pseudo_beta_mask
|
336 |
+
else:
|
337 |
+
return pseudo_beta
|
338 |
+
|
339 |
+
|
340 |
+
@curry1
|
341 |
+
def make_pseudo_beta(protein, prefix=''):
|
342 |
+
"""Create pseudo-beta (alpha for glycine) position and mask."""
|
343 |
+
assert prefix in ['', 'template_']
|
344 |
+
protein[prefix + 'pseudo_beta'], protein[prefix + 'pseudo_beta_mask'] = (
|
345 |
+
pseudo_beta_fn(
|
346 |
+
protein['template_aatype' if prefix else 'all_atom_aatype'],
|
347 |
+
protein[prefix + 'all_atom_positions'],
|
348 |
+
protein['template_all_atom_masks' if prefix else 'all_atom_mask']))
|
349 |
+
return protein
|
350 |
+
|
351 |
+
|
352 |
+
@curry1
|
353 |
+
def add_constant_field(protein, key, value):
|
354 |
+
protein[key] = tf.convert_to_tensor(value)
|
355 |
+
return protein
|
356 |
+
|
357 |
+
|
358 |
+
def shaped_categorical(probs, epsilon=1e-10):
|
359 |
+
ds = shape_helpers.shape_list(probs)
|
360 |
+
num_classes = ds[-1]
|
361 |
+
counts = tf.random.categorical(
|
362 |
+
tf.reshape(tf.log(probs + epsilon), [-1, num_classes]),
|
363 |
+
1,
|
364 |
+
dtype=tf.int32)
|
365 |
+
return tf.reshape(counts, ds[:-1])
|
366 |
+
|
367 |
+
|
368 |
+
def make_hhblits_profile(protein):
|
369 |
+
"""Compute the HHblits MSA profile if not already present."""
|
370 |
+
if 'hhblits_profile' in protein:
|
371 |
+
return protein
|
372 |
+
|
373 |
+
# Compute the profile for every residue (over all MSA sequences).
|
374 |
+
protein['hhblits_profile'] = tf.reduce_mean(
|
375 |
+
tf.one_hot(protein['msa'], 22), axis=0)
|
376 |
+
return protein
|
377 |
+
|
378 |
+
|
379 |
+
@curry1
|
380 |
+
def make_masked_msa(protein, config, replace_fraction):
|
381 |
+
"""Create data for BERT on raw MSA."""
|
382 |
+
# Add a random amino acid uniformly
|
383 |
+
random_aa = tf.constant([0.05] * 20 + [0., 0.], dtype=tf.float32)
|
384 |
+
|
385 |
+
categorical_probs = (
|
386 |
+
config.uniform_prob * random_aa +
|
387 |
+
config.profile_prob * protein['hhblits_profile'] +
|
388 |
+
config.same_prob * tf.one_hot(protein['msa'], 22))
|
389 |
+
|
390 |
+
# Put all remaining probability on [MASK] which is a new column
|
391 |
+
pad_shapes = [[0, 0] for _ in range(len(categorical_probs.shape))]
|
392 |
+
pad_shapes[-1][1] = 1
|
393 |
+
mask_prob = 1. - config.profile_prob - config.same_prob - config.uniform_prob
|
394 |
+
assert mask_prob >= 0.
|
395 |
+
categorical_probs = tf.pad(
|
396 |
+
categorical_probs, pad_shapes, constant_values=mask_prob)
|
397 |
+
|
398 |
+
sh = shape_helpers.shape_list(protein['msa'])
|
399 |
+
mask_position = tf.random.uniform(sh) < replace_fraction
|
400 |
+
|
401 |
+
bert_msa = shaped_categorical(categorical_probs)
|
402 |
+
bert_msa = tf.where(mask_position, bert_msa, protein['msa'])
|
403 |
+
|
404 |
+
# Mix real and masked MSA
|
405 |
+
protein['bert_mask'] = tf.cast(mask_position, tf.float32)
|
406 |
+
protein['true_msa'] = protein['msa']
|
407 |
+
protein['msa'] = bert_msa
|
408 |
+
|
409 |
+
return protein
|
410 |
+
|
411 |
+
|
412 |
+
@curry1
|
413 |
+
def make_fixed_size(protein, shape_schema, msa_cluster_size, extra_msa_size,
|
414 |
+
num_res, num_templates=0):
|
415 |
+
"""Guess at the MSA and sequence dimensions to make fixed size."""
|
416 |
+
|
417 |
+
pad_size_map = {
|
418 |
+
NUM_RES: num_res,
|
419 |
+
NUM_MSA_SEQ: msa_cluster_size,
|
420 |
+
NUM_EXTRA_SEQ: extra_msa_size,
|
421 |
+
NUM_TEMPLATES: num_templates,
|
422 |
+
}
|
423 |
+
|
424 |
+
for k, v in protein.items():
|
425 |
+
# Don't transfer this to the accelerator.
|
426 |
+
if k == 'extra_cluster_assignment':
|
427 |
+
continue
|
428 |
+
shape = v.shape.as_list()
|
429 |
+
schema = shape_schema[k]
|
430 |
+
assert len(shape) == len(schema), (
|
431 |
+
f'Rank mismatch between shape and shape schema for {k}: '
|
432 |
+
f'{shape} vs {schema}')
|
433 |
+
pad_size = [
|
434 |
+
pad_size_map.get(s2, None) or s1 for (s1, s2) in zip(shape, schema)
|
435 |
+
]
|
436 |
+
padding = [(0, p - tf.shape(v)[i]) for i, p in enumerate(pad_size)]
|
437 |
+
if padding:
|
438 |
+
protein[k] = tf.pad(
|
439 |
+
v, padding, name=f'pad_to_fixed_{k}')
|
440 |
+
protein[k].set_shape(pad_size)
|
441 |
+
|
442 |
+
return protein
|
443 |
+
|
444 |
+
|
445 |
+
@curry1
|
446 |
+
def make_msa_feat(protein):
|
447 |
+
"""Create and concatenate MSA features."""
|
448 |
+
# Whether there is a domain break. Always zero for chains, but keeping
|
449 |
+
# for compatibility with domain datasets.
|
450 |
+
has_break = tf.clip_by_value(
|
451 |
+
tf.cast(protein['between_segment_residues'], tf.float32),
|
452 |
+
0, 1)
|
453 |
+
aatype_1hot = tf.one_hot(protein['aatype'], 21, axis=-1)
|
454 |
+
|
455 |
+
target_feat = [
|
456 |
+
tf.expand_dims(has_break, axis=-1),
|
457 |
+
aatype_1hot, # Everyone gets the original sequence.
|
458 |
+
]
|
459 |
+
|
460 |
+
msa_1hot = tf.one_hot(protein['msa'], 23, axis=-1)
|
461 |
+
has_deletion = tf.clip_by_value(protein['deletion_matrix'], 0., 1.)
|
462 |
+
deletion_value = tf.atan(protein['deletion_matrix'] / 3.) * (2. / np.pi)
|
463 |
+
|
464 |
+
msa_feat = [
|
465 |
+
msa_1hot,
|
466 |
+
tf.expand_dims(has_deletion, axis=-1),
|
467 |
+
tf.expand_dims(deletion_value, axis=-1),
|
468 |
+
]
|
469 |
+
|
470 |
+
if 'cluster_profile' in protein:
|
471 |
+
deletion_mean_value = (
|
472 |
+
tf.atan(protein['cluster_deletion_mean'] / 3.) * (2. / np.pi))
|
473 |
+
msa_feat.extend([
|
474 |
+
protein['cluster_profile'],
|
475 |
+
tf.expand_dims(deletion_mean_value, axis=-1),
|
476 |
+
])
|
477 |
+
|
478 |
+
if 'extra_deletion_matrix' in protein:
|
479 |
+
protein['extra_has_deletion'] = tf.clip_by_value(
|
480 |
+
protein['extra_deletion_matrix'], 0., 1.)
|
481 |
+
protein['extra_deletion_value'] = tf.atan(
|
482 |
+
protein['extra_deletion_matrix'] / 3.) * (2. / np.pi)
|
483 |
+
|
484 |
+
protein['msa_feat'] = tf.concat(msa_feat, axis=-1)
|
485 |
+
protein['target_feat'] = tf.concat(target_feat, axis=-1)
|
486 |
+
return protein
|
487 |
+
|
488 |
+
|
489 |
+
@curry1
|
490 |
+
def select_feat(protein, feature_list):
|
491 |
+
return {k: v for k, v in protein.items() if k in feature_list}
|
492 |
+
|
493 |
+
|
494 |
+
@curry1
|
495 |
+
def crop_templates(protein, max_templates):
|
496 |
+
for k, v in protein.items():
|
497 |
+
if k.startswith('template_'):
|
498 |
+
protein[k] = v[:max_templates]
|
499 |
+
return protein
|
500 |
+
|
501 |
+
|
502 |
+
@curry1
|
503 |
+
def random_crop_to_size(protein, crop_size, max_templates, shape_schema,
|
504 |
+
subsample_templates=False):
|
505 |
+
"""Crop randomly to `crop_size`, or keep as is if shorter than that."""
|
506 |
+
seq_length = protein['seq_length']
|
507 |
+
if 'template_mask' in protein:
|
508 |
+
num_templates = tf.cast(
|
509 |
+
shape_helpers.shape_list(protein['template_mask'])[0], tf.int32)
|
510 |
+
else:
|
511 |
+
num_templates = tf.constant(0, dtype=tf.int32)
|
512 |
+
num_res_crop_size = tf.math.minimum(seq_length, crop_size)
|
513 |
+
|
514 |
+
# Ensures that the cropping of residues and templates happens in the same way
|
515 |
+
# across ensembling iterations.
|
516 |
+
# Do not use for randomness that should vary in ensembling.
|
517 |
+
seed_maker = utils.SeedMaker(initial_seed=protein['random_crop_to_size_seed'])
|
518 |
+
|
519 |
+
if subsample_templates:
|
520 |
+
templates_crop_start = tf.random.stateless_uniform(
|
521 |
+
shape=(), minval=0, maxval=num_templates + 1, dtype=tf.int32,
|
522 |
+
seed=seed_maker())
|
523 |
+
else:
|
524 |
+
templates_crop_start = 0
|
525 |
+
|
526 |
+
num_templates_crop_size = tf.math.minimum(
|
527 |
+
num_templates - templates_crop_start, max_templates)
|
528 |
+
|
529 |
+
num_res_crop_start = tf.random.stateless_uniform(
|
530 |
+
shape=(), minval=0, maxval=seq_length - num_res_crop_size + 1,
|
531 |
+
dtype=tf.int32, seed=seed_maker())
|
532 |
+
|
533 |
+
templates_select_indices = tf.argsort(tf.random.stateless_uniform(
|
534 |
+
[num_templates], seed=seed_maker()))
|
535 |
+
|
536 |
+
for k, v in protein.items():
|
537 |
+
if k not in shape_schema or (
|
538 |
+
'template' not in k and NUM_RES not in shape_schema[k]):
|
539 |
+
continue
|
540 |
+
|
541 |
+
# randomly permute the templates before cropping them.
|
542 |
+
if k.startswith('template') and subsample_templates:
|
543 |
+
v = tf.gather(v, templates_select_indices)
|
544 |
+
|
545 |
+
crop_sizes = []
|
546 |
+
crop_starts = []
|
547 |
+
for i, (dim_size, dim) in enumerate(zip(shape_schema[k],
|
548 |
+
shape_helpers.shape_list(v))):
|
549 |
+
is_num_res = (dim_size == NUM_RES)
|
550 |
+
if i == 0 and k.startswith('template'):
|
551 |
+
crop_size = num_templates_crop_size
|
552 |
+
crop_start = templates_crop_start
|
553 |
+
else:
|
554 |
+
crop_start = num_res_crop_start if is_num_res else 0
|
555 |
+
crop_size = (num_res_crop_size if is_num_res else
|
556 |
+
(-1 if dim is None else dim))
|
557 |
+
crop_sizes.append(crop_size)
|
558 |
+
crop_starts.append(crop_start)
|
559 |
+
protein[k] = tf.slice(v, crop_starts, crop_sizes)
|
560 |
+
|
561 |
+
protein['seq_length'] = num_res_crop_size
|
562 |
+
return protein
|
563 |
+
|
564 |
+
|
565 |
+
def make_atom14_masks(protein):
|
566 |
+
"""Construct denser atom positions (14 dimensions instead of 37)."""
|
567 |
+
restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37
|
568 |
+
restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14
|
569 |
+
restype_atom14_mask = []
|
570 |
+
|
571 |
+
for rt in residue_constants.restypes:
|
572 |
+
atom_names = residue_constants.restype_name_to_atom14_names[
|
573 |
+
residue_constants.restype_1to3[rt]]
|
574 |
+
|
575 |
+
restype_atom14_to_atom37.append([
|
576 |
+
(residue_constants.atom_order[name] if name else 0)
|
577 |
+
for name in atom_names
|
578 |
+
])
|
579 |
+
|
580 |
+
atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}
|
581 |
+
restype_atom37_to_atom14.append([
|
582 |
+
(atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0)
|
583 |
+
for name in residue_constants.atom_types
|
584 |
+
])
|
585 |
+
|
586 |
+
restype_atom14_mask.append([(1. if name else 0.) for name in atom_names])
|
587 |
+
|
588 |
+
# Add dummy mapping for restype 'UNK'
|
589 |
+
restype_atom14_to_atom37.append([0] * 14)
|
590 |
+
restype_atom37_to_atom14.append([0] * 37)
|
591 |
+
restype_atom14_mask.append([0.] * 14)
|
592 |
+
|
593 |
+
restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)
|
594 |
+
restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)
|
595 |
+
restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32)
|
596 |
+
|
597 |
+
# create the mapping for (residx, atom14) --> atom37, i.e. an array
|
598 |
+
# with shape (num_res, 14) containing the atom37 indices for this protein
|
599 |
+
residx_atom14_to_atom37 = tf.gather(restype_atom14_to_atom37,
|
600 |
+
protein['aatype'])
|
601 |
+
residx_atom14_mask = tf.gather(restype_atom14_mask,
|
602 |
+
protein['aatype'])
|
603 |
+
|
604 |
+
protein['atom14_atom_exists'] = residx_atom14_mask
|
605 |
+
protein['residx_atom14_to_atom37'] = residx_atom14_to_atom37
|
606 |
+
|
607 |
+
# create the gather indices for mapping back
|
608 |
+
residx_atom37_to_atom14 = tf.gather(restype_atom37_to_atom14,
|
609 |
+
protein['aatype'])
|
610 |
+
protein['residx_atom37_to_atom14'] = residx_atom37_to_atom14
|
611 |
+
|
612 |
+
# create the corresponding mask
|
613 |
+
restype_atom37_mask = np.zeros([21, 37], dtype=np.float32)
|
614 |
+
for restype, restype_letter in enumerate(residue_constants.restypes):
|
615 |
+
restype_name = residue_constants.restype_1to3[restype_letter]
|
616 |
+
atom_names = residue_constants.residue_atoms[restype_name]
|
617 |
+
for atom_name in atom_names:
|
618 |
+
atom_type = residue_constants.atom_order[atom_name]
|
619 |
+
restype_atom37_mask[restype, atom_type] = 1
|
620 |
+
|
621 |
+
residx_atom37_mask = tf.gather(restype_atom37_mask,
|
622 |
+
protein['aatype'])
|
623 |
+
protein['atom37_atom_exists'] = residx_atom37_mask
|
624 |
+
|
625 |
+
return protein
|
af_backprop/alphafold/model/tf/input_pipeline.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Feature pre-processing input pipeline for AlphaFold."""
|
16 |
+
|
17 |
+
from alphafold.model.tf import data_transforms
|
18 |
+
from alphafold.model.tf import shape_placeholders
|
19 |
+
import tensorflow.compat.v1 as tf
|
20 |
+
import tree
|
21 |
+
|
22 |
+
# Pylint gets confused by the curry1 decorator because it changes the number
|
23 |
+
# of arguments to the function.
|
24 |
+
# pylint:disable=no-value-for-parameter
|
25 |
+
|
26 |
+
|
27 |
+
NUM_RES = shape_placeholders.NUM_RES
|
28 |
+
NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ
|
29 |
+
NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ
|
30 |
+
NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES
|
31 |
+
|
32 |
+
|
33 |
+
def nonensembled_map_fns(data_config):
|
34 |
+
"""Input pipeline functions which are not ensembled."""
|
35 |
+
common_cfg = data_config.common
|
36 |
+
|
37 |
+
map_fns = [
|
38 |
+
data_transforms.correct_msa_restypes,
|
39 |
+
data_transforms.add_distillation_flag(False),
|
40 |
+
data_transforms.cast_64bit_ints,
|
41 |
+
data_transforms.squeeze_features,
|
42 |
+
# Keep to not disrupt RNG.
|
43 |
+
data_transforms.randomly_replace_msa_with_unknown(0.0),
|
44 |
+
data_transforms.make_seq_mask,
|
45 |
+
data_transforms.make_msa_mask,
|
46 |
+
# Compute the HHblits profile if it's not set. This has to be run before
|
47 |
+
# sampling the MSA.
|
48 |
+
data_transforms.make_hhblits_profile,
|
49 |
+
data_transforms.make_random_crop_to_size_seed,
|
50 |
+
]
|
51 |
+
if common_cfg.use_templates:
|
52 |
+
map_fns.extend([
|
53 |
+
data_transforms.fix_templates_aatype,
|
54 |
+
data_transforms.make_template_mask,
|
55 |
+
data_transforms.make_pseudo_beta('template_')
|
56 |
+
])
|
57 |
+
map_fns.extend([
|
58 |
+
data_transforms.make_atom14_masks,
|
59 |
+
])
|
60 |
+
|
61 |
+
return map_fns
|
62 |
+
|
63 |
+
|
64 |
+
def ensembled_map_fns(data_config):
|
65 |
+
"""Input pipeline functions that can be ensembled and averaged."""
|
66 |
+
common_cfg = data_config.common
|
67 |
+
eval_cfg = data_config.eval
|
68 |
+
|
69 |
+
map_fns = []
|
70 |
+
|
71 |
+
if common_cfg.reduce_msa_clusters_by_max_templates:
|
72 |
+
pad_msa_clusters = eval_cfg.max_msa_clusters - eval_cfg.max_templates
|
73 |
+
else:
|
74 |
+
pad_msa_clusters = eval_cfg.max_msa_clusters
|
75 |
+
|
76 |
+
max_msa_clusters = pad_msa_clusters
|
77 |
+
max_extra_msa = common_cfg.max_extra_msa
|
78 |
+
|
79 |
+
map_fns.append(
|
80 |
+
data_transforms.sample_msa(
|
81 |
+
max_msa_clusters,
|
82 |
+
keep_extra=True))
|
83 |
+
|
84 |
+
if 'masked_msa' in common_cfg:
|
85 |
+
# Masked MSA should come *before* MSA clustering so that
|
86 |
+
# the clustering and full MSA profile do not leak information about
|
87 |
+
# the masked locations and secret corrupted locations.
|
88 |
+
map_fns.append(
|
89 |
+
data_transforms.make_masked_msa(common_cfg.masked_msa,
|
90 |
+
eval_cfg.masked_msa_replace_fraction))
|
91 |
+
|
92 |
+
if common_cfg.msa_cluster_features:
|
93 |
+
map_fns.append(data_transforms.nearest_neighbor_clusters())
|
94 |
+
map_fns.append(data_transforms.summarize_clusters())
|
95 |
+
|
96 |
+
# Crop after creating the cluster profiles.
|
97 |
+
if max_extra_msa:
|
98 |
+
map_fns.append(data_transforms.crop_extra_msa(max_extra_msa))
|
99 |
+
else:
|
100 |
+
map_fns.append(data_transforms.delete_extra_msa)
|
101 |
+
|
102 |
+
map_fns.append(data_transforms.make_msa_feat())
|
103 |
+
|
104 |
+
crop_feats = dict(eval_cfg.feat)
|
105 |
+
|
106 |
+
if eval_cfg.fixed_size:
|
107 |
+
map_fns.append(data_transforms.select_feat(list(crop_feats)))
|
108 |
+
map_fns.append(data_transforms.random_crop_to_size(
|
109 |
+
eval_cfg.crop_size,
|
110 |
+
eval_cfg.max_templates,
|
111 |
+
crop_feats,
|
112 |
+
eval_cfg.subsample_templates))
|
113 |
+
map_fns.append(data_transforms.make_fixed_size(
|
114 |
+
crop_feats,
|
115 |
+
pad_msa_clusters,
|
116 |
+
common_cfg.max_extra_msa,
|
117 |
+
eval_cfg.crop_size,
|
118 |
+
eval_cfg.max_templates))
|
119 |
+
else:
|
120 |
+
map_fns.append(data_transforms.crop_templates(eval_cfg.max_templates))
|
121 |
+
|
122 |
+
return map_fns
|
123 |
+
|
124 |
+
|
125 |
+
def process_tensors_from_config(tensors, data_config):
|
126 |
+
"""Apply filters and maps to an existing dataset, based on the config."""
|
127 |
+
|
128 |
+
def wrap_ensemble_fn(data, i):
|
129 |
+
"""Function to be mapped over the ensemble dimension."""
|
130 |
+
d = data.copy()
|
131 |
+
fns = ensembled_map_fns(data_config)
|
132 |
+
fn = compose(fns)
|
133 |
+
d['ensemble_index'] = i
|
134 |
+
return fn(d)
|
135 |
+
|
136 |
+
eval_cfg = data_config.eval
|
137 |
+
tensors = compose(
|
138 |
+
nonensembled_map_fns(
|
139 |
+
data_config))(
|
140 |
+
tensors)
|
141 |
+
|
142 |
+
tensors_0 = wrap_ensemble_fn(tensors, tf.constant(0))
|
143 |
+
num_ensemble = eval_cfg.num_ensemble
|
144 |
+
if data_config.common.resample_msa_in_recycling:
|
145 |
+
# Separate batch per ensembling & recycling step.
|
146 |
+
num_ensemble *= data_config.common.num_recycle + 1
|
147 |
+
|
148 |
+
if isinstance(num_ensemble, tf.Tensor) or num_ensemble > 1:
|
149 |
+
fn_output_signature = tree.map_structure(
|
150 |
+
tf.TensorSpec.from_tensor, tensors_0)
|
151 |
+
tensors = tf.map_fn(
|
152 |
+
lambda x: wrap_ensemble_fn(tensors, x),
|
153 |
+
tf.range(num_ensemble),
|
154 |
+
parallel_iterations=1,
|
155 |
+
fn_output_signature=fn_output_signature)
|
156 |
+
else:
|
157 |
+
tensors = tree.map_structure(lambda x: x[None],
|
158 |
+
tensors_0)
|
159 |
+
return tensors
|
160 |
+
|
161 |
+
|
162 |
+
@data_transforms.curry1
|
163 |
+
def compose(x, fs):
|
164 |
+
for f in fs:
|
165 |
+
x = f(x)
|
166 |
+
return x
|
af_backprop/alphafold/model/tf/protein_features.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Contains descriptions of various protein features."""
|
16 |
+
import enum
|
17 |
+
from typing import Dict, Optional, Sequence, Tuple, Union
|
18 |
+
from alphafold.common import residue_constants
|
19 |
+
import tensorflow.compat.v1 as tf
|
20 |
+
|
21 |
+
# Type aliases.
|
22 |
+
FeaturesMetadata = Dict[str, Tuple[tf.dtypes.DType, Sequence[Union[str, int]]]]
|
23 |
+
|
24 |
+
|
25 |
+
class FeatureType(enum.Enum):
|
26 |
+
ZERO_DIM = 0 # Shape [x]
|
27 |
+
ONE_DIM = 1 # Shape [num_res, x]
|
28 |
+
TWO_DIM = 2 # Shape [num_res, num_res, x]
|
29 |
+
MSA = 3 # Shape [msa_length, num_res, x]
|
30 |
+
|
31 |
+
|
32 |
+
# Placeholder values that will be replaced with their true value at runtime.
|
33 |
+
NUM_RES = "num residues placeholder"
|
34 |
+
NUM_SEQ = "length msa placeholder"
|
35 |
+
NUM_TEMPLATES = "num templates placeholder"
|
36 |
+
# Sizes of the protein features, NUM_RES and NUM_SEQ are allowed as placeholders
|
37 |
+
# to be replaced with the number of residues and the number of sequences in the
|
38 |
+
# multiple sequence alignment, respectively.
|
39 |
+
|
40 |
+
|
41 |
+
FEATURES = {
|
42 |
+
#### Static features of a protein sequence ####
|
43 |
+
"aatype": (tf.float32, [NUM_RES, 21]),
|
44 |
+
"between_segment_residues": (tf.int64, [NUM_RES, 1]),
|
45 |
+
"deletion_matrix": (tf.float32, [NUM_SEQ, NUM_RES, 1]),
|
46 |
+
"domain_name": (tf.string, [1]),
|
47 |
+
"msa": (tf.int64, [NUM_SEQ, NUM_RES, 1]),
|
48 |
+
"num_alignments": (tf.int64, [NUM_RES, 1]),
|
49 |
+
"residue_index": (tf.int64, [NUM_RES, 1]),
|
50 |
+
"seq_length": (tf.int64, [NUM_RES, 1]),
|
51 |
+
"sequence": (tf.string, [1]),
|
52 |
+
"all_atom_positions": (tf.float32,
|
53 |
+
[NUM_RES, residue_constants.atom_type_num, 3]),
|
54 |
+
"all_atom_mask": (tf.int64, [NUM_RES, residue_constants.atom_type_num]),
|
55 |
+
"resolution": (tf.float32, [1]),
|
56 |
+
"template_domain_names": (tf.string, [NUM_TEMPLATES]),
|
57 |
+
"template_sum_probs": (tf.float32, [NUM_TEMPLATES, 1]),
|
58 |
+
"template_aatype": (tf.float32, [NUM_TEMPLATES, NUM_RES, 22]),
|
59 |
+
"template_all_atom_positions": (tf.float32, [
|
60 |
+
NUM_TEMPLATES, NUM_RES, residue_constants.atom_type_num, 3
|
61 |
+
]),
|
62 |
+
"template_all_atom_masks": (tf.float32, [
|
63 |
+
NUM_TEMPLATES, NUM_RES, residue_constants.atom_type_num, 1
|
64 |
+
]),
|
65 |
+
}
|
66 |
+
|
67 |
+
FEATURE_TYPES = {k: v[0] for k, v in FEATURES.items()}
|
68 |
+
FEATURE_SIZES = {k: v[1] for k, v in FEATURES.items()}
|
69 |
+
|
70 |
+
|
71 |
+
def register_feature(name: str,
|
72 |
+
type_: tf.dtypes.DType,
|
73 |
+
shape_: Tuple[Union[str, int]]):
|
74 |
+
"""Register extra features used in custom datasets."""
|
75 |
+
FEATURES[name] = (type_, shape_)
|
76 |
+
FEATURE_TYPES[name] = type_
|
77 |
+
FEATURE_SIZES[name] = shape_
|
78 |
+
|
79 |
+
|
80 |
+
def shape(feature_name: str,
|
81 |
+
num_residues: int,
|
82 |
+
msa_length: int,
|
83 |
+
num_templates: Optional[int] = None,
|
84 |
+
features: Optional[FeaturesMetadata] = None):
|
85 |
+
"""Get the shape for the given feature name.
|
86 |
+
|
87 |
+
This is near identical to _get_tf_shape_no_placeholders() but with 2
|
88 |
+
differences:
|
89 |
+
* This method does not calculate a single placeholder from the total number of
|
90 |
+
elements (eg given <NUM_RES, 3> and size := 12, this won't deduce NUM_RES
|
91 |
+
must be 4)
|
92 |
+
* This method will work with tensors
|
93 |
+
|
94 |
+
Args:
|
95 |
+
feature_name: String identifier for the feature. If the feature name ends
|
96 |
+
with "_unnormalized", this suffix is stripped off.
|
97 |
+
num_residues: The number of residues in the current domain - some elements
|
98 |
+
of the shape can be dynamic and will be replaced by this value.
|
99 |
+
msa_length: The number of sequences in the multiple sequence alignment, some
|
100 |
+
elements of the shape can be dynamic and will be replaced by this value.
|
101 |
+
If the number of alignments is unknown / not read, please pass None for
|
102 |
+
msa_length.
|
103 |
+
num_templates (optional): The number of templates in this tfexample.
|
104 |
+
features: A feature_name to (tf_dtype, shape) lookup; defaults to FEATURES.
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
List of ints representation the tensor size.
|
108 |
+
|
109 |
+
Raises:
|
110 |
+
ValueError: If a feature is requested but no concrete placeholder value is
|
111 |
+
given.
|
112 |
+
"""
|
113 |
+
features = features or FEATURES
|
114 |
+
if feature_name.endswith("_unnormalized"):
|
115 |
+
feature_name = feature_name[:-13]
|
116 |
+
|
117 |
+
unused_dtype, raw_sizes = features[feature_name]
|
118 |
+
replacements = {NUM_RES: num_residues,
|
119 |
+
NUM_SEQ: msa_length}
|
120 |
+
|
121 |
+
if num_templates is not None:
|
122 |
+
replacements[NUM_TEMPLATES] = num_templates
|
123 |
+
|
124 |
+
sizes = [replacements.get(dimension, dimension) for dimension in raw_sizes]
|
125 |
+
for dimension in sizes:
|
126 |
+
if isinstance(dimension, str):
|
127 |
+
raise ValueError("Could not parse %s (shape: %s) with values: %s" % (
|
128 |
+
feature_name, raw_sizes, replacements))
|
129 |
+
return sizes
|
af_backprop/alphafold/model/tf/proteins_dataset.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Datasets consisting of proteins."""
|
16 |
+
from typing import Dict, Mapping, Optional, Sequence
|
17 |
+
from alphafold.model.tf import protein_features
|
18 |
+
import numpy as np
|
19 |
+
import tensorflow.compat.v1 as tf
|
20 |
+
|
21 |
+
TensorDict = Dict[str, tf.Tensor]
|
22 |
+
|
23 |
+
|
24 |
+
def parse_tfexample(
|
25 |
+
raw_data: bytes,
|
26 |
+
features: protein_features.FeaturesMetadata,
|
27 |
+
key: Optional[str] = None) -> Dict[str, tf.train.Feature]:
|
28 |
+
"""Read a single TF Example proto and return a subset of its features.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
raw_data: A serialized tf.Example proto.
|
32 |
+
features: A dictionary of features, mapping string feature names to a tuple
|
33 |
+
(dtype, shape). This dictionary should be a subset of
|
34 |
+
protein_features.FEATURES (or the dictionary itself for all features).
|
35 |
+
key: Optional string with the SSTable key of that tf.Example. This will be
|
36 |
+
added into features as a 'key' but only if requested in features.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
A dictionary of features mapping feature names to features. Only the given
|
40 |
+
features are returned, all other ones are filtered out.
|
41 |
+
"""
|
42 |
+
feature_map = {
|
43 |
+
k: tf.io.FixedLenSequenceFeature(shape=(), dtype=v[0], allow_missing=True)
|
44 |
+
for k, v in features.items()
|
45 |
+
}
|
46 |
+
parsed_features = tf.io.parse_single_example(raw_data, feature_map)
|
47 |
+
reshaped_features = parse_reshape_logic(parsed_features, features, key=key)
|
48 |
+
|
49 |
+
return reshaped_features
|
50 |
+
|
51 |
+
|
52 |
+
def _first(tensor: tf.Tensor) -> tf.Tensor:
|
53 |
+
"""Returns the 1st element - the input can be a tensor or a scalar."""
|
54 |
+
return tf.reshape(tensor, shape=(-1,))[0]
|
55 |
+
|
56 |
+
|
57 |
+
def parse_reshape_logic(
|
58 |
+
parsed_features: TensorDict,
|
59 |
+
features: protein_features.FeaturesMetadata,
|
60 |
+
key: Optional[str] = None) -> TensorDict:
|
61 |
+
"""Transforms parsed serial features to the correct shape."""
|
62 |
+
# Find out what is the number of sequences and the number of alignments.
|
63 |
+
num_residues = tf.cast(_first(parsed_features["seq_length"]), dtype=tf.int32)
|
64 |
+
|
65 |
+
if "num_alignments" in parsed_features:
|
66 |
+
num_msa = tf.cast(_first(parsed_features["num_alignments"]), dtype=tf.int32)
|
67 |
+
else:
|
68 |
+
num_msa = 0
|
69 |
+
|
70 |
+
if "template_domain_names" in parsed_features:
|
71 |
+
num_templates = tf.cast(
|
72 |
+
tf.shape(parsed_features["template_domain_names"])[0], dtype=tf.int32)
|
73 |
+
else:
|
74 |
+
num_templates = 0
|
75 |
+
|
76 |
+
if key is not None and "key" in features:
|
77 |
+
parsed_features["key"] = [key] # Expand dims from () to (1,).
|
78 |
+
|
79 |
+
# Reshape the tensors according to the sequence length and num alignments.
|
80 |
+
for k, v in parsed_features.items():
|
81 |
+
new_shape = protein_features.shape(
|
82 |
+
feature_name=k,
|
83 |
+
num_residues=num_residues,
|
84 |
+
msa_length=num_msa,
|
85 |
+
num_templates=num_templates,
|
86 |
+
features=features)
|
87 |
+
new_shape_size = tf.constant(1, dtype=tf.int32)
|
88 |
+
for dim in new_shape:
|
89 |
+
new_shape_size *= tf.cast(dim, tf.int32)
|
90 |
+
|
91 |
+
assert_equal = tf.assert_equal(
|
92 |
+
tf.size(v), new_shape_size,
|
93 |
+
name="assert_%s_shape_correct" % k,
|
94 |
+
message="The size of feature %s (%s) could not be reshaped "
|
95 |
+
"into %s" % (k, tf.size(v), new_shape))
|
96 |
+
if "template" not in k:
|
97 |
+
# Make sure the feature we are reshaping is not empty.
|
98 |
+
assert_non_empty = tf.assert_greater(
|
99 |
+
tf.size(v), 0, name="assert_%s_non_empty" % k,
|
100 |
+
message="The feature %s is not set in the tf.Example. Either do not "
|
101 |
+
"request the feature or use a tf.Example that has the "
|
102 |
+
"feature set." % k)
|
103 |
+
with tf.control_dependencies([assert_non_empty, assert_equal]):
|
104 |
+
parsed_features[k] = tf.reshape(v, new_shape, name="reshape_%s" % k)
|
105 |
+
else:
|
106 |
+
with tf.control_dependencies([assert_equal]):
|
107 |
+
parsed_features[k] = tf.reshape(v, new_shape, name="reshape_%s" % k)
|
108 |
+
|
109 |
+
return parsed_features
|
110 |
+
|
111 |
+
|
112 |
+
def _make_features_metadata(
|
113 |
+
feature_names: Sequence[str]) -> protein_features.FeaturesMetadata:
|
114 |
+
"""Makes a feature name to type and shape mapping from a list of names."""
|
115 |
+
# Make sure these features are always read.
|
116 |
+
required_features = ["aatype", "sequence", "seq_length"]
|
117 |
+
feature_names = list(set(feature_names) | set(required_features))
|
118 |
+
|
119 |
+
features_metadata = {name: protein_features.FEATURES[name]
|
120 |
+
for name in feature_names}
|
121 |
+
return features_metadata
|
122 |
+
|
123 |
+
|
124 |
+
def create_tensor_dict(
|
125 |
+
raw_data: bytes,
|
126 |
+
features: Sequence[str],
|
127 |
+
key: Optional[str] = None,
|
128 |
+
) -> TensorDict:
|
129 |
+
"""Creates a dictionary of tensor features.
|
130 |
+
|
131 |
+
Args:
|
132 |
+
raw_data: A serialized tf.Example proto.
|
133 |
+
features: A list of strings of feature names to be returned in the dataset.
|
134 |
+
key: Optional string with the SSTable key of that tf.Example. This will be
|
135 |
+
added into features as a 'key' but only if requested in features.
|
136 |
+
|
137 |
+
Returns:
|
138 |
+
A dictionary of features mapping feature names to features. Only the given
|
139 |
+
features are returned, all other ones are filtered out.
|
140 |
+
"""
|
141 |
+
features_metadata = _make_features_metadata(features)
|
142 |
+
return parse_tfexample(raw_data, features_metadata, key)
|
143 |
+
|
144 |
+
|
145 |
+
def np_to_tensor_dict(
|
146 |
+
np_example: Mapping[str, np.ndarray],
|
147 |
+
features: Sequence[str],
|
148 |
+
) -> TensorDict:
|
149 |
+
"""Creates dict of tensors from a dict of NumPy arrays.
|
150 |
+
|
151 |
+
Args:
|
152 |
+
np_example: A dict of NumPy feature arrays.
|
153 |
+
features: A list of strings of feature names to be returned in the dataset.
|
154 |
+
|
155 |
+
Returns:
|
156 |
+
A dictionary of features mapping feature names to features. Only the given
|
157 |
+
features are returned, all other ones are filtered out.
|
158 |
+
"""
|
159 |
+
features_metadata = _make_features_metadata(features)
|
160 |
+
tensor_dict = {k: tf.constant(v) for k, v in np_example.items()
|
161 |
+
if k in features_metadata}
|
162 |
+
|
163 |
+
# Ensures shapes are as expected. Needed for setting size of empty features
|
164 |
+
# e.g. when no template hits were found.
|
165 |
+
tensor_dict = parse_reshape_logic(tensor_dict, features_metadata)
|
166 |
+
return tensor_dict
|
af_backprop/alphafold/model/tf/shape_helpers.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Utilities for dealing with shapes of TensorFlow tensors."""
|
16 |
+
import tensorflow.compat.v1 as tf
|
17 |
+
|
18 |
+
|
19 |
+
def shape_list(x):
|
20 |
+
"""Return list of dimensions of a tensor, statically where possible.
|
21 |
+
|
22 |
+
Like `x.shape.as_list()` but with tensors instead of `None`s.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
x: A tensor.
|
26 |
+
Returns:
|
27 |
+
A list with length equal to the rank of the tensor. The n-th element of the
|
28 |
+
list is an integer when that dimension is statically known otherwise it is
|
29 |
+
the n-th element of `tf.shape(x)`.
|
30 |
+
"""
|
31 |
+
x = tf.convert_to_tensor(x)
|
32 |
+
|
33 |
+
# If unknown rank, return dynamic shape
|
34 |
+
if x.get_shape().dims is None:
|
35 |
+
return tf.shape(x)
|
36 |
+
|
37 |
+
static = x.get_shape().as_list()
|
38 |
+
shape = tf.shape(x)
|
39 |
+
|
40 |
+
ret = []
|
41 |
+
for i in range(len(static)):
|
42 |
+
dim = static[i]
|
43 |
+
if dim is None:
|
44 |
+
dim = shape[i]
|
45 |
+
ret.append(dim)
|
46 |
+
return ret
|
47 |
+
|
af_backprop/alphafold/model/tf/shape_placeholders.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Placeholder values for run-time varying dimension sizes."""
|
16 |
+
|
17 |
+
NUM_RES = 'num residues placeholder'
|
18 |
+
NUM_MSA_SEQ = 'msa placeholder'
|
19 |
+
NUM_EXTRA_SEQ = 'extra msa placeholder'
|
20 |
+
NUM_TEMPLATES = 'num templates placeholder'
|
af_backprop/alphafold/model/tf/utils.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""Shared utilities for various components."""
|
16 |
+
import tensorflow.compat.v1 as tf
|
17 |
+
|
18 |
+
|
19 |
+
def tf_combine_mask(*masks):
|
20 |
+
"""Take the intersection of float-valued masks."""
|
21 |
+
ret = 1
|
22 |
+
for m in masks:
|
23 |
+
ret *= m
|
24 |
+
return ret
|
25 |
+
|
26 |
+
|
27 |
+
class SeedMaker(object):
|
28 |
+
"""Return unique seeds."""
|
29 |
+
|
30 |
+
def __init__(self, initial_seed=0):
|
31 |
+
self.next_seed = initial_seed
|
32 |
+
|
33 |
+
def __call__(self):
|
34 |
+
i = self.next_seed
|
35 |
+
self.next_seed += 1
|
36 |
+
return i
|
37 |
+
|
38 |
+
seed_maker = SeedMaker()
|
39 |
+
|
40 |
+
|
41 |
+
def make_random_seed():
|
42 |
+
return tf.random.uniform([2],
|
43 |
+
tf.int32.min,
|
44 |
+
tf.int32.max,
|
45 |
+
tf.int32,
|
46 |
+
seed=seed_maker())
|
47 |
+
|
af_backprop/alphafold/model/utils.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 DeepMind Technologies Limited
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""A collection of JAX utility functions for use in protein folding."""
|
16 |
+
|
17 |
+
import collections
|
18 |
+
import numbers
|
19 |
+
from typing import Mapping
|
20 |
+
|
21 |
+
import haiku as hk
|
22 |
+
import jax
|
23 |
+
import jax.numpy as jnp
|
24 |
+
import numpy as np
|
25 |
+
|
26 |
+
|
27 |
+
def final_init(config):
|
28 |
+
if config.zero_init:
|
29 |
+
return 'zeros'
|
30 |
+
else:
|
31 |
+
return 'linear'
|
32 |
+
|
33 |
+
|
34 |
+
def batched_gather(params, indices, axis=0, batch_dims=0):
|
35 |
+
"""Implements a JAX equivalent of `tf.gather` with `axis` and `batch_dims`."""
|
36 |
+
take_fn = lambda p, i: jnp.take(p, i, axis=axis)
|
37 |
+
for _ in range(batch_dims):
|
38 |
+
take_fn = jax.vmap(take_fn)
|
39 |
+
return take_fn(params, indices)
|
40 |
+
|
41 |
+
|
42 |
+
def mask_mean(mask, value, axis=None, drop_mask_channel=False, eps=1e-10):
|
43 |
+
"""Masked mean."""
|
44 |
+
if drop_mask_channel:
|
45 |
+
mask = mask[..., 0]
|
46 |
+
|
47 |
+
mask_shape = mask.shape
|
48 |
+
value_shape = value.shape
|
49 |
+
|
50 |
+
assert len(mask_shape) == len(value_shape)
|
51 |
+
|
52 |
+
if isinstance(axis, numbers.Integral):
|
53 |
+
axis = [axis]
|
54 |
+
elif axis is None:
|
55 |
+
axis = list(range(len(mask_shape)))
|
56 |
+
assert isinstance(axis, collections.Iterable), (
|
57 |
+
'axis needs to be either an iterable, integer or "None"')
|
58 |
+
|
59 |
+
broadcast_factor = 1.
|
60 |
+
for axis_ in axis:
|
61 |
+
value_size = value_shape[axis_]
|
62 |
+
mask_size = mask_shape[axis_]
|
63 |
+
if mask_size == 1:
|
64 |
+
broadcast_factor *= value_size
|
65 |
+
else:
|
66 |
+
assert mask_size == value_size
|
67 |
+
|
68 |
+
return (jnp.sum(mask * value, axis=axis) /
|
69 |
+
(jnp.sum(mask, axis=axis) * broadcast_factor + eps))
|
70 |
+
|
71 |
+
|
72 |
+
def flat_params_to_haiku(params: Mapping[str, np.ndarray]) -> hk.Params:
|
73 |
+
"""Convert a dictionary of NumPy arrays to Haiku parameters."""
|
74 |
+
hk_params = {}
|
75 |
+
for path, array in params.items():
|
76 |
+
scope, name = path.split('//')
|
77 |
+
if scope not in hk_params:
|
78 |
+
hk_params[scope] = {}
|
79 |
+
hk_params[scope][name] = jnp.array(array)
|
80 |
+
|
81 |
+
return hk_params
|
af_backprop/examples/AlphaFold_single.ipynb
ADDED
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"colab": {
|
6 |
+
"name": "AlphaFold_single.ipynb",
|
7 |
+
"provenance": [],
|
8 |
+
"include_colab_link": true
|
9 |
+
},
|
10 |
+
"kernelspec": {
|
11 |
+
"name": "python3",
|
12 |
+
"display_name": "Python 3"
|
13 |
+
},
|
14 |
+
"language_info": {
|
15 |
+
"name": "python"
|
16 |
+
},
|
17 |
+
"accelerator": "GPU"
|
18 |
+
},
|
19 |
+
"cells": [
|
20 |
+
{
|
21 |
+
"cell_type": "markdown",
|
22 |
+
"metadata": {
|
23 |
+
"id": "view-in-github",
|
24 |
+
"colab_type": "text"
|
25 |
+
},
|
26 |
+
"source": [
|
27 |
+
"<a href=\"https://colab.research.google.com/github/sokrypton/af_backprop/blob/beta/examples/AlphaFold_single.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"cell_type": "markdown",
|
32 |
+
"source": [
|
33 |
+
"#AlphaFold - single sequence input\n",
|
34 |
+
"- WARNING - For DEMO and educational purposes only. \n",
|
35 |
+
"- For natural proteins you often need more than a single sequence to accurately predict the structure. See [ColabFold](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb) notebook if you want to predict the protein structure from a multiple-sequence-alignment. That being said, this notebook could potentially be useful for evaluating *de novo* designed proteins.\n"
|
36 |
+
],
|
37 |
+
"metadata": {
|
38 |
+
"id": "VpfCw7IzVHXv"
|
39 |
+
}
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"cell_type": "code",
|
43 |
+
"source": [
|
44 |
+
"#@title Setup\n",
|
45 |
+
"from IPython.utils import io\n",
|
46 |
+
"import os,sys,re\n",
|
47 |
+
"import tensorflow as tf\n",
|
48 |
+
"import jax\n",
|
49 |
+
"import jax.numpy as jnp\n",
|
50 |
+
"import numpy as np\n",
|
51 |
+
"\n",
|
52 |
+
"with io.capture_output() as captured:\n",
|
53 |
+
" if not os.path.isdir(\"af_backprop\"):\n",
|
54 |
+
" %shell git clone -b beta https://github.com/sokrypton/af_backprop.git\n",
|
55 |
+
" %shell pip -q install biopython dm-haiku ml-collections py3Dmol\n",
|
56 |
+
" %shell wget -qnc https://raw.githubusercontent.com/sokrypton/ColabFold/main/beta/colabfold.py\n",
|
57 |
+
" if not os.path.isdir(\"params\"):\n",
|
58 |
+
" %shell mkdir params\n",
|
59 |
+
" %shell curl -fsSL https://storage.googleapis.com/alphafold/alphafold_params_2021-07-14.tar | tar x -C params\n",
|
60 |
+
"\n",
|
61 |
+
"try:\n",
|
62 |
+
" # check if TPU is available\n",
|
63 |
+
" import jax.tools.colab_tpu\n",
|
64 |
+
" jax.tools.colab_tpu.setup_tpu()\n",
|
65 |
+
" print('Running on TPU')\n",
|
66 |
+
" DEVICE = \"tpu\"\n",
|
67 |
+
"except:\n",
|
68 |
+
" if jax.local_devices()[0].platform == 'cpu':\n",
|
69 |
+
" print(\"WARNING: no GPU detected, will be using CPU\")\n",
|
70 |
+
" DEVICE = \"cpu\"\n",
|
71 |
+
" else:\n",
|
72 |
+
" print('Running on GPU')\n",
|
73 |
+
" DEVICE = \"gpu\"\n",
|
74 |
+
" # disable GPU on tensorflow\n",
|
75 |
+
" tf.config.set_visible_devices([], 'GPU')\n",
|
76 |
+
"\n",
|
77 |
+
"sys.path.append('/content/af_backprop')\n",
|
78 |
+
"# import libraries\n",
|
79 |
+
"from utils import update_seq, update_aatype, get_plddt, get_pae\n",
|
80 |
+
"import colabfold as cf\n",
|
81 |
+
"from alphafold.common import protein\n",
|
82 |
+
"from alphafold.data import pipeline\n",
|
83 |
+
"from alphafold.model import data, config, model\n",
|
84 |
+
"from alphafold.common import residue_constants\n",
|
85 |
+
"\n",
|
86 |
+
"def clear_mem():\n",
|
87 |
+
" backend = jax.lib.xla_bridge.get_backend()\n",
|
88 |
+
" for buf in backend.live_buffers(): buf.delete()\n",
|
89 |
+
"\n",
|
90 |
+
"def setup_model(max_len, model_name=\"model_2_ptm\"):\n",
|
91 |
+
"\n",
|
92 |
+
" clear_mem()\n",
|
93 |
+
"\n",
|
94 |
+
" # setup model\n",
|
95 |
+
" cfg = config.model_config(\"model_5_ptm\")\n",
|
96 |
+
" cfg.model.num_recycle = 0\n",
|
97 |
+
" cfg.data.common.num_recycle = 0\n",
|
98 |
+
" cfg.data.eval.max_msa_clusters = 1\n",
|
99 |
+
" cfg.data.common.max_extra_msa = 1\n",
|
100 |
+
" cfg.data.eval.masked_msa_replace_fraction = 0\n",
|
101 |
+
" cfg.model.global_config.subbatch_size = None\n",
|
102 |
+
" model_params = data.get_model_haiku_params(model_name=model_name, data_dir=\".\")\n",
|
103 |
+
" model_runner = model.RunModel(cfg, model_params, is_training=False)\n",
|
104 |
+
"\n",
|
105 |
+
" seq = \"A\" * max_len\n",
|
106 |
+
" length = len(seq)\n",
|
107 |
+
" feature_dict = {\n",
|
108 |
+
" **pipeline.make_sequence_features(sequence=seq, description=\"none\", num_res=length),\n",
|
109 |
+
" **pipeline.make_msa_features(msas=[[seq]], deletion_matrices=[[[0]*length]])\n",
|
110 |
+
" }\n",
|
111 |
+
" inputs = model_runner.process_features(feature_dict,random_seed=0)\n",
|
112 |
+
"\n",
|
113 |
+
" def runner(seq, opt):\n",
|
114 |
+
" # update sequence\n",
|
115 |
+
" inputs = opt[\"inputs\"]\n",
|
116 |
+
" inputs.update(opt[\"prev\"])\n",
|
117 |
+
" update_seq(seq, inputs)\n",
|
118 |
+
" update_aatype(inputs[\"target_feat\"][...,1:], inputs)\n",
|
119 |
+
"\n",
|
120 |
+
" # mask prediction\n",
|
121 |
+
" mask = seq.sum(-1)\n",
|
122 |
+
" inputs[\"seq_mask\"] = inputs[\"seq_mask\"].at[:].set(mask)\n",
|
123 |
+
" inputs[\"msa_mask\"] = inputs[\"msa_mask\"].at[:].set(mask)\n",
|
124 |
+
" inputs[\"residue_index\"] = jnp.where(mask==1,inputs[\"residue_index\"],0)\n",
|
125 |
+
"\n",
|
126 |
+
" # get prediction\n",
|
127 |
+
" key = jax.random.PRNGKey(0)\n",
|
128 |
+
" outputs = model_runner.apply(opt[\"params\"], key, inputs)\n",
|
129 |
+
"\n",
|
130 |
+
" prev = {\"init_msa_first_row\":outputs['representations']['msa_first_row'][None],\n",
|
131 |
+
" \"init_pair\":outputs['representations']['pair'][None],\n",
|
132 |
+
" \"init_pos\":outputs['structure_module']['final_atom_positions'][None]}\n",
|
133 |
+
" \n",
|
134 |
+
" aux = {\"final_atom_positions\":outputs[\"structure_module\"][\"final_atom_positions\"],\n",
|
135 |
+
" \"final_atom_mask\":outputs[\"structure_module\"][\"final_atom_mask\"],\n",
|
136 |
+
" \"plddt\":get_plddt(outputs),\"pae\":get_pae(outputs),\n",
|
137 |
+
" \"inputs\":inputs, \"prev\":prev}\n",
|
138 |
+
" return aux\n",
|
139 |
+
"\n",
|
140 |
+
" return jax.jit(runner), {\"inputs\":inputs,\"params\":model_params}\n",
|
141 |
+
"\n",
|
142 |
+
"MAX_LEN = 50\n",
|
143 |
+
"RUNNER, OPT = setup_model(MAX_LEN)"
|
144 |
+
],
|
145 |
+
"metadata": {
|
146 |
+
"cellView": "form",
|
147 |
+
"id": "24ybo88aBiSU"
|
148 |
+
},
|
149 |
+
"execution_count": null,
|
150 |
+
"outputs": []
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"cell_type": "code",
|
154 |
+
"source": [
|
155 |
+
"%%time\n",
|
156 |
+
"#@title Enter the amino acid sequence to fold ⬇️\n",
|
157 |
+
"\n",
|
158 |
+
"sequence = 'GGGGGGGGGGGGGGGGGGGG' #@param {type:\"string\"}\n",
|
159 |
+
"recycles = 0 #@param [\"0\", \"1\", \"2\", \"3\", \"6\", \"12\", \"24\"] {type:\"raw\"}\n",
|
160 |
+
"SEQ = re.sub(\"[^A-Z]\", \"\", sequence.upper())\n",
|
161 |
+
"LEN = len(SEQ)\n",
|
162 |
+
"if LEN > MAX_LEN:\n",
|
163 |
+
" print(\"recompiling...\")\n",
|
164 |
+
" MAX_LEN = LEN\n",
|
165 |
+
" RUNNER, OPT = setup_model(MAX_LEN)\n",
|
166 |
+
"\n",
|
167 |
+
"x = np.array([residue_constants.restype_order.get(aa,0) for aa in SEQ])\n",
|
168 |
+
"x = np.pad(x,[0,MAX_LEN-LEN],constant_values=-1)\n",
|
169 |
+
"x = jax.nn.one_hot(x,20)\n",
|
170 |
+
"\n",
|
171 |
+
"OPT[\"prev\"] = {'init_msa_first_row': np.zeros([1, MAX_LEN, 256]),\n",
|
172 |
+
" 'init_pair': np.zeros([1, MAX_LEN, MAX_LEN, 128]),\n",
|
173 |
+
" 'init_pos': np.zeros([1, MAX_LEN, 37, 3])}\n",
|
174 |
+
"\n",
|
175 |
+
"positions = []\n",
|
176 |
+
"plddts = []\n",
|
177 |
+
"for r in range(recycles+1):\n",
|
178 |
+
" outs = RUNNER(x, OPT)\n",
|
179 |
+
" outs = jax.tree_map(lambda x:np.asarray(x), outs)\n",
|
180 |
+
" positions.append(outs[\"prev\"][\"init_pos\"][0,:LEN])\n",
|
181 |
+
" plddts.append(outs[\"plddt\"][:LEN])\n",
|
182 |
+
" OPT[\"prev\"] = outs[\"prev\"]\n",
|
183 |
+
" if recycles > 0:\n",
|
184 |
+
" print(r, plddts[-1].mean())"
|
185 |
+
],
|
186 |
+
"metadata": {
|
187 |
+
"cellView": "form",
|
188 |
+
"id": "cAoC4ar8G7ZH"
|
189 |
+
},
|
190 |
+
"execution_count": null,
|
191 |
+
"outputs": []
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"cell_type": "code",
|
195 |
+
"source": [
|
196 |
+
"#@title Display 3D structure {run: \"auto\"}\n",
|
197 |
+
"color = \"lDDT\" #@param [\"chain\", \"lDDT\", \"rainbow\"]\n",
|
198 |
+
"show_sidechains = True #@param {type:\"boolean\"}\n",
|
199 |
+
"show_mainchains = False #@param {type:\"boolean\"}\n",
|
200 |
+
"#@markdown - TIP - hold mouse over aminoacid to get name and position number\n",
|
201 |
+
"\n",
|
202 |
+
"def save_pdb(outs, filename):\n",
|
203 |
+
" '''save pdb coordinates'''\n",
|
204 |
+
" p = {\"residue_index\":outs[\"inputs\"][\"residue_index\"][0][:LEN] + 1,\n",
|
205 |
+
" \"aatype\":outs[\"inputs\"][\"aatype\"].argmax(-1)[0][:LEN],\n",
|
206 |
+
" \"atom_positions\":outs[\"final_atom_positions\"][:LEN],\n",
|
207 |
+
" \"atom_mask\":outs[\"final_atom_mask\"][:LEN]}\n",
|
208 |
+
" b_factors = 100.0 * outs[\"plddt\"][:LEN,None] * p[\"atom_mask\"]\n",
|
209 |
+
" p = protein.Protein(**p,b_factors=b_factors)\n",
|
210 |
+
" pdb_lines = protein.to_pdb(p)\n",
|
211 |
+
" with open(filename, 'w') as f:\n",
|
212 |
+
" f.write(pdb_lines)\n",
|
213 |
+
"\n",
|
214 |
+
"save_pdb(outs,\"out.pdb\")\n",
|
215 |
+
"num_res = int(outs[\"inputs\"][\"aatype\"][0].sum())\n",
|
216 |
+
"\n",
|
217 |
+
"v = cf.show_pdb(\"out.pdb\", show_sidechains, show_mainchains, color,\n",
|
218 |
+
" color_HP=True, size=(800,480)) \n",
|
219 |
+
"v.setHoverable({},\n",
|
220 |
+
" True,\n",
|
221 |
+
" '''function(atom,viewer,event,container){if(!atom.label){atom.label=viewer.addLabel(\" \"+atom.resn+\":\"+atom.resi,{position:atom,backgroundColor:'mintcream',fontColor:'black'});}}''',\n",
|
222 |
+
" '''function(atom,viewer){if(atom.label){viewer.removeLabel(atom.label);delete atom.label;}}''')\n",
|
223 |
+
"v.show() \n",
|
224 |
+
"\n",
|
225 |
+
"if color == \"lDDT\":\n",
|
226 |
+
" cf.plot_plddt_legend().show() \n",
|
227 |
+
"if \"pae\" in outs:\n",
|
228 |
+
" cf.plot_confidence(outs[\"plddt\"][:LEN]*100, outs[\"pae\"][:LEN,:LEN]).show()\n",
|
229 |
+
"else:\n",
|
230 |
+
" cf.plot_confidence(outs[\"plddt\"][:LEN]*100).show()"
|
231 |
+
],
|
232 |
+
"metadata": {
|
233 |
+
"cellView": "form",
|
234 |
+
"id": "-KbUGG4ZOp0J"
|
235 |
+
},
|
236 |
+
"execution_count": null,
|
237 |
+
"outputs": []
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"cell_type": "code",
|
241 |
+
"source": [
|
242 |
+
"#@title Animate\n",
|
243 |
+
"#@markdown - Animate trajectory if more than 0 recycle(s)\n",
|
244 |
+
"import matplotlib\n",
|
245 |
+
"from matplotlib import animation\n",
|
246 |
+
"import matplotlib.pyplot as plt\n",
|
247 |
+
"from IPython.display import HTML\n",
|
248 |
+
"\n",
|
249 |
+
"def make_animation(positions, plddts=None, line_w=2.0):\n",
|
250 |
+
"\n",
|
251 |
+
" def ca_align_to_last(positions):\n",
|
252 |
+
" def align(P, Q):\n",
|
253 |
+
" p = P - P.mean(0,keepdims=True)\n",
|
254 |
+
" q = Q - Q.mean(0,keepdims=True)\n",
|
255 |
+
" return p @ cf.kabsch(p,q)\n",
|
256 |
+
" \n",
|
257 |
+
" pos = positions[-1,:,1,:] - positions[-1,:,1,:].mean(0,keepdims=True)\n",
|
258 |
+
" best_2D_view = pos @ cf.kabsch(pos,pos,return_v=True)\n",
|
259 |
+
"\n",
|
260 |
+
" new_positions = []\n",
|
261 |
+
" for i in range(len(positions)):\n",
|
262 |
+
" new_positions.append(align(positions[i,:,1,:],best_2D_view))\n",
|
263 |
+
" return np.asarray(new_positions)\n",
|
264 |
+
"\n",
|
265 |
+
" # align all to last recycle\n",
|
266 |
+
" pos = ca_align_to_last(positions)\n",
|
267 |
+
"\n",
|
268 |
+
" fig, (ax1, ax2, ax3) = plt.subplots(1,3)\n",
|
269 |
+
" fig.subplots_adjust(top = 0.90, bottom = 0.10, right = 1, left = 0, hspace = 0, wspace = 0)\n",
|
270 |
+
" fig.set_figwidth(13)\n",
|
271 |
+
" fig.set_figheight(5)\n",
|
272 |
+
" fig.set_dpi(100)\n",
|
273 |
+
"\n",
|
274 |
+
" xy_min = pos[...,:2].min() - 1\n",
|
275 |
+
" xy_max = pos[...,:2].max() + 1\n",
|
276 |
+
"\n",
|
277 |
+
" for ax in [ax1,ax3]:\n",
|
278 |
+
" ax.set_xlim(xy_min, xy_max)\n",
|
279 |
+
" ax.set_ylim(xy_min, xy_max)\n",
|
280 |
+
" ax.axis(False)\n",
|
281 |
+
"\n",
|
282 |
+
" ims=[]\n",
|
283 |
+
" for k,(xyz,plddt) in enumerate(zip(pos,plddts)):\n",
|
284 |
+
" ims.append([])\n",
|
285 |
+
" im2 = ax2.plot(plddt, animated=True, color=\"black\")\n",
|
286 |
+
" tt1 = cf.add_text(\"colored by N->C\", ax1)\n",
|
287 |
+
" tt2 = cf.add_text(f\"recycle={k}\", ax2)\n",
|
288 |
+
" tt3 = cf.add_text(f\"pLDDT={plddt.mean():.3f}\", ax3)\n",
|
289 |
+
" ax2.set_xlabel(\"positions\")\n",
|
290 |
+
" ax2.set_ylabel(\"pLDDT\")\n",
|
291 |
+
" ax2.set_ylim(0,100)\n",
|
292 |
+
" ims[-1] += [cf.plot_pseudo_3D(xyz, ax=ax1, line_w=line_w)]\n",
|
293 |
+
" ims[-1] += [im2[0],tt1,tt2,tt3]\n",
|
294 |
+
" ims[-1] += [cf.plot_pseudo_3D(xyz, c=plddt, cmin=50, cmax=90, ax=ax3, line_w=line_w)]\n",
|
295 |
+
" \n",
|
296 |
+
" ani = animation.ArtistAnimation(fig, ims, blit=True, interval=120)\n",
|
297 |
+
" plt.close()\n",
|
298 |
+
" return ani.to_html5_video()\n",
|
299 |
+
"\n",
|
300 |
+
"HTML(make_animation(np.asarray(positions),\n",
|
301 |
+
" np.asarray(plddts) * 100.0))"
|
302 |
+
],
|
303 |
+
"metadata": {
|
304 |
+
"cellView": "form",
|
305 |
+
"id": "tdjdC0KFPjWw"
|
306 |
+
},
|
307 |
+
"execution_count": null,
|
308 |
+
"outputs": []
|
309 |
+
}
|
310 |
+
]
|
311 |
+
}
|
af_backprop/examples/af_design.ipynb
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"accelerator": "GPU",
|
6 |
+
"colab": {
|
7 |
+
"name": "af_design.ipynb",
|
8 |
+
"provenance": [],
|
9 |
+
"include_colab_link": true
|
10 |
+
},
|
11 |
+
"kernelspec": {
|
12 |
+
"display_name": "Python 3",
|
13 |
+
"name": "python3"
|
14 |
+
},
|
15 |
+
"language_info": {
|
16 |
+
"name": "python"
|
17 |
+
}
|
18 |
+
},
|
19 |
+
"cells": [
|
20 |
+
{
|
21 |
+
"cell_type": "markdown",
|
22 |
+
"metadata": {
|
23 |
+
"id": "view-in-github",
|
24 |
+
"colab_type": "text"
|
25 |
+
},
|
26 |
+
"source": [
|
27 |
+
"<a href=\"https://colab.research.google.com/github/sokrypton/af_backprop/blob/main/examples/af_design.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"cell_type": "markdown",
|
32 |
+
"metadata": {
|
33 |
+
"id": "OA2k3sAYuiXe"
|
34 |
+
},
|
35 |
+
"source": [
|
36 |
+
"#AF Design\n",
|
37 |
+
"NOTE, updated version of this notebook has moved to: [ColabDesign](https://github.com/sokrypton/ColabDesign/tree/main/af)"
|
38 |
+
]
|
39 |
+
}
|
40 |
+
]
|
41 |
+
}
|
af_backprop/examples/fixbb_design.ipynb
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nbformat": 4,
|
3 |
+
"nbformat_minor": 0,
|
4 |
+
"metadata": {
|
5 |
+
"accelerator": "GPU",
|
6 |
+
"colab": {
|
7 |
+
"name": "fixbb_design.ipynb",
|
8 |
+
"provenance": []
|
9 |
+
},
|
10 |
+
"kernelspec": {
|
11 |
+
"display_name": "Python 3",
|
12 |
+
"name": "python3"
|
13 |
+
},
|
14 |
+
"language_info": {
|
15 |
+
"name": "python"
|
16 |
+
}
|
17 |
+
},
|
18 |
+
"cells": [
|
19 |
+
{
|
20 |
+
"cell_type": "markdown",
|
21 |
+
"metadata": {
|
22 |
+
"id": "uLHIgB5QydoL"
|
23 |
+
},
|
24 |
+
"source": [
|
25 |
+
"This notebook has moved here: https://colab.research.google.com/github/sokrypton/af_backprop/blob/main/examples/af_design.ipynb"
|
26 |
+
]
|
27 |
+
}
|
28 |
+
]
|
29 |
+
}
|
af_backprop/examples/sc_hall/1QJG.pdb
ADDED
@@ -0,0 +1,1156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ATOM 1 N MET A 1 10.694 86.076 -17.884 1.00 84.30 A N
|
2 |
+
ATOM 2 CA MET A 1 9.356 86.709 -18.062 1.00 81.23 A C
|
3 |
+
ATOM 3 C MET A 1 8.548 85.831 -19.012 1.00 74.08 A C
|
4 |
+
ATOM 4 O MET A 1 8.958 85.627 -20.159 1.00 74.53 A O
|
5 |
+
ATOM 5 CB MET A 1 9.510 88.112 -18.660 1.00 88.90 A C
|
6 |
+
ATOM 6 CG MET A 1 8.245 88.955 -18.591 1.00 98.51 A C
|
7 |
+
ATOM 7 SD MET A 1 7.719 89.207 -16.879 1.00111.10 A S
|
8 |
+
ATOM 8 CE MET A 1 6.310 88.074 -16.761 1.00106.45 A C
|
9 |
+
ATOM 9 H1 MET A 1 11.109 85.938 -18.826 1.00 0.00 A H
|
10 |
+
ATOM 10 H2 MET A 1 11.317 86.619 -17.265 1.00 0.00 A H
|
11 |
+
ATOM 11 H3 MET A 1 10.557 85.138 -17.448 1.00 0.00 A H
|
12 |
+
ATOM 12 N ASN A 2 7.410 85.316 -18.548 1.00 62.40 A N
|
13 |
+
ATOM 13 CA ASN A 2 6.596 84.449 -19.389 1.00 47.87 A C
|
14 |
+
ATOM 14 C ASN A 2 5.205 84.954 -19.711 1.00 41.68 A C
|
15 |
+
ATOM 15 O ASN A 2 4.231 84.652 -19.021 1.00 41.06 A O
|
16 |
+
ATOM 16 CB ASN A 2 6.506 83.052 -18.794 1.00 41.73 A C
|
17 |
+
ATOM 17 CG ASN A 2 7.857 82.460 -18.510 1.00 34.33 A C
|
18 |
+
ATOM 18 ND2 ASN A 2 7.874 81.403 -17.729 1.00 31.37 A N
|
19 |
+
ATOM 19 OD1 ASN A 2 8.883 82.969 -18.959 1.00 37.21 A O
|
20 |
+
ATOM 20 H ASN A 2 7.083 85.483 -17.636 1.00 0.00 A H
|
21 |
+
ATOM 21 HD21 ASN A 2 7.016 81.088 -17.384 1.00 0.00 A H
|
22 |
+
ATOM 22 HD22 ASN A 2 8.733 80.991 -17.536 1.00 0.00 A H
|
23 |
+
ATOM 23 N THR A 3 5.127 85.753 -20.761 1.00 36.66 A N
|
24 |
+
ATOM 24 CA THR A 3 3.865 86.272 -21.226 1.00 34.05 A C
|
25 |
+
ATOM 25 C THR A 3 3.507 85.320 -22.359 1.00 34.07 A C
|
26 |
+
ATOM 26 O THR A 3 4.363 84.575 -22.833 1.00 36.58 A O
|
27 |
+
ATOM 27 CB THR A 3 4.029 87.713 -21.764 1.00 33.23 A C
|
28 |
+
ATOM 28 CG2 THR A 3 4.446 88.643 -20.655 1.00 34.82 A C
|
29 |
+
ATOM 29 OG1 THR A 3 5.029 87.746 -22.789 1.00 32.02 A O
|
30 |
+
ATOM 30 H THR A 3 5.922 86.005 -21.270 1.00 0.00 A H
|
31 |
+
ATOM 31 HG1 THR A 3 4.879 88.607 -23.212 1.00 0.00 A H
|
32 |
+
ATOM 32 N PRO A 4 2.236 85.282 -22.773 1.00 33.54 A N
|
33 |
+
ATOM 33 CA PRO A 4 1.860 84.384 -23.865 1.00 30.39 A C
|
34 |
+
ATOM 34 C PRO A 4 2.707 84.727 -25.085 1.00 29.97 A C
|
35 |
+
ATOM 35 O PRO A 4 3.253 83.850 -25.760 1.00 31.13 A O
|
36 |
+
ATOM 36 CB PRO A 4 0.404 84.759 -24.119 1.00 30.46 A C
|
37 |
+
ATOM 37 CG PRO A 4 -0.066 85.173 -22.778 1.00 33.24 A C
|
38 |
+
ATOM 38 CD PRO A 4 1.061 86.017 -22.276 1.00 34.34 A C
|
39 |
+
ATOM 39 N GLU A 5 2.848 86.023 -25.319 1.00 27.73 A N
|
40 |
+
ATOM 40 CA GLU A 5 3.607 86.541 -26.437 1.00 27.01 A C
|
41 |
+
ATOM 41 C GLU A 5 5.012 85.965 -26.481 1.00 24.21 A C
|
42 |
+
ATOM 42 O GLU A 5 5.488 85.561 -27.532 1.00 27.56 A O
|
43 |
+
ATOM 43 CB GLU A 5 3.690 88.070 -26.353 1.00 31.01 A C
|
44 |
+
ATOM 44 CG GLU A 5 2.342 88.828 -26.453 1.00 43.67 A C
|
45 |
+
ATOM 45 CD GLU A 5 1.448 88.719 -25.210 1.00 49.66 A C
|
46 |
+
ATOM 46 OE1 GLU A 5 1.955 88.464 -24.095 1.00 57.37 A O
|
47 |
+
ATOM 47 OE2 GLU A 5 0.222 88.898 -25.353 1.00 51.34 A O1-
|
48 |
+
ATOM 48 H GLU A 5 2.432 86.661 -24.705 1.00 0.00 A H
|
49 |
+
ATOM 49 N HIS A 6 5.662 85.907 -25.328 1.00 20.87 A N
|
50 |
+
ATOM 50 CA HIS A 6 7.030 85.407 -25.240 1.00 18.21 A C
|
51 |
+
ATOM 51 C HIS A 6 7.128 83.913 -25.504 1.00 16.76 A C
|
52 |
+
ATOM 52 O HIS A 6 8.016 83.462 -26.216 1.00 20.94 A O
|
53 |
+
ATOM 53 CB HIS A 6 7.644 85.758 -23.879 1.00 17.74 A C
|
54 |
+
ATOM 54 CG HIS A 6 8.966 85.097 -23.617 1.00 23.16 A C
|
55 |
+
ATOM 55 CD2 HIS A 6 9.288 84.014 -22.867 1.00 23.51 A C
|
56 |
+
ATOM 56 ND1 HIS A 6 10.148 85.543 -24.171 1.00 26.10 A N
|
57 |
+
ATOM 57 CE1 HIS A 6 11.138 84.764 -23.775 1.00 25.94 A C
|
58 |
+
ATOM 58 NE2 HIS A 6 10.643 83.829 -22.985 1.00 24.12 A N
|
59 |
+
ATOM 59 H HIS A 6 5.203 86.224 -24.518 1.00 0.00 A H
|
60 |
+
ATOM 60 HD1 HIS A 6 10.261 86.309 -24.778 1.00 0.00 A H
|
61 |
+
ATOM 61 HE2 HIS A 6 11.146 83.092 -22.596 1.00 0.00 A H
|
62 |
+
ATOM 62 N MET A 7 6.222 83.145 -24.924 1.00 14.80 A N
|
63 |
+
ATOM 63 CA MET A 7 6.251 81.718 -25.128 1.00 13.22 A C
|
64 |
+
ATOM 64 C MET A 7 5.959 81.421 -26.587 1.00 11.59 A C
|
65 |
+
ATOM 65 O MET A 7 6.500 80.493 -27.161 1.00 12.50 A O
|
66 |
+
ATOM 66 CB MET A 7 5.245 81.037 -24.208 1.00 13.28 A C
|
67 |
+
ATOM 67 CG MET A 7 5.517 81.314 -22.727 1.00 9.93 A C
|
68 |
+
ATOM 68 SD MET A 7 4.532 80.310 -21.631 1.00 15.44 A S
|
69 |
+
ATOM 69 CE MET A 7 3.047 81.335 -21.585 1.00 7.92 A C
|
70 |
+
ATOM 70 H MET A 7 5.517 83.549 -24.369 1.00 0.00 A H
|
71 |
+
ATOM 71 N THR A 8 5.132 82.245 -27.204 1.00 13.23 A N
|
72 |
+
ATOM 72 CA THR A 8 4.793 82.053 -28.600 1.00 14.43 A C
|
73 |
+
ATOM 73 C THR A 8 5.973 82.395 -29.514 1.00 18.05 A C
|
74 |
+
ATOM 74 O THR A 8 6.181 81.732 -30.534 1.00 19.37 A O
|
75 |
+
ATOM 75 CB THR A 8 3.577 82.897 -28.978 1.00 19.26 A C
|
76 |
+
ATOM 76 CG2 THR A 8 3.136 82.608 -30.403 1.00 20.22 A C
|
77 |
+
ATOM 77 OG1 THR A 8 2.500 82.579 -28.086 1.00 14.83 A O
|
78 |
+
ATOM 78 H THR A 8 4.729 82.990 -26.718 1.00 0.00 A H
|
79 |
+
ATOM 79 HG1 THR A 8 2.746 82.797 -27.187 1.00 0.00 A H
|
80 |
+
ATOM 80 N ALA A 9 6.751 83.416 -29.152 1.00 14.07 A N
|
81 |
+
ATOM 81 CA ALA A 9 7.911 83.806 -29.947 1.00 13.33 A C
|
82 |
+
ATOM 82 C ALA A 9 9.000 82.739 -29.838 1.00 12.50 A C
|
83 |
+
ATOM 83 O ALA A 9 9.697 82.462 -30.807 1.00 15.18 A O
|
84 |
+
ATOM 84 CB ALA A 9 8.423 85.131 -29.500 1.00 13.58 A C
|
85 |
+
ATOM 85 H ALA A 9 6.526 83.927 -28.344 1.00 0.00 A H
|
86 |
+
ATOM 86 N VAL A 10 9.118 82.116 -28.671 1.00 11.73 A N
|
87 |
+
ATOM 87 CA VAL A 10 10.090 81.055 -28.464 1.00 12.11 A C
|
88 |
+
ATOM 88 C VAL A 10 9.731 79.823 -29.295 1.00 13.25 A C
|
89 |
+
ATOM 89 O VAL A 10 10.614 79.217 -29.890 1.00 19.98 A O
|
90 |
+
ATOM 90 CB VAL A 10 10.191 80.674 -26.995 1.00 9.17 A C
|
91 |
+
ATOM 91 CG1 VAL A 10 10.931 79.370 -26.838 1.00 6.34 A C
|
92 |
+
ATOM 92 CG2 VAL A 10 10.917 81.772 -26.238 1.00 8.83 A C
|
93 |
+
ATOM 93 H VAL A 10 8.524 82.378 -27.934 1.00 0.00 A H
|
94 |
+
ATOM 94 N VAL A 11 8.451 79.453 -29.354 1.00 14.16 A N
|
95 |
+
ATOM 95 CA VAL A 11 8.036 78.296 -30.153 1.00 8.99 A C
|
96 |
+
ATOM 96 C VAL A 11 8.424 78.566 -31.597 1.00 13.66 A C
|
97 |
+
ATOM 97 O VAL A 11 8.974 77.707 -32.272 1.00 20.39 A O
|
98 |
+
ATOM 98 CB VAL A 11 6.512 78.064 -30.071 1.00 8.71 A C
|
99 |
+
ATOM 99 CG1 VAL A 11 6.051 77.051 -31.104 1.00 6.03 A C
|
100 |
+
ATOM 100 CG2 VAL A 11 6.137 77.589 -28.690 1.00 10.59 A C
|
101 |
+
ATOM 101 H VAL A 11 7.783 79.946 -28.829 1.00 0.00 A H
|
102 |
+
ATOM 102 N GLN A 12 8.171 79.789 -32.048 1.00 17.26 A N
|
103 |
+
ATOM 103 CA GLN A 12 8.478 80.205 -33.405 1.00 16.35 A C
|
104 |
+
ATOM 104 C GLN A 12 9.972 80.219 -33.722 1.00 14.80 A C
|
105 |
+
ATOM 105 O GLN A 12 10.369 79.891 -34.824 1.00 13.57 A O
|
106 |
+
ATOM 106 CB GLN A 12 7.856 81.566 -33.678 1.00 18.90 A C
|
107 |
+
ATOM 107 CG GLN A 12 6.344 81.525 -33.685 1.00 23.55 A C
|
108 |
+
ATOM 108 CD GLN A 12 5.732 82.912 -33.828 1.00 34.39 A C
|
109 |
+
ATOM 109 NE2 GLN A 12 6.071 83.814 -32.905 1.00 35.29 A N
|
110 |
+
ATOM 110 OE1 GLN A 12 4.959 83.169 -34.748 1.00 41.94 A O
|
111 |
+
ATOM 111 H GLN A 12 7.747 80.428 -31.439 1.00 0.00 A H
|
112 |
+
ATOM 112 HE21 GLN A 12 6.672 83.536 -32.191 1.00 0.00 A H
|
113 |
+
ATOM 113 HE22 GLN A 12 5.691 84.710 -33.003 1.00 0.00 A H
|
114 |
+
ATOM 114 N ARG A 13 10.804 80.604 -32.764 1.00 16.01 A N
|
115 |
+
ATOM 115 CA ARG A 13 12.240 80.609 -32.997 1.00 15.24 A C
|
116 |
+
ATOM 116 C ARG A 13 12.729 79.171 -33.079 1.00 15.94 A C
|
117 |
+
ATOM 117 O ARG A 13 13.676 78.860 -33.802 1.00 18.78 A O
|
118 |
+
ATOM 118 CB ARG A 13 12.970 81.328 -31.868 1.00 14.71 A C
|
119 |
+
ATOM 119 CG ARG A 13 12.781 82.814 -31.870 1.00 24.21 A C
|
120 |
+
ATOM 120 CD ARG A 13 13.525 83.449 -30.719 1.00 30.45 A C
|
121 |
+
ATOM 121 NE ARG A 13 13.429 84.899 -30.810 1.00 42.18 A N
|
122 |
+
ATOM 122 CZ ARG A 13 12.766 85.668 -29.951 1.00 46.62 A C
|
123 |
+
ATOM 123 NH1 ARG A 13 12.138 85.130 -28.912 1.00 52.17 A N1+
|
124 |
+
ATOM 124 NH2 ARG A 13 12.700 86.979 -30.163 1.00 53.25 A N
|
125 |
+
ATOM 125 H ARG A 13 10.448 80.892 -31.898 1.00 0.00 A H
|
126 |
+
ATOM 126 HE ARG A 13 13.905 85.284 -31.580 1.00 0.00 A H
|
127 |
+
ATOM 127 HH11 ARG A 13 12.163 84.134 -28.780 1.00 0.00 A H
|
128 |
+
ATOM 128 HH12 ARG A 13 11.626 85.671 -28.239 1.00 0.00 A H
|
129 |
+
ATOM 129 HH21 ARG A 13 13.140 87.388 -30.967 1.00 0.00 A H
|
130 |
+
ATOM 130 HH22 ARG A 13 12.212 87.593 -29.540 1.00 0.00 A H
|
131 |
+
ATOM 131 N TYR A 14 12.069 78.299 -32.327 1.00 13.30 A N
|
132 |
+
ATOM 132 CA TYR A 14 12.402 76.890 -32.286 1.00 10.90 A C
|
133 |
+
ATOM 133 C TYR A 14 12.147 76.240 -33.647 1.00 13.50 A C
|
134 |
+
ATOM 134 O TYR A 14 12.987 75.533 -34.188 1.00 16.92 A O
|
135 |
+
ATOM 135 CB TYR A 14 11.538 76.227 -31.229 1.00 6.03 A C
|
136 |
+
ATOM 136 CG TYR A 14 11.674 74.726 -31.157 1.00 9.19 A C
|
137 |
+
ATOM 137 CD1 TYR A 14 12.809 74.151 -30.607 1.00 11.32 A C
|
138 |
+
ATOM 138 CD2 TYR A 14 10.662 73.885 -31.594 1.00 6.00 A C
|
139 |
+
ATOM 139 CE1 TYR A 14 12.937 72.787 -30.486 1.00 13.36 A C
|
140 |
+
ATOM 140 CE2 TYR A 14 10.787 72.508 -31.473 1.00 7.87 A C
|
141 |
+
ATOM 141 CZ TYR A 14 11.936 71.975 -30.912 1.00 7.44 A C
|
142 |
+
ATOM 142 OH TYR A 14 12.106 70.618 -30.754 1.00 8.52 A O
|
143 |
+
ATOM 143 H TYR A 14 11.336 78.619 -31.760 1.00 0.00 A H
|
144 |
+
ATOM 144 HH TYR A 14 11.279 70.202 -31.029 1.00 0.00 A H
|
145 |
+
ATOM 145 N VAL A 15 10.951 76.458 -34.176 1.00 17.30 A N
|
146 |
+
ATOM 146 CA VAL A 15 10.573 75.905 -35.460 1.00 16.50 A C
|
147 |
+
ATOM 147 C VAL A 15 11.522 76.437 -36.513 1.00 17.14 A C
|
148 |
+
ATOM 148 O VAL A 15 11.978 75.699 -37.366 1.00 20.10 A O
|
149 |
+
ATOM 149 CB VAL A 15 9.135 76.312 -35.818 1.00 14.14 A C
|
150 |
+
ATOM 150 CG1 VAL A 15 8.871 76.099 -37.278 1.00 14.20 A C
|
151 |
+
ATOM 151 CG2 VAL A 15 8.169 75.506 -35.018 1.00 11.49 A C
|
152 |
+
ATOM 152 H VAL A 15 10.306 76.993 -33.666 1.00 0.00 A H
|
153 |
+
ATOM 153 N ALA A 16 11.835 77.723 -36.422 1.00 15.98 A N
|
154 |
+
ATOM 154 CA ALA A 16 12.718 78.372 -37.381 1.00 16.15 A C
|
155 |
+
ATOM 155 C ALA A 16 14.137 77.859 -37.330 1.00 18.88 A C
|
156 |
+
ATOM 156 O ALA A 16 14.744 77.656 -38.380 1.00 26.26 A O
|
157 |
+
ATOM 157 CB ALA A 16 12.708 79.867 -37.179 1.00 17.59 A C
|
158 |
+
ATOM 158 H ALA A 16 11.451 78.264 -35.698 1.00 0.00 A H
|
159 |
+
ATOM 159 N ALA A 17 14.670 77.679 -36.119 1.00 12.81 A N
|
160 |
+
ATOM 160 CA ALA A 17 16.036 77.180 -35.919 1.00 12.41 A C
|
161 |
+
ATOM 161 C ALA A 17 16.144 75.741 -36.425 1.00 15.44 A C
|
162 |
+
ATOM 162 O ALA A 17 17.136 75.357 -37.021 1.00 19.19 A O
|
163 |
+
ATOM 163 CB ALA A 17 16.415 77.268 -34.457 1.00 9.24 A C
|
164 |
+
ATOM 164 H ALA A 17 14.132 77.888 -35.332 1.00 0.00 A H
|
165 |
+
ATOM 165 N LEU A 18 15.088 74.967 -36.223 1.00 17.31 A N
|
166 |
+
ATOM 166 CA LEU A 18 15.009 73.586 -36.680 1.00 15.47 A C
|
167 |
+
ATOM 167 C LEU A 18 15.056 73.590 -38.218 1.00 19.85 A C
|
168 |
+
ATOM 168 O LEU A 18 15.794 72.814 -38.837 1.00 20.17 A O
|
169 |
+
ATOM 169 CB LEU A 18 13.694 72.985 -36.174 1.00 15.33 A C
|
170 |
+
ATOM 170 CG LEU A 18 13.622 71.635 -35.445 1.00 15.11 A C
|
171 |
+
ATOM 171 CD1 LEU A 18 14.784 71.406 -34.532 1.00 9.98 A C
|
172 |
+
ATOM 172 CD2 LEU A 18 12.329 71.585 -34.666 1.00 11.37 A C
|
173 |
+
ATOM 173 H LEU A 18 14.330 75.333 -35.719 1.00 0.00 A H
|
174 |
+
ATOM 174 N ASN A 19 14.319 74.519 -38.822 1.00 19.25 A N
|
175 |
+
ATOM 175 CA ASN A 19 14.279 74.678 -40.282 1.00 18.67 A C
|
176 |
+
ATOM 176 C ASN A 19 15.583 75.183 -40.890 1.00 19.48 A C
|
177 |
+
ATOM 177 O ASN A 19 15.759 75.149 -42.112 1.00 26.00 A O
|
178 |
+
ATOM 178 CB ASN A 19 13.227 75.700 -40.684 1.00 16.40 A C
|
179 |
+
ATOM 179 CG ASN A 19 11.885 75.095 -40.902 1.00 19.81 A C
|
180 |
+
ATOM 180 ND2 ASN A 19 10.846 75.801 -40.448 1.00 19.07 A N
|
181 |
+
ATOM 181 OD1 ASN A 19 11.761 73.998 -41.471 1.00 21.76 A O
|
182 |
+
ATOM 182 H ASN A 19 13.762 75.104 -38.267 1.00 0.00 A H
|
183 |
+
ATOM 183 HD21 ASN A 19 11.004 76.649 -39.995 1.00 0.00 A H
|
184 |
+
ATOM 184 HD22 ASN A 19 9.959 75.414 -40.603 1.00 0.00 A H
|
185 |
+
ATOM 185 N ALA A 20 16.467 75.724 -40.070 1.00 15.92 A N
|
186 |
+
ATOM 186 CA ALA A 20 17.708 76.257 -40.599 1.00 13.83 A C
|
187 |
+
ATOM 187 C ALA A 20 18.952 75.465 -40.200 1.00 15.30 A C
|
188 |
+
ATOM 188 O ALA A 20 20.075 75.893 -40.484 1.00 20.93 A O
|
189 |
+
ATOM 189 CB ALA A 20 17.850 77.723 -40.180 1.00 9.03 A C
|
190 |
+
ATOM 190 H ALA A 20 16.279 75.791 -39.113 1.00 0.00 A H
|
191 |
+
ATOM 191 N GLY A 21 18.765 74.314 -39.559 1.00 14.60 A N
|
192 |
+
ATOM 192 CA GLY A 21 19.901 73.530 -39.112 1.00 6.97 A C
|
193 |
+
ATOM 193 C GLY A 21 20.786 74.355 -38.196 1.00 11.91 A C
|
194 |
+
ATOM 194 O GLY A 21 21.993 74.153 -38.146 1.00 19.27 A O
|
195 |
+
ATOM 195 H GLY A 21 17.855 73.991 -39.412 1.00 0.00 A H
|
196 |
+
ATOM 196 N ASP A 22 20.176 75.272 -37.455 1.00 10.80 A N
|
197 |
+
ATOM 197 CA ASP A 22 20.876 76.156 -36.540 1.00 13.90 A C
|
198 |
+
ATOM 198 C ASP A 22 20.938 75.487 -35.162 1.00 18.34 A C
|
199 |
+
ATOM 199 O ASP A 22 20.055 75.645 -34.319 1.00 21.73 A O
|
200 |
+
ATOM 200 CB ASP A 22 20.107 77.467 -36.460 1.00 15.62 A C
|
201 |
+
ATOM 201 CG ASP A 22 20.716 78.448 -35.481 1.00 21.13 A C
|
202 |
+
ATOM 202 OD1 ASP A 22 21.758 78.127 -34.874 1.00 24.96 A O
|
203 |
+
ATOM 203 OD2 ASP A 22 20.155 79.559 -35.333 1.00 22.13 A O1-
|
204 |
+
ATOM 204 H ASP A 22 19.204 75.373 -37.497 1.00 0.00 A H
|
205 |
+
ATOM 205 N LEU A 23 22.029 74.791 -34.912 1.00 16.13 A N
|
206 |
+
ATOM 206 CA LEU A 23 22.200 74.048 -33.687 1.00 12.88 A C
|
207 |
+
ATOM 207 C LEU A 23 22.297 74.892 -32.455 1.00 13.55 A C
|
208 |
+
ATOM 208 O LEU A 23 21.770 74.531 -31.414 1.00 16.43 A O
|
209 |
+
ATOM 209 CB LEU A 23 23.431 73.159 -33.812 1.00 22.08 A C
|
210 |
+
ATOM 210 CG LEU A 23 23.645 72.107 -32.746 1.00 22.16 A C
|
211 |
+
ATOM 211 CD1 LEU A 23 22.340 71.415 -32.440 1.00 23.03 A C
|
212 |
+
ATOM 212 CD2 LEU A 23 24.679 71.134 -33.248 1.00 22.78 A C
|
213 |
+
ATOM 213 H LEU A 23 22.746 74.826 -35.582 1.00 0.00 A H
|
214 |
+
ATOM 214 N ASP A 24 22.994 76.002 -32.553 1.00 14.90 A N
|
215 |
+
ATOM 215 CA ASP A 24 23.149 76.860 -31.407 1.00 19.66 A C
|
216 |
+
ATOM 216 C ASP A 24 21.920 77.692 -31.129 1.00 21.39 A C
|
217 |
+
ATOM 217 O ASP A 24 21.687 78.078 -29.993 1.00 24.19 A O
|
218 |
+
ATOM 218 CB ASP A 24 24.421 77.697 -31.539 1.00 21.55 A C
|
219 |
+
ATOM 219 CG ASP A 24 25.681 76.846 -31.380 1.00 26.43 A C
|
220 |
+
ATOM 220 OD1 ASP A 24 25.590 75.603 -31.526 1.00 33.14 A O
|
221 |
+
ATOM 221 OD2 ASP A 24 26.765 77.402 -31.111 1.00 34.14 A O1-
|
222 |
+
ATOM 222 H ASP A 24 23.414 76.281 -33.395 1.00 0.00 A H
|
223 |
+
ATOM 223 N GLY A 25 21.113 77.936 -32.157 1.00 24.38 A N
|
224 |
+
ATOM 224 CA GLY A 25 19.888 78.690 -31.980 1.00 19.16 A C
|
225 |
+
ATOM 225 C GLY A 25 18.894 77.848 -31.203 1.00 17.24 A C
|
226 |
+
ATOM 226 O GLY A 25 18.222 78.340 -30.311 1.00 19.60 A O
|
227 |
+
ATOM 227 H GLY A 25 21.353 77.634 -33.059 1.00 0.00 A H
|
228 |
+
ATOM 228 N ILE A 26 18.821 76.566 -31.518 1.00 10.29 A N
|
229 |
+
ATOM 229 CA ILE A 26 17.930 75.669 -30.819 1.00 12.55 A C
|
230 |
+
ATOM 230 C ILE A 26 18.297 75.585 -29.339 1.00 15.29 A C
|
231 |
+
ATOM 231 O ILE A 26 17.474 75.828 -28.461 1.00 17.57 A O
|
232 |
+
ATOM 232 CB ILE A 26 18.004 74.262 -31.406 1.00 9.32 A C
|
233 |
+
ATOM 233 CG1 ILE A 26 17.508 74.273 -32.836 1.00 8.47 A C
|
234 |
+
ATOM 234 CG2 ILE A 26 17.156 73.321 -30.597 1.00 13.37 A C
|
235 |
+
ATOM 235 CD1 ILE A 26 17.655 72.942 -33.496 1.00 12.60 A C
|
236 |
+
ATOM 236 H ILE A 26 19.377 76.251 -32.269 1.00 0.00 A H
|
237 |
+
ATOM 237 N VAL A 27 19.550 75.250 -29.072 1.00 17.20 A N
|
238 |
+
ATOM 238 CA VAL A 27 20.046 75.090 -27.710 1.00 14.33 A C
|
239 |
+
ATOM 239 C VAL A 27 20.013 76.328 -26.818 1.00 13.10 A C
|
240 |
+
ATOM 240 O VAL A 27 19.883 76.202 -25.606 1.00 18.39 A O
|
241 |
+
ATOM 241 CB VAL A 27 21.461 74.464 -27.718 1.00 12.93 A C
|
242 |
+
ATOM 242 CG1 VAL A 27 21.924 74.194 -26.304 1.00 17.80 A C
|
243 |
+
ATOM 243 CG2 VAL A 27 21.436 73.131 -28.520 1.00 14.81 A C
|
244 |
+
ATOM 244 H VAL A 27 20.169 75.108 -29.823 1.00 0.00 A H
|
245 |
+
ATOM 245 N ALA A 28 20.075 77.515 -27.406 1.00 13.76 A N
|
246 |
+
ATOM 246 CA ALA A 28 20.052 78.764 -26.637 1.00 12.16 A C
|
247 |
+
ATOM 247 C ALA A 28 18.720 78.974 -25.931 1.00 16.53 A C
|
248 |
+
ATOM 248 O ALA A 28 18.628 79.725 -24.963 1.00 18.18 A O
|
249 |
+
ATOM 249 CB ALA A 28 20.344 79.958 -27.556 1.00 10.61 A C
|
250 |
+
ATOM 250 H ALA A 28 20.146 77.563 -28.383 1.00 0.00 A H
|
251 |
+
ATOM 251 N LEU A 29 17.693 78.306 -26.443 1.00 16.50 A N
|
252 |
+
ATOM 252 CA LEU A 29 16.332 78.376 -25.940 1.00 12.57 A C
|
253 |
+
ATOM 253 C LEU A 29 16.119 77.660 -24.611 1.00 13.08 A C
|
254 |
+
ATOM 254 O LEU A 29 15.228 78.028 -23.840 1.00 18.83 A O
|
255 |
+
ATOM 255 CB LEU A 29 15.392 77.758 -26.981 1.00 10.87 A C
|
256 |
+
ATOM 256 CG LEU A 29 14.615 78.653 -27.953 1.00 16.58 A C
|
257 |
+
ATOM 257 CD1 LEU A 29 15.336 79.965 -28.233 1.00 17.44 A C
|
258 |
+
ATOM 258 CD2 LEU A 29 14.320 77.880 -29.244 1.00 12.46 A C
|
259 |
+
ATOM 259 H LEU A 29 17.855 77.733 -27.220 1.00 0.00 A H
|
260 |
+
ATOM 260 N PHE A 30 16.905 76.618 -24.363 1.00 15.03 A N
|
261 |
+
ATOM 261 CA PHE A 30 16.769 75.816 -23.153 1.00 13.36 A C
|
262 |
+
ATOM 262 C PHE A 30 17.547 76.305 -21.927 1.00 18.06 A C
|
263 |
+
ATOM 263 O PHE A 30 18.538 77.027 -22.044 1.00 20.40 A O
|
264 |
+
ATOM 264 CB PHE A 30 17.128 74.354 -23.463 1.00 13.54 A C
|
265 |
+
ATOM 265 CG PHE A 30 16.189 73.679 -24.468 1.00 13.97 A C
|
266 |
+
ATOM 266 CD1 PHE A 30 16.391 73.811 -25.833 1.00 12.41 A C
|
267 |
+
ATOM 267 CD2 PHE A 30 15.121 72.890 -24.032 1.00 11.09 A C
|
268 |
+
ATOM 268 CE1 PHE A 30 15.563 73.176 -26.733 1.00 13.85 A C
|
269 |
+
ATOM 269 CE2 PHE A 30 14.289 72.251 -24.930 1.00 8.95 A C
|
270 |
+
ATOM 270 CZ PHE A 30 14.509 72.392 -26.278 1.00 13.73 A C
|
271 |
+
ATOM 271 H PHE A 30 17.631 76.398 -24.981 1.00 0.00 A H
|
272 |
+
ATOM 272 N ALA A 31 17.020 75.999 -20.745 1.00 18.68 A N
|
273 |
+
ATOM 273 CA ALA A 31 17.674 76.353 -19.487 1.00 20.42 A C
|
274 |
+
ATOM 274 C ALA A 31 18.795 75.322 -19.367 1.00 20.67 A C
|
275 |
+
ATOM 275 O ALA A 31 18.672 74.215 -19.864 1.00 20.86 A O
|
276 |
+
ATOM 276 CB ALA A 31 16.705 76.222 -18.325 1.00 13.63 A C
|
277 |
+
ATOM 277 H ALA A 31 16.197 75.481 -20.726 1.00 0.00 A H
|
278 |
+
ATOM 278 N ASP A 32 19.868 75.649 -18.680 1.00 25.24 A N
|
279 |
+
ATOM 279 CA ASP A 32 20.975 74.718 -18.584 1.00 29.66 A C
|
280 |
+
ATOM 280 C ASP A 32 20.645 73.374 -17.954 1.00 26.34 A C
|
281 |
+
ATOM 281 O ASP A 32 21.213 72.337 -18.317 1.00 27.92 A O
|
282 |
+
ATOM 282 CB ASP A 32 22.141 75.424 -17.903 1.00 41.07 A C
|
283 |
+
ATOM 283 CG ASP A 32 22.543 76.697 -18.649 1.00 56.13 A C
|
284 |
+
ATOM 284 OD1 ASP A 32 21.696 77.629 -18.750 1.00 65.00 A O
|
285 |
+
ATOM 285 OD2 ASP A 32 23.672 76.741 -19.194 1.00 63.02 A O1-
|
286 |
+
ATOM 286 H ASP A 32 19.988 76.501 -18.211 1.00 0.00 A H
|
287 |
+
ATOM 287 N ASP A 33 19.654 73.378 -17.082 1.00 24.28 A N
|
288 |
+
ATOM 288 CA ASP A 33 19.230 72.174 -16.393 1.00 25.07 A C
|
289 |
+
ATOM 289 C ASP A 33 17.986 71.549 -17.017 1.00 25.39 A C
|
290 |
+
ATOM 290 O ASP A 33 17.418 70.592 -16.464 1.00 27.75 A O
|
291 |
+
ATOM 291 CB ASP A 33 18.975 72.494 -14.915 1.00 34.12 A C
|
292 |
+
ATOM 292 CG ASP A 33 18.076 73.713 -14.717 1.00 38.66 A C
|
293 |
+
ATOM 293 OD1 ASP A 33 18.398 74.816 -15.213 1.00 41.05 A O
|
294 |
+
ATOM 294 OD2 ASP A 33 17.043 73.574 -14.037 1.00 46.96 A O1-
|
295 |
+
ATOM 295 H ASP A 33 19.193 74.214 -16.864 1.00 0.00 A H
|
296 |
+
ATOM 296 N ALA A 34 17.600 72.044 -18.190 1.00 16.51 A N
|
297 |
+
ATOM 297 CA ALA A 34 16.417 71.557 -18.898 1.00 16.20 A C
|
298 |
+
ATOM 298 C ALA A 34 16.446 70.087 -19.302 1.00 18.11 A C
|
299 |
+
ATOM 299 O ALA A 34 17.509 69.450 -19.332 1.00 22.20 A O
|
300 |
+
ATOM 300 CB ALA A 34 16.163 72.412 -20.117 1.00 13.10 A C
|
301 |
+
ATOM 301 H ALA A 34 18.140 72.752 -18.594 1.00 0.00 A H
|
302 |
+
ATOM 302 N THR A 35 15.267 69.534 -19.566 1.00 17.22 A N
|
303 |
+
ATOM 303 CA THR A 35 15.164 68.143 -19.997 1.00 19.54 A C
|
304 |
+
ATOM 304 C THR A 35 14.404 68.107 -21.314 1.00 20.23 A C
|
305 |
+
ATOM 305 O THR A 35 13.650 69.036 -21.617 1.00 19.48 A O
|
306 |
+
ATOM 306 CB THR A 35 14.393 67.270 -18.995 1.00 19.13 A C
|
307 |
+
ATOM 307 CG2 THR A 35 15.303 66.727 -17.930 1.00 22.28 A C
|
308 |
+
ATOM 308 OG1 THR A 35 13.364 68.047 -18.385 1.00 33.43 A O
|
309 |
+
ATOM 309 H THR A 35 14.440 70.059 -19.505 1.00 0.00 A H
|
310 |
+
ATOM 310 HG1 THR A 35 13.777 68.790 -17.927 1.00 0.00 A H
|
311 |
+
ATOM 311 N VAL A 36 14.589 67.023 -22.066 1.00 17.96 A N
|
312 |
+
ATOM 312 CA VAL A 36 13.938 66.785 -23.362 1.00 13.42 A C
|
313 |
+
ATOM 313 C VAL A 36 13.458 65.335 -23.329 1.00 17.93 A C
|
314 |
+
ATOM 314 O VAL A 36 14.226 64.446 -22.964 1.00 21.00 A O
|
315 |
+
ATOM 315 CB VAL A 36 14.960 66.940 -24.551 1.00 15.38 A C
|
316 |
+
ATOM 316 CG1 VAL A 36 14.490 66.201 -25.779 1.00 9.13 A C
|
317 |
+
ATOM 317 CG2 VAL A 36 15.164 68.416 -24.891 1.00 6.36 A C
|
318 |
+
ATOM 318 H VAL A 36 15.199 66.329 -21.737 1.00 0.00 A H
|
319 |
+
ATOM 319 N GLU A 37 12.180 65.098 -23.604 1.00 17.48 A N
|
320 |
+
ATOM 320 CA GLU A 37 11.667 63.739 -23.638 1.00 16.27 A C
|
321 |
+
ATOM 321 C GLU A 37 11.080 63.616 -25.024 1.00 14.95 A C
|
322 |
+
ATOM 322 O GLU A 37 9.975 64.057 -25.277 1.00 14.81 A O
|
323 |
+
ATOM 323 CB GLU A 37 10.601 63.529 -22.583 1.00 21.55 A C
|
324 |
+
ATOM 324 CG GLU A 37 10.328 62.078 -22.314 1.00 25.91 A C
|
325 |
+
ATOM 325 CD GLU A 37 9.276 61.879 -21.261 1.00 27.26 A C
|
326 |
+
ATOM 326 OE1 GLU A 37 9.249 62.659 -20.294 1.00 36.29 A O
|
327 |
+
ATOM 327 OE2 GLU A 37 8.458 60.950 -21.407 1.00 32.31 A O1-
|
328 |
+
ATOM 328 H GLU A 37 11.555 65.842 -23.761 1.00 0.00 A H
|
329 |
+
ATOM 329 N ASN A 38 11.870 63.089 -25.948 1.00 19.33 A N
|
330 |
+
ATOM 330 CA ASN A 38 11.453 62.956 -27.337 1.00 18.26 A C
|
331 |
+
ATOM 331 C ASN A 38 11.866 61.577 -27.816 1.00 18.79 A C
|
332 |
+
ATOM 332 O ASN A 38 13.049 61.275 -27.911 1.00 23.11 A O
|
333 |
+
ATOM 333 CB ASN A 38 12.159 64.031 -28.170 1.00 15.03 A C
|
334 |
+
ATOM 334 CG ASN A 38 11.539 64.233 -29.541 1.00 13.27 A C
|
335 |
+
ATOM 335 ND2 ASN A 38 11.759 65.409 -30.106 1.00 20.00 A N
|
336 |
+
ATOM 336 OD1 ASN A 38 10.881 63.345 -30.090 1.00 14.68 A O
|
337 |
+
ATOM 337 H ASN A 38 12.755 62.739 -25.703 1.00 0.00 A H
|
338 |
+
ATOM 338 HD21 ASN A 38 12.289 66.102 -29.664 1.00 0.00 A H
|
339 |
+
ATOM 339 HD22 ASN A 38 11.345 65.573 -30.975 1.00 0.00 A H
|
340 |
+
ATOM 340 N PRO A 39 10.893 60.686 -28.029 1.00 19.61 A N
|
341 |
+
ATOM 341 CA PRO A 39 9.463 60.934 -27.834 1.00 20.71 A C
|
342 |
+
ATOM 342 C PRO A 39 9.023 60.632 -26.403 1.00 17.56 A C
|
343 |
+
ATOM 343 O PRO A 39 9.786 60.056 -25.612 1.00 15.92 A O
|
344 |
+
ATOM 344 CB PRO A 39 8.835 59.932 -28.790 1.00 24.55 A C
|
345 |
+
ATOM 345 CG PRO A 39 9.747 58.741 -28.617 1.00 19.39 A C
|
346 |
+
ATOM 346 CD PRO A 39 11.114 59.384 -28.689 1.00 20.94 A C
|
347 |
+
ATOM 347 N VAL A 40 7.792 61.022 -26.084 1.00 17.69 A N
|
348 |
+
ATOM 348 CA VAL A 40 7.211 60.751 -24.770 1.00 20.37 A C
|
349 |
+
ATOM 349 C VAL A 40 7.255 59.234 -24.574 1.00 21.71 A C
|
350 |
+
ATOM 350 O VAL A 40 6.793 58.456 -25.421 1.00 24.84 A O
|
351 |
+
ATOM 351 CB VAL A 40 5.745 61.274 -24.666 1.00 13.69 A C
|
352 |
+
ATOM 352 CG1 VAL A 40 4.933 60.444 -23.697 1.00 21.73 A C
|
353 |
+
ATOM 353 CG2 VAL A 40 5.755 62.678 -24.193 1.00 13.64 A C
|
354 |
+
ATOM 354 H VAL A 40 7.263 61.507 -26.755 1.00 0.00 A H
|
355 |
+
ATOM 355 N GLY A 41 7.833 58.831 -23.452 1.00 25.12 A N
|
356 |
+
ATOM 356 CA GLY A 41 7.974 57.423 -23.140 1.00 25.44 A C
|
357 |
+
ATOM 357 C GLY A 41 9.425 56.959 -23.121 1.00 23.88 A C
|
358 |
+
ATOM 358 O GLY A 41 9.692 55.840 -22.692 1.00 27.74 A O
|
359 |
+
ATOM 359 H GLY A 41 8.186 59.492 -22.810 1.00 0.00 A H
|
360 |
+
ATOM 360 N SER A 42 10.348 57.782 -23.626 1.00 26.93 A N
|
361 |
+
ATOM 361 CA SER A 42 11.775 57.450 -23.641 1.00 27.71 A C
|
362 |
+
ATOM 362 C SER A 42 12.519 58.104 -22.456 1.00 28.62 A C
|
363 |
+
ATOM 363 O SER A 42 11.940 58.880 -21.683 1.00 28.06 A O
|
364 |
+
ATOM 364 CB SER A 42 12.403 57.858 -24.979 1.00 28.05 A C
|
365 |
+
ATOM 365 OG SER A 42 12.277 59.247 -25.211 1.00 26.25 A O
|
366 |
+
ATOM 366 H SER A 42 10.065 58.650 -23.975 1.00 0.00 A H
|
367 |
+
ATOM 367 HG SER A 42 11.405 59.561 -24.962 1.00 0.00 A H
|
368 |
+
ATOM 368 N GLU A 43 13.798 57.785 -22.304 1.00 28.59 A N
|
369 |
+
ATOM 369 CA GLU A 43 14.583 58.337 -21.210 1.00 30.92 A C
|
370 |
+
ATOM 370 C GLU A 43 14.857 59.797 -21.502 1.00 28.51 A C
|
371 |
+
ATOM 371 O GLU A 43 15.324 60.152 -22.588 1.00 30.42 A O
|
372 |
+
ATOM 372 CB GLU A 43 15.922 57.600 -21.045 1.00 44.55 A C
|
373 |
+
ATOM 373 CG GLU A 43 16.019 56.223 -21.722 1.00 62.47 A C
|
374 |
+
ATOM 374 CD GLU A 43 16.565 56.284 -23.155 1.00 70.64 A C
|
375 |
+
ATOM 375 OE1 GLU A 43 15.895 56.851 -24.052 1.00 75.18 A O
|
376 |
+
ATOM 376 OE2 GLU A 43 17.675 55.758 -23.385 1.00 74.27 A O1-
|
377 |
+
ATOM 377 H GLU A 43 14.229 57.192 -22.953 1.00 0.00 A H
|
378 |
+
ATOM 378 N PRO A 44 14.533 60.675 -20.552 1.00 25.20 A N
|
379 |
+
ATOM 379 CA PRO A 44 14.754 62.113 -20.709 1.00 25.10 A C
|
380 |
+
ATOM 380 C PRO A 44 16.243 62.473 -20.872 1.00 26.61 A C
|
381 |
+
ATOM 381 O PRO A 44 17.105 61.850 -20.267 1.00 29.02 A O
|
382 |
+
ATOM 382 CB PRO A 44 14.195 62.679 -19.395 1.00 19.62 A C
|
383 |
+
ATOM 383 CG PRO A 44 13.110 61.749 -19.058 1.00 21.92 A C
|
384 |
+
ATOM 384 CD PRO A 44 13.750 60.402 -19.335 1.00 18.83 A C
|
385 |
+
ATOM 385 N ARG A 45 16.528 63.439 -21.742 1.00 27.43 A N
|
386 |
+
ATOM 386 CA ARG A 45 17.875 63.933 -21.964 1.00 25.28 A C
|
387 |
+
ATOM 387 C ARG A 45 17.876 65.090 -20.981 1.00 22.66 A C
|
388 |
+
ATOM 388 O ARG A 45 16.906 65.850 -20.926 1.00 25.12 A O
|
389 |
+
ATOM 389 CB ARG A 45 18.028 64.488 -23.386 1.00 32.63 A C
|
390 |
+
ATOM 390 CG ARG A 45 17.525 63.562 -24.488 1.00 42.81 A C
|
391 |
+
ATOM 391 CD ARG A 45 18.355 62.295 -24.560 1.00 50.48 A C
|
392 |
+
ATOM 392 NE ARG A 45 17.537 61.091 -24.696 1.00 57.00 A N
|
393 |
+
ATOM 393 CZ ARG A 45 17.594 60.248 -25.728 1.00 60.21 A C
|
394 |
+
ATOM 394 NH1 ARG A 45 18.433 60.483 -26.743 1.00 56.34 A N1+
|
395 |
+
ATOM 395 NH2 ARG A 45 16.865 59.127 -25.706 1.00 64.07 A N
|
396 |
+
ATOM 396 H ARG A 45 15.796 63.843 -22.236 1.00 0.00 A H
|
397 |
+
ATOM 397 HE ARG A 45 16.892 60.897 -23.964 1.00 0.00 A H
|
398 |
+
ATOM 398 HH11 ARG A 45 19.032 61.291 -26.729 1.00 0.00 A H
|
399 |
+
ATOM 399 HH12 ARG A 45 18.503 59.869 -27.529 1.00 0.00 A H
|
400 |
+
ATOM 400 HH21 ARG A 45 16.294 58.876 -24.902 1.00 0.00 A H
|
401 |
+
ATOM 401 HH22 ARG A 45 16.850 58.425 -26.417 1.00 0.00 A H
|
402 |
+
ATOM 402 N SER A 46 18.932 65.219 -20.187 1.00 21.54 A N
|
403 |
+
ATOM 403 CA SER A 46 19.001 66.291 -19.192 1.00 19.49 A C
|
404 |
+
ATOM 404 C SER A 46 20.289 67.084 -19.323 1.00 15.15 A C
|
405 |
+
ATOM 405 O SER A 46 21.369 66.505 -19.426 1.00 21.08 A O
|
406 |
+
ATOM 406 CB SER A 46 18.885 65.686 -17.785 1.00 22.78 A C
|
407 |
+
ATOM 407 OG SER A 46 18.817 66.685 -16.770 1.00 30.92 A O
|
408 |
+
ATOM 408 H SER A 46 19.691 64.606 -20.248 1.00 0.00 A H
|
409 |
+
ATOM 409 HG SER A 46 19.658 67.169 -16.829 1.00 0.00 A H
|
410 |
+
ATOM 410 N GLY A 47 20.175 68.403 -19.383 1.00 9.93 A N
|
411 |
+
ATOM 411 CA GLY A 47 21.360 69.224 -19.488 1.00 15.35 A C
|
412 |
+
ATOM 412 C GLY A 47 21.740 69.608 -20.899 1.00 20.79 A C
|
413 |
+
ATOM 413 O GLY A 47 21.493 68.864 -21.845 1.00 22.57 A O
|
414 |
+
ATOM 414 H GLY A 47 19.297 68.836 -19.380 1.00 0.00 A H
|
415 |
+
ATOM 415 N THR A 48 22.368 70.774 -21.013 1.00 22.30 A N
|
416 |
+
ATOM 416 CA THR A 48 22.815 71.356 -22.272 1.00 27.44 A C
|
417 |
+
ATOM 417 C THR A 48 23.584 70.425 -23.210 1.00 26.51 A C
|
418 |
+
ATOM 418 O THR A 48 23.317 70.391 -24.411 1.00 28.00 A O
|
419 |
+
ATOM 419 CB THR A 48 23.674 72.587 -21.977 1.00 29.56 A C
|
420 |
+
ATOM 420 CG2 THR A 48 23.921 73.382 -23.235 1.00 36.98 A C
|
421 |
+
ATOM 421 OG1 THR A 48 22.983 73.417 -21.035 1.00 38.91 A O
|
422 |
+
ATOM 422 H THR A 48 22.518 71.316 -20.206 1.00 0.00 A H
|
423 |
+
ATOM 423 HG1 THR A 48 23.493 74.188 -20.725 1.00 0.00 A H
|
424 |
+
ATOM 424 N ALA A 49 24.546 69.686 -22.670 1.00 26.42 A N
|
425 |
+
ATOM 425 CA ALA A 49 25.350 68.768 -23.472 1.00 25.21 A C
|
426 |
+
ATOM 426 C ALA A 49 24.525 67.668 -24.103 1.00 22.40 A C
|
427 |
+
ATOM 427 O ALA A 49 24.779 67.304 -25.245 1.00 24.67 A O
|
428 |
+
ATOM 428 CB ALA A 49 26.454 68.160 -22.642 1.00 24.80 A C
|
429 |
+
ATOM 429 H ALA A 49 24.722 69.753 -21.705 1.00 0.00 A H
|
430 |
+
ATOM 430 N ALA A 50 23.552 67.136 -23.361 1.00 18.24 A N
|
431 |
+
ATOM 431 CA ALA A 50 22.692 66.059 -23.862 1.00 16.13 A C
|
432 |
+
ATOM 432 C ALA A 50 21.644 66.566 -24.832 1.00 15.85 A C
|
433 |
+
ATOM 433 O ALA A 50 21.205 65.829 -25.709 1.00 21.53 A O
|
434 |
+
ATOM 434 CB ALA A 50 22.018 65.338 -22.721 1.00 18.85 A C
|
435 |
+
ATOM 435 H ALA A 50 23.408 67.471 -22.450 1.00 0.00 A H
|
436 |
+
ATOM 436 N ILE A 51 21.209 67.807 -24.641 1.00 14.98 A N
|
437 |
+
ATOM 437 CA ILE A 51 20.213 68.425 -25.517 1.00 16.38 A C
|
438 |
+
ATOM 438 C ILE A 51 20.848 68.716 -26.892 1.00 17.29 A C
|
439 |
+
ATOM 439 O ILE A 51 20.271 68.383 -27.932 1.00 17.52 A O
|
440 |
+
ATOM 440 CB ILE A 51 19.589 69.701 -24.851 1.00 17.04 A C
|
441 |
+
ATOM 441 CG1 ILE A 51 18.731 69.286 -23.637 1.00 18.33 A C
|
442 |
+
ATOM 442 CG2 ILE A 51 18.750 70.483 -25.859 1.00 17.15 A C
|
443 |
+
ATOM 443 CD1 ILE A 51 18.279 70.417 -22.747 1.00 14.09 A C
|
444 |
+
ATOM 444 H ILE A 51 21.558 68.311 -23.875 1.00 0.00 A H
|
445 |
+
ATOM 445 N ARG A 52 22.065 69.256 -26.885 1.00 18.63 A N
|
446 |
+
ATOM 446 CA ARG A 52 22.802 69.549 -28.119 1.00 19.29 A C
|
447 |
+
ATOM 447 C ARG A 52 23.059 68.258 -28.919 1.00 19.10 A C
|
448 |
+
ATOM 448 O ARG A 52 22.758 68.180 -30.114 1.00 24.51 A O
|
449 |
+
ATOM 449 CB ARG A 52 24.129 70.235 -27.788 1.00 21.72 A C
|
450 |
+
ATOM 450 CG ARG A 52 24.843 70.737 -29.003 1.00 24.00 A C
|
451 |
+
ATOM 451 CD ARG A 52 26.227 71.218 -28.716 1.00 21.97 A C
|
452 |
+
ATOM 452 NE ARG A 52 26.898 71.544 -29.971 1.00 29.26 A N
|
453 |
+
ATOM 453 CZ ARG A 52 26.958 72.767 -30.486 1.00 30.33 A C
|
454 |
+
ATOM 454 NH1 ARG A 52 26.384 73.771 -29.843 1.00 28.11 A N1+
|
455 |
+
ATOM 455 NH2 ARG A 52 27.608 72.995 -31.626 1.00 22.98 A N
|
456 |
+
ATOM 456 H ARG A 52 22.480 69.475 -26.023 1.00 0.00 A H
|
457 |
+
ATOM 457 HE ARG A 52 27.290 70.783 -30.464 1.00 0.00 A H
|
458 |
+
ATOM 458 HH11 ARG A 52 25.877 73.672 -28.996 1.00 0.00 A H
|
459 |
+
ATOM 459 HH12 ARG A 52 26.387 74.709 -30.252 1.00 0.00 A H
|
460 |
+
ATOM 460 HH21 ARG A 52 28.079 72.275 -32.140 1.00 0.00 A H
|
461 |
+
ATOM 461 HH22 ARG A 52 27.637 73.938 -31.999 1.00 0.00 A H
|
462 |
+
ATOM 462 N GLU A 53 23.544 67.223 -28.242 1.00 19.57 A N
|
463 |
+
ATOM 463 CA GLU A 53 23.818 65.938 -28.880 1.00 22.65 A C
|
464 |
+
ATOM 464 C GLU A 53 22.549 65.369 -29.537 1.00 21.36 A C
|
465 |
+
ATOM 465 O GLU A 53 22.580 64.901 -30.692 1.00 20.72 A O
|
466 |
+
ATOM 466 CB GLU A 53 24.465 64.968 -27.854 1.00 29.62 A C
|
467 |
+
ATOM 467 CG GLU A 53 23.751 63.618 -27.551 1.00 43.16 A C
|
468 |
+
ATOM 468 CD GLU A 53 24.079 62.493 -28.534 1.00 48.92 A C
|
469 |
+
ATOM 469 OE1 GLU A 53 25.097 62.583 -29.251 1.00 49.44 A O
|
470 |
+
ATOM 470 OE2 GLU A 53 23.311 61.508 -28.602 1.00 48.51 A O1-
|
471 |
+
ATOM 471 H GLU A 53 23.760 67.315 -27.289 1.00 0.00 A H
|
472 |
+
ATOM 472 N PHE A 54 21.426 65.473 -28.831 1.00 15.74 A N
|
473 |
+
ATOM 473 CA PHE A 54 20.163 64.981 -29.346 1.00 14.67 A C
|
474 |
+
ATOM 474 C PHE A 54 19.739 65.757 -30.596 1.00 14.61 A C
|
475 |
+
ATOM 475 O PHE A 54 19.359 65.181 -31.611 1.00 16.88 A O
|
476 |
+
ATOM 476 CB PHE A 54 19.076 65.093 -28.284 1.00 16.51 A C
|
477 |
+
ATOM 477 CG PHE A 54 17.700 64.832 -28.818 1.00 15.40 A C
|
478 |
+
ATOM 478 CD1 PHE A 54 17.260 63.526 -29.028 1.00 19.05 A C
|
479 |
+
ATOM 479 CD2 PHE A 54 16.869 65.889 -29.176 1.00 14.87 A C
|
480 |
+
ATOM 480 CE1 PHE A 54 16.018 63.272 -29.591 1.00 20.66 A C
|
481 |
+
ATOM 481 CE2 PHE A 54 15.625 65.655 -29.741 1.00 22.45 A C
|
482 |
+
ATOM 482 CZ PHE A 54 15.196 64.334 -29.952 1.00 21.58 A C
|
483 |
+
ATOM 483 H PHE A 54 21.466 65.860 -27.931 1.00 0.00 A H
|
484 |
+
ATOM 484 N TYR A 55 19.812 67.073 -30.538 1.00 15.57 A N
|
485 |
+
ATOM 485 CA TYR A 55 19.419 67.835 -31.699 1.00 15.97 A C
|
486 |
+
ATOM 486 C TYR A 55 20.386 67.726 -32.851 1.00 15.54 A C
|
487 |
+
ATOM 487 O TYR A 55 19.967 67.836 -34.002 1.00 17.97 A O
|
488 |
+
ATOM 488 CB TYR A 55 19.126 69.271 -31.335 1.00 16.17 A C
|
489 |
+
ATOM 489 CG TYR A 55 17.743 69.413 -30.737 1.00 19.24 A C
|
490 |
+
ATOM 490 CD1 TYR A 55 16.595 69.253 -31.523 1.00 16.59 A C
|
491 |
+
ATOM 491 CD2 TYR A 55 17.580 69.717 -29.384 1.00 21.43 A C
|
492 |
+
ATOM 492 CE1 TYR A 55 15.323 69.397 -30.969 1.00 17.69 A C
|
493 |
+
ATOM 493 CE2 TYR A 55 16.319 69.862 -28.830 1.00 24.02 A C
|
494 |
+
ATOM 494 CZ TYR A 55 15.200 69.702 -29.623 1.00 18.58 A C
|
495 |
+
ATOM 495 OH TYR A 55 13.971 69.854 -29.031 1.00 16.62 A O
|
496 |
+
ATOM 496 H TYR A 55 20.122 67.516 -29.720 1.00 0.00 A H
|
497 |
+
ATOM 497 HH TYR A 55 13.258 69.756 -29.685 1.00 0.00 A H
|
498 |
+
ATOM 498 N ALA A 56 21.664 67.470 -32.552 1.00 13.11 A N
|
499 |
+
ATOM 499 CA ALA A 56 22.676 67.316 -33.597 1.00 13.94 A C
|
500 |
+
ATOM 500 C ALA A 56 22.334 66.072 -34.401 1.00 14.20 A C
|
501 |
+
ATOM 501 O ALA A 56 22.427 66.070 -35.624 1.00 19.71 A O
|
502 |
+
ATOM 502 CB ALA A 56 24.075 67.195 -32.993 1.00 17.34 A C
|
503 |
+
ATOM 503 H ALA A 56 21.936 67.397 -31.617 1.00 0.00 A H
|
504 |
+
ATOM 504 N ASN A 57 21.911 65.022 -33.706 1.00 18.33 A N
|
505 |
+
ATOM 505 CA ASN A 57 21.521 63.772 -34.353 1.00 18.10 A C
|
506 |
+
ATOM 506 C ASN A 57 20.281 63.960 -35.190 1.00 22.69 A C
|
507 |
+
ATOM 507 O ASN A 57 20.120 63.306 -36.214 1.00 25.44 A O
|
508 |
+
ATOM 508 CB ASN A 57 21.177 62.712 -33.319 1.00 21.21 A C
|
509 |
+
ATOM 509 CG ASN A 57 22.358 61.920 -32.885 1.00 29.38 A C
|
510 |
+
ATOM 510 ND2 ASN A 57 22.662 61.977 -31.588 1.00 38.74 A N
|
511 |
+
ATOM 511 OD1 ASN A 57 23.002 61.241 -33.697 1.00 37.38 A O
|
512 |
+
ATOM 512 H ASN A 57 21.873 65.098 -32.727 1.00 0.00 A H
|
513 |
+
ATOM 513 HD21 ASN A 57 22.139 62.582 -31.013 1.00 0.00 A H
|
514 |
+
ATOM 514 HD22 ASN A 57 23.344 61.421 -31.155 1.00 0.00 A H
|
515 |
+
ATOM 515 N SER A 58 19.359 64.777 -34.691 1.00 20.80 A N
|
516 |
+
ATOM 516 CA SER A 58 18.097 65.041 -35.371 1.00 17.38 A C
|
517 |
+
ATOM 517 C SER A 58 18.311 65.791 -36.668 1.00 19.47 A C
|
518 |
+
ATOM 518 O SER A 58 17.671 65.508 -37.674 1.00 21.88 A O
|
519 |
+
ATOM 519 CB SER A 58 17.151 65.827 -34.449 1.00 17.00 A C
|
520 |
+
ATOM 520 OG SER A 58 16.738 65.021 -33.356 1.00 20.95 A O
|
521 |
+
ATOM 521 H SER A 58 19.507 65.199 -33.818 1.00 0.00 A H
|
522 |
+
ATOM 522 HG SER A 58 16.358 64.202 -33.683 1.00 0.00 A H
|
523 |
+
ATOM 523 N LEU A 59 19.245 66.726 -36.652 1.00 18.71 A N
|
524 |
+
ATOM 524 CA LEU A 59 19.536 67.509 -37.835 1.00 16.24 A C
|
525 |
+
ATOM 525 C LEU A 59 20.444 66.809 -38.853 1.00 13.82 A C
|
526 |
+
ATOM 526 O LEU A 59 20.937 67.438 -39.788 1.00 14.80 A O
|
527 |
+
ATOM 527 CB LEU A 59 20.108 68.852 -37.425 1.00 14.05 A C
|
528 |
+
ATOM 528 CG LEU A 59 19.145 69.698 -36.596 1.00 12.88 A C
|
529 |
+
ATOM 529 CD1 LEU A 59 19.927 70.882 -36.106 1.00 17.02 A C
|
530 |
+
ATOM 530 CD2 LEU A 59 17.959 70.167 -37.409 1.00 11.78 A C
|
531 |
+
ATOM 531 H LEU A 59 19.753 66.901 -35.830 1.00 0.00 A H
|
532 |
+
ATOM 532 N LYS A 60 20.658 65.514 -38.657 1.00 12.12 A N
|
533 |
+
ATOM 533 CA LYS A 60 21.456 64.686 -39.551 1.00 13.05 A C
|
534 |
+
ATOM 534 C LYS A 60 20.789 64.780 -40.927 1.00 16.30 A C
|
535 |
+
ATOM 535 O LYS A 60 21.464 64.851 -41.956 1.00 17.91 A O
|
536 |
+
ATOM 536 CB LYS A 60 21.391 63.235 -39.078 1.00 17.73 A C
|
537 |
+
ATOM 537 CG LYS A 60 22.692 62.486 -39.059 1.00 23.84 A C
|
538 |
+
ATOM 538 CD LYS A 60 23.522 62.797 -37.839 1.00 27.57 A C
|
539 |
+
ATOM 539 CE LYS A 60 23.596 61.594 -36.905 1.00 21.21 A C
|
540 |
+
ATOM 540 NZ LYS A 60 24.341 60.433 -37.453 1.00 23.59 A N1+
|
541 |
+
ATOM 541 H LYS A 60 20.266 65.084 -37.873 1.00 0.00 A H
|
542 |
+
ATOM 542 HZ1 LYS A 60 23.931 60.114 -38.358 1.00 0.00 A H
|
543 |
+
ATOM 543 HZ2 LYS A 60 25.314 60.730 -37.672 1.00 0.00 A H
|
544 |
+
ATOM 544 HZ3 LYS A 60 24.354 59.650 -36.780 1.00 0.00 A H
|
545 |
+
ATOM 545 N LEU A 61 19.455 64.758 -40.925 1.00 19.54 A N
|
546 |
+
ATOM 546 CA LEU A 61 18.634 64.842 -42.131 1.00 19.62 A C
|
547 |
+
ATOM 547 C LEU A 61 18.053 66.240 -42.191 1.00 22.00 A C
|
548 |
+
ATOM 548 O LEU A 61 17.749 66.824 -41.156 1.00 23.73 A O
|
549 |
+
ATOM 549 CB LEU A 61 17.455 63.864 -42.056 1.00 21.85 A C
|
550 |
+
ATOM 550 CG LEU A 61 17.726 62.376 -41.884 1.00 26.24 A C
|
551 |
+
ATOM 551 CD1 LEU A 61 16.446 61.604 -42.034 1.00 22.46 A C
|
552 |
+
ATOM 552 CD2 LEU A 61 18.709 61.932 -42.935 1.00 32.42 A C
|
553 |
+
ATOM 553 H LEU A 61 18.984 64.713 -40.066 1.00 0.00 A H
|
554 |
+
ATOM 554 N PRO A 62 17.943 66.824 -43.396 1.00 22.42 A N
|
555 |
+
ATOM 555 CA PRO A 62 17.375 68.170 -43.485 1.00 22.72 A C
|
556 |
+
ATOM 556 C PRO A 62 15.894 68.092 -43.131 1.00 23.49 A C
|
557 |
+
ATOM 557 O PRO A 62 15.174 67.226 -43.638 1.00 22.02 A O
|
558 |
+
ATOM 558 CB PRO A 62 17.614 68.543 -44.945 1.00 26.38 A C
|
559 |
+
ATOM 559 CG PRO A 62 17.666 67.216 -45.647 1.00 24.89 A C
|
560 |
+
ATOM 560 CD PRO A 62 18.425 66.358 -44.707 1.00 21.44 A C
|
561 |
+
ATOM 561 N LEU A 63 15.442 68.975 -42.250 1.00 22.90 A N
|
562 |
+
ATOM 562 CA LEU A 63 14.058 68.935 -41.817 1.00 16.78 A C
|
563 |
+
ATOM 563 C LEU A 63 13.300 70.164 -42.231 1.00 17.38 A C
|
564 |
+
ATOM 564 O LEU A 63 13.857 71.258 -42.293 1.00 18.87 A O
|
565 |
+
ATOM 565 CB LEU A 63 13.998 68.836 -40.297 1.00 11.97 A C
|
566 |
+
ATOM 566 CG LEU A 63 14.823 67.776 -39.574 1.00 10.10 A C
|
567 |
+
ATOM 567 CD1 LEU A 63 14.699 68.081 -38.117 1.00 9.09 A C
|
568 |
+
ATOM 568 CD2 LEU A 63 14.360 66.334 -39.876 1.00 9.67 A C
|
569 |
+
ATOM 569 H LEU A 63 16.025 69.675 -41.895 1.00 0.00 A H
|
570 |
+
ATOM 570 N ALA A 64 12.027 69.964 -42.544 1.00 19.85 A N
|
571 |
+
ATOM 571 CA ALA A 64 11.115 71.045 -42.911 1.00 18.67 A C
|
572 |
+
ATOM 572 C ALA A 64 10.016 70.917 -41.862 1.00 14.97 A C
|
573 |
+
ATOM 573 O ALA A 64 9.274 69.926 -41.837 1.00 15.53 A O
|
574 |
+
ATOM 574 CB ALA A 64 10.548 70.830 -44.307 1.00 18.29 A C
|
575 |
+
ATOM 575 H ALA A 64 11.673 69.048 -42.532 1.00 0.00 A H
|
576 |
+
ATOM 576 N VAL A 65 9.992 71.857 -40.928 1.00 18.71 A N
|
577 |
+
ATOM 577 CA VAL A 65 9.012 71.827 -39.863 1.00 18.12 A C
|
578 |
+
ATOM 578 C VAL A 65 8.152 73.064 -39.940 1.00 14.97 A C
|
579 |
+
ATOM 579 O VAL A 65 8.641 74.129 -40.261 1.00 18.06 A O
|
580 |
+
ATOM 580 CB VAL A 65 9.699 71.646 -38.470 1.00 17.54 A C
|
581 |
+
ATOM 581 CG1 VAL A 65 11.135 72.002 -38.542 1.00 18.32 A C
|
582 |
+
ATOM 582 CG2 VAL A 65 9.028 72.462 -37.429 1.00 17.88 A C
|
583 |
+
ATOM 583 H VAL A 65 10.612 72.612 -40.980 1.00 0.00 A H
|
584 |
+
ATOM 584 N GLU A 66 6.855 72.918 -39.696 1.00 16.34 A N
|
585 |
+
ATOM 585 CA GLU A 66 5.952 74.061 -39.752 1.00 19.15 A C
|
586 |
+
ATOM 586 C GLU A 66 4.853 73.956 -38.722 1.00 15.51 A C
|
587 |
+
ATOM 587 O GLU A 66 4.336 72.874 -38.480 1.00 17.34 A O
|
588 |
+
ATOM 588 CB GLU A 66 5.283 74.137 -41.128 1.00 26.88 A C
|
589 |
+
ATOM 589 CG GLU A 66 4.292 73.007 -41.373 1.00 38.82 A C
|
590 |
+
ATOM 590 CD GLU A 66 3.409 73.196 -42.604 1.00 49.05 A C
|
591 |
+
ATOM 591 OE1 GLU A 66 3.507 74.262 -43.261 1.00 55.93 A O
|
592 |
+
ATOM 592 OE2 GLU A 66 2.612 72.266 -42.909 1.00 52.01 A O1-
|
593 |
+
ATOM 593 H GLU A 66 6.490 72.032 -39.486 1.00 0.00 A H
|
594 |
+
ATOM 594 N LEU A 67 4.489 75.078 -38.118 1.00 14.88 A N
|
595 |
+
ATOM 595 CA LEU A 67 3.388 75.087 -37.152 1.00 16.04 A C
|
596 |
+
ATOM 596 C LEU A 67 2.134 74.870 -37.982 1.00 13.87 A C
|
597 |
+
ATOM 597 O LEU A 67 2.047 75.397 -39.083 1.00 23.45 A O
|
598 |
+
ATOM 598 CB LEU A 67 3.296 76.443 -36.451 1.00 13.88 A C
|
599 |
+
ATOM 599 CG LEU A 67 4.275 76.657 -35.301 1.00 20.67 A C
|
600 |
+
ATOM 600 CD1 LEU A 67 4.330 78.143 -34.900 1.00 25.36 A C
|
601 |
+
ATOM 601 CD2 LEU A 67 3.868 75.767 -34.142 1.00 23.30 A C
|
602 |
+
ATOM 602 H LEU A 67 4.930 75.912 -38.366 1.00 0.00 A H
|
603 |
+
ATOM 603 N THR A 68 1.176 74.089 -37.495 1.00 15.39 A N
|
604 |
+
ATOM 604 CA THR A 68 -0.032 73.866 -38.280 1.00 13.51 A C
|
605 |
+
ATOM 605 C THR A 68 -1.275 74.480 -37.689 1.00 13.48 A C
|
606 |
+
ATOM 606 O THR A 68 -2.317 74.459 -38.310 1.00 14.79 A O
|
607 |
+
ATOM 607 CB THR A 68 -0.297 72.392 -38.512 1.00 9.45 A C
|
608 |
+
ATOM 608 CG2 THR A 68 0.755 71.809 -39.466 1.00 10.27 A C
|
609 |
+
ATOM 609 OG1 THR A 68 -0.247 71.711 -37.257 1.00 15.66 A O
|
610 |
+
ATOM 610 H THR A 68 1.276 73.696 -36.603 1.00 0.00 A H
|
611 |
+
ATOM 611 HG1 THR A 68 0.629 71.871 -36.880 1.00 0.00 A H
|
612 |
+
ATOM 612 N GLN A 69 -1.158 75.032 -36.489 1.00 12.31 A N
|
613 |
+
ATOM 613 CA GLN A 69 -2.273 75.656 -35.783 1.00 11.58 A C
|
614 |
+
ATOM 614 C GLN A 69 -1.700 76.685 -34.837 1.00 15.38 A C
|
615 |
+
ATOM 615 O GLN A 69 -0.498 76.728 -34.605 1.00 15.03 A O
|
616 |
+
ATOM 616 CB GLN A 69 -3.032 74.625 -34.946 1.00 9.10 A C
|
617 |
+
ATOM 617 CG GLN A 69 -4.205 73.982 -35.640 1.00 8.88 A C
|
618 |
+
ATOM 618 CD GLN A 69 -4.936 73.018 -34.760 1.00 14.81 A C
|
619 |
+
ATOM 619 NE2 GLN A 69 -5.731 72.163 -35.371 1.00 14.48 A N
|
620 |
+
ATOM 620 OE1 GLN A 69 -4.797 73.041 -33.527 1.00 24.74 A O
|
621 |
+
ATOM 621 H GLN A 69 -0.290 75.078 -36.036 1.00 0.00 A H
|
622 |
+
ATOM 622 HE21 GLN A 69 -5.800 72.199 -36.347 1.00 0.00 A H
|
623 |
+
ATOM 623 HE22 GLN A 69 -6.234 71.521 -34.825 1.00 0.00 A H
|
624 |
+
ATOM 624 N GLU A 70 -2.575 77.494 -34.267 1.00 17.67 A N
|
625 |
+
ATOM 625 CA GLU A 70 -2.175 78.510 -33.316 1.00 15.37 A C
|
626 |
+
ATOM 626 C GLU A 70 -1.456 77.860 -32.126 1.00 12.95 A C
|
627 |
+
ATOM 627 O GLU A 70 -1.678 76.684 -31.812 1.00 15.83 A O
|
628 |
+
ATOM 628 CB GLU A 70 -3.433 79.251 -32.846 1.00 15.22 A C
|
629 |
+
ATOM 629 CG GLU A 70 -4.542 78.332 -32.266 1.00 13.80 A C
|
630 |
+
ATOM 630 CD GLU A 70 -5.831 79.077 -31.979 1.00 11.85 A C
|
631 |
+
ATOM 631 OE1 GLU A 70 -5.932 79.770 -30.956 1.00 19.78 A O
|
632 |
+
ATOM 632 OE2 GLU A 70 -6.766 78.964 -32.783 1.00 15.86 A O1-
|
633 |
+
ATOM 633 H GLU A 70 -3.520 77.395 -34.476 1.00 0.00 A H
|
634 |
+
ATOM 634 N VAL A 71 -0.545 78.599 -31.514 1.00 13.61 A N
|
635 |
+
ATOM 635 CA VAL A 71 0.179 78.113 -30.348 1.00 12.54 A C
|
636 |
+
ATOM 636 C VAL A 71 -0.741 78.284 -29.115 1.00 13.85 A C
|
637 |
+
ATOM 637 O VAL A 71 -1.479 79.275 -29.003 1.00 11.75 A O
|
638 |
+
ATOM 638 CB VAL A 71 1.510 78.921 -30.124 1.00 10.04 A C
|
639 |
+
ATOM 639 CG1 VAL A 71 2.231 78.466 -28.840 1.00 9.76 A C
|
640 |
+
ATOM 640 CG2 VAL A 71 2.448 78.743 -31.301 1.00 6.51 A C
|
641 |
+
ATOM 641 H VAL A 71 -0.376 79.504 -31.849 1.00 0.00 A H
|
642 |
+
ATOM 642 N ARG A 72 -0.777 77.282 -28.245 1.00 11.83 A N
|
643 |
+
ATOM 643 CA ARG A 72 -1.580 77.367 -27.037 1.00 11.86 A C
|
644 |
+
ATOM 644 C ARG A 72 -0.613 77.779 -25.955 1.00 13.28 A C
|
645 |
+
ATOM 645 O ARG A 72 0.319 77.054 -25.665 1.00 20.35 A O
|
646 |
+
ATOM 646 CB ARG A 72 -2.187 76.014 -26.706 1.00 6.01 A C
|
647 |
+
ATOM 647 CG ARG A 72 -3.535 75.777 -27.333 1.00 9.72 A C
|
648 |
+
ATOM 648 CD ARG A 72 -3.509 75.793 -28.857 1.00 6.03 A C
|
649 |
+
ATOM 649 NE ARG A 72 -4.710 75.143 -29.389 1.00 7.77 A N
|
650 |
+
ATOM 650 CZ ARG A 72 -4.841 74.651 -30.617 1.00 6.68 A C
|
651 |
+
ATOM 651 NH1 ARG A 72 -3.855 74.736 -31.503 1.00 12.81 A N1+
|
652 |
+
ATOM 652 NH2 ARG A 72 -5.928 73.971 -30.929 1.00 6.00 A N
|
653 |
+
ATOM 653 H ARG A 72 -0.231 76.481 -28.394 1.00 0.00 A H
|
654 |
+
ATOM 654 HE ARG A 72 -5.477 75.136 -28.778 1.00 0.00 A H
|
655 |
+
ATOM 655 HH11 ARG A 72 -2.995 75.220 -31.296 1.00 0.00 A H
|
656 |
+
ATOM 656 HH12 ARG A 72 -3.972 74.350 -32.426 1.00 0.00 A H
|
657 |
+
ATOM 657 HH21 ARG A 72 -6.650 73.797 -30.256 1.00 0.00 A H
|
658 |
+
ATOM 658 HH22 ARG A 72 -6.066 73.585 -31.852 1.00 0.00 A H
|
659 |
+
ATOM 659 N ALA A 73 -0.803 78.947 -25.375 1.00 11.78 A N
|
660 |
+
ATOM 660 CA ALA A 73 0.105 79.402 -24.342 1.00 14.51 A C
|
661 |
+
ATOM 661 C ALA A 73 -0.669 79.861 -23.090 1.00 18.31 A C
|
662 |
+
ATOM 662 O ALA A 73 -1.619 80.658 -23.182 1.00 23.63 A O
|
663 |
+
ATOM 663 CB ALA A 73 0.982 80.523 -24.896 1.00 8.71 A C
|
664 |
+
ATOM 664 H ALA A 73 -1.566 79.514 -25.615 1.00 0.00 A H
|
665 |
+
ATOM 665 N VAL A 74 -0.263 79.356 -21.924 1.00 18.62 A N
|
666 |
+
ATOM 666 CA VAL A 74 -0.920 79.698 -20.666 1.00 18.32 A C
|
667 |
+
ATOM 667 C VAL A 74 -0.087 79.243 -19.471 1.00 20.08 A C
|
668 |
+
ATOM 668 O VAL A 74 0.538 78.184 -19.518 1.00 22.78 A O
|
669 |
+
ATOM 669 CB VAL A 74 -2.351 79.043 -20.571 1.00 12.55 A C
|
670 |
+
ATOM 670 CG1 VAL A 74 -2.258 77.535 -20.280 1.00 8.98 A C
|
671 |
+
ATOM 671 CG2 VAL A 74 -3.185 79.729 -19.505 1.00 10.59 A C
|
672 |
+
ATOM 672 H VAL A 74 0.495 78.730 -21.882 1.00 0.00 A H
|
673 |
+
ATOM 673 N ALA A 75 -0.092 80.054 -18.411 1.00 22.66 A N
|
674 |
+
ATOM 674 CA ALA A 75 0.615 79.784 -17.147 1.00 22.52 A C
|
675 |
+
ATOM 675 C ALA A 75 1.994 79.147 -17.259 1.00 22.63 A C
|
676 |
+
ATOM 676 O ALA A 75 2.236 78.075 -16.699 1.00 33.25 A O
|
677 |
+
ATOM 677 CB ALA A 75 -0.264 78.951 -16.220 1.00 21.28 A C
|
678 |
+
ATOM 678 H ALA A 75 -0.602 80.881 -18.484 1.00 0.00 A H
|
679 |
+
ATOM 679 N ASN A 76 2.903 79.823 -17.952 1.00 20.58 A N
|
680 |
+
ATOM 680 CA ASN A 76 4.268 79.325 -18.133 1.00 22.59 A C
|
681 |
+
ATOM 681 C ASN A 76 4.377 78.008 -18.900 1.00 20.82 A C
|
682 |
+
ATOM 682 O ASN A 76 5.376 77.305 -18.773 1.00 21.38 A O
|
683 |
+
ATOM 683 CB ASN A 76 4.978 79.168 -16.784 1.00 22.74 A C
|
684 |
+
ATOM 684 CG ASN A 76 4.982 80.450 -15.981 1.00 27.62 A C
|
685 |
+
ATOM 685 ND2 ASN A 76 4.488 80.373 -14.765 1.00 27.46 A N
|
686 |
+
ATOM 686 OD1 ASN A 76 5.386 81.504 -16.462 1.00 32.04 A O
|
687 |
+
ATOM 687 H ASN A 76 2.639 80.681 -18.349 1.00 0.00 A H
|
688 |
+
ATOM 688 HD21 ASN A 76 4.146 79.496 -14.486 1.00 0.00 A H
|
689 |
+
ATOM 689 HD22 ASN A 76 4.489 81.171 -14.201 1.00 0.00 A H
|
690 |
+
ATOM 690 N GLU A 77 3.355 77.661 -19.671 1.00 15.14 A N
|
691 |
+
ATOM 691 CA GLU A 77 3.375 76.442 -20.455 1.00 11.27 A C
|
692 |
+
ATOM 692 C GLU A 77 2.881 76.771 -21.852 1.00 12.76 A C
|
693 |
+
ATOM 693 O GLU A 77 2.173 77.770 -22.038 1.00 11.79 A O
|
694 |
+
ATOM 694 CB GLU A 77 2.519 75.364 -19.797 1.00 6.01 A C
|
695 |
+
ATOM 695 CG GLU A 77 3.034 74.986 -18.430 1.00 11.87 A C
|
696 |
+
ATOM 696 CD GLU A 77 2.296 73.836 -17.819 1.00 12.81 A C
|
697 |
+
ATOM 697 OE1 GLU A 77 2.053 72.814 -18.511 1.00 17.80 A O
|
698 |
+
ATOM 698 OE2 GLU A 77 1.971 73.918 -16.625 1.00 23.63 A O1-
|
699 |
+
ATOM 699 H GLU A 77 2.539 78.194 -19.717 1.00 0.00 A H
|
700 |
+
ATOM 700 N ALA A 78 3.348 76.002 -22.838 1.00 7.70 A N
|
701 |
+
ATOM 701 CA ALA A 78 2.955 76.190 -24.235 1.00 6.02 A C
|
702 |
+
ATOM 702 C ALA A 78 2.866 74.833 -24.881 1.00 7.74 A C
|
703 |
+
ATOM 703 O ALA A 78 3.611 73.927 -24.526 1.00 10.37 A O
|
704 |
+
ATOM 704 CB ALA A 78 3.958 77.040 -24.964 1.00 6.00 A C
|
705 |
+
ATOM 705 H ALA A 78 3.971 75.276 -22.627 1.00 0.00 A H
|
706 |
+
ATOM 706 N ALA A 79 1.893 74.651 -25.762 1.00 11.83 A N
|
707 |
+
ATOM 707 CA ALA A 79 1.727 73.386 -26.475 1.00 10.41 A C
|
708 |
+
ATOM 708 C ALA A 79 1.440 73.791 -27.912 1.00 14.15 A C
|
709 |
+
ATOM 709 O ALA A 79 0.759 74.788 -28.157 1.00 13.66 A O
|
710 |
+
ATOM 710 CB ALA A 79 0.569 72.601 -25.901 1.00 6.06 A C
|
711 |
+
ATOM 711 H ALA A 79 1.275 75.385 -25.969 1.00 0.00 A H
|
712 |
+
ATOM 712 N PHE A 80 1.972 73.048 -28.872 1.00 11.10 A N
|
713 |
+
ATOM 713 CA PHE A 80 1.749 73.380 -30.271 1.00 9.14 A C
|
714 |
+
ATOM 714 C PHE A 80 1.715 72.121 -31.120 1.00 9.38 A C
|
715 |
+
ATOM 715 O PHE A 80 2.266 71.090 -30.733 1.00 10.43 A O
|
716 |
+
ATOM 716 CB PHE A 80 2.826 74.360 -30.796 1.00 8.83 A C
|
717 |
+
ATOM 717 CG PHE A 80 4.252 73.914 -30.536 1.00 11.32 A C
|
718 |
+
ATOM 718 CD1 PHE A 80 4.785 73.926 -29.238 1.00 8.20 A C
|
719 |
+
ATOM 719 CD2 PHE A 80 5.069 73.502 -31.575 1.00 13.44 A C
|
720 |
+
ATOM 720 CE1 PHE A 80 6.102 73.534 -28.993 1.00 9.03 A C
|
721 |
+
ATOM 721 CE2 PHE A 80 6.389 73.114 -31.334 1.00 12.24 A C
|
722 |
+
ATOM 722 CZ PHE A 80 6.905 73.129 -30.041 1.00 11.05 A C
|
723 |
+
ATOM 723 H PHE A 80 2.509 72.255 -28.638 1.00 0.00 A H
|
724 |
+
ATOM 724 N ALA A 81 1.016 72.214 -32.251 1.00 10.99 A N
|
725 |
+
ATOM 725 CA ALA A 81 0.884 71.124 -33.206 1.00 8.80 A C
|
726 |
+
ATOM 726 C ALA A 81 1.687 71.571 -34.428 1.00 7.95 A C
|
727 |
+
ATOM 727 O ALA A 81 1.658 72.740 -34.816 1.00 7.94 A O
|
728 |
+
ATOM 728 CB ALA A 81 -0.578 70.908 -33.551 1.00 10.70 A C
|
729 |
+
ATOM 729 H ALA A 81 0.592 73.066 -32.479 1.00 0.00 A H
|
730 |
+
ATOM 730 N PHE A 82 2.454 70.658 -34.999 1.00 10.87 A N
|
731 |
+
ATOM 731 CA PHE A 82 3.289 70.998 -36.139 1.00 10.92 A C
|
732 |
+
ATOM 732 C PHE A 82 3.673 69.736 -36.902 1.00 9.53 A C
|
733 |
+
ATOM 733 O PHE A 82 3.261 68.641 -36.537 1.00 15.36 A O
|
734 |
+
ATOM 734 CB PHE A 82 4.553 71.730 -35.642 1.00 9.06 A C
|
735 |
+
ATOM 735 CG PHE A 82 5.463 70.889 -34.772 1.00 16.63 A C
|
736 |
+
ATOM 736 CD1 PHE A 82 5.068 70.480 -33.495 1.00 13.23 A C
|
737 |
+
ATOM 737 CD2 PHE A 82 6.724 70.506 -35.225 1.00 13.14 A C
|
738 |
+
ATOM 738 CE1 PHE A 82 5.917 69.703 -32.700 1.00 9.87 A C
|
739 |
+
ATOM 739 CE2 PHE A 82 7.587 69.724 -34.427 1.00 10.48 A C
|
740 |
+
ATOM 740 CZ PHE A 82 7.184 69.326 -33.175 1.00 9.88 A C
|
741 |
+
ATOM 741 H PHE A 82 2.475 69.730 -34.669 1.00 0.00 A H
|
742 |
+
ATOM 742 N ILE A 83 4.364 69.879 -38.017 1.00 10.92 A N
|
743 |
+
ATOM 743 CA ILE A 83 4.817 68.690 -38.730 1.00 10.71 A C
|
744 |
+
ATOM 744 C ILE A 83 6.314 68.819 -38.947 1.00 13.47 A C
|
745 |
+
ATOM 745 O ILE A 83 6.840 69.938 -38.968 1.00 13.07 A O
|
746 |
+
ATOM 746 CB ILE A 83 4.110 68.484 -40.105 1.00 12.41 A C
|
747 |
+
ATOM 747 CG1 ILE A 83 4.151 69.749 -40.958 1.00 15.47 A C
|
748 |
+
ATOM 748 CG2 ILE A 83 2.702 67.968 -39.913 1.00 19.29 A C
|
749 |
+
ATOM 749 CD1 ILE A 83 5.425 69.913 -41.777 1.00 32.58 A C
|
750 |
+
ATOM 750 H ILE A 83 4.593 70.768 -38.367 1.00 0.00 A H
|
751 |
+
ATOM 751 N VAL A 84 6.995 67.678 -39.010 1.00 16.93 A N
|
752 |
+
ATOM 752 CA VAL A 84 8.429 67.609 -39.284 1.00 15.26 A C
|
753 |
+
ATOM 753 C VAL A 84 8.482 66.681 -40.487 1.00 12.30 A C
|
754 |
+
ATOM 754 O VAL A 84 8.094 65.516 -40.389 1.00 10.20 A O
|
755 |
+
ATOM 755 CB VAL A 84 9.225 66.946 -38.156 1.00 20.95 A C
|
756 |
+
ATOM 756 CG1 VAL A 84 10.699 66.907 -38.516 1.00 22.12 A C
|
757 |
+
ATOM 757 CG2 VAL A 84 9.028 67.695 -36.859 1.00 22.87 A C
|
758 |
+
ATOM 758 H VAL A 84 6.512 66.832 -38.875 1.00 0.00 A H
|
759 |
+
ATOM 759 N SER A 85 8.877 67.216 -41.637 1.00 17.81 A N
|
760 |
+
ATOM 760 CA SER A 85 8.956 66.428 -42.865 1.00 18.91 A C
|
761 |
+
ATOM 761 C SER A 85 10.412 66.233 -43.220 1.00 21.30 A C
|
762 |
+
ATOM 762 O SER A 85 11.228 67.122 -43.003 1.00 21.25 A O
|
763 |
+
ATOM 763 CB SER A 85 8.298 67.176 -44.023 1.00 17.72 A C
|
764 |
+
ATOM 764 OG SER A 85 7.306 68.083 -43.570 1.00 36.30 A O
|
765 |
+
ATOM 765 H SER A 85 9.138 68.145 -41.668 1.00 0.00 A H
|
766 |
+
ATOM 766 HG SER A 85 7.749 68.805 -43.105 1.00 0.00 A H
|
767 |
+
ATOM 767 N PHE A 86 10.722 65.086 -43.807 1.00 24.08 A N
|
768 |
+
ATOM 768 CA PHE A 86 12.080 64.760 -44.234 1.00 25.09 A C
|
769 |
+
ATOM 769 C PHE A 86 11.997 63.587 -45.209 1.00 29.80 A C
|
770 |
+
ATOM 770 O PHE A 86 10.937 62.971 -45.368 1.00 31.28 A O
|
771 |
+
ATOM 771 CB PHE A 86 12.968 64.396 -43.028 1.00 25.83 A C
|
772 |
+
ATOM 772 CG PHE A 86 12.464 63.227 -42.234 1.00 23.95 A C
|
773 |
+
ATOM 773 CD1 PHE A 86 11.511 63.411 -41.228 1.00 25.22 A C
|
774 |
+
ATOM 774 CD2 PHE A 86 12.887 61.940 -42.528 1.00 25.79 A C
|
775 |
+
ATOM 775 CE1 PHE A 86 10.975 62.322 -40.527 1.00 28.59 A C
|
776 |
+
ATOM 776 CE2 PHE A 86 12.362 60.842 -41.837 1.00 25.89 A C
|
777 |
+
ATOM 777 CZ PHE A 86 11.397 61.036 -40.832 1.00 26.91 A C
|
778 |
+
ATOM 778 H PHE A 86 10.021 64.413 -43.964 1.00 0.00 A H
|
779 |
+
ATOM 779 N GLU A 87 13.100 63.296 -45.885 1.00 37.39 A N
|
780 |
+
ATOM 780 CA GLU A 87 13.135 62.186 -46.819 1.00 43.62 A C
|
781 |
+
ATOM 781 C GLU A 87 14.269 61.285 -46.409 1.00 45.60 A C
|
782 |
+
ATOM 782 O GLU A 87 15.385 61.763 -46.198 1.00 45.45 A O
|
783 |
+
ATOM 783 CB GLU A 87 13.404 62.673 -48.239 1.00 49.85 A C
|
784 |
+
ATOM 784 CG GLU A 87 12.398 63.669 -48.769 1.00 60.90 A C
|
785 |
+
ATOM 785 CD GLU A 87 12.611 63.979 -50.240 1.00 64.35 A C
|
786 |
+
ATOM 786 OE1 GLU A 87 12.669 63.021 -51.050 1.00 68.83 A O
|
787 |
+
ATOM 787 OE2 GLU A 87 12.721 65.180 -50.583 1.00 65.74 A O1-
|
788 |
+
ATOM 788 H GLU A 87 13.916 63.827 -45.774 1.00 0.00 A H
|
789 |
+
ATOM 789 N TYR A 88 13.985 60.008 -46.191 1.00 51.49 A N
|
790 |
+
ATOM 790 CA TYR A 88 15.065 59.097 -45.859 1.00 60.92 A C
|
791 |
+
ATOM 791 C TYR A 88 15.253 58.240 -47.088 1.00 66.96 A C
|
792 |
+
ATOM 792 O TYR A 88 14.530 57.261 -47.297 1.00 68.37 A O
|
793 |
+
ATOM 793 CB TYR A 88 14.781 58.224 -44.639 1.00 62.44 A C
|
794 |
+
ATOM 794 CG TYR A 88 15.987 57.378 -44.266 1.00 68.24 A C
|
795 |
+
ATOM 795 CD1 TYR A 88 17.243 57.967 -44.093 1.00 67.81 A C
|
796 |
+
ATOM 796 CD2 TYR A 88 15.891 55.990 -44.133 1.00 71.27 A C
|
797 |
+
ATOM 797 CE1 TYR A 88 18.372 57.205 -43.803 1.00 70.22 A C
|
798 |
+
ATOM 798 CE2 TYR A 88 17.023 55.212 -43.841 1.00 71.36 A C
|
799 |
+
ATOM 799 CZ TYR A 88 18.259 55.832 -43.678 1.00 71.55 A C
|
800 |
+
ATOM 800 OH TYR A 88 19.386 55.092 -43.397 1.00 73.19 A O
|
801 |
+
ATOM 801 H TYR A 88 13.069 59.692 -46.317 1.00 0.00 A H
|
802 |
+
ATOM 802 HH TYR A 88 20.138 55.683 -43.299 1.00 0.00 A H
|
803 |
+
ATOM 803 N GLN A 89 16.189 58.671 -47.926 1.00 71.77 A N
|
804 |
+
ATOM 804 CA GLN A 89 16.519 57.997 -49.175 1.00 75.65 A C
|
805 |
+
ATOM 805 C GLN A 89 15.393 58.122 -50.206 1.00 75.76 A C
|
806 |
+
ATOM 806 O GLN A 89 14.710 57.141 -50.523 1.00 77.08 A O
|
807 |
+
ATOM 807 CB GLN A 89 16.888 56.524 -48.921 1.00 78.19 A C
|
808 |
+
ATOM 808 CG GLN A 89 17.898 56.313 -47.781 1.00 84.01 A C
|
809 |
+
ATOM 809 CD GLN A 89 18.912 57.452 -47.658 1.00 88.89 A C
|
810 |
+
ATOM 810 NE2 GLN A 89 20.064 57.298 -48.302 1.00 91.91 A N
|
811 |
+
ATOM 811 OE1 GLN A 89 18.651 58.462 -46.995 1.00 92.08 A O
|
812 |
+
ATOM 812 H GLN A 89 16.710 59.458 -47.651 1.00 0.00 A H
|
813 |
+
ATOM 813 HE21 GLN A 89 20.213 56.483 -48.823 1.00 0.00 A H
|
814 |
+
ATOM 814 HE22 GLN A 89 20.726 58.016 -48.216 1.00 0.00 A H
|
815 |
+
ATOM 815 N GLY A 90 15.184 59.355 -50.678 1.00 74.54 A N
|
816 |
+
ATOM 816 CA GLY A 90 14.160 59.650 -51.678 1.00 70.86 A C
|
817 |
+
ATOM 817 C GLY A 90 12.725 59.353 -51.272 1.00 68.55 A C
|
818 |
+
ATOM 818 O GLY A 90 11.787 59.539 -52.058 1.00 67.92 A O
|
819 |
+
ATOM 819 H GLY A 90 15.715 60.101 -50.336 1.00 0.00 A H
|
820 |
+
ATOM 820 N ARG A 91 12.564 58.894 -50.033 1.00 67.07 A N
|
821 |
+
ATOM 821 CA ARG A 91 11.269 58.543 -49.470 1.00 64.55 A C
|
822 |
+
ATOM 822 C ARG A 91 10.844 59.601 -48.444 1.00 56.77 A C
|
823 |
+
ATOM 823 O ARG A 91 11.498 59.765 -47.412 1.00 53.37 A O
|
824 |
+
ATOM 824 CB ARG A 91 11.381 57.157 -48.821 1.00 70.87 A C
|
825 |
+
ATOM 825 CG ARG A 91 10.072 56.536 -48.353 1.00 82.61 A C
|
826 |
+
ATOM 826 CD ARG A 91 9.843 56.764 -46.866 1.00 91.33 A C
|
827 |
+
ATOM 827 NE ARG A 91 10.904 56.179 -46.047 1.00 99.54 A N
|
828 |
+
ATOM 828 CZ ARG A 91 11.706 56.874 -45.244 1.00104.71 A C
|
829 |
+
ATOM 829 NH1 ARG A 91 11.584 58.194 -45.143 1.00106.46 A N1+
|
830 |
+
ATOM 830 NH2 ARG A 91 12.619 56.239 -44.520 1.00108.62 A N
|
831 |
+
ATOM 831 H ARG A 91 13.335 58.777 -49.455 1.00 0.00 A H
|
832 |
+
ATOM 832 HE ARG A 91 11.009 55.209 -46.140 1.00 0.00 A H
|
833 |
+
ATOM 833 HH11 ARG A 91 10.861 58.681 -45.653 1.00 0.00 A H
|
834 |
+
ATOM 834 HH12 ARG A 91 12.138 58.762 -44.541 1.00 0.00 A H
|
835 |
+
ATOM 835 HH21 ARG A 91 12.708 55.244 -44.590 1.00 0.00 A H
|
836 |
+
ATOM 836 HH22 ARG A 91 13.232 56.706 -43.887 1.00 0.00 A H
|
837 |
+
ATOM 837 N LYS A 92 9.773 60.332 -48.745 1.00 49.39 A N
|
838 |
+
ATOM 838 CA LYS A 92 9.280 61.362 -47.838 1.00 44.20 A C
|
839 |
+
ATOM 839 C LYS A 92 8.488 60.782 -46.665 1.00 37.95 A C
|
840 |
+
ATOM 840 O LYS A 92 7.862 59.730 -46.768 1.00 36.08 A O
|
841 |
+
ATOM 841 CB LYS A 92 8.438 62.404 -48.580 1.00 47.71 A C
|
842 |
+
ATOM 842 CG LYS A 92 7.989 63.562 -47.681 1.00 54.34 A C
|
843 |
+
ATOM 843 CD LYS A 92 7.121 64.568 -48.414 1.00 61.58 A C
|
844 |
+
ATOM 844 CE LYS A 92 6.096 65.191 -47.477 1.00 66.71 A C
|
845 |
+
ATOM 845 NZ LYS A 92 5.102 64.179 -46.991 1.00 72.99 A N1+
|
846 |
+
ATOM 846 H LYS A 92 9.328 60.189 -49.603 1.00 0.00 A H
|
847 |
+
ATOM 847 HZ1 LYS A 92 4.604 63.670 -47.752 1.00 0.00 A H
|
848 |
+
ATOM 848 HZ2 LYS A 92 5.431 63.405 -46.381 1.00 0.00 A H
|
849 |
+
ATOM 849 HZ3 LYS A 92 4.294 64.558 -46.437 1.00 0.00 A H
|
850 |
+
ATOM 850 N THR A 93 8.506 61.483 -45.548 1.00 31.62 A N
|
851 |
+
ATOM 851 CA THR A 93 7.819 61.026 -44.368 1.00 26.75 A C
|
852 |
+
ATOM 852 C THR A 93 7.462 62.279 -43.596 1.00 24.78 A C
|
853 |
+
ATOM 853 O THR A 93 8.219 63.251 -43.612 1.00 23.01 A O
|
854 |
+
ATOM 854 CB THR A 93 8.769 60.117 -43.588 1.00 30.50 A C
|
855 |
+
ATOM 855 CG2 THR A 93 8.131 59.574 -42.325 1.00 28.73 A C
|
856 |
+
ATOM 856 OG1 THR A 93 9.162 59.034 -44.440 1.00 27.75 A O
|
857 |
+
ATOM 857 H THR A 93 9.028 62.313 -45.474 1.00 0.00 A H
|
858 |
+
ATOM 858 HG1 THR A 93 8.374 58.813 -44.959 1.00 0.00 A H
|
859 |
+
ATOM 859 N VAL A 94 6.275 62.288 -43.000 1.00 21.14 A N
|
860 |
+
ATOM 860 CA VAL A 94 5.775 63.426 -42.232 1.00 21.95 A C
|
861 |
+
ATOM 861 C VAL A 94 5.327 62.965 -40.853 1.00 23.50 A C
|
862 |
+
ATOM 862 O VAL A 94 4.408 62.148 -40.745 1.00 25.33 A O
|
863 |
+
ATOM 863 CB VAL A 94 4.537 64.069 -42.904 1.00 19.68 A C
|
864 |
+
ATOM 864 CG1 VAL A 94 3.883 65.037 -41.942 1.00 16.72 A C
|
865 |
+
ATOM 865 CG2 VAL A 94 4.927 64.795 -44.163 1.00 15.77 A C
|
866 |
+
ATOM 866 H VAL A 94 5.699 61.526 -43.010 1.00 0.00 A H
|
867 |
+
ATOM 867 N VAL A 95 5.963 63.478 -39.801 1.00 20.80 A N
|
868 |
+
ATOM 868 CA VAL A 95 5.589 63.108 -38.432 1.00 14.02 A C
|
869 |
+
ATOM 869 C VAL A 95 4.834 64.298 -37.872 1.00 14.94 A C
|
870 |
+
ATOM 870 O VAL A 95 5.287 65.442 -38.003 1.00 16.50 A O
|
871 |
+
ATOM 871 CB VAL A 95 6.818 62.832 -37.558 1.00 12.89 A C
|
872 |
+
ATOM 872 CG1 VAL A 95 6.403 62.428 -36.191 1.00 13.40 A C
|
873 |
+
ATOM 873 CG2 VAL A 95 7.661 61.729 -38.181 1.00 18.43 A C
|
874 |
+
ATOM 874 H VAL A 95 6.682 64.134 -39.938 1.00 0.00 A H
|
875 |
+
ATOM 875 N ALA A 96 3.673 64.034 -37.281 1.00 13.39 A N
|
876 |
+
ATOM 876 CA ALA A 96 2.839 65.089 -36.704 1.00 13.84 A C
|
877 |
+
ATOM 877 C ALA A 96 2.736 64.955 -35.178 1.00 13.73 A C
|
878 |
+
ATOM 878 O ALA A 96 1.908 64.210 -34.647 1.00 14.82 A O
|
879 |
+
ATOM 879 CB ALA A 96 1.446 65.064 -37.339 1.00 10.54 A C
|
880 |
+
ATOM 880 H ALA A 96 3.360 63.116 -37.200 1.00 0.00 A H
|
881 |
+
ATOM 881 N PRO A 97 3.606 65.653 -34.449 1.00 13.05 A N
|
882 |
+
ATOM 882 CA PRO A 97 3.549 65.558 -32.998 1.00 10.55 A C
|
883 |
+
ATOM 883 C PRO A 97 2.922 66.810 -32.410 1.00 13.35 A C
|
884 |
+
ATOM 884 O PRO A 97 2.604 67.760 -33.140 1.00 11.58 A O
|
885 |
+
ATOM 885 CB PRO A 97 5.033 65.520 -32.609 1.00 6.00 A C
|
886 |
+
ATOM 886 CG PRO A 97 5.820 65.918 -33.910 1.00 7.23 A C
|
887 |
+
ATOM 887 CD PRO A 97 4.788 66.416 -34.870 1.00 10.87 A C
|
888 |
+
ATOM 888 N ILE A 98 2.685 66.767 -31.107 1.00 11.92 A N
|
889 |
+
ATOM 889 CA ILE A 98 2.211 67.929 -30.374 1.00 12.48 A C
|
890 |
+
ATOM 890 C ILE A 98 3.323 68.053 -29.320 1.00 10.15 A C
|
891 |
+
ATOM 891 O ILE A 98 3.655 67.071 -28.655 1.00 11.57 A O
|
892 |
+
ATOM 892 CB ILE A 98 0.796 67.718 -29.684 1.00 15.20 A C
|
893 |
+
ATOM 893 CG1 ILE A 98 -0.334 67.826 -30.724 1.00 8.52 A C
|
894 |
+
ATOM 894 CG2 ILE A 98 0.540 68.823 -28.631 1.00 9.28 A C
|
895 |
+
ATOM 895 CD1 ILE A 98 -1.721 67.689 -30.134 1.00 6.04 A C
|
896 |
+
ATOM 896 H ILE A 98 2.859 65.943 -30.595 1.00 0.00 A H
|
897 |
+
ATOM 897 N ASP A 99 3.986 69.198 -29.264 1.00 6.01 A N
|
898 |
+
ATOM 898 CA ASP A 99 5.028 69.386 -28.276 1.00 7.15 A C
|
899 |
+
ATOM 899 C ASP A 99 4.491 70.205 -27.127 1.00 10.60 A C
|
900 |
+
ATOM 900 O ASP A 99 3.639 71.081 -27.326 1.00 10.01 A O
|
901 |
+
ATOM 901 CB ASP A 99 6.205 70.150 -28.841 1.00 10.22 A C
|
902 |
+
ATOM 902 CG ASP A 99 7.175 69.277 -29.617 1.00 6.01 A C
|
903 |
+
ATOM 903 OD1 ASP A 99 6.924 68.080 -29.890 1.00 11.95 A O
|
904 |
+
ATOM 904 OD2 ASP A 99 8.219 69.846 -29.961 1.00 8.57 A O1-
|
905 |
+
ATOM 905 H ASP A 99 3.765 69.922 -29.882 1.00 0.00 A H
|
906 |
+
ATOM 906 HD2 ASP A 99 8.781 69.224 -30.451 1.00 0.00 A H
|
907 |
+
ATOM 907 N HIS A 100 5.043 69.955 -25.940 1.00 11.43 A N
|
908 |
+
ATOM 908 CA HIS A 100 4.678 70.666 -24.725 1.00 8.74 A C
|
909 |
+
ATOM 909 C HIS A 100 5.947 71.261 -24.104 1.00 12.40 A C
|
910 |
+
ATOM 910 O HIS A 100 6.892 70.547 -23.794 1.00 10.22 A O
|
911 |
+
ATOM 911 CB HIS A 100 3.998 69.713 -23.733 1.00 12.71 A C
|
912 |
+
ATOM 912 CG HIS A 100 3.659 70.350 -22.414 1.00 20.40 A C
|
913 |
+
ATOM 913 CD2 HIS A 100 3.150 71.568 -22.119 1.00 13.89 A C
|
914 |
+
ATOM 914 ND1 HIS A 100 3.837 69.711 -21.205 1.00 18.95 A N
|
915 |
+
ATOM 915 CE1 HIS A 100 3.447 70.507 -20.226 1.00 22.32 A C
|
916 |
+
ATOM 916 NE2 HIS A 100 3.028 71.641 -20.755 1.00 20.66 A N
|
917 |
+
ATOM 917 H HIS A 100 5.734 69.262 -25.919 1.00 0.00 A H
|
918 |
+
ATOM 918 HD1 HIS A 100 4.288 68.856 -21.040 1.00 0.00 A H
|
919 |
+
ATOM 919 HE2 HIS A 100 2.731 72.447 -20.266 1.00 0.00 A H
|
920 |
+
ATOM 920 N PHE A 101 5.970 72.579 -23.994 1.00 10.81 A N
|
921 |
+
ATOM 921 CA PHE A 101 7.086 73.315 -23.422 1.00 10.19 A C
|
922 |
+
ATOM 922 C PHE A 101 6.664 73.821 -22.050 1.00 12.64 A C
|
923 |
+
ATOM 923 O PHE A 101 5.489 74.119 -21.824 1.00 11.76 A O
|
924 |
+
ATOM 924 CB PHE A 101 7.395 74.560 -24.265 1.00 11.58 A C
|
925 |
+
ATOM 925 CG PHE A 101 8.232 74.303 -25.503 1.00 9.82 A C
|
926 |
+
ATOM 926 CD1 PHE A 101 8.433 73.019 -25.996 1.00 15.10 A C
|
927 |
+
ATOM 927 CD2 PHE A 101 8.847 75.374 -26.159 1.00 13.96 A C
|
928 |
+
ATOM 928 CE1 PHE A 101 9.241 72.813 -27.122 1.00 18.99 A C
|
929 |
+
ATOM 929 CE2 PHE A 101 9.655 75.174 -27.285 1.00 13.55 A C
|
930 |
+
ATOM 930 CZ PHE A 101 9.853 73.895 -27.763 1.00 8.83 A C
|
931 |
+
ATOM 931 H PHE A 101 5.212 73.095 -24.316 1.00 0.00 A H
|
932 |
+
ATOM 932 N ARG A 102 7.628 73.976 -21.154 1.00 13.07 A N
|
933 |
+
ATOM 933 CA ARG A 102 7.343 74.491 -19.835 1.00 13.20 A C
|
934 |
+
ATOM 934 C ARG A 102 8.472 75.491 -19.618 1.00 11.79 A C
|
935 |
+
ATOM 935 O ARG A 102 9.641 75.171 -19.825 1.00 13.69 A O
|
936 |
+
ATOM 936 CB ARG A 102 7.353 73.355 -18.820 1.00 20.65 A C
|
937 |
+
ATOM 937 CG ARG A 102 6.599 73.668 -17.560 1.00 31.29 A C
|
938 |
+
ATOM 938 CD ARG A 102 6.617 72.499 -16.595 1.00 43.38 A C
|
939 |
+
ATOM 939 NE ARG A 102 5.788 71.387 -17.057 1.00 46.48 A N
|
940 |
+
ATOM 940 CZ ARG A 102 4.617 71.049 -16.514 1.00 50.74 A C
|
941 |
+
ATOM 941 NH1 ARG A 102 4.133 71.744 -15.484 1.00 50.96 A N1+
|
942 |
+
ATOM 942 NH2 ARG A 102 3.940 70.000 -16.978 1.00 53.28 A N
|
943 |
+
ATOM 943 H ARG A 102 8.549 73.733 -21.383 1.00 0.00 A H
|
944 |
+
ATOM 944 HE ARG A 102 6.158 70.882 -17.819 1.00 0.00 A H
|
945 |
+
ATOM 945 HH11 ARG A 102 4.637 72.549 -15.155 1.00 0.00 A H
|
946 |
+
ATOM 946 HH12 ARG A 102 3.251 71.575 -15.046 1.00 0.00 A H
|
947 |
+
ATOM 947 HH21 ARG A 102 4.295 69.403 -17.709 1.00 0.00 A H
|
948 |
+
ATOM 948 HH22 ARG A 102 3.074 69.714 -16.579 1.00 0.00 A H
|
949 |
+
ATOM 949 N PHE A 103 8.116 76.726 -19.302 1.00 9.98 A N
|
950 |
+
ATOM 950 CA PHE A 103 9.104 77.772 -19.129 1.00 15.39 A C
|
951 |
+
ATOM 951 C PHE A 103 9.374 78.094 -17.667 1.00 20.19 A C
|
952 |
+
ATOM 952 O PHE A 103 8.618 77.699 -16.771 1.00 20.52 A O
|
953 |
+
ATOM 953 CB PHE A 103 8.662 79.056 -19.847 1.00 12.56 A C
|
954 |
+
ATOM 954 CG PHE A 103 8.441 78.896 -21.334 1.00 11.89 A C
|
955 |
+
ATOM 955 CD1 PHE A 103 7.328 78.209 -21.827 1.00 16.99 A C
|
956 |
+
ATOM 956 CD2 PHE A 103 9.297 79.485 -22.239 1.00 14.06 A C
|
957 |
+
ATOM 957 CE1 PHE A 103 7.068 78.118 -23.204 1.00 12.90 A C
|
958 |
+
ATOM 958 CE2 PHE A 103 9.051 79.401 -23.605 1.00 16.35 A C
|
959 |
+
ATOM 959 CZ PHE A 103 7.923 78.711 -24.086 1.00 16.18 A C
|
960 |
+
ATOM 960 H PHE A 103 7.174 76.928 -19.137 1.00 0.00 A H
|
961 |
+
ATOM 961 N ASN A 104 10.470 78.806 -17.432 1.00 18.03 A N
|
962 |
+
ATOM 962 CA ASN A 104 10.837 79.196 -16.083 1.00 18.86 A C
|
963 |
+
ATOM 963 C ASN A 104 10.821 80.696 -15.968 1.00 21.20 A C
|
964 |
+
ATOM 964 O ASN A 104 10.576 81.383 -16.966 1.00 26.75 A O
|
965 |
+
ATOM 965 CB ASN A 104 12.205 78.639 -15.693 1.00 18.97 A C
|
966 |
+
ATOM 966 CG ASN A 104 13.339 79.145 -16.565 1.00 15.78 A C
|
967 |
+
ATOM 967 ND2 ASN A 104 13.099 80.160 -17.373 1.00 21.10 A N
|
968 |
+
ATOM 968 OD1 ASN A 104 14.432 78.598 -16.511 1.00 23.62 A O
|
969 |
+
ATOM 969 H ASN A 104 11.032 79.032 -18.204 1.00 0.00 A H
|
970 |
+
ATOM 970 HD21 ASN A 104 12.267 80.624 -17.550 1.00 0.00 A H
|
971 |
+
ATOM 971 HD22 ASN A 104 13.944 80.392 -17.810 1.00 0.00 A H
|
972 |
+
ATOM 972 N GLY A 105 11.125 81.204 -14.775 1.00 25.04 A N
|
973 |
+
ATOM 973 CA GLY A 105 11.128 82.641 -14.549 1.00 26.04 A C
|
974 |
+
ATOM 974 C GLY A 105 11.921 83.455 -15.559 1.00 26.75 A C
|
975 |
+
ATOM 975 O GLY A 105 11.496 84.537 -15.977 1.00 32.50 A O
|
976 |
+
ATOM 976 H GLY A 105 11.324 80.606 -14.024 1.00 0.00 A H
|
977 |
+
ATOM 977 N ALA A 106 13.060 82.922 -15.982 1.00 26.82 A N
|
978 |
+
ATOM 978 CA ALA A 106 13.918 83.599 -16.947 1.00 25.44 A C
|
979 |
+
ATOM 979 C ALA A 106 13.375 83.589 -18.381 1.00 26.75 A C
|
980 |
+
ATOM 980 O ALA A 106 13.932 84.242 -19.274 1.00 31.90 A O
|
981 |
+
ATOM 981 CB ALA A 106 15.297 82.985 -16.912 1.00 27.30 A C
|
982 |
+
ATOM 982 H ALA A 106 13.345 82.059 -15.621 1.00 0.00 A H
|
983 |
+
ATOM 983 N GLY A 107 12.278 82.876 -18.600 1.00 21.30 A N
|
984 |
+
ATOM 984 CA GLY A 107 11.710 82.811 -19.928 1.00 19.15 A C
|
985 |
+
ATOM 985 C GLY A 107 12.295 81.715 -20.793 1.00 16.57 A C
|
986 |
+
ATOM 986 O GLY A 107 11.977 81.637 -21.979 1.00 19.15 A O
|
987 |
+
ATOM 987 H GLY A 107 11.817 82.417 -17.881 1.00 0.00 A H
|
988 |
+
ATOM 988 N LYS A 108 13.149 80.880 -20.221 1.00 15.74 A N
|
989 |
+
ATOM 989 CA LYS A 108 13.755 79.785 -20.953 1.00 15.79 A C
|
990 |
+
ATOM 990 C LYS A 108 12.959 78.509 -20.722 1.00 16.00 A C
|
991 |
+
ATOM 991 O LYS A 108 12.232 78.380 -19.728 1.00 19.98 A O
|
992 |
+
ATOM 992 CB LYS A 108 15.221 79.576 -20.536 1.00 21.57 A C
|
993 |
+
ATOM 993 CG LYS A 108 16.163 80.753 -20.884 1.00 30.19 A C
|
994 |
+
ATOM 994 CD LYS A 108 17.600 80.313 -21.253 1.00 41.27 A C
|
995 |
+
ATOM 995 CE LYS A 108 18.449 79.827 -20.044 1.00 48.96 A C
|
996 |
+
ATOM 996 NZ LYS A 108 19.775 79.201 -20.448 1.00 62.20 A N1+
|
997 |
+
ATOM 997 H LYS A 108 13.372 80.997 -19.288 1.00 0.00 A H
|
998 |
+
ATOM 998 HZ1 LYS A 108 19.585 78.387 -21.083 1.00 0.00 A H
|
999 |
+
ATOM 999 HZ2 LYS A 108 20.373 79.857 -20.978 1.00 0.00 A H
|
1000 |
+
ATOM 1000 HZ3 LYS A 108 20.367 78.823 -19.657 1.00 0.00 A H
|
1001 |
+
ATOM 1001 N VAL A 109 13.094 77.581 -21.661 1.00 12.86 A N
|
1002 |
+
ATOM 1002 CA VAL A 109 12.422 76.295 -21.622 1.00 10.69 A C
|
1003 |
+
ATOM 1003 C VAL A 109 13.176 75.384 -20.647 1.00 16.12 A C
|
1004 |
+
ATOM 1004 O VAL A 109 14.391 75.199 -20.754 1.00 18.17 A O
|
1005 |
+
ATOM 1005 CB VAL A 109 12.387 75.663 -23.067 1.00 6.00 A C
|
1006 |
+
ATOM 1006 CG1 VAL A 109 11.695 74.319 -23.068 1.00 6.01 A C
|
1007 |
+
ATOM 1007 CG2 VAL A 109 11.663 76.584 -24.017 1.00 7.37 A C
|
1008 |
+
ATOM 1008 H VAL A 109 13.697 77.765 -22.416 1.00 0.00 A H
|
1009 |
+
ATOM 1009 N VAL A 110 12.474 74.846 -19.661 1.00 14.02 A N
|
1010 |
+
ATOM 1010 CA VAL A 110 13.125 73.959 -18.716 1.00 11.14 A C
|
1011 |
+
ATOM 1011 C VAL A 110 12.697 72.532 -18.976 1.00 15.43 A C
|
1012 |
+
ATOM 1012 O VAL A 110 13.341 71.588 -18.504 1.00 19.88 A O
|
1013 |
+
ATOM 1013 CB VAL A 110 12.800 74.338 -17.246 1.00 14.86 A C
|
1014 |
+
ATOM 1014 CG1 VAL A 110 13.421 75.672 -16.892 1.00 17.88 A C
|
1015 |
+
ATOM 1015 CG2 VAL A 110 11.296 74.409 -17.032 1.00 18.60 A C
|
1016 |
+
ATOM 1016 H VAL A 110 11.530 75.078 -19.560 1.00 0.00 A H
|
1017 |
+
ATOM 1017 N SER A 111 11.650 72.372 -19.785 1.00 21.33 A N
|
1018 |
+
ATOM 1018 CA SER A 111 11.111 71.050 -20.101 1.00 18.49 A C
|
1019 |
+
ATOM 1019 C SER A 111 10.358 70.982 -21.431 1.00 19.05 A C
|
1020 |
+
ATOM 1020 O SER A 111 9.522 71.837 -21.713 1.00 17.55 A O
|
1021 |
+
ATOM 1021 CB SER A 111 10.160 70.630 -18.972 1.00 15.96 A C
|
1022 |
+
ATOM 1022 OG SER A 111 9.363 69.523 -19.337 1.00 23.59 A O
|
1023 |
+
ATOM 1023 H SER A 111 11.210 73.148 -20.188 1.00 0.00 A H
|
1024 |
+
ATOM 1024 HG SER A 111 9.937 68.749 -19.487 1.00 0.00 A H
|
1025 |
+
ATOM 1025 N MET A 112 10.665 69.991 -22.264 1.00 17.04 A N
|
1026 |
+
ATOM 1026 CA MET A 112 9.929 69.842 -23.510 1.00 12.13 A C
|
1027 |
+
ATOM 1027 C MET A 112 9.613 68.375 -23.688 1.00 9.89 A C
|
1028 |
+
ATOM 1028 O MET A 112 10.415 67.518 -23.360 1.00 9.32 A O
|
1029 |
+
ATOM 1029 CB MET A 112 10.668 70.447 -24.722 1.00 16.06 A C
|
1030 |
+
ATOM 1030 CG MET A 112 11.581 69.539 -25.551 1.00 16.45 A C
|
1031 |
+
ATOM 1031 SD MET A 112 10.783 68.225 -26.550 1.00 16.21 A S
|
1032 |
+
ATOM 1032 CE MET A 112 11.040 68.840 -28.146 1.00 26.96 A C
|
1033 |
+
ATOM 1033 H MET A 112 11.372 69.342 -22.047 1.00 0.00 A H
|
1034 |
+
ATOM 1034 N ARG A 113 8.396 68.084 -24.121 1.00 8.79 A N
|
1035 |
+
ATOM 1035 CA ARG A 113 7.982 66.701 -24.340 1.00 7.97 A C
|
1036 |
+
ATOM 1036 C ARG A 113 7.285 66.645 -25.684 1.00 6.41 A C
|
1037 |
+
ATOM 1037 O ARG A 113 6.378 67.434 -25.935 1.00 13.99 A O
|
1038 |
+
ATOM 1038 CB ARG A 113 7.030 66.251 -23.233 1.00 9.02 A C
|
1039 |
+
ATOM 1039 CG ARG A 113 7.625 66.370 -21.867 1.00 15.14 A C
|
1040 |
+
ATOM 1040 CD ARG A 113 6.696 65.766 -20.873 1.00 16.20 A C
|
1041 |
+
ATOM 1041 NE ARG A 113 6.778 64.314 -20.880 1.00 14.51 A N
|
1042 |
+
ATOM 1042 CZ ARG A 113 5.727 63.509 -20.833 1.00 10.39 A C
|
1043 |
+
ATOM 1043 NH1 ARG A 113 4.507 64.006 -20.784 1.00 13.94 A N1+
|
1044 |
+
ATOM 1044 NH2 ARG A 113 5.907 62.204 -20.788 1.00 13.69 A N
|
1045 |
+
ATOM 1045 H ARG A 113 7.768 68.807 -24.323 1.00 0.00 A H
|
1046 |
+
ATOM 1046 HE ARG A 113 7.691 63.913 -20.902 1.00 0.00 A H
|
1047 |
+
ATOM 1047 HH11 ARG A 113 4.320 64.981 -20.767 1.00 0.00 A H
|
1048 |
+
ATOM 1048 HH12 ARG A 113 3.746 63.347 -20.805 1.00 0.00 A H
|
1049 |
+
ATOM 1049 HH21 ARG A 113 6.856 61.827 -20.804 1.00 0.00 A H
|
1050 |
+
ATOM 1050 HH22 ARG A 113 5.170 61.521 -20.786 1.00 0.00 A H
|
1051 |
+
ATOM 1051 N ALA A 114 7.729 65.724 -26.538 1.00 12.66 A N
|
1052 |
+
ATOM 1052 CA ALA A 114 7.198 65.566 -27.880 1.00 6.59 A C
|
1053 |
+
ATOM 1053 C ALA A 114 6.288 64.349 -27.911 1.00 6.02 A C
|
1054 |
+
ATOM 1054 O ALA A 114 6.739 63.219 -27.744 1.00 11.50 A O
|
1055 |
+
ATOM 1055 CB ALA A 114 8.349 65.421 -28.876 1.00 7.36 A C
|
1056 |
+
ATOM 1056 H ALA A 114 8.438 65.113 -26.244 1.00 0.00 A H
|
1057 |
+
ATOM 1057 N LEU A 115 5.000 64.590 -28.126 1.00 7.62 A N
|
1058 |
+
ATOM 1058 CA LEU A 115 3.997 63.535 -28.149 1.00 10.84 A C
|
1059 |
+
ATOM 1059 C LEU A 115 3.587 63.101 -29.541 1.00 13.62 A C
|
1060 |
+
ATOM 1060 O LEU A 115 3.045 63.908 -30.288 1.00 17.93 A O
|
1061 |
+
ATOM 1061 CB LEU A 115 2.762 64.036 -27.412 1.00 11.12 A C
|
1062 |
+
ATOM 1062 CG LEU A 115 2.448 63.462 -26.042 1.00 13.33 A C
|
1063 |
+
ATOM 1063 CD1 LEU A 115 1.773 64.522 -25.185 1.00 21.56 A C
|
1064 |
+
ATOM 1064 CD2 LEU A 115 1.549 62.271 -26.233 1.00 19.09 A C
|
1065 |
+
ATOM 1065 H LEU A 115 4.719 65.518 -28.285 1.00 0.00 A H
|
1066 |
+
ATOM 1066 N PHE A 116 3.887 61.854 -29.901 1.00 17.39 A N
|
1067 |
+
ATOM 1067 CA PHE A 116 3.512 61.279 -31.205 1.00 15.62 A C
|
1068 |
+
ATOM 1068 C PHE A 116 3.773 59.782 -31.214 1.00 14.98 A C
|
1069 |
+
ATOM 1069 O PHE A 116 4.750 59.330 -30.638 1.00 21.35 A O
|
1070 |
+
ATOM 1070 CB PHE A 116 4.255 61.931 -32.388 1.00 12.89 A C
|
1071 |
+
ATOM 1071 CG PHE A 116 5.742 61.674 -32.402 1.00 15.41 A C
|
1072 |
+
ATOM 1072 CD1 PHE A 116 6.260 60.540 -33.006 1.00 18.05 A C
|
1073 |
+
ATOM 1073 CD2 PHE A 116 6.627 62.574 -31.794 1.00 16.24 A C
|
1074 |
+
ATOM 1074 CE1 PHE A 116 7.644 60.294 -33.007 1.00 19.50 A C
|
1075 |
+
ATOM 1075 CE2 PHE A 116 8.000 62.335 -31.790 1.00 17.91 A C
|
1076 |
+
ATOM 1076 CZ PHE A 116 8.509 61.189 -32.400 1.00 17.62 A C
|
1077 |
+
ATOM 1077 H PHE A 116 4.395 61.271 -29.298 1.00 0.00 A H
|
1078 |
+
ATOM 1078 N GLY A 117 2.874 59.021 -31.831 1.00 17.65 A N
|
1079 |
+
ATOM 1079 CA GLY A 117 3.034 57.579 -31.914 1.00 20.71 A C
|
1080 |
+
ATOM 1080 C GLY A 117 3.137 57.144 -33.370 1.00 23.09 A C
|
1081 |
+
ATOM 1081 O GLY A 117 3.117 57.988 -34.263 1.00 26.53 A O
|
1082 |
+
ATOM 1082 H GLY A 117 2.104 59.433 -32.250 1.00 0.00 A H
|
1083 |
+
ATOM 1083 N GLU A 118 3.195 55.833 -33.617 1.00 29.70 A N
|
1084 |
+
ATOM 1084 CA GLU A 118 3.312 55.274 -34.973 1.00 34.79 A C
|
1085 |
+
ATOM 1085 C GLU A 118 2.274 55.834 -35.940 1.00 33.08 A C
|
1086 |
+
ATOM 1086 O GLU A 118 2.573 56.087 -37.101 1.00 32.88 A O
|
1087 |
+
ATOM 1087 CB GLU A 118 3.207 53.745 -34.939 1.00 46.74 A C
|
1088 |
+
ATOM 1088 CG GLU A 118 1.967 53.236 -34.174 1.00 64.19 A C
|
1089 |
+
ATOM 1089 CD GLU A 118 1.551 51.794 -34.510 1.00 73.18 A C
|
1090 |
+
ATOM 1090 OE1 GLU A 118 2.285 51.093 -35.248 1.00 75.99 A O
|
1091 |
+
ATOM 1091 OE2 GLU A 118 0.460 51.381 -34.038 1.00 78.00 A O1-
|
1092 |
+
ATOM 1092 H GLU A 118 3.159 55.215 -32.856 1.00 0.00 A H
|
1093 |
+
ATOM 1093 N LYS A 119 1.059 56.056 -35.451 1.00 31.94 A N
|
1094 |
+
ATOM 1094 CA LYS A 119 -0.012 56.595 -36.285 1.00 28.74 A C
|
1095 |
+
ATOM 1095 C LYS A 119 0.134 58.092 -36.630 1.00 21.50 A C
|
1096 |
+
ATOM 1096 O LYS A 119 -0.639 58.627 -37.418 1.00 22.81 A O
|
1097 |
+
ATOM 1097 CB LYS A 119 -1.382 56.269 -35.658 1.00 36.51 A C
|
1098 |
+
ATOM 1098 CG LYS A 119 -1.695 54.759 -35.695 1.00 47.40 A C
|
1099 |
+
ATOM 1099 CD LYS A 119 -3.014 54.351 -35.029 1.00 55.43 A C
|
1100 |
+
ATOM 1100 CE LYS A 119 -3.265 52.816 -35.130 1.00 60.03 A C
|
1101 |
+
ATOM 1101 NZ LYS A 119 -2.308 51.931 -34.351 1.00 67.90 A N1+
|
1102 |
+
ATOM 1102 H LYS A 119 0.889 55.839 -34.514 1.00 0.00 A H
|
1103 |
+
ATOM 1103 HZ1 LYS A 119 -1.283 51.987 -34.576 1.00 0.00 A H
|
1104 |
+
ATOM 1104 HZ2 LYS A 119 -2.367 52.066 -33.316 1.00 0.00 A H
|
1105 |
+
ATOM 1105 HZ3 LYS A 119 -2.545 50.920 -34.392 1.00 0.00 A H
|
1106 |
+
ATOM 1106 N ASN A 120 1.146 58.748 -36.069 1.00 18.19 A N
|
1107 |
+
ATOM 1107 CA ASN A 120 1.410 60.162 -36.331 1.00 14.95 A C
|
1108 |
+
ATOM 1108 C ASN A 120 2.621 60.253 -37.240 1.00 14.05 A C
|
1109 |
+
ATOM 1109 O ASN A 120 3.229 61.300 -37.399 1.00 16.23 A O
|
1110 |
+
ATOM 1110 CB ASN A 120 1.649 60.927 -35.022 1.00 16.83 A C
|
1111 |
+
ATOM 1111 CG ASN A 120 0.500 60.768 -34.058 1.00 11.86 A C
|
1112 |
+
ATOM 1112 ND2 ASN A 120 -0.704 61.061 -34.534 1.00 13.11 A N
|
1113 |
+
ATOM 1113 OD1 ASN A 120 0.670 60.299 -32.930 1.00 15.82 A O
|
1114 |
+
ATOM 1114 H ASN A 120 1.752 58.275 -35.475 1.00 0.00 A H
|
1115 |
+
ATOM 1115 HD21 ASN A 120 -0.770 61.365 -35.461 1.00 0.00 A H
|
1116 |
+
ATOM 1116 HD22 ASN A 120 -1.492 60.971 -33.959 1.00 0.00 A H
|
1117 |
+
ATOM 1117 N ILE A 121 2.983 59.130 -37.829 1.00 15.46 A N
|
1118 |
+
ATOM 1118 CA ILE A 121 4.110 59.084 -38.731 1.00 19.96 A C
|
1119 |
+
ATOM 1119 C ILE A 121 3.507 58.777 -40.088 1.00 24.14 A C
|
1120 |
+
ATOM 1120 O ILE A 121 2.828 57.778 -40.251 1.00 23.40 A O
|
1121 |
+
ATOM 1121 CB ILE A 121 5.075 57.989 -38.307 1.00 17.39 A C
|
1122 |
+
ATOM 1122 CG1 ILE A 121 5.643 58.351 -36.940 1.00 12.27 A C
|
1123 |
+
ATOM 1123 CG2 ILE A 121 6.173 57.820 -39.330 1.00 17.35 A C
|
1124 |
+
ATOM 1124 CD1 ILE A 121 6.604 57.357 -36.398 1.00 23.38 A C
|
1125 |
+
ATOM 1125 H ILE A 121 2.500 58.289 -37.685 1.00 0.00 A H
|
1126 |
+
ATOM 1126 N HIS A 122 3.705 59.662 -41.048 1.00 23.88 A N
|
1127 |
+
ATOM 1127 CA HIS A 122 3.128 59.445 -42.357 1.00 36.19 A C
|
1128 |
+
ATOM 1128 C HIS A 122 4.148 59.369 -43.482 1.00 46.30 A C
|
1129 |
+
ATOM 1129 O HIS A 122 4.765 60.358 -43.840 1.00 47.68 A O
|
1130 |
+
ATOM 1130 CB HIS A 122 2.085 60.519 -42.630 1.00 29.55 A C
|
1131 |
+
ATOM 1131 CG HIS A 122 1.052 60.620 -41.556 1.00 29.07 A C
|
1132 |
+
ATOM 1132 CD2 HIS A 122 0.190 59.698 -41.060 1.00 25.53 A C
|
1133 |
+
ATOM 1133 ND1 HIS A 122 0.885 61.751 -40.788 1.00 26.56 A N
|
1134 |
+
ATOM 1134 CE1 HIS A 122 -0.029 61.522 -39.862 1.00 25.43 A C
|
1135 |
+
ATOM 1135 NE2 HIS A 122 -0.464 60.282 -40.005 1.00 23.16 A N
|
1136 |
+
ATOM 1136 H HIS A 122 4.299 60.404 -40.884 1.00 0.00 A H
|
1137 |
+
ATOM 1137 HD1 HIS A 122 1.376 62.596 -40.895 1.00 0.00 A H
|
1138 |
+
ATOM 1138 HE2 HIS A 122 -1.100 59.859 -39.377 1.00 0.00 A H
|
1139 |
+
ATOM 1139 N ALA A 123 4.395 58.165 -43.976 1.00 59.66 A N
|
1140 |
+
ATOM 1140 CA ALA A 123 5.331 58.000 -45.074 1.00 73.75 A C
|
1141 |
+
ATOM 1141 C ALA A 123 4.609 58.586 -46.276 1.00 82.21 A C
|
1142 |
+
ATOM 1142 O ALA A 123 3.377 58.581 -46.320 1.00 85.95 A O
|
1143 |
+
ATOM 1143 CB ALA A 123 5.643 56.528 -45.297 1.00 73.39 A C
|
1144 |
+
ATOM 1144 H ALA A 123 3.918 57.391 -43.620 1.00 0.00 A H
|
1145 |
+
ATOM 1145 N GLY A 124 5.368 59.086 -47.242 1.00 90.51 A N
|
1146 |
+
ATOM 1146 CA GLY A 124 4.773 59.683 -48.421 1.00101.54 A C
|
1147 |
+
ATOM 1147 C GLY A 124 4.023 60.951 -48.070 1.00108.75 A C
|
1148 |
+
ATOM 1148 O GLY A 124 4.498 62.055 -48.351 1.00109.98 A O
|
1149 |
+
ATOM 1149 H GLY A 124 6.323 59.002 -47.173 1.00 0.00 A H
|
1150 |
+
ATOM 1150 N ALA A 125 2.855 60.788 -47.454 1.00114.55 A N
|
1151 |
+
ATOM 1151 CA ALA A 125 2.012 61.902 -47.043 1.00119.60 A C
|
1152 |
+
ATOM 1152 C ALA A 125 2.779 62.840 -46.123 1.00121.61 A C
|
1153 |
+
ATOM 1153 O ALA A 125 2.439 64.040 -46.086 1.00122.70 A O
|
1154 |
+
ATOM 1154 CB ALA A 125 0.768 61.380 -46.343 1.00121.44 A C
|
1155 |
+
ATOM 1155 OXT ALA A 125 3.759 62.383 -45.497 1.00123.42 A O1-
|
1156 |
+
ATOM 1156 H ALA A 125 2.534 59.892 -47.260 1.00 0.00 A H
|
af_backprop/examples/sc_hall/1QJS_starting.pdb
ADDED
@@ -0,0 +1,880 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL 1
|
2 |
+
ATOM 1 N HIS A 1 -11.161 5.339 22.224 1.00 0.00 N
|
3 |
+
ATOM 2 CA HIS A 1 -9.750 5.488 21.883 1.00 0.00 C
|
4 |
+
ATOM 3 C HIS A 1 -9.362 4.571 20.728 1.00 0.00 C
|
5 |
+
ATOM 4 CB HIS A 1 -8.871 5.198 23.102 1.00 0.00 C
|
6 |
+
ATOM 5 O HIS A 1 -9.646 3.372 20.760 1.00 0.00 O
|
7 |
+
ATOM 6 CG HIS A 1 -9.124 6.115 24.256 1.00 0.00 C
|
8 |
+
ATOM 7 CD2 HIS A 1 -9.874 5.954 25.372 1.00 0.00 C
|
9 |
+
ATOM 8 ND1 HIS A 1 -8.571 7.375 24.340 1.00 0.00 N
|
10 |
+
ATOM 9 CE1 HIS A 1 -8.971 7.950 25.462 1.00 0.00 C
|
11 |
+
ATOM 10 NE2 HIS A 1 -9.762 7.109 26.106 1.00 0.00 N
|
12 |
+
ATOM 11 N CYS A 2 -8.947 5.187 19.589 1.00 0.00 N
|
13 |
+
ATOM 12 CA CYS A 2 -8.553 4.393 18.430 1.00 0.00 C
|
14 |
+
ATOM 13 C CYS A 2 -7.058 4.097 18.455 1.00 0.00 C
|
15 |
+
ATOM 14 CB CYS A 2 -8.916 5.119 17.135 1.00 0.00 C
|
16 |
+
ATOM 15 O CYS A 2 -6.262 4.934 18.882 1.00 0.00 O
|
17 |
+
ATOM 16 SG CYS A 2 -10.678 5.483 16.975 1.00 0.00 S
|
18 |
+
ATOM 17 N TYR A 3 -6.659 2.830 18.088 1.00 0.00 N
|
19 |
+
ATOM 18 CA TYR A 3 -5.257 2.439 18.175 1.00 0.00 C
|
20 |
+
ATOM 19 C TYR A 3 -4.640 2.308 16.787 1.00 0.00 C
|
21 |
+
ATOM 20 CB TYR A 3 -5.114 1.117 18.936 1.00 0.00 C
|
22 |
+
ATOM 21 O TYR A 3 -5.356 2.162 15.794 1.00 0.00 O
|
23 |
+
ATOM 22 CG TYR A 3 -6.148 0.084 18.559 1.00 0.00 C
|
24 |
+
ATOM 23 CD1 TYR A 3 -7.368 0.015 19.228 1.00 0.00 C
|
25 |
+
ATOM 24 CD2 TYR A 3 -5.907 -0.825 17.534 1.00 0.00 C
|
26 |
+
ATOM 25 CE1 TYR A 3 -8.323 -0.936 18.885 1.00 0.00 C
|
27 |
+
ATOM 26 CE2 TYR A 3 -6.855 -1.780 17.183 1.00 0.00 C
|
28 |
+
ATOM 27 OH TYR A 3 -9.000 -2.772 17.519 1.00 0.00 O
|
29 |
+
ATOM 28 CZ TYR A 3 -8.058 -1.828 17.863 1.00 0.00 C
|
30 |
+
ATOM 29 N ASN A 4 -3.262 2.608 16.714 1.00 0.00 N
|
31 |
+
ATOM 30 CA ASN A 4 -2.461 2.337 15.525 1.00 0.00 C
|
32 |
+
ATOM 31 C ASN A 4 -2.395 0.842 15.223 1.00 0.00 C
|
33 |
+
ATOM 32 CB ASN A 4 -1.051 2.909 15.684 1.00 0.00 C
|
34 |
+
ATOM 33 O ASN A 4 -2.342 0.021 16.140 1.00 0.00 O
|
35 |
+
ATOM 34 CG ASN A 4 -1.049 4.414 15.872 1.00 0.00 C
|
36 |
+
ATOM 35 ND2 ASN A 4 -0.034 4.925 16.557 1.00 0.00 N
|
37 |
+
ATOM 36 OD1 ASN A 4 -1.954 5.111 15.405 1.00 0.00 O
|
38 |
+
ATOM 37 N THR A 5 -2.544 0.524 13.919 1.00 0.00 N
|
39 |
+
ATOM 38 CA THR A 5 -2.446 -0.879 13.530 1.00 0.00 C
|
40 |
+
ATOM 39 C THR A 5 -1.345 -1.076 12.492 1.00 0.00 C
|
41 |
+
ATOM 40 CB THR A 5 -3.783 -1.399 12.970 1.00 0.00 C
|
42 |
+
ATOM 41 O THR A 5 -0.994 -0.143 11.767 1.00 0.00 O
|
43 |
+
ATOM 42 CG2 THR A 5 -4.888 -1.312 14.018 1.00 0.00 C
|
44 |
+
ATOM 43 OG1 THR A 5 -4.156 -0.612 11.832 1.00 0.00 O
|
45 |
+
ATOM 44 N HIS A 6 -0.770 -2.218 12.577 1.00 0.00 N
|
46 |
+
ATOM 45 CA HIS A 6 0.214 -2.702 11.616 1.00 0.00 C
|
47 |
+
ATOM 46 C HIS A 6 -0.116 -4.117 11.155 1.00 0.00 C
|
48 |
+
ATOM 47 CB HIS A 6 1.619 -2.661 12.220 1.00 0.00 C
|
49 |
+
ATOM 48 O HIS A 6 -0.128 -5.050 11.961 1.00 0.00 O
|
50 |
+
ATOM 49 CG HIS A 6 2.697 -3.055 11.261 1.00 0.00 C
|
51 |
+
ATOM 50 CD2 HIS A 6 3.413 -4.199 11.150 1.00 0.00 C
|
52 |
+
ATOM 51 ND1 HIS A 6 3.149 -2.218 10.264 1.00 0.00 N
|
53 |
+
ATOM 52 CE1 HIS A 6 4.099 -2.832 9.579 1.00 0.00 C
|
54 |
+
ATOM 53 NE2 HIS A 6 4.278 -4.036 10.097 1.00 0.00 N
|
55 |
+
ATOM 54 N GLU A 7 -0.382 -4.286 9.867 1.00 0.00 N
|
56 |
+
ATOM 55 CA GLU A 7 -0.811 -5.584 9.356 1.00 0.00 C
|
57 |
+
ATOM 56 C GLU A 7 0.045 -6.021 8.171 1.00 0.00 C
|
58 |
+
ATOM 57 CB GLU A 7 -2.287 -5.541 8.952 1.00 0.00 C
|
59 |
+
ATOM 58 O GLU A 7 0.361 -5.213 7.296 1.00 0.00 O
|
60 |
+
ATOM 59 CG GLU A 7 -3.240 -5.342 10.121 1.00 0.00 C
|
61 |
+
ATOM 60 CD GLU A 7 -4.701 -5.297 9.704 1.00 0.00 C
|
62 |
+
ATOM 61 OE1 GLU A 7 -5.587 -5.386 10.584 1.00 0.00 O
|
63 |
+
ATOM 62 OE2 GLU A 7 -4.963 -5.174 8.486 1.00 0.00 O
|
64 |
+
ATOM 63 N HIS A 8 0.419 -7.223 8.285 1.00 0.00 N
|
65 |
+
ATOM 64 CA HIS A 8 1.167 -7.903 7.234 1.00 0.00 C
|
66 |
+
ATOM 65 C HIS A 8 0.236 -8.691 6.317 1.00 0.00 C
|
67 |
+
ATOM 66 CB HIS A 8 2.218 -8.835 7.840 1.00 0.00 C
|
68 |
+
ATOM 67 O HIS A 8 -0.668 -9.384 6.790 1.00 0.00 O
|
69 |
+
ATOM 68 CG HIS A 8 3.038 -9.558 6.819 1.00 0.00 C
|
70 |
+
ATOM 69 CD2 HIS A 8 3.959 -9.106 5.936 1.00 0.00 C
|
71 |
+
ATOM 70 ND1 HIS A 8 2.951 -10.920 6.625 1.00 0.00 N
|
72 |
+
ATOM 71 CE1 HIS A 8 3.787 -11.275 5.663 1.00 0.00 C
|
73 |
+
ATOM 72 NE2 HIS A 8 4.410 -10.193 5.228 1.00 0.00 N
|
74 |
+
ATOM 73 N PHE A 9 0.399 -8.517 4.937 1.00 0.00 N
|
75 |
+
ATOM 74 CA PHE A 9 -0.393 -9.342 4.032 1.00 0.00 C
|
76 |
+
ATOM 75 C PHE A 9 0.429 -9.757 2.818 1.00 0.00 C
|
77 |
+
ATOM 76 CB PHE A 9 -1.652 -8.593 3.583 1.00 0.00 C
|
78 |
+
ATOM 77 O PHE A 9 1.455 -9.142 2.517 1.00 0.00 O
|
79 |
+
ATOM 78 CG PHE A 9 -1.366 -7.296 2.875 1.00 0.00 C
|
80 |
+
ATOM 79 CD1 PHE A 9 -1.153 -6.127 3.596 1.00 0.00 C
|
81 |
+
ATOM 80 CD2 PHE A 9 -1.311 -7.246 1.488 1.00 0.00 C
|
82 |
+
ATOM 81 CE1 PHE A 9 -0.889 -4.925 2.943 1.00 0.00 C
|
83 |
+
ATOM 82 CE2 PHE A 9 -1.047 -6.049 0.829 1.00 0.00 C
|
84 |
+
ATOM 83 CZ PHE A 9 -0.837 -4.889 1.558 1.00 0.00 C
|
85 |
+
ATOM 84 N ARG A 10 0.178 -10.978 2.316 1.00 0.00 N
|
86 |
+
ATOM 85 CA ARG A 10 0.973 -11.510 1.213 1.00 0.00 C
|
87 |
+
ATOM 86 C ARG A 10 0.149 -11.587 -0.068 1.00 0.00 C
|
88 |
+
ATOM 87 CB ARG A 10 1.525 -12.892 1.566 1.00 0.00 C
|
89 |
+
ATOM 88 O ARG A 10 -1.047 -11.885 -0.026 1.00 0.00 O
|
90 |
+
ATOM 89 CG ARG A 10 2.623 -12.868 2.618 1.00 0.00 C
|
91 |
+
ATOM 90 CD ARG A 10 3.229 -14.247 2.833 1.00 0.00 C
|
92 |
+
ATOM 91 NE ARG A 10 4.333 -14.501 1.913 1.00 0.00 N
|
93 |
+
ATOM 92 NH1 ARG A 10 5.449 -15.950 3.324 1.00 0.00 N
|
94 |
+
ATOM 93 NH2 ARG A 10 6.314 -15.462 1.257 1.00 0.00 N
|
95 |
+
ATOM 94 CZ ARG A 10 5.363 -15.304 2.167 1.00 0.00 C
|
96 |
+
ATOM 95 N LEU A 11 0.804 -11.094 -1.103 1.00 0.00 N
|
97 |
+
ATOM 96 CA LEU A 11 0.320 -11.410 -2.443 1.00 0.00 C
|
98 |
+
ATOM 97 C LEU A 11 0.991 -12.670 -2.980 1.00 0.00 C
|
99 |
+
ATOM 98 CB LEU A 11 0.572 -10.238 -3.395 1.00 0.00 C
|
100 |
+
ATOM 99 O LEU A 11 1.837 -13.263 -2.307 1.00 0.00 O
|
101 |
+
ATOM 100 CG LEU A 11 -0.077 -8.906 -3.016 1.00 0.00 C
|
102 |
+
ATOM 101 CD1 LEU A 11 0.400 -7.799 -3.952 1.00 0.00 C
|
103 |
+
ATOM 102 CD2 LEU A 11 -1.597 -9.023 -3.049 1.00 0.00 C
|
104 |
+
ATOM 103 N ASP A 12 0.341 -13.343 -4.077 1.00 0.00 N
|
105 |
+
ATOM 104 CA ASP A 12 0.855 -14.552 -4.714 1.00 0.00 C
|
106 |
+
ATOM 105 C ASP A 12 2.306 -14.367 -5.151 1.00 0.00 C
|
107 |
+
ATOM 106 CB ASP A 12 -0.011 -14.936 -5.915 1.00 0.00 C
|
108 |
+
ATOM 107 O ASP A 12 2.690 -14.792 -6.243 1.00 0.00 O
|
109 |
+
ATOM 108 CG ASP A 12 -1.397 -15.415 -5.519 1.00 0.00 C
|
110 |
+
ATOM 109 OD1 ASP A 12 -1.546 -16.017 -4.434 1.00 0.00 O
|
111 |
+
ATOM 110 OD2 ASP A 12 -2.347 -15.191 -6.300 1.00 0.00 O
|
112 |
+
ATOM 111 N ASP A 13 3.110 -13.435 -4.576 1.00 0.00 N
|
113 |
+
ATOM 112 CA ASP A 13 4.545 -13.254 -4.773 1.00 0.00 C
|
114 |
+
ATOM 113 C ASP A 13 5.321 -13.603 -3.505 1.00 0.00 C
|
115 |
+
ATOM 114 CB ASP A 13 4.851 -11.817 -5.199 1.00 0.00 C
|
116 |
+
ATOM 115 O ASP A 13 5.243 -12.885 -2.506 1.00 0.00 O
|
117 |
+
ATOM 116 CG ASP A 13 6.323 -11.586 -5.491 1.00 0.00 C
|
118 |
+
ATOM 117 OD1 ASP A 13 7.094 -12.568 -5.553 1.00 0.00 O
|
119 |
+
ATOM 118 OD2 ASP A 13 6.716 -10.412 -5.658 1.00 0.00 O
|
120 |
+
ATOM 119 N PRO A 14 6.029 -14.766 -3.543 1.00 0.00 N
|
121 |
+
ATOM 120 CA PRO A 14 6.658 -15.283 -2.325 1.00 0.00 C
|
122 |
+
ATOM 121 C PRO A 14 7.790 -14.392 -1.821 1.00 0.00 C
|
123 |
+
ATOM 122 CB PRO A 14 7.191 -16.652 -2.757 1.00 0.00 C
|
124 |
+
ATOM 123 O PRO A 14 8.147 -14.448 -0.641 1.00 0.00 O
|
125 |
+
ATOM 124 CG PRO A 14 7.284 -16.572 -4.246 1.00 0.00 C
|
126 |
+
ATOM 125 CD PRO A 14 6.262 -15.588 -4.736 1.00 0.00 C
|
127 |
+
ATOM 126 N TRP A 15 8.193 -13.454 -2.636 1.00 0.00 N
|
128 |
+
ATOM 127 CA TRP A 15 9.394 -12.710 -2.274 1.00 0.00 C
|
129 |
+
ATOM 128 C TRP A 15 9.039 -11.318 -1.760 1.00 0.00 C
|
130 |
+
ATOM 129 CB TRP A 15 10.341 -12.598 -3.472 1.00 0.00 C
|
131 |
+
ATOM 130 O TRP A 15 9.853 -10.666 -1.102 1.00 0.00 O
|
132 |
+
ATOM 131 CG TRP A 15 10.788 -13.920 -4.020 1.00 0.00 C
|
133 |
+
ATOM 132 CD1 TRP A 15 11.486 -14.892 -3.358 1.00 0.00 C
|
134 |
+
ATOM 133 CD2 TRP A 15 10.573 -14.413 -5.346 1.00 0.00 C
|
135 |
+
ATOM 134 CE2 TRP A 15 11.167 -15.692 -5.418 1.00 0.00 C
|
136 |
+
ATOM 135 CE3 TRP A 15 9.934 -13.896 -6.481 1.00 0.00 C
|
137 |
+
ATOM 136 NE1 TRP A 15 11.716 -15.961 -4.193 1.00 0.00 N
|
138 |
+
ATOM 137 CH2 TRP A 15 10.509 -15.934 -7.678 1.00 0.00 C
|
139 |
+
ATOM 138 CZ2 TRP A 15 11.141 -16.463 -6.583 1.00 0.00 C
|
140 |
+
ATOM 139 CZ3 TRP A 15 9.909 -14.665 -7.639 1.00 0.00 C
|
141 |
+
ATOM 140 N THR A 16 7.868 -10.913 -2.013 1.00 0.00 N
|
142 |
+
ATOM 141 CA THR A 16 7.495 -9.538 -1.702 1.00 0.00 C
|
143 |
+
ATOM 142 C THR A 16 6.582 -9.488 -0.480 1.00 0.00 C
|
144 |
+
ATOM 143 CB THR A 16 6.794 -8.866 -2.898 1.00 0.00 C
|
145 |
+
ATOM 144 O THR A 16 5.600 -10.229 -0.402 1.00 0.00 O
|
146 |
+
ATOM 145 CG2 THR A 16 6.483 -7.402 -2.602 1.00 0.00 C
|
147 |
+
ATOM 146 OG1 THR A 16 7.646 -8.939 -4.047 1.00 0.00 O
|
148 |
+
ATOM 147 N GLU A 17 6.987 -8.731 0.402 1.00 0.00 N
|
149 |
+
ATOM 148 CA GLU A 17 6.175 -8.520 1.596 1.00 0.00 C
|
150 |
+
ATOM 149 C GLU A 17 5.456 -7.175 1.545 1.00 0.00 C
|
151 |
+
ATOM 150 CB GLU A 17 7.039 -8.607 2.857 1.00 0.00 C
|
152 |
+
ATOM 151 O GLU A 17 6.060 -6.155 1.205 1.00 0.00 O
|
153 |
+
ATOM 152 CG GLU A 17 7.672 -9.973 3.076 1.00 0.00 C
|
154 |
+
ATOM 153 CD GLU A 17 8.498 -10.055 4.349 1.00 0.00 C
|
155 |
+
ATOM 154 OE1 GLU A 17 8.478 -11.113 5.019 1.00 0.00 O
|
156 |
+
ATOM 155 OE2 GLU A 17 9.170 -9.053 4.681 1.00 0.00 O
|
157 |
+
ATOM 156 N PHE A 18 4.157 -7.211 1.917 1.00 0.00 N
|
158 |
+
ATOM 157 CA PHE A 18 3.352 -5.996 1.948 1.00 0.00 C
|
159 |
+
ATOM 158 C PHE A 18 2.861 -5.707 3.362 1.00 0.00 C
|
160 |
+
ATOM 159 CB PHE A 18 2.160 -6.115 0.993 1.00 0.00 C
|
161 |
+
ATOM 160 O PHE A 18 2.413 -6.614 4.066 1.00 0.00 O
|
162 |
+
ATOM 161 CG PHE A 18 2.553 -6.268 -0.452 1.00 0.00 C
|
163 |
+
ATOM 162 CD1 PHE A 18 2.624 -5.160 -1.288 1.00 0.00 C
|
164 |
+
ATOM 163 CD2 PHE A 18 2.851 -7.520 -0.974 1.00 0.00 C
|
165 |
+
ATOM 164 CE1 PHE A 18 2.988 -5.299 -2.626 1.00 0.00 C
|
166 |
+
ATOM 165 CE2 PHE A 18 3.216 -7.666 -2.310 1.00 0.00 C
|
167 |
+
ATOM 166 CZ PHE A 18 3.282 -6.554 -3.134 1.00 0.00 C
|
168 |
+
ATOM 167 N TYR A 19 3.014 -4.483 3.756 1.00 0.00 N
|
169 |
+
ATOM 168 CA TYR A 19 2.556 -4.040 5.069 1.00 0.00 C
|
170 |
+
ATOM 169 C TYR A 19 1.598 -2.861 4.943 1.00 0.00 C
|
171 |
+
ATOM 170 CB TYR A 19 3.746 -3.653 5.952 1.00 0.00 C
|
172 |
+
ATOM 171 O TYR A 19 1.757 -2.016 4.059 1.00 0.00 O
|
173 |
+
ATOM 172 CG TYR A 19 4.807 -4.723 6.042 1.00 0.00 C
|
174 |
+
ATOM 173 CD1 TYR A 19 4.770 -5.685 7.049 1.00 0.00 C
|
175 |
+
ATOM 174 CD2 TYR A 19 5.849 -4.773 5.122 1.00 0.00 C
|
176 |
+
ATOM 175 CE1 TYR A 19 5.747 -6.671 7.138 1.00 0.00 C
|
177 |
+
ATOM 176 CE2 TYR A 19 6.831 -5.755 5.201 1.00 0.00 C
|
178 |
+
ATOM 177 OH TYR A 19 7.742 -7.672 6.294 1.00 0.00 O
|
179 |
+
ATOM 178 CZ TYR A 19 6.772 -6.698 6.211 1.00 0.00 C
|
180 |
+
ATOM 179 N ARG A 20 0.661 -2.887 5.726 1.00 0.00 N
|
181 |
+
ATOM 180 CA ARG A 20 -0.194 -1.722 5.928 1.00 0.00 C
|
182 |
+
ATOM 181 C ARG A 20 -0.080 -1.200 7.357 1.00 0.00 C
|
183 |
+
ATOM 182 CB ARG A 20 -1.652 -2.062 5.610 1.00 0.00 C
|
184 |
+
ATOM 183 O ARG A 20 -0.319 -1.939 8.314 1.00 0.00 O
|
185 |
+
ATOM 184 CG ARG A 20 -2.611 -0.897 5.793 1.00 0.00 C
|
186 |
+
ATOM 185 CD ARG A 20 -3.943 -1.349 6.375 1.00 0.00 C
|
187 |
+
ATOM 186 NE ARG A 20 -3.783 -1.914 7.712 1.00 0.00 N
|
188 |
+
ATOM 187 NH1 ARG A 20 -6.044 -1.973 8.182 1.00 0.00 N
|
189 |
+
ATOM 188 NH2 ARG A 20 -4.527 -2.709 9.734 1.00 0.00 N
|
190 |
+
ATOM 189 CZ ARG A 20 -4.785 -2.198 8.540 1.00 0.00 C
|
191 |
+
ATOM 190 N THR A 21 0.265 0.071 7.504 1.00 0.00 N
|
192 |
+
ATOM 191 CA THR A 21 0.341 0.733 8.802 1.00 0.00 C
|
193 |
+
ATOM 192 C THR A 21 -0.671 1.872 8.889 1.00 0.00 C
|
194 |
+
ATOM 193 CB THR A 21 1.756 1.278 9.069 1.00 0.00 C
|
195 |
+
ATOM 194 O THR A 21 -0.741 2.716 7.993 1.00 0.00 O
|
196 |
+
ATOM 195 CG2 THR A 21 1.849 1.911 10.454 1.00 0.00 C
|
197 |
+
ATOM 196 OG1 THR A 21 2.700 0.204 8.982 1.00 0.00 O
|
198 |
+
ATOM 197 N LEU A 22 -1.459 1.839 9.957 1.00 0.00 N
|
199 |
+
ATOM 198 CA LEU A 22 -2.458 2.875 10.198 1.00 0.00 C
|
200 |
+
ATOM 199 C LEU A 22 -2.082 3.722 11.409 1.00 0.00 C
|
201 |
+
ATOM 200 CB LEU A 22 -3.840 2.250 10.406 1.00 0.00 C
|
202 |
+
ATOM 201 O LEU A 22 -1.766 3.184 12.473 1.00 0.00 O
|
203 |
+
ATOM 202 CG LEU A 22 -5.013 3.224 10.530 1.00 0.00 C
|
204 |
+
ATOM 203 CD1 LEU A 22 -6.236 2.675 9.804 1.00 0.00 C
|
205 |
+
ATOM 204 CD2 LEU A 22 -5.331 3.493 11.997 1.00 0.00 C
|
206 |
+
ATOM 205 N ASN A 23 -2.010 5.058 11.207 1.00 0.00 N
|
207 |
+
ATOM 206 CA ASN A 23 -1.916 6.025 12.296 1.00 0.00 C
|
208 |
+
ATOM 207 C ASN A 23 -3.262 6.686 12.578 1.00 0.00 C
|
209 |
+
ATOM 208 CB ASN A 23 -0.859 7.086 11.982 1.00 0.00 C
|
210 |
+
ATOM 209 O ASN A 23 -3.700 7.558 11.826 1.00 0.00 O
|
211 |
+
ATOM 210 CG ASN A 23 -0.544 7.968 13.174 1.00 0.00 C
|
212 |
+
ATOM 211 ND2 ASN A 23 0.738 8.240 13.389 1.00 0.00 N
|
213 |
+
ATOM 212 OD1 ASN A 23 -1.445 8.400 13.897 1.00 0.00 O
|
214 |
+
ATOM 213 N ALA A 24 -3.964 6.163 13.657 1.00 0.00 N
|
215 |
+
ATOM 214 CA ALA A 24 -5.343 6.534 13.965 1.00 0.00 C
|
216 |
+
ATOM 215 C ALA A 24 -5.445 8.012 14.331 1.00 0.00 C
|
217 |
+
ATOM 216 CB ALA A 24 -5.885 5.669 15.101 1.00 0.00 C
|
218 |
+
ATOM 217 O ALA A 24 -6.448 8.664 14.033 1.00 0.00 O
|
219 |
+
ATOM 218 N ARG A 25 -4.362 8.538 14.892 1.00 0.00 N
|
220 |
+
ATOM 219 CA ARG A 25 -4.384 9.932 15.323 1.00 0.00 C
|
221 |
+
ATOM 220 C ARG A 25 -4.331 10.876 14.126 1.00 0.00 C
|
222 |
+
ATOM 221 CB ARG A 25 -3.219 10.220 16.271 1.00 0.00 C
|
223 |
+
ATOM 222 O ARG A 25 -5.124 11.815 14.034 1.00 0.00 O
|
224 |
+
ATOM 223 CG ARG A 25 -3.211 11.637 16.824 1.00 0.00 C
|
225 |
+
ATOM 224 CD ARG A 25 -2.139 11.820 17.889 1.00 0.00 C
|
226 |
+
ATOM 225 NE ARG A 25 -2.145 13.176 18.432 1.00 0.00 N
|
227 |
+
ATOM 226 NH1 ARG A 25 -0.364 12.832 19.863 1.00 0.00 N
|
228 |
+
ATOM 227 NH2 ARG A 25 -1.397 14.877 19.781 1.00 0.00 N
|
229 |
+
ATOM 228 CZ ARG A 25 -1.302 13.625 19.358 1.00 0.00 C
|
230 |
+
ATOM 229 N SER A 26 -3.376 10.637 13.145 1.00 0.00 N
|
231 |
+
ATOM 230 CA SER A 26 -3.171 11.513 11.996 1.00 0.00 C
|
232 |
+
ATOM 231 C SER A 26 -4.052 11.100 10.822 1.00 0.00 C
|
233 |
+
ATOM 232 CB SER A 26 -1.702 11.504 11.569 1.00 0.00 C
|
234 |
+
ATOM 233 O SER A 26 -4.116 11.801 9.810 1.00 0.00 O
|
235 |
+
ATOM 234 OG SER A 26 -1.312 10.211 11.138 1.00 0.00 O
|
236 |
+
ATOM 235 N LYS A 27 -4.836 10.033 11.095 1.00 0.00 N
|
237 |
+
ATOM 236 CA LYS A 27 -5.656 9.457 10.033 1.00 0.00 C
|
238 |
+
ATOM 237 C LYS A 27 -4.835 9.227 8.768 1.00 0.00 C
|
239 |
+
ATOM 238 CB LYS A 27 -6.850 10.362 9.726 1.00 0.00 C
|
240 |
+
ATOM 239 O LYS A 27 -5.263 9.587 7.669 1.00 0.00 O
|
241 |
+
ATOM 240 CG LYS A 27 -7.799 10.556 10.899 1.00 0.00 C
|
242 |
+
ATOM 241 CD LYS A 27 -8.515 9.260 11.258 1.00 0.00 C
|
243 |
+
ATOM 242 CE LYS A 27 -9.598 9.490 12.304 1.00 0.00 C
|
244 |
+
ATOM 243 NZ LYS A 27 -10.257 8.213 12.709 1.00 0.00 N
|
245 |
+
ATOM 244 N THR A 28 -3.684 8.620 8.927 1.00 0.00 N
|
246 |
+
ATOM 245 CA THR A 28 -2.783 8.326 7.817 1.00 0.00 C
|
247 |
+
ATOM 246 C THR A 28 -2.604 6.820 7.650 1.00 0.00 C
|
248 |
+
ATOM 247 CB THR A 28 -1.409 8.990 8.023 1.00 0.00 C
|
249 |
+
ATOM 248 O THR A 28 -2.497 6.089 8.637 1.00 0.00 O
|
250 |
+
ATOM 249 CG2 THR A 28 -0.480 8.706 6.848 1.00 0.00 C
|
251 |
+
ATOM 250 OG1 THR A 28 -1.584 10.407 8.148 1.00 0.00 O
|
252 |
+
ATOM 251 N CYS A 29 -2.718 6.348 6.470 1.00 0.00 N
|
253 |
+
ATOM 252 CA CYS A 29 -2.452 4.960 6.109 1.00 0.00 C
|
254 |
+
ATOM 253 C CYS A 29 -1.190 4.850 5.262 1.00 0.00 C
|
255 |
+
ATOM 254 CB CYS A 29 -3.639 4.366 5.352 1.00 0.00 C
|
256 |
+
ATOM 255 O CYS A 29 -1.018 5.597 4.297 1.00 0.00 O
|
257 |
+
ATOM 256 SG CYS A 29 -3.561 2.571 5.170 1.00 0.00 S
|
258 |
+
ATOM 257 N ILE A 30 -0.309 3.925 5.603 1.00 0.00 N
|
259 |
+
ATOM 258 CA ILE A 30 0.942 3.715 4.882 1.00 0.00 C
|
260 |
+
ATOM 259 C ILE A 30 0.989 2.290 4.335 1.00 0.00 C
|
261 |
+
ATOM 260 CB ILE A 30 2.166 3.984 5.786 1.00 0.00 C
|
262 |
+
ATOM 261 O ILE A 30 0.818 1.325 5.084 1.00 0.00 O
|
263 |
+
ATOM 262 CG1 ILE A 30 2.083 5.388 6.394 1.00 0.00 C
|
264 |
+
ATOM 263 CG2 ILE A 30 3.468 3.803 4.999 1.00 0.00 C
|
265 |
+
ATOM 264 CD1 ILE A 30 3.072 5.632 7.526 1.00 0.00 C
|
266 |
+
ATOM 265 N VAL A 31 1.098 2.123 3.077 1.00 0.00 N
|
267 |
+
ATOM 266 CA VAL A 31 1.317 0.825 2.449 1.00 0.00 C
|
268 |
+
ATOM 267 C VAL A 31 2.785 0.684 2.053 1.00 0.00 C
|
269 |
+
ATOM 268 CB VAL A 31 0.411 0.632 1.212 1.00 0.00 C
|
270 |
+
ATOM 269 O VAL A 31 3.310 1.498 1.289 1.00 0.00 O
|
271 |
+
ATOM 270 CG1 VAL A 31 0.662 -0.729 0.566 1.00 0.00 C
|
272 |
+
ATOM 271 CG2 VAL A 31 -1.059 0.779 1.601 1.00 0.00 C
|
273 |
+
ATOM 272 N THR A 32 3.455 -0.320 2.592 1.00 0.00 N
|
274 |
+
ATOM 273 CA THR A 32 4.880 -0.565 2.391 1.00 0.00 C
|
275 |
+
ATOM 274 C THR A 32 5.102 -1.860 1.616 1.00 0.00 C
|
276 |
+
ATOM 275 CB THR A 32 5.630 -0.631 3.734 1.00 0.00 C
|
277 |
+
ATOM 276 O THR A 32 4.464 -2.876 1.899 1.00 0.00 O
|
278 |
+
ATOM 277 CG2 THR A 32 7.129 -0.812 3.519 1.00 0.00 C
|
279 |
+
ATOM 278 OG1 THR A 32 5.405 0.583 4.461 1.00 0.00 O
|
280 |
+
ATOM 279 N VAL A 33 5.927 -1.787 0.613 1.00 0.00 N
|
281 |
+
ATOM 280 CA VAL A 33 6.436 -2.966 -0.081 1.00 0.00 C
|
282 |
+
ATOM 281 C VAL A 33 7.879 -3.231 0.342 1.00 0.00 C
|
283 |
+
ATOM 282 CB VAL A 33 6.351 -2.802 -1.615 1.00 0.00 C
|
284 |
+
ATOM 283 O VAL A 33 8.737 -2.352 0.230 1.00 0.00 O
|
285 |
+
ATOM 284 CG1 VAL A 33 6.835 -4.067 -2.321 1.00 0.00 C
|
286 |
+
ATOM 285 CG2 VAL A 33 4.922 -2.464 -2.036 1.00 0.00 C
|
287 |
+
ATOM 286 N ASP A 34 8.169 -4.392 0.805 1.00 0.00 N
|
288 |
+
ATOM 287 CA ASP A 34 9.479 -4.825 1.284 1.00 0.00 C
|
289 |
+
ATOM 288 C ASP A 34 10.014 -5.983 0.446 1.00 0.00 C
|
290 |
+
ATOM 289 CB ASP A 34 9.404 -5.231 2.757 1.00 0.00 C
|
291 |
+
ATOM 290 O ASP A 34 9.513 -7.106 0.537 1.00 0.00 O
|
292 |
+
ATOM 291 CG ASP A 34 10.762 -5.570 3.348 1.00 0.00 C
|
293 |
+
ATOM 292 OD1 ASP A 34 11.727 -5.774 2.581 1.00 0.00 O
|
294 |
+
ATOM 293 OD2 ASP A 34 10.866 -5.636 4.592 1.00 0.00 O
|
295 |
+
ATOM 294 N GLN A 35 11.073 -5.726 -0.286 1.00 0.00 N
|
296 |
+
ATOM 295 CA GLN A 35 11.675 -6.742 -1.144 1.00 0.00 C
|
297 |
+
ATOM 296 C GLN A 35 13.024 -7.197 -0.595 1.00 0.00 C
|
298 |
+
ATOM 297 CB GLN A 35 11.840 -6.212 -2.569 1.00 0.00 C
|
299 |
+
ATOM 298 O GLN A 35 13.897 -7.623 -1.353 1.00 0.00 O
|
300 |
+
ATOM 299 CG GLN A 35 10.522 -5.921 -3.274 1.00 0.00 C
|
301 |
+
ATOM 300 CD GLN A 35 9.870 -7.169 -3.838 1.00 0.00 C
|
302 |
+
ATOM 301 NE2 GLN A 35 8.547 -7.240 -3.747 1.00 0.00 N
|
303 |
+
ATOM 302 OE1 GLN A 35 10.551 -8.062 -4.353 1.00 0.00 O
|
304 |
+
ATOM 303 N THR A 36 13.298 -6.855 0.668 1.00 0.00 N
|
305 |
+
ATOM 304 CA THR A 36 14.573 -7.196 1.289 1.00 0.00 C
|
306 |
+
ATOM 305 C THR A 36 14.903 -8.670 1.071 1.00 0.00 C
|
307 |
+
ATOM 306 CB THR A 36 14.560 -6.888 2.797 1.00 0.00 C
|
308 |
+
ATOM 307 O THR A 36 16.070 -9.034 0.913 1.00 0.00 O
|
309 |
+
ATOM 308 CG2 THR A 36 15.919 -7.172 3.428 1.00 0.00 C
|
310 |
+
ATOM 309 OG1 THR A 36 14.233 -5.506 2.994 1.00 0.00 O
|
311 |
+
ATOM 310 N ASN A 37 13.791 -9.502 0.940 1.00 0.00 N
|
312 |
+
ATOM 311 CA ASN A 37 14.006 -10.939 0.811 1.00 0.00 C
|
313 |
+
ATOM 312 C ASN A 37 13.990 -11.380 -0.650 1.00 0.00 C
|
314 |
+
ATOM 313 CB ASN A 37 12.955 -11.713 1.610 1.00 0.00 C
|
315 |
+
ATOM 314 O ASN A 37 13.982 -12.577 -0.942 1.00 0.00 O
|
316 |
+
ATOM 315 CG ASN A 37 13.047 -11.453 3.101 1.00 0.00 C
|
317 |
+
ATOM 316 ND2 ASN A 37 11.898 -11.333 3.754 1.00 0.00 N
|
318 |
+
ATOM 317 OD1 ASN A 37 14.143 -11.360 3.661 1.00 0.00 O
|
319 |
+
ATOM 318 N ASN A 38 13.915 -10.430 -1.517 1.00 0.00 N
|
320 |
+
ATOM 319 CA ASN A 38 13.930 -10.725 -2.946 1.00 0.00 C
|
321 |
+
ATOM 320 C ASN A 38 15.353 -10.908 -3.466 1.00 0.00 C
|
322 |
+
ATOM 321 CB ASN A 38 13.216 -9.621 -3.730 1.00 0.00 C
|
323 |
+
ATOM 322 O ASN A 38 16.154 -9.972 -3.437 1.00 0.00 O
|
324 |
+
ATOM 323 CG ASN A 38 12.940 -10.011 -5.168 1.00 0.00 C
|
325 |
+
ATOM 324 ND2 ASN A 38 12.018 -9.303 -5.808 1.00 0.00 N
|
326 |
+
ATOM 325 OD1 ASN A 38 13.551 -10.942 -5.700 1.00 0.00 O
|
327 |
+
ATOM 326 N PRO A 39 15.718 -12.109 -3.794 1.00 0.00 N
|
328 |
+
ATOM 327 CA PRO A 39 17.079 -12.430 -4.229 1.00 0.00 C
|
329 |
+
ATOM 328 C PRO A 39 17.430 -11.806 -5.578 1.00 0.00 C
|
330 |
+
ATOM 329 CB PRO A 39 17.068 -13.958 -4.322 1.00 0.00 C
|
331 |
+
ATOM 330 O PRO A 39 18.598 -11.804 -5.975 1.00 0.00 O
|
332 |
+
ATOM 331 CG PRO A 39 15.632 -14.318 -4.529 1.00 0.00 C
|
333 |
+
ATOM 332 CD PRO A 39 14.778 -13.272 -3.872 1.00 0.00 C
|
334 |
+
ATOM 333 N GLN A 40 16.394 -11.251 -6.150 1.00 0.00 N
|
335 |
+
ATOM 334 CA GLN A 40 16.646 -10.746 -7.496 1.00 0.00 C
|
336 |
+
ATOM 335 C GLN A 40 17.358 -9.397 -7.452 1.00 0.00 C
|
337 |
+
ATOM 336 CB GLN A 40 15.338 -10.625 -8.278 1.00 0.00 C
|
338 |
+
ATOM 337 O GLN A 40 16.883 -8.459 -6.810 1.00 0.00 O
|
339 |
+
ATOM 338 CG GLN A 40 14.617 -11.951 -8.481 1.00 0.00 C
|
340 |
+
ATOM 339 CD GLN A 40 13.302 -11.796 -9.221 1.00 0.00 C
|
341 |
+
ATOM 340 NE2 GLN A 40 12.511 -12.864 -9.254 1.00 0.00 N
|
342 |
+
ATOM 341 OE1 GLN A 40 12.999 -10.727 -9.759 1.00 0.00 O
|
343 |
+
ATOM 342 N GLU A 41 18.658 -9.506 -7.750 1.00 0.00 N
|
344 |
+
ATOM 343 CA GLU A 41 19.475 -8.300 -7.847 1.00 0.00 C
|
345 |
+
ATOM 344 C GLU A 41 19.146 -7.510 -9.110 1.00 0.00 C
|
346 |
+
ATOM 345 CB GLU A 41 20.964 -8.656 -7.822 1.00 0.00 C
|
347 |
+
ATOM 346 O GLU A 41 18.572 -8.053 -10.056 1.00 0.00 O
|
348 |
+
ATOM 347 CG GLU A 41 21.428 -9.272 -6.510 1.00 0.00 C
|
349 |
+
ATOM 348 CD GLU A 41 22.928 -9.511 -6.457 1.00 0.00 C
|
350 |
+
ATOM 349 OE1 GLU A 41 23.442 -9.906 -5.385 1.00 0.00 O
|
351 |
+
ATOM 350 OE2 GLU A 41 23.595 -9.301 -7.494 1.00 0.00 O
|
352 |
+
ATOM 351 N ASN A 42 18.825 -6.245 -8.943 1.00 0.00 N
|
353 |
+
ATOM 352 CA ASN A 42 18.748 -5.279 -10.034 1.00 0.00 C
|
354 |
+
ATOM 353 C ASN A 42 17.322 -5.141 -10.561 1.00 0.00 C
|
355 |
+
ATOM 354 CB ASN A 42 19.697 -5.672 -11.168 1.00 0.00 C
|
356 |
+
ATOM 355 O ASN A 42 17.113 -4.986 -11.765 1.00 0.00 O
|
357 |
+
ATOM 356 CG ASN A 42 21.153 -5.643 -10.747 1.00 0.00 C
|
358 |
+
ATOM 357 ND2 ASN A 42 21.930 -6.604 -11.232 1.00 0.00 N
|
359 |
+
ATOM 358 OD1 ASN A 42 21.576 -4.764 -9.991 1.00 0.00 O
|
360 |
+
ATOM 359 N MET A 43 16.406 -5.433 -9.710 1.00 0.00 N
|
361 |
+
ATOM 360 CA MET A 43 15.006 -5.237 -10.074 1.00 0.00 C
|
362 |
+
ATOM 361 C MET A 43 14.398 -4.075 -9.295 1.00 0.00 C
|
363 |
+
ATOM 362 CB MET A 43 14.201 -6.513 -9.824 1.00 0.00 C
|
364 |
+
ATOM 363 O MET A 43 14.758 -3.842 -8.140 1.00 0.00 O
|
365 |
+
ATOM 364 CG MET A 43 14.599 -7.674 -10.720 1.00 0.00 C
|
366 |
+
ATOM 365 SD MET A 43 13.455 -9.102 -10.570 1.00 0.00 S
|
367 |
+
ATOM 366 CE MET A 43 13.902 -9.697 -8.916 1.00 0.00 C
|
368 |
+
ATOM 367 N GLY A 44 13.752 -3.201 -10.052 1.00 0.00 N
|
369 |
+
ATOM 368 CA GLY A 44 12.963 -2.172 -9.393 1.00 0.00 C
|
370 |
+
ATOM 369 C GLY A 44 11.483 -2.499 -9.336 1.00 0.00 C
|
371 |
+
ATOM 370 O GLY A 44 11.043 -3.506 -9.895 1.00 0.00 O
|
372 |
+
ATOM 371 N PHE A 45 10.800 -1.846 -8.459 1.00 0.00 N
|
373 |
+
ATOM 372 CA PHE A 45 9.356 -2.022 -8.356 1.00 0.00 C
|
374 |
+
ATOM 373 C PHE A 45 8.664 -0.687 -8.106 1.00 0.00 C
|
375 |
+
ATOM 374 CB PHE A 45 9.013 -3.009 -7.235 1.00 0.00 C
|
376 |
+
ATOM 375 O PHE A 45 9.316 0.302 -7.767 1.00 0.00 O
|
377 |
+
ATOM 376 CG PHE A 45 9.419 -2.537 -5.865 1.00 0.00 C
|
378 |
+
ATOM 377 CD1 PHE A 45 10.703 -2.769 -5.387 1.00 0.00 C
|
379 |
+
ATOM 378 CD2 PHE A 45 8.516 -1.860 -5.055 1.00 0.00 C
|
380 |
+
ATOM 379 CE1 PHE A 45 11.082 -2.334 -4.119 1.00 0.00 C
|
381 |
+
ATOM 380 CE2 PHE A 45 8.888 -1.422 -3.787 1.00 0.00 C
|
382 |
+
ATOM 381 CZ PHE A 45 10.170 -1.661 -3.321 1.00 0.00 C
|
383 |
+
ATOM 382 N ALA A 46 7.363 -0.706 -8.425 1.00 0.00 N
|
384 |
+
ATOM 383 CA ALA A 46 6.515 0.457 -8.177 1.00 0.00 C
|
385 |
+
ATOM 384 C ALA A 46 5.186 0.044 -7.550 1.00 0.00 C
|
386 |
+
ATOM 385 CB ALA A 46 6.271 1.225 -9.474 1.00 0.00 C
|
387 |
+
ATOM 386 O ALA A 46 4.642 -1.014 -7.875 1.00 0.00 O
|
388 |
+
ATOM 387 N ILE A 47 4.724 0.753 -6.545 1.00 0.00 N
|
389 |
+
ATOM 388 CA ILE A 47 3.375 0.616 -6.007 1.00 0.00 C
|
390 |
+
ATOM 389 C ILE A 47 2.622 1.935 -6.157 1.00 0.00 C
|
391 |
+
ATOM 390 CB ILE A 47 3.400 0.179 -4.525 1.00 0.00 C
|
392 |
+
ATOM 391 O ILE A 47 3.171 3.005 -5.882 1.00 0.00 O
|
393 |
+
ATOM 392 CG1 ILE A 47 4.194 1.185 -3.684 1.00 0.00 C
|
394 |
+
ATOM 393 CG2 ILE A 47 3.983 -1.231 -4.387 1.00 0.00 C
|
395 |
+
ATOM 394 CD1 ILE A 47 4.036 0.998 -2.182 1.00 0.00 C
|
396 |
+
ATOM 395 N MET A 48 1.411 1.770 -6.657 1.00 0.00 N
|
397 |
+
ATOM 396 CA MET A 48 0.618 2.953 -6.979 1.00 0.00 C
|
398 |
+
ATOM 397 C MET A 48 -0.783 2.847 -6.387 1.00 0.00 C
|
399 |
+
ATOM 398 CB MET A 48 0.533 3.148 -8.494 1.00 0.00 C
|
400 |
+
ATOM 399 O MET A 48 -1.425 1.800 -6.483 1.00 0.00 O
|
401 |
+
ATOM 400 CG MET A 48 -0.272 4.368 -8.911 1.00 0.00 C
|
402 |
+
ATOM 401 SD MET A 48 -0.347 4.571 -10.733 1.00 0.00 S
|
403 |
+
ATOM 402 CE MET A 48 1.088 3.577 -11.226 1.00 0.00 C
|
404 |
+
ATOM 403 N LEU A 49 -1.241 3.937 -5.762 1.00 0.00 N
|
405 |
+
ATOM 404 CA LEU A 49 -2.646 4.072 -5.392 1.00 0.00 C
|
406 |
+
ATOM 405 C LEU A 49 -3.494 4.445 -6.605 1.00 0.00 C
|
407 |
+
ATOM 406 CB LEU A 49 -2.813 5.127 -4.295 1.00 0.00 C
|
408 |
+
ATOM 407 O LEU A 49 -3.363 5.546 -7.144 1.00 0.00 O
|
409 |
+
ATOM 408 CG LEU A 49 -4.234 5.341 -3.771 1.00 0.00 C
|
410 |
+
ATOM 409 CD1 LEU A 49 -4.792 4.039 -3.206 1.00 0.00 C
|
411 |
+
ATOM 410 CD2 LEU A 49 -4.255 6.441 -2.715 1.00 0.00 C
|
412 |
+
ATOM 411 N ILE A 50 -4.427 3.567 -7.073 1.00 0.00 N
|
413 |
+
ATOM 412 CA ILE A 50 -5.183 3.724 -8.311 1.00 0.00 C
|
414 |
+
ATOM 413 C ILE A 50 -6.056 4.974 -8.227 1.00 0.00 C
|
415 |
+
ATOM 414 CB ILE A 50 -6.053 2.481 -8.603 1.00 0.00 C
|
416 |
+
ATOM 415 O ILE A 50 -6.663 5.247 -7.188 1.00 0.00 O
|
417 |
+
ATOM 416 CG1 ILE A 50 -5.166 1.261 -8.875 1.00 0.00 C
|
418 |
+
ATOM 417 CG2 ILE A 50 -6.995 2.746 -9.780 1.00 0.00 C
|
419 |
+
ATOM 418 CD1 ILE A 50 -5.936 -0.045 -9.017 1.00 0.00 C
|
420 |
+
ATOM 419 N ASP A 51 -6.069 5.759 -9.226 1.00 0.00 N
|
421 |
+
ATOM 420 CA ASP A 51 -6.921 6.925 -9.438 1.00 0.00 C
|
422 |
+
ATOM 421 C ASP A 51 -6.398 8.135 -8.667 1.00 0.00 C
|
423 |
+
ATOM 422 CB ASP A 51 -8.362 6.620 -9.025 1.00 0.00 C
|
424 |
+
ATOM 423 O ASP A 51 -7.171 9.018 -8.290 1.00 0.00 O
|
425 |
+
ATOM 424 CG ASP A 51 -9.006 5.537 -9.873 1.00 0.00 C
|
426 |
+
ATOM 425 OD1 ASP A 51 -8.719 5.462 -11.087 1.00 0.00 O
|
427 |
+
ATOM 426 OD2 ASP A 51 -9.809 4.753 -9.323 1.00 0.00 O
|
428 |
+
ATOM 427 N THR A 52 -5.116 8.064 -8.253 1.00 0.00 N
|
429 |
+
ATOM 428 CA THR A 52 -4.446 9.180 -7.595 1.00 0.00 C
|
430 |
+
ATOM 429 C THR A 52 -3.068 9.419 -8.204 1.00 0.00 C
|
431 |
+
ATOM 430 CB THR A 52 -4.305 8.933 -6.081 1.00 0.00 C
|
432 |
+
ATOM 431 O THR A 52 -2.650 8.695 -9.111 1.00 0.00 O
|
433 |
+
ATOM 432 CG2 THR A 52 -5.582 8.334 -5.500 1.00 0.00 C
|
434 |
+
ATOM 433 OG1 THR A 52 -3.218 8.029 -5.850 1.00 0.00 O
|
435 |
+
ATOM 434 N ASP A 53 -2.398 10.489 -7.806 1.00 0.00 N
|
436 |
+
ATOM 435 CA ASP A 53 -1.026 10.761 -8.225 1.00 0.00 C
|
437 |
+
ATOM 436 C ASP A 53 -0.028 10.312 -7.160 1.00 0.00 C
|
438 |
+
ATOM 437 CB ASP A 53 -0.842 12.251 -8.524 1.00 0.00 C
|
439 |
+
ATOM 438 O ASP A 53 1.125 10.750 -7.159 1.00 0.00 O
|
440 |
+
ATOM 439 CG ASP A 53 -1.641 12.717 -9.728 1.00 0.00 C
|
441 |
+
ATOM 440 OD1 ASP A 53 -1.728 11.972 -10.728 1.00 0.00 O
|
442 |
+
ATOM 441 OD2 ASP A 53 -2.186 13.841 -9.678 1.00 0.00 O
|
443 |
+
ATOM 442 N ILE A 54 -0.442 9.451 -6.327 1.00 0.00 N
|
444 |
+
ATOM 443 CA ILE A 54 0.406 8.993 -5.232 1.00 0.00 C
|
445 |
+
ATOM 444 C ILE A 54 1.056 7.662 -5.603 1.00 0.00 C
|
446 |
+
ATOM 445 CB ILE A 54 -0.395 8.850 -3.918 1.00 0.00 C
|
447 |
+
ATOM 446 O ILE A 54 0.362 6.682 -5.881 1.00 0.00 O
|
448 |
+
ATOM 447 CG1 ILE A 54 -1.083 10.174 -3.566 1.00 0.00 C
|
449 |
+
ATOM 448 CG2 ILE A 54 0.516 8.386 -2.778 1.00 0.00 C
|
450 |
+
ATOM 449 CD1 ILE A 54 -2.074 10.069 -2.414 1.00 0.00 C
|
451 |
+
ATOM 450 N TRP A 55 2.279 7.611 -5.677 1.00 0.00 N
|
452 |
+
ATOM 451 CA TRP A 55 2.996 6.392 -6.034 1.00 0.00 C
|
453 |
+
ATOM 452 C TRP A 55 4.426 6.427 -5.505 1.00 0.00 C
|
454 |
+
ATOM 453 CB TRP A 55 3.004 6.197 -7.552 1.00 0.00 C
|
455 |
+
ATOM 454 O TRP A 55 4.907 7.474 -5.065 1.00 0.00 O
|
456 |
+
ATOM 455 CG TRP A 55 3.649 7.321 -8.307 1.00 0.00 C
|
457 |
+
ATOM 456 CD1 TRP A 55 3.095 8.533 -8.612 1.00 0.00 C
|
458 |
+
ATOM 457 CD2 TRP A 55 4.970 7.333 -8.857 1.00 0.00 C
|
459 |
+
ATOM 458 CE2 TRP A 55 5.150 8.587 -9.482 1.00 0.00 C
|
460 |
+
ATOM 459 CE3 TRP A 55 6.020 6.405 -8.881 1.00 0.00 C
|
461 |
+
ATOM 460 NE1 TRP A 55 3.993 9.299 -9.318 1.00 0.00 N
|
462 |
+
ATOM 461 CH2 TRP A 55 7.351 8.010 -10.134 1.00 0.00 C
|
463 |
+
ATOM 462 CZ2 TRP A 55 6.341 8.936 -10.125 1.00 0.00 C
|
464 |
+
ATOM 463 CZ3 TRP A 55 7.203 6.755 -9.522 1.00 0.00 C
|
465 |
+
ATOM 464 N CYS A 56 5.000 5.260 -5.391 1.00 0.00 N
|
466 |
+
ATOM 465 CA CYS A 56 6.389 5.078 -4.984 1.00 0.00 C
|
467 |
+
ATOM 466 C CYS A 56 7.106 4.104 -5.911 1.00 0.00 C
|
468 |
+
ATOM 467 CB CYS A 56 6.464 4.574 -3.543 1.00 0.00 C
|
469 |
+
ATOM 468 O CYS A 56 6.576 3.038 -6.228 1.00 0.00 O
|
470 |
+
ATOM 469 SG CYS A 56 8.149 4.302 -2.954 1.00 0.00 S
|
471 |
+
ATOM 470 N MET A 57 8.257 4.518 -6.421 1.00 0.00 N
|
472 |
+
ATOM 471 CA MET A 57 9.125 3.657 -7.220 1.00 0.00 C
|
473 |
+
ATOM 472 C MET A 57 10.511 3.550 -6.593 1.00 0.00 C
|
474 |
+
ATOM 473 CB MET A 57 9.237 4.186 -8.651 1.00 0.00 C
|
475 |
+
ATOM 474 O MET A 57 11.049 4.539 -6.094 1.00 0.00 O
|
476 |
+
ATOM 475 CG MET A 57 7.924 4.169 -9.416 1.00 0.00 C
|
477 |
+
ATOM 476 SD MET A 57 8.115 4.724 -11.154 1.00 0.00 S
|
478 |
+
ATOM 477 CE MET A 57 9.877 4.372 -11.410 1.00 0.00 C
|
479 |
+
ATOM 478 N SER A 58 10.989 2.288 -6.576 1.00 0.00 N
|
480 |
+
ATOM 479 CA SER A 58 12.264 2.156 -5.879 1.00 0.00 C
|
481 |
+
ATOM 480 C SER A 58 13.082 0.996 -6.438 1.00 0.00 C
|
482 |
+
ATOM 481 CB SER A 58 12.038 1.955 -4.380 1.00 0.00 C
|
483 |
+
ATOM 482 O SER A 58 12.521 -0.003 -6.894 1.00 0.00 O
|
484 |
+
ATOM 483 OG SER A 58 13.273 1.808 -3.701 1.00 0.00 O
|
485 |
+
ATOM 484 N PHE A 59 14.446 1.264 -6.479 1.00 0.00 N
|
486 |
+
ATOM 485 CA PHE A 59 15.391 0.171 -6.677 1.00 0.00 C
|
487 |
+
ATOM 486 C PHE A 59 15.963 -0.298 -5.345 1.00 0.00 C
|
488 |
+
ATOM 487 CB PHE A 59 16.524 0.601 -7.614 1.00 0.00 C
|
489 |
+
ATOM 488 O PHE A 59 16.693 -1.291 -5.292 1.00 0.00 O
|
490 |
+
ATOM 489 CG PHE A 59 16.135 0.625 -9.067 1.00 0.00 C
|
491 |
+
ATOM 490 CD1 PHE A 59 16.399 -0.463 -9.890 1.00 0.00 C
|
492 |
+
ATOM 491 CD2 PHE A 59 15.504 1.736 -9.611 1.00 0.00 C
|
493 |
+
ATOM 492 CE1 PHE A 59 16.040 -0.444 -11.235 1.00 0.00 C
|
494 |
+
ATOM 493 CE2 PHE A 59 15.143 1.763 -10.955 1.00 0.00 C
|
495 |
+
ATOM 494 CZ PHE A 59 15.412 0.672 -11.766 1.00 0.00 C
|
496 |
+
ATOM 495 N ALA A 60 15.648 0.435 -4.357 1.00 0.00 N
|
497 |
+
ATOM 496 CA ALA A 60 15.954 0.014 -2.992 1.00 0.00 C
|
498 |
+
ATOM 497 C ALA A 60 14.951 -1.025 -2.500 1.00 0.00 C
|
499 |
+
ATOM 498 CB ALA A 60 15.969 1.219 -2.054 1.00 0.00 C
|
500 |
+
ATOM 499 O ALA A 60 13.911 -1.239 -3.128 1.00 0.00 O
|
501 |
+
ATOM 500 N PRO A 61 15.294 -1.798 -1.450 1.00 0.00 N
|
502 |
+
ATOM 501 CA PRO A 61 14.452 -2.918 -1.021 1.00 0.00 C
|
503 |
+
ATOM 502 C PRO A 61 13.119 -2.462 -0.431 1.00 0.00 C
|
504 |
+
ATOM 503 CB PRO A 61 15.308 -3.616 0.038 1.00 0.00 C
|
505 |
+
ATOM 504 O PRO A 61 12.211 -3.276 -0.247 1.00 0.00 O
|
506 |
+
ATOM 505 CG PRO A 61 16.273 -2.571 0.500 1.00 0.00 C
|
507 |
+
ATOM 506 CD PRO A 61 16.512 -1.612 -0.630 1.00 0.00 C
|
508 |
+
ATOM 507 N LEU A 62 12.946 -1.118 -0.268 1.00 0.00 N
|
509 |
+
ATOM 508 CA LEU A 62 11.732 -0.669 0.405 1.00 0.00 C
|
510 |
+
ATOM 509 C LEU A 62 11.131 0.538 -0.307 1.00 0.00 C
|
511 |
+
ATOM 510 CB LEU A 62 12.027 -0.321 1.866 1.00 0.00 C
|
512 |
+
ATOM 511 O LEU A 62 11.859 1.434 -0.741 1.00 0.00 O
|
513 |
+
ATOM 512 CG LEU A 62 12.417 -1.486 2.776 1.00 0.00 C
|
514 |
+
ATOM 513 CD1 LEU A 62 12.964 -0.965 4.101 1.00 0.00 C
|
515 |
+
ATOM 514 CD2 LEU A 62 11.223 -2.406 3.010 1.00 0.00 C
|
516 |
+
ATOM 515 N CYS A 63 9.795 0.579 -0.388 1.00 0.00 N
|
517 |
+
ATOM 516 CA CYS A 63 9.049 1.716 -0.916 1.00 0.00 C
|
518 |
+
ATOM 517 C CYS A 63 7.730 1.894 -0.174 1.00 0.00 C
|
519 |
+
ATOM 518 CB CYS A 63 8.783 1.534 -2.411 1.00 0.00 C
|
520 |
+
ATOM 519 O CYS A 63 7.066 0.914 0.166 1.00 0.00 O
|
521 |
+
ATOM 520 SG CYS A 63 8.129 3.012 -3.218 1.00 0.00 S
|
522 |
+
ATOM 521 N GLU A 64 7.273 3.137 0.060 1.00 0.00 N
|
523 |
+
ATOM 522 CA GLU A 64 6.045 3.385 0.811 1.00 0.00 C
|
524 |
+
ATOM 523 C GLU A 64 5.144 4.378 0.083 1.00 0.00 C
|
525 |
+
ATOM 524 CB GLU A 64 6.369 3.900 2.216 1.00 0.00 C
|
526 |
+
ATOM 525 O GLU A 64 5.631 5.312 -0.557 1.00 0.00 O
|
527 |
+
ATOM 526 CG GLU A 64 7.158 2.914 3.066 1.00 0.00 C
|
528 |
+
ATOM 527 CD GLU A 64 7.576 3.483 4.412 1.00 0.00 C
|
529 |
+
ATOM 528 OE1 GLU A 64 8.127 2.728 5.245 1.00 0.00 O
|
530 |
+
ATOM 529 OE2 GLU A 64 7.351 4.693 4.636 1.00 0.00 O
|
531 |
+
ATOM 530 N VAL A 65 3.891 4.163 0.164 1.00 0.00 N
|
532 |
+
ATOM 531 CA VAL A 65 2.869 5.126 -0.231 1.00 0.00 C
|
533 |
+
ATOM 532 C VAL A 65 2.061 5.553 0.992 1.00 0.00 C
|
534 |
+
ATOM 533 CB VAL A 65 1.931 4.547 -1.314 1.00 0.00 C
|
535 |
+
ATOM 534 O VAL A 65 1.446 4.719 1.660 1.00 0.00 O
|
536 |
+
ATOM 535 CG1 VAL A 65 0.787 5.515 -1.612 1.00 0.00 C
|
537 |
+
ATOM 536 CG2 VAL A 65 2.716 4.234 -2.587 1.00 0.00 C
|
538 |
+
ATOM 537 N LYS A 66 2.125 6.823 1.301 1.00 0.00 N
|
539 |
+
ATOM 538 CA LYS A 66 1.412 7.397 2.439 1.00 0.00 C
|
540 |
+
ATOM 539 C LYS A 66 0.230 8.244 1.976 1.00 0.00 C
|
541 |
+
ATOM 540 CB LYS A 66 2.357 8.240 3.295 1.00 0.00 C
|
542 |
+
ATOM 541 O LYS A 66 0.372 9.083 1.084 1.00 0.00 O
|
543 |
+
ATOM 542 CG LYS A 66 1.740 8.734 4.595 1.00 0.00 C
|
544 |
+
ATOM 543 CD LYS A 66 2.762 9.464 5.458 1.00 0.00 C
|
545 |
+
ATOM 544 CE LYS A 66 2.137 9.991 6.742 1.00 0.00 C
|
546 |
+
ATOM 545 NZ LYS A 66 3.131 10.723 7.582 1.00 0.00 N
|
547 |
+
ATOM 546 N PHE A 67 -0.965 8.044 2.601 1.00 0.00 N
|
548 |
+
ATOM 547 CA PHE A 67 -2.137 8.813 2.199 1.00 0.00 C
|
549 |
+
ATOM 548 C PHE A 67 -3.119 8.946 3.357 1.00 0.00 C
|
550 |
+
ATOM 549 CB PHE A 67 -2.826 8.158 0.999 1.00 0.00 C
|
551 |
+
ATOM 550 O PHE A 67 -3.043 8.196 4.332 1.00 0.00 O
|
552 |
+
ATOM 551 CG PHE A 67 -3.210 6.721 1.229 1.00 0.00 C
|
553 |
+
ATOM 552 CD1 PHE A 67 -2.288 5.700 1.029 1.00 0.00 C
|
554 |
+
ATOM 553 CD2 PHE A 67 -4.493 6.391 1.645 1.00 0.00 C
|
555 |
+
ATOM 554 CE1 PHE A 67 -2.641 4.369 1.241 1.00 0.00 C
|
556 |
+
ATOM 555 CE2 PHE A 67 -4.852 5.063 1.859 1.00 0.00 C
|
557 |
+
ATOM 556 CZ PHE A 67 -3.925 4.054 1.655 1.00 0.00 C
|
558 |
+
ATOM 557 N SER A 68 -3.986 9.931 3.229 1.00 0.00 N
|
559 |
+
ATOM 558 CA SER A 68 -5.005 10.179 4.243 1.00 0.00 C
|
560 |
+
ATOM 559 C SER A 68 -6.269 9.372 3.966 1.00 0.00 C
|
561 |
+
ATOM 560 CB SER A 68 -5.346 11.668 4.308 1.00 0.00 C
|
562 |
+
ATOM 561 O SER A 68 -6.585 9.080 2.811 1.00 0.00 O
|
563 |
+
ATOM 562 OG SER A 68 -4.210 12.428 4.685 1.00 0.00 O
|
564 |
+
ATOM 563 N TYR A 69 -6.917 8.883 5.050 1.00 0.00 N
|
565 |
+
ATOM 564 CA TYR A 69 -8.184 8.178 4.893 1.00 0.00 C
|
566 |
+
ATOM 565 C TYR A 69 -9.270 8.807 5.758 1.00 0.00 C
|
567 |
+
ATOM 566 CB TYR A 69 -8.024 6.697 5.251 1.00 0.00 C
|
568 |
+
ATOM 567 O TYR A 69 -8.972 9.565 6.684 1.00 0.00 O
|
569 |
+
ATOM 568 CG TYR A 69 -7.710 6.456 6.707 1.00 0.00 C
|
570 |
+
ATOM 569 CD1 TYR A 69 -6.406 6.554 7.185 1.00 0.00 C
|
571 |
+
ATOM 570 CD2 TYR A 69 -8.718 6.128 7.608 1.00 0.00 C
|
572 |
+
ATOM 571 CE1 TYR A 69 -6.112 6.331 8.526 1.00 0.00 C
|
573 |
+
ATOM 572 CE2 TYR A 69 -8.436 5.902 8.952 1.00 0.00 C
|
574 |
+
ATOM 573 OH TYR A 69 -6.847 5.784 10.729 1.00 0.00 O
|
575 |
+
ATOM 574 CZ TYR A 69 -7.132 6.006 9.401 1.00 0.00 C
|
576 |
+
ATOM 575 N ARG A 70 -10.583 8.696 5.262 1.00 0.00 N
|
577 |
+
ATOM 576 CA ARG A 70 -11.721 9.219 6.010 1.00 0.00 C
|
578 |
+
ATOM 577 C ARG A 70 -12.611 8.089 6.514 1.00 0.00 C
|
579 |
+
ATOM 578 CB ARG A 70 -12.536 10.183 5.146 1.00 0.00 C
|
580 |
+
ATOM 579 O ARG A 70 -12.681 7.024 5.897 1.00 0.00 O
|
581 |
+
ATOM 580 CG ARG A 70 -11.777 11.434 4.734 1.00 0.00 C
|
582 |
+
ATOM 581 CD ARG A 70 -12.666 12.410 3.977 1.00 0.00 C
|
583 |
+
ATOM 582 NE ARG A 70 -11.945 13.627 3.614 1.00 0.00 N
|
584 |
+
ATOM 583 NH1 ARG A 70 -13.751 14.639 2.588 1.00 0.00 N
|
585 |
+
ATOM 584 NH2 ARG A 70 -11.730 15.716 2.684 1.00 0.00 N
|
586 |
+
ATOM 585 CZ ARG A 70 -12.477 14.658 2.963 1.00 0.00 C
|
587 |
+
ATOM 586 N GLY A 71 -13.241 8.307 7.725 1.00 0.00 N
|
588 |
+
ATOM 587 CA GLY A 71 -14.135 7.313 8.296 1.00 0.00 C
|
589 |
+
ATOM 588 C GLY A 71 -13.408 6.233 9.075 1.00 0.00 C
|
590 |
+
ATOM 589 O GLY A 71 -12.192 6.309 9.263 1.00 0.00 O
|
591 |
+
ATOM 590 N MET A 72 -14.158 5.323 9.659 1.00 0.00 N
|
592 |
+
ATOM 591 CA MET A 72 -13.613 4.253 10.491 1.00 0.00 C
|
593 |
+
ATOM 592 C MET A 72 -13.139 3.085 9.632 1.00 0.00 C
|
594 |
+
ATOM 593 CB MET A 72 -14.656 3.771 11.500 1.00 0.00 C
|
595 |
+
ATOM 594 O MET A 72 -12.324 2.274 10.076 1.00 0.00 O
|
596 |
+
ATOM 595 CG MET A 72 -15.002 4.801 12.563 1.00 0.00 C
|
597 |
+
ATOM 596 SD MET A 72 -16.056 4.109 13.896 1.00 0.00 S
|
598 |
+
ATOM 597 CE MET A 72 -15.027 2.711 14.424 1.00 0.00 C
|
599 |
+
ATOM 598 N LYS A 73 -13.499 3.031 8.394 1.00 0.00 N
|
600 |
+
ATOM 599 CA LYS A 73 -13.158 1.998 7.420 1.00 0.00 C
|
601 |
+
ATOM 600 C LYS A 73 -13.157 2.558 6.001 1.00 0.00 C
|
602 |
+
ATOM 601 CB LYS A 73 -14.134 0.824 7.519 1.00 0.00 C
|
603 |
+
ATOM 602 O LYS A 73 -14.116 3.212 5.585 1.00 0.00 O
|
604 |
+
ATOM 603 CG LYS A 73 -13.805 -0.335 6.589 1.00 0.00 C
|
605 |
+
ATOM 604 CD LYS A 73 -14.814 -1.467 6.729 1.00 0.00 C
|
606 |
+
ATOM 605 CE LYS A 73 -14.536 -2.590 5.738 1.00 0.00 C
|
607 |
+
ATOM 606 NZ LYS A 73 -15.551 -3.681 5.839 1.00 0.00 N
|
608 |
+
ATOM 607 N ALA A 74 -12.007 2.391 5.220 1.00 0.00 N
|
609 |
+
ATOM 608 CA ALA A 74 -11.877 2.842 3.837 1.00 0.00 C
|
610 |
+
ATOM 609 C ALA A 74 -11.034 1.868 3.019 1.00 0.00 C
|
611 |
+
ATOM 610 CB ALA A 74 -11.266 4.241 3.788 1.00 0.00 C
|
612 |
+
ATOM 611 O ALA A 74 -10.053 1.315 3.520 1.00 0.00 O
|
613 |
+
ATOM 612 N MET A 75 -11.483 1.658 1.812 1.00 0.00 N
|
614 |
+
ATOM 613 CA MET A 75 -10.803 0.722 0.923 1.00 0.00 C
|
615 |
+
ATOM 614 C MET A 75 -10.086 1.463 -0.202 1.00 0.00 C
|
616 |
+
ATOM 615 CB MET A 75 -11.796 -0.283 0.337 1.00 0.00 C
|
617 |
+
ATOM 616 O MET A 75 -10.672 2.331 -0.851 1.00 0.00 O
|
618 |
+
ATOM 617 CG MET A 75 -12.428 -1.196 1.375 1.00 0.00 C
|
619 |
+
ATOM 618 SD MET A 75 -13.576 -2.419 0.630 1.00 0.00 S
|
620 |
+
ATOM 619 CE MET A 75 -15.015 -1.358 0.318 1.00 0.00 C
|
621 |
+
ATOM 620 N PHE A 76 -8.828 0.988 -0.456 1.00 0.00 N
|
622 |
+
ATOM 621 CA PHE A 76 -8.002 1.608 -1.486 1.00 0.00 C
|
623 |
+
ATOM 622 C PHE A 76 -7.388 0.550 -2.396 1.00 0.00 C
|
624 |
+
ATOM 623 CB PHE A 76 -6.897 2.459 -0.851 1.00 0.00 C
|
625 |
+
ATOM 624 O PHE A 76 -6.937 -0.495 -1.925 1.00 0.00 O
|
626 |
+
ATOM 625 CG PHE A 76 -7.413 3.553 0.044 1.00 0.00 C
|
627 |
+
ATOM 626 CD1 PHE A 76 -7.706 4.810 -0.471 1.00 0.00 C
|
628 |
+
ATOM 627 CD2 PHE A 76 -7.604 3.325 1.400 1.00 0.00 C
|
629 |
+
ATOM 628 CE1 PHE A 76 -8.184 5.825 0.355 1.00 0.00 C
|
630 |
+
ATOM 629 CE2 PHE A 76 -8.081 4.334 2.232 1.00 0.00 C
|
631 |
+
ATOM 630 CZ PHE A 76 -8.369 5.584 1.708 1.00 0.00 C
|
632 |
+
ATOM 631 N SER A 77 -7.380 0.793 -3.671 1.00 0.00 N
|
633 |
+
ATOM 632 CA SER A 77 -6.849 -0.138 -4.661 1.00 0.00 C
|
634 |
+
ATOM 633 C SER A 77 -5.443 0.262 -5.098 1.00 0.00 C
|
635 |
+
ATOM 634 CB SER A 77 -7.770 -0.206 -5.880 1.00 0.00 C
|
636 |
+
ATOM 635 O SER A 77 -5.182 1.436 -5.368 1.00 0.00 O
|
637 |
+
ATOM 636 OG SER A 77 -9.051 -0.690 -5.516 1.00 0.00 O
|
638 |
+
ATOM 637 N PHE A 78 -4.547 -0.700 -5.147 1.00 0.00 N
|
639 |
+
ATOM 638 CA PHE A 78 -3.150 -0.484 -5.504 1.00 0.00 C
|
640 |
+
ATOM 639 C PHE A 78 -2.766 -1.319 -6.720 1.00 0.00 C
|
641 |
+
ATOM 640 CB PHE A 78 -2.234 -0.822 -4.324 1.00 0.00 C
|
642 |
+
ATOM 641 O PHE A 78 -3.356 -2.373 -6.967 1.00 0.00 O
|
643 |
+
ATOM 642 CG PHE A 78 -2.257 0.204 -3.224 1.00 0.00 C
|
644 |
+
ATOM 643 CD1 PHE A 78 -1.255 1.162 -3.127 1.00 0.00 C
|
645 |
+
ATOM 644 CD2 PHE A 78 -3.281 0.210 -2.286 1.00 0.00 C
|
646 |
+
ATOM 645 CE1 PHE A 78 -1.274 2.113 -2.109 1.00 0.00 C
|
647 |
+
ATOM 646 CE2 PHE A 78 -3.307 1.158 -1.266 1.00 0.00 C
|
648 |
+
ATOM 647 CZ PHE A 78 -2.302 2.108 -1.179 1.00 0.00 C
|
649 |
+
ATOM 648 N ARG A 79 -1.852 -0.697 -7.408 1.00 0.00 N
|
650 |
+
ATOM 649 CA ARG A 79 -1.158 -1.385 -8.491 1.00 0.00 C
|
651 |
+
ATOM 650 C ARG A 79 0.318 -1.580 -8.159 1.00 0.00 C
|
652 |
+
ATOM 651 CB ARG A 79 -1.302 -0.609 -9.802 1.00 0.00 C
|
653 |
+
ATOM 652 O ARG A 79 1.017 -0.619 -7.830 1.00 0.00 O
|
654 |
+
ATOM 653 CG ARG A 79 -0.640 -1.283 -10.993 1.00 0.00 C
|
655 |
+
ATOM 654 CD ARG A 79 -0.783 -0.453 -12.262 1.00 0.00 C
|
656 |
+
ATOM 655 NE ARG A 79 -0.132 -1.094 -13.401 1.00 0.00 N
|
657 |
+
ATOM 656 NH1 ARG A 79 -0.537 0.641 -14.872 1.00 0.00 N
|
658 |
+
ATOM 657 NH2 ARG A 79 0.583 -1.230 -15.579 1.00 0.00 N
|
659 |
+
ATOM 658 CZ ARG A 79 -0.030 -0.560 -14.615 1.00 0.00 C
|
660 |
+
ATOM 659 N TYR A 80 0.823 -2.845 -8.184 1.00 0.00 N
|
661 |
+
ATOM 660 CA TYR A 80 2.220 -3.211 -7.979 1.00 0.00 C
|
662 |
+
ATOM 661 C TYR A 80 2.861 -3.665 -9.284 1.00 0.00 C
|
663 |
+
ATOM 662 CB TYR A 80 2.337 -4.317 -6.927 1.00 0.00 C
|
664 |
+
ATOM 663 O TYR A 80 2.344 -4.559 -9.959 1.00 0.00 O
|
665 |
+
ATOM 664 CG TYR A 80 3.724 -4.901 -6.812 1.00 0.00 C
|
666 |
+
ATOM 665 CD1 TYR A 80 4.021 -6.153 -7.347 1.00 0.00 C
|
667 |
+
ATOM 666 CD2 TYR A 80 4.741 -4.204 -6.168 1.00 0.00 C
|
668 |
+
ATOM 667 CE1 TYR A 80 5.297 -6.696 -7.242 1.00 0.00 C
|
669 |
+
ATOM 668 CE2 TYR A 80 6.020 -4.737 -6.057 1.00 0.00 C
|
670 |
+
ATOM 669 OH TYR A 80 7.554 -6.514 -6.490 1.00 0.00 O
|
671 |
+
ATOM 670 CZ TYR A 80 6.289 -5.982 -6.597 1.00 0.00 C
|
672 |
+
ATOM 671 N ILE A 81 3.977 -3.045 -9.726 1.00 0.00 N
|
673 |
+
ATOM 672 CA ILE A 81 4.663 -3.362 -10.974 1.00 0.00 C
|
674 |
+
ATOM 673 C ILE A 81 6.141 -3.626 -10.697 1.00 0.00 C
|
675 |
+
ATOM 674 CB ILE A 81 4.506 -2.226 -12.010 1.00 0.00 C
|
676 |
+
ATOM 675 O ILE A 81 6.795 -2.855 -9.991 1.00 0.00 O
|
677 |
+
ATOM 676 CG1 ILE A 81 3.023 -1.919 -12.245 1.00 0.00 C
|
678 |
+
ATOM 677 CG2 ILE A 81 5.205 -2.593 -13.323 1.00 0.00 C
|
679 |
+
ATOM 678 CD1 ILE A 81 2.772 -0.622 -13.001 1.00 0.00 C
|
680 |
+
ATOM 679 N MET A 82 6.624 -4.650 -11.272 1.00 0.00 N
|
681 |
+
ATOM 680 CA MET A 82 8.056 -4.932 -11.247 1.00 0.00 C
|
682 |
+
ATOM 681 C MET A 82 8.708 -4.553 -12.572 1.00 0.00 C
|
683 |
+
ATOM 682 CB MET A 82 8.309 -6.410 -10.943 1.00 0.00 C
|
684 |
+
ATOM 683 O MET A 82 8.136 -4.782 -13.639 1.00 0.00 O
|
685 |
+
ATOM 684 CG MET A 82 7.919 -6.821 -9.533 1.00 0.00 C
|
686 |
+
ATOM 685 SD MET A 82 8.465 -8.524 -9.121 1.00 0.00 S
|
687 |
+
ATOM 686 CE MET A 82 7.498 -9.472 -10.328 1.00 0.00 C
|
688 |
+
ATOM 687 N TYR A 83 9.885 -3.839 -12.450 1.00 0.00 N
|
689 |
+
ATOM 688 CA TYR A 83 10.575 -3.496 -13.688 1.00 0.00 C
|
690 |
+
ATOM 689 C TYR A 83 12.059 -3.831 -13.597 1.00 0.00 C
|
691 |
+
ATOM 690 CB TYR A 83 10.394 -2.010 -14.011 1.00 0.00 C
|
692 |
+
ATOM 691 O TYR A 83 12.607 -3.957 -12.499 1.00 0.00 O
|
693 |
+
ATOM 692 CG TYR A 83 10.659 -1.097 -12.838 1.00 0.00 C
|
694 |
+
ATOM 693 CD1 TYR A 83 9.644 -0.769 -11.943 1.00 0.00 C
|
695 |
+
ATOM 694 CD2 TYR A 83 11.924 -0.561 -12.624 1.00 0.00 C
|
696 |
+
ATOM 695 CE1 TYR A 83 9.883 0.073 -10.861 1.00 0.00 C
|
697 |
+
ATOM 696 CE2 TYR A 83 12.175 0.282 -11.546 1.00 0.00 C
|
698 |
+
ATOM 697 OH TYR A 83 11.392 1.426 -9.603 1.00 0.00 O
|
699 |
+
ATOM 698 CZ TYR A 83 11.150 0.592 -10.671 1.00 0.00 C
|
700 |
+
ATOM 699 N ASP A 84 12.579 -4.184 -14.769 1.00 0.00 N
|
701 |
+
ATOM 700 CA ASP A 84 14.004 -4.497 -14.807 1.00 0.00 C
|
702 |
+
ATOM 701 C ASP A 84 14.846 -3.224 -14.847 1.00 0.00 C
|
703 |
+
ATOM 702 CB ASP A 84 14.328 -5.379 -16.015 1.00 0.00 C
|
704 |
+
ATOM 703 O ASP A 84 14.307 -2.116 -14.805 1.00 0.00 O
|
705 |
+
ATOM 704 CG ASP A 84 14.207 -4.641 -17.337 1.00 0.00 C
|
706 |
+
ATOM 705 OD1 ASP A 84 14.151 -3.393 -17.336 1.00 0.00 O
|
707 |
+
ATOM 706 OD2 ASP A 84 14.166 -5.315 -18.390 1.00 0.00 O
|
708 |
+
ATOM 707 N GLN A 85 16.104 -3.107 -14.825 1.00 0.00 N
|
709 |
+
ATOM 708 CA GLN A 85 17.029 -1.983 -14.722 1.00 0.00 C
|
710 |
+
ATOM 709 C GLN A 85 16.939 -1.083 -15.951 1.00 0.00 C
|
711 |
+
ATOM 710 CB GLN A 85 18.463 -2.481 -14.540 1.00 0.00 C
|
712 |
+
ATOM 711 O GLN A 85 17.373 0.070 -15.916 1.00 0.00 O
|
713 |
+
ATOM 712 CG GLN A 85 18.972 -3.327 -15.700 1.00 0.00 C
|
714 |
+
ATOM 713 CD GLN A 85 20.353 -3.901 -15.446 1.00 0.00 C
|
715 |
+
ATOM 714 NE2 GLN A 85 20.790 -4.803 -16.318 1.00 0.00 N
|
716 |
+
ATOM 715 OE1 GLN A 85 21.023 -3.536 -14.474 1.00 0.00 O
|
717 |
+
ATOM 716 N ASN A 86 16.347 -1.719 -16.985 1.00 0.00 N
|
718 |
+
ATOM 717 CA ASN A 86 16.216 -0.932 -18.206 1.00 0.00 C
|
719 |
+
ATOM 718 C ASN A 86 14.876 -0.203 -18.262 1.00 0.00 C
|
720 |
+
ATOM 719 CB ASN A 86 16.389 -1.821 -19.439 1.00 0.00 C
|
721 |
+
ATOM 720 O ASN A 86 14.590 0.504 -19.229 1.00 0.00 O
|
722 |
+
ATOM 721 CG ASN A 86 17.770 -2.440 -19.524 1.00 0.00 C
|
723 |
+
ATOM 722 ND2 ASN A 86 17.829 -3.711 -19.903 1.00 0.00 N
|
724 |
+
ATOM 723 OD1 ASN A 86 18.777 -1.781 -19.250 1.00 0.00 O
|
725 |
+
ATOM 724 N GLY A 87 14.047 -0.451 -17.157 1.00 0.00 N
|
726 |
+
ATOM 725 CA GLY A 87 12.774 0.245 -17.074 1.00 0.00 C
|
727 |
+
ATOM 726 C GLY A 87 11.645 -0.491 -17.770 1.00 0.00 C
|
728 |
+
ATOM 727 O GLY A 87 10.551 0.053 -17.935 1.00 0.00 O
|
729 |
+
ATOM 728 N HIS A 88 11.979 -1.693 -18.203 1.00 0.00 N
|
730 |
+
ATOM 729 CA HIS A 88 10.969 -2.521 -18.853 1.00 0.00 C
|
731 |
+
ATOM 730 C HIS A 88 10.107 -3.246 -17.825 1.00 0.00 C
|
732 |
+
ATOM 731 CB HIS A 88 11.629 -3.533 -19.792 1.00 0.00 C
|
733 |
+
ATOM 732 O HIS A 88 10.623 -3.781 -16.841 1.00 0.00 O
|
734 |
+
ATOM 733 CG HIS A 88 12.426 -2.902 -20.889 1.00 0.00 C
|
735 |
+
ATOM 734 CD2 HIS A 88 13.745 -2.608 -20.974 1.00 0.00 C
|
736 |
+
ATOM 735 ND1 HIS A 88 11.862 -2.492 -22.078 1.00 0.00 N
|
737 |
+
ATOM 736 CE1 HIS A 88 12.803 -1.973 -22.849 1.00 0.00 C
|
738 |
+
ATOM 737 NE2 HIS A 88 13.954 -2.032 -22.202 1.00 0.00 N
|
739 |
+
ATOM 738 N ASP A 89 8.762 -3.058 -18.051 1.00 0.00 N
|
740 |
+
ATOM 739 CA ASP A 89 7.804 -3.799 -17.236 1.00 0.00 C
|
741 |
+
ATOM 740 C ASP A 89 8.001 -5.305 -17.390 1.00 0.00 C
|
742 |
+
ATOM 741 CB ASP A 89 6.371 -3.415 -17.610 1.00 0.00 C
|
743 |
+
ATOM 742 O ASP A 89 8.056 -5.818 -18.509 1.00 0.00 O
|
744 |
+
ATOM 743 CG ASP A 89 5.330 -4.062 -16.713 1.00 0.00 C
|
745 |
+
ATOM 744 OD1 ASP A 89 5.698 -4.883 -15.846 1.00 0.00 O
|
746 |
+
ATOM 745 OD2 ASP A 89 4.131 -3.750 -16.877 1.00 0.00 O
|
747 |
+
ATOM 746 N LEU A 90 8.334 -6.022 -16.320 1.00 0.00 N
|
748 |
+
ATOM 747 CA LEU A 90 8.513 -7.469 -16.350 1.00 0.00 C
|
749 |
+
ATOM 748 C LEU A 90 7.166 -8.183 -16.380 1.00 0.00 C
|
750 |
+
ATOM 749 CB LEU A 90 9.323 -7.935 -15.137 1.00 0.00 C
|
751 |
+
ATOM 750 O LEU A 90 7.109 -9.414 -16.328 1.00 0.00 O
|
752 |
+
ATOM 751 CG LEU A 90 10.771 -7.447 -15.061 1.00 0.00 C
|
753 |
+
ATOM 752 CD1 LEU A 90 11.421 -7.917 -13.764 1.00 0.00 C
|
754 |
+
ATOM 753 CD2 LEU A 90 11.563 -7.933 -16.269 1.00 0.00 C
|
755 |
+
ATOM 754 N CYS A 91 6.145 -7.647 -16.960 1.00 0.00 N
|
756 |
+
ATOM 755 CA CYS A 91 4.838 -8.217 -17.265 1.00 0.00 C
|
757 |
+
ATOM 756 C CYS A 91 4.203 -8.824 -16.019 1.00 0.00 C
|
758 |
+
ATOM 757 CB CYS A 91 4.958 -9.281 -18.356 1.00 0.00 C
|
759 |
+
ATOM 758 O CYS A 91 3.278 -9.632 -16.119 1.00 0.00 O
|
760 |
+
ATOM 759 SG CYS A 91 5.305 -8.606 -19.995 1.00 0.00 S
|
761 |
+
ATOM 760 N SER A 92 4.554 -8.421 -14.785 1.00 0.00 N
|
762 |
+
ATOM 761 CA SER A 92 3.890 -8.942 -13.594 1.00 0.00 C
|
763 |
+
ATOM 762 C SER A 92 3.226 -7.824 -12.798 1.00 0.00 C
|
764 |
+
ATOM 763 CB SER A 92 4.887 -9.688 -12.707 1.00 0.00 C
|
765 |
+
ATOM 764 O SER A 92 3.909 -6.970 -12.228 1.00 0.00 O
|
766 |
+
ATOM 765 OG SER A 92 5.438 -10.799 -13.394 1.00 0.00 O
|
767 |
+
ATOM 766 N GLN A 93 2.048 -7.488 -13.242 1.00 0.00 N
|
768 |
+
ATOM 767 CA GLN A 93 1.265 -6.515 -12.488 1.00 0.00 C
|
769 |
+
ATOM 768 C GLN A 93 0.295 -7.209 -11.535 1.00 0.00 C
|
770 |
+
ATOM 769 CB GLN A 93 0.499 -5.591 -13.435 1.00 0.00 C
|
771 |
+
ATOM 770 O GLN A 93 -0.357 -8.187 -11.908 1.00 0.00 O
|
772 |
+
ATOM 771 CG GLN A 93 1.388 -4.851 -14.425 1.00 0.00 C
|
773 |
+
ATOM 772 CD GLN A 93 0.606 -3.933 -15.345 1.00 0.00 C
|
774 |
+
ATOM 773 NE2 GLN A 93 0.928 -3.968 -16.633 1.00 0.00 N
|
775 |
+
ATOM 774 OE1 GLN A 93 -0.282 -3.198 -14.901 1.00 0.00 O
|
776 |
+
ATOM 775 N ILE A 94 0.338 -6.724 -10.308 1.00 0.00 N
|
777 |
+
ATOM 776 CA ILE A 94 -0.576 -7.263 -9.307 1.00 0.00 C
|
778 |
+
ATOM 777 C ILE A 94 -1.499 -6.156 -8.801 1.00 0.00 C
|
779 |
+
ATOM 778 CB ILE A 94 0.191 -7.903 -8.128 1.00 0.00 C
|
780 |
+
ATOM 779 O ILE A 94 -1.043 -5.052 -8.494 1.00 0.00 O
|
781 |
+
ATOM 780 CG1 ILE A 94 1.112 -9.019 -8.633 1.00 0.00 C
|
782 |
+
ATOM 781 CG2 ILE A 94 -0.785 -8.433 -7.074 1.00 0.00 C
|
783 |
+
ATOM 782 CD1 ILE A 94 2.092 -9.533 -7.587 1.00 0.00 C
|
784 |
+
ATOM 783 N PHE A 95 -2.760 -6.352 -8.842 1.00 0.00 N
|
785 |
+
ATOM 784 CA PHE A 95 -3.734 -5.442 -8.251 1.00 0.00 C
|
786 |
+
ATOM 785 C PHE A 95 -4.216 -5.966 -6.904 1.00 0.00 C
|
787 |
+
ATOM 786 CB PHE A 95 -4.925 -5.241 -9.194 1.00 0.00 C
|
788 |
+
ATOM 787 O PHE A 95 -4.517 -7.153 -6.765 1.00 0.00 O
|
789 |
+
ATOM 788 CG PHE A 95 -4.559 -4.610 -10.509 1.00 0.00 C
|
790 |
+
ATOM 789 CD1 PHE A 95 -4.613 -3.231 -10.674 1.00 0.00 C
|
791 |
+
ATOM 790 CD2 PHE A 95 -4.159 -5.395 -11.583 1.00 0.00 C
|
792 |
+
ATOM 791 CE1 PHE A 95 -4.274 -2.644 -11.891 1.00 0.00 C
|
793 |
+
ATOM 792 CE2 PHE A 95 -3.819 -4.816 -12.802 1.00 0.00 C
|
794 |
+
ATOM 793 CZ PHE A 95 -3.878 -3.440 -12.954 1.00 0.00 C
|
795 |
+
ATOM 794 N PHE A 96 -4.198 -5.106 -5.926 1.00 0.00 N
|
796 |
+
ATOM 795 CA PHE A 96 -4.673 -5.552 -4.622 1.00 0.00 C
|
797 |
+
ATOM 796 C PHE A 96 -5.353 -4.411 -3.874 1.00 0.00 C
|
798 |
+
ATOM 797 CB PHE A 96 -3.515 -6.110 -3.788 1.00 0.00 C
|
799 |
+
ATOM 798 O PHE A 96 -5.223 -3.246 -4.257 1.00 0.00 O
|
800 |
+
ATOM 799 CG PHE A 96 -2.419 -5.112 -3.528 1.00 0.00 C
|
801 |
+
ATOM 800 CD1 PHE A 96 -1.422 -4.893 -4.470 1.00 0.00 C
|
802 |
+
ATOM 801 CD2 PHE A 96 -2.386 -4.393 -2.340 1.00 0.00 C
|
803 |
+
ATOM 802 CE1 PHE A 96 -0.406 -3.970 -4.231 1.00 0.00 C
|
804 |
+
ATOM 803 CE2 PHE A 96 -1.375 -3.470 -2.094 1.00 0.00 C
|
805 |
+
ATOM 804 CZ PHE A 96 -0.385 -3.260 -3.041 1.00 0.00 C
|
806 |
+
ATOM 805 N THR A 97 -6.180 -4.778 -2.859 1.00 0.00 N
|
807 |
+
ATOM 806 CA THR A 97 -6.980 -3.835 -2.084 1.00 0.00 C
|
808 |
+
ATOM 807 C THR A 97 -6.479 -3.756 -0.645 1.00 0.00 C
|
809 |
+
ATOM 808 CB THR A 97 -8.469 -4.227 -2.092 1.00 0.00 C
|
810 |
+
ATOM 809 O THR A 97 -6.224 -4.784 -0.014 1.00 0.00 O
|
811 |
+
ATOM 810 CG2 THR A 97 -9.298 -3.253 -1.261 1.00 0.00 C
|
812 |
+
ATOM 811 OG1 THR A 97 -8.952 -4.218 -3.441 1.00 0.00 O
|
813 |
+
ATOM 812 N VAL A 98 -6.234 -2.578 -0.176 1.00 0.00 N
|
814 |
+
ATOM 813 CA VAL A 98 -5.879 -2.335 1.218 1.00 0.00 C
|
815 |
+
ATOM 814 C VAL A 98 -7.047 -1.667 1.940 1.00 0.00 C
|
816 |
+
ATOM 815 CB VAL A 98 -4.609 -1.463 1.337 1.00 0.00 C
|
817 |
+
ATOM 816 O VAL A 98 -7.631 -0.706 1.432 1.00 0.00 O
|
818 |
+
ATOM 817 CG1 VAL A 98 -4.398 -1.011 2.781 1.00 0.00 C
|
819 |
+
ATOM 818 CG2 VAL A 98 -3.388 -2.228 0.830 1.00 0.00 C
|
820 |
+
ATOM 819 N ILE A 99 -7.430 -2.236 3.089 1.00 0.00 N
|
821 |
+
ATOM 820 CA ILE A 99 -8.491 -1.681 3.923 1.00 0.00 C
|
822 |
+
ATOM 821 C ILE A 99 -7.881 -0.965 5.126 1.00 0.00 C
|
823 |
+
ATOM 822 CB ILE A 99 -9.472 -2.778 4.394 1.00 0.00 C
|
824 |
+
ATOM 823 O ILE A 99 -7.148 -1.571 5.911 1.00 0.00 O
|
825 |
+
ATOM 824 CG1 ILE A 99 -10.082 -3.502 3.188 1.00 0.00 C
|
826 |
+
ATOM 825 CG2 ILE A 99 -10.565 -2.179 5.283 1.00 0.00 C
|
827 |
+
ATOM 826 CD1 ILE A 99 -10.874 -4.750 3.551 1.00 0.00 C
|
828 |
+
ATOM 827 N CYS A 100 -8.094 0.384 5.239 1.00 0.00 N
|
829 |
+
ATOM 828 CA CYS A 100 -7.717 1.159 6.416 1.00 0.00 C
|
830 |
+
ATOM 829 C CYS A 100 -8.858 1.208 7.425 1.00 0.00 C
|
831 |
+
ATOM 830 CB CYS A 100 -7.315 2.579 6.018 1.00 0.00 C
|
832 |
+
ATOM 831 O CYS A 100 -9.811 1.971 7.255 1.00 0.00 O
|
833 |
+
ATOM 832 SG CYS A 100 -5.913 2.646 4.881 1.00 0.00 S
|
834 |
+
ATOM 833 N ARG A 101 -8.734 0.291 8.419 1.00 0.00 N
|
835 |
+
ATOM 834 CA ARG A 101 -9.778 0.171 9.433 1.00 0.00 C
|
836 |
+
ATOM 835 C ARG A 101 -9.235 0.503 10.819 1.00 0.00 C
|
837 |
+
ATOM 836 CB ARG A 101 -10.374 -1.239 9.427 1.00 0.00 C
|
838 |
+
ATOM 837 O ARG A 101 -8.234 -0.070 11.253 1.00 0.00 O
|
839 |
+
ATOM 838 CG ARG A 101 -11.479 -1.444 10.451 1.00 0.00 C
|
840 |
+
ATOM 839 CD ARG A 101 -12.048 -2.855 10.390 1.00 0.00 C
|
841 |
+
ATOM 840 NE ARG A 101 -13.253 -2.985 11.205 1.00 0.00 N
|
842 |
+
ATOM 841 NH1 ARG A 101 -13.729 -5.141 10.524 1.00 0.00 N
|
843 |
+
ATOM 842 NH2 ARG A 101 -15.098 -4.079 12.026 1.00 0.00 N
|
844 |
+
ATOM 843 CZ ARG A 101 -14.024 -4.068 11.250 1.00 0.00 C
|
845 |
+
ATOM 844 N GLU A 102 -9.902 1.446 11.474 1.00 0.00 N
|
846 |
+
ATOM 845 CA GLU A 102 -9.563 1.790 12.851 1.00 0.00 C
|
847 |
+
ATOM 846 C GLU A 102 -10.253 0.854 13.840 1.00 0.00 C
|
848 |
+
ATOM 847 CB GLU A 102 -9.940 3.242 13.152 1.00 0.00 C
|
849 |
+
ATOM 848 O GLU A 102 -11.402 0.458 13.630 1.00 0.00 O
|
850 |
+
ATOM 849 CG GLU A 102 -9.221 4.258 12.277 1.00 0.00 C
|
851 |
+
ATOM 850 CD GLU A 102 -9.635 5.694 12.559 1.00 0.00 C
|
852 |
+
ATOM 851 OE1 GLU A 102 -9.298 6.592 11.755 1.00 0.00 O
|
853 |
+
ATOM 852 OE2 GLU A 102 -10.302 5.922 13.593 1.00 0.00 O
|
854 |
+
ATOM 853 N TYR A 103 -9.489 0.409 14.757 1.00 0.00 N
|
855 |
+
ATOM 854 CA TYR A 103 -10.036 -0.359 15.869 1.00 0.00 C
|
856 |
+
ATOM 855 C TYR A 103 -10.129 0.495 17.128 1.00 0.00 C
|
857 |
+
ATOM 856 CB TYR A 103 -9.178 -1.598 16.141 1.00 0.00 C
|
858 |
+
ATOM 857 O TYR A 103 -9.113 0.975 17.636 1.00 0.00 O
|
859 |
+
ATOM 858 CG TYR A 103 -9.123 -2.564 14.982 1.00 0.00 C
|
860 |
+
ATOM 859 CD1 TYR A 103 -10.142 -3.491 14.774 1.00 0.00 C
|
861 |
+
ATOM 860 CD2 TYR A 103 -8.052 -2.553 14.095 1.00 0.00 C
|
862 |
+
ATOM 861 CE1 TYR A 103 -10.094 -4.384 13.709 1.00 0.00 C
|
863 |
+
ATOM 862 CE2 TYR A 103 -7.994 -3.442 13.027 1.00 0.00 C
|
864 |
+
ATOM 863 OH TYR A 103 -8.966 -5.234 11.786 1.00 0.00 O
|
865 |
+
ATOM 864 CZ TYR A 103 -9.018 -4.352 12.842 1.00 0.00 C
|
866 |
+
ATOM 865 N CYS A 104 -11.495 0.756 17.579 1.00 0.00 N
|
867 |
+
ATOM 866 CA CYS A 104 -11.696 1.681 18.689 1.00 0.00 C
|
868 |
+
ATOM 867 C CYS A 104 -12.302 0.967 19.891 1.00 0.00 C
|
869 |
+
ATOM 868 CB CYS A 104 -12.597 2.840 18.264 1.00 0.00 C
|
870 |
+
ATOM 869 O CYS A 104 -13.088 0.031 19.731 1.00 0.00 O
|
871 |
+
ATOM 870 SG CYS A 104 -12.045 3.679 16.763 1.00 0.00 S
|
872 |
+
ATOM 871 N CYS A 105 -11.819 1.196 21.115 1.00 0.00 N
|
873 |
+
ATOM 872 CA CYS A 105 -12.422 0.718 22.354 1.00 0.00 C
|
874 |
+
ATOM 873 C CYS A 105 -13.019 1.872 23.151 1.00 0.00 C
|
875 |
+
ATOM 874 CB CYS A 105 -11.386 -0.018 23.204 1.00 0.00 C
|
876 |
+
ATOM 875 O CYS A 105 -12.550 3.008 23.054 1.00 0.00 O
|
877 |
+
ATOM 876 SG CYS A 105 -10.010 1.023 23.738 1.00 0.00 S
|
878 |
+
TER 877 CYS A 105
|
879 |
+
ENDMDL
|
880 |
+
END
|