Spaces:

igashov
/

DiffLinker

Sleeping

App Files Files Community

igashov commited on Apr 17

Commit

88b37fb

•

1 Parent(s): c2d5999

updated code

Browse files

Files changed (7) hide show

app.py +8 -4
src/datasets.py +114 -9
src/egnn.py +48 -13
src/lightning.py +8 -4
src/linker_size.py +0 -4
src/linker_size_lightning.py +6 -1
src/utils.py +14 -0

app.py CHANGED Viewed

@@ -35,12 +35,16 @@ MODELS_METADATA = {
         'path': 'models/geom_difflinker_given_anchors.ckpt',
     },
     'pockets_difflinker': {
-        'link': 'https://zenodo.org/record/7775568/files/pockets_difflinker_full_no_anchors.ckpt?download=1',
-        'path': 'models/pockets_difflinker.ckpt',
     },
     'pockets_difflinker_given_anchors': {
-        'link': 'https://zenodo.org/record/7775568/files/pockets_difflinker_full.ckpt?download=1',
-        'path': 'models/pockets_difflinker_given_anchors.ckpt',
     },
 }

         'path': 'models/geom_difflinker_given_anchors.ckpt',
     },
     'pockets_difflinker': {
+        # 'link': 'https://zenodo.org/record/7775568/files/pockets_difflinker_full_no_anchors.ckpt?download=1',
+        # 'path': 'models/pockets_difflinker.ckpt',
+        'link': 'https://zenodo.org/records/10988017/files/pockets_difflinker_full_no_anchors_fc_pdb_excluded.ckpt?download=1',
+        'path': 'models/pockets_difflinker_full_no_anchors_fc_pdb_excluded.ckpt',
     },
     'pockets_difflinker_given_anchors': {
+        # 'link': 'https://zenodo.org/record/7775568/files/pockets_difflinker_full.ckpt?download=1',
+        # 'path': 'models/pockets_difflinker_given_anchors.ckpt',
+        'link': 'https://zenodo.org/records/10988017/files/pockets_difflinker_full_fc_pdb_excluded.ckpt?download=1',
+        'path': 'models/pockets_difflinker_full_fc_pdb_excluded.ckpt',
     },
 }

src/datasets.py CHANGED Viewed

@@ -148,6 +148,15 @@ class MOADDataset(Dataset):
             total=len(table)
         )
         for (_, row), fragments, linker, pocket_data in generator:
             uuid = row['uuid']
             name = row['molecule']
             frag_pos, frag_one_hot, frag_charges = parse_molecule(fragments, is_geom=is_geom)
@@ -212,16 +221,112 @@ class MOADDataset(Dataset):
         return data
     @staticmethod
-    def create_edges(positions, fragment_mask_only, linker_mask_only):
-        ligand_mask = fragment_mask_only.astype(bool) | linker_mask_only.astype(bool)
-        ligand_adj = ligand_mask[:, None] & ligand_mask[None, :]
-        proximity_adj = np.linalg.norm(positions[:, None, :] - positions[None, :, :], axis=-1) <= 6
-        full_adj = ligand_adj | proximity_adj
-        full_adj &= ~np.eye(len(positions)).astype(bool)
-        curr_rows, curr_cols = np.where(full_adj)
-        return [curr_rows, curr_cols]
 def collate(batch):
@@ -231,7 +336,7 @@ def collate(batch):
     # if 'pocket_mask' not in batch[0].keys():
     #    batch = [data for data in batch if data['num_atoms'] <= 50]
     # else:
-    # batch = [data for data in batch if data['num_atoms'] <= 1000]
     for i, data in enumerate(batch):
         for key, value in data.items():

             total=len(table)
         )
         for (_, row), fragments, linker, pocket_data in generator:
+            pdb = row['molecule_name'].split('_')[0]
+            if pdb in {
+                '5ou2', '5ou3', '6hay',
+                '5mo8', '5mo5', '5mo7', '5ctp', '5cu2', '5cu4', '5mmr', '5mmf',
+                '5moe', '3iw7', '4i9n', '3fi2', '3fi3',
+            }:
+                print(f'Skipping pdb={pdb}')
+                continue
             uuid = row['uuid']
             name = row['molecule']
             frag_pos, frag_one_hot, frag_charges = parse_molecule(fragments, is_geom=is_geom)
         return data
+class OptimisedMOADDataset(MOADDataset):
+    # TODO: finish testing
+    def __len__(self):
+        return len(self.data['fragmentation_level_data'])
+    def __getitem__(self, item):
+        fragmentation_level_data = self.data['fragmentation_level_data'][item]
+        protein_level_data = self.data['protein_level_data'][fragmentation_level_data['name']]
+        return {
+            **fragmentation_level_data,
+            **protein_level_data,
+        }
     @staticmethod
+    def preprocess(data_path, prefix, pocket_mode, device):
+        print('Preprocessing optimised version of the dataset')
+        protein_level_data = {}
+        fragmentation_level_data = []
+        table_path = os.path.join(data_path, f'{prefix}_table.csv')
+        fragments_path = os.path.join(data_path, f'{prefix}_frag.sdf')
+        linkers_path = os.path.join(data_path, f'{prefix}_link.sdf')
+        pockets_path = os.path.join(data_path, f'{prefix}_pockets.pkl')
+        is_geom = True
+        is_multifrag = 'multifrag' in prefix
+        with open(pockets_path, 'rb') as f:
+            pockets = pickle.load(f)
+        table = pd.read_csv(table_path)
+        generator = tqdm(
+            zip(table.iterrows(), read_sdf(fragments_path), read_sdf(linkers_path), pockets),
+            total=len(table)
+        )
+        for (_, row), fragments, linker, pocket_data in generator:
+            uuid = row['uuid']
+            name = row['molecule']
+            frag_pos, frag_one_hot, frag_charges = parse_molecule(fragments, is_geom=is_geom)
+            link_pos, link_one_hot, link_charges = parse_molecule(linker, is_geom=is_geom)
+            # Parsing pocket data
+            pocket_pos = pocket_data[f'{pocket_mode}_coord']
+            pocket_one_hot = []
+            pocket_charges = []
+            for atom_type in pocket_data[f'{pocket_mode}_types']:
+                pocket_one_hot.append(get_one_hot(atom_type, const.GEOM_ATOM2IDX))
+                pocket_charges.append(const.GEOM_CHARGES[atom_type])
+            pocket_one_hot = np.array(pocket_one_hot)
+            pocket_charges = np.array(pocket_charges)
+            positions = np.concatenate([frag_pos, pocket_pos, link_pos], axis=0)
+            one_hot = np.concatenate([frag_one_hot, pocket_one_hot, link_one_hot], axis=0)
+            charges = np.concatenate([frag_charges, pocket_charges, link_charges], axis=0)
+            anchors = np.zeros_like(charges)
+            if is_multifrag:
+                for anchor_idx in map(int, row['anchors'].split('-')):
+                    anchors[anchor_idx] = 1
+            else:
+                anchors[row['anchor_1']] = 1
+                anchors[row['anchor_2']] = 1
+            fragment_only_mask = np.concatenate([
+                np.ones_like(frag_charges),
+                np.zeros_like(pocket_charges),
+                np.zeros_like(link_charges)
+            ])
+            pocket_mask = np.concatenate([
+                np.zeros_like(frag_charges),
+                np.ones_like(pocket_charges),
+                np.zeros_like(link_charges)
+            ])
+            linker_mask = np.concatenate([
+                np.zeros_like(frag_charges),
+                np.zeros_like(pocket_charges),
+                np.ones_like(link_charges)
+            ])
+            fragment_mask = np.concatenate([
+                np.ones_like(frag_charges),
+                np.ones_like(pocket_charges),
+                np.zeros_like(link_charges)
+            ])
+            fragmentation_level_data.append({
+                'uuid': uuid,
+                'name': name,
+                'anchors': torch.tensor(anchors, dtype=const.TORCH_FLOAT, device=device),
+                'fragment_only_mask': torch.tensor(fragment_only_mask, dtype=const.TORCH_FLOAT, device=device),
+                'pocket_mask': torch.tensor(pocket_mask, dtype=const.TORCH_FLOAT, device=device),
+                'fragment_mask': torch.tensor(fragment_mask, dtype=const.TORCH_FLOAT, device=device),
+                'linker_mask': torch.tensor(linker_mask, dtype=const.TORCH_FLOAT, device=device),
+            })
+            protein_level_data[name] = {
+                'positions': torch.tensor(positions, dtype=const.TORCH_FLOAT, device=device),
+                'one_hot': torch.tensor(one_hot, dtype=const.TORCH_FLOAT, device=device),
+                'charges': torch.tensor(charges, dtype=const.TORCH_FLOAT, device=device),
+                'num_atoms': len(positions),
+            }
+        return {
+            'fragmentation_level_data': fragmentation_level_data,
+            'protein_level_data': protein_level_data,
+        }
 def collate(batch):
     # if 'pocket_mask' not in batch[0].keys():
     #    batch = [data for data in batch if data['num_atoms'] <= 50]
     # else:
+    #    batch = [data for data in batch if data['num_atoms'] <= 1000]
     for i, data in enumerate(batch):
         for key, value in data.items():

src/egnn.py CHANGED Viewed

@@ -315,7 +315,7 @@ class Dynamics(nn.Module):
             self, n_dims, in_node_nf, context_node_nf, hidden_nf=64, device='cpu', activation=nn.SiLU(),
             n_layers=4, attention=False, condition_time=True, tanh=False, norm_constant=0, inv_sublayers=2,
             sin_embedding=False, normalization_factor=100, aggregation_method='sum', model='egnn_dynamics',
-            normalization=None, centering=False,
     ):
         super().__init__()
         self.device = device
@@ -324,6 +324,7 @@ class Dynamics(nn.Module):
         self.condition_time = condition_time
         self.model = model
         self.centering = centering
         in_node_nf = in_node_nf + context_node_nf + condition_time
         if self.model == 'egnn_dynamics':
@@ -369,6 +370,8 @@ class Dynamics(nn.Module):
         - context: (B, N, C)
         """
         bs, n_nodes = xh.shape[0], xh.shape[1]
         edges = self.get_edges(n_nodes, bs)  # (2, B*N)
         node_mask = node_mask.view(bs * n_nodes, 1)  # (B*N, 1)
@@ -421,16 +424,6 @@ class Dynamics(nn.Module):
         if self.condition_time:
             h_final = h_final[:, :-1]
-        if torch.any(torch.isnan(vel)):
-            print('Found NaN values in velocities')
-            nan_mask = torch.isnan(vel).float()
-            vel = x * nan_mask + torch.nan_to_num(vel) * (1 - nan_mask)
-        if torch.any(torch.isnan(h_final)):
-            print('Found NaN values in features')
-            nan_mask = torch.isnan(h_final).float()
-            h_final = h[:, :h_final.shape[1]] * nan_mask + torch.nan_to_num(h_final) * (1 - nan_mask)
         vel = vel.view(bs, n_nodes, -1)  # (B, N, 3)
         h_final = h_final.view(bs, n_nodes, -1)  # (B, N, D)
         node_mask = node_mask.view(bs, n_nodes, 1)  # (B, N, 1)
@@ -477,12 +470,21 @@ class DynamicsWithPockets(Dynamics):
         if linker_mask is not None:
             linker_mask = linker_mask.view(bs * n_nodes, 1)  # (B*N, 1)
         # Reshaping node features & adding time feature
         xh = xh.view(bs * n_nodes, -1).clone() * node_mask  # (B*N, D)
         x = xh[:, :self.n_dims].clone()  # (B*N, 3)
         h = xh[:, self.n_dims:].clone()  # (B*N, nf)
-        edges = self.get_dist_edges(x, node_mask, edge_mask)
         if self.condition_time:
             if np.prod(t.size()) == 1:
                 # t is the same for all elements in batch.
@@ -537,7 +539,7 @@ class DynamicsWithPockets(Dynamics):
         return torch.cat([vel, h_final], dim=2)
     @staticmethod
-    def get_dist_edges(x, node_mask, batch_mask):
         node_mask = node_mask.squeeze().bool()
         batch_adj = (batch_mask[:, None] == batch_mask[None, :])
         nodes_adj = (node_mask[:, None] & node_mask[None, :])
@@ -546,3 +548,36 @@ class DynamicsWithPockets(Dynamics):
         adj = batch_adj & nodes_adj & dists_adj & rm_self_loops
         edges = torch.stack(torch.where(adj))
         return edges

             self, n_dims, in_node_nf, context_node_nf, hidden_nf=64, device='cpu', activation=nn.SiLU(),
             n_layers=4, attention=False, condition_time=True, tanh=False, norm_constant=0, inv_sublayers=2,
             sin_embedding=False, normalization_factor=100, aggregation_method='sum', model='egnn_dynamics',
+            normalization=None, centering=False, graph_type='FC',
     ):
         super().__init__()
         self.device = device
         self.condition_time = condition_time
         self.model = model
         self.centering = centering
+        self.graph_type = graph_type
         in_node_nf = in_node_nf + context_node_nf + condition_time
         if self.model == 'egnn_dynamics':
         - context: (B, N, C)
         """
+        assert self.graph_type == 'FC'
         bs, n_nodes = xh.shape[0], xh.shape[1]
         edges = self.get_edges(n_nodes, bs)  # (2, B*N)
         node_mask = node_mask.view(bs * n_nodes, 1)  # (B*N, 1)
         if self.condition_time:
             h_final = h_final[:, :-1]
         vel = vel.view(bs, n_nodes, -1)  # (B, N, 3)
         h_final = h_final.view(bs, n_nodes, -1)  # (B, N, D)
         node_mask = node_mask.view(bs, n_nodes, 1)  # (B, N, 1)
         if linker_mask is not None:
             linker_mask = linker_mask.view(bs * n_nodes, 1)  # (B*N, 1)
+        fragment_only_mask = context[..., -2].view(bs * n_nodes, 1)  # (B*N, 1)
+        pocket_only_mask = context[..., -1].view(bs * n_nodes, 1)  # (B*N, 1)
+        assert torch.all(fragment_only_mask.bool() | pocket_only_mask.bool() | linker_mask.bool() == node_mask.bool())
         # Reshaping node features & adding time feature
         xh = xh.view(bs * n_nodes, -1).clone() * node_mask  # (B*N, D)
         x = xh[:, :self.n_dims].clone()  # (B*N, 3)
         h = xh[:, self.n_dims:].clone()  # (B*N, nf)
+        assert self.graph_type in ['4A', 'FC-4A', 'FC-10A-4A']
+        if self.graph_type == '4A' or self.graph_type is None:
+            edges = self.get_dist_edges_4A(x, node_mask, edge_mask)
+        else:
+            edges = self.get_dist_edges(x, node_mask, edge_mask, linker_mask, fragment_only_mask, pocket_only_mask)
         if self.condition_time:
             if np.prod(t.size()) == 1:
                 # t is the same for all elements in batch.
         return torch.cat([vel, h_final], dim=2)
     @staticmethod
+    def get_dist_edges_4A(x, node_mask, batch_mask):
         node_mask = node_mask.squeeze().bool()
         batch_adj = (batch_mask[:, None] == batch_mask[None, :])
         nodes_adj = (node_mask[:, None] & node_mask[None, :])
         adj = batch_adj & nodes_adj & dists_adj & rm_self_loops
         edges = torch.stack(torch.where(adj))
         return edges
+    def get_dist_edges(self, x, node_mask, batch_mask, linker_mask, fragment_only_mask, pocket_only_mask):
+        node_mask = node_mask.squeeze().bool()
+        linker_mask = linker_mask.squeeze().bool() & node_mask
+        fragment_only_mask = fragment_only_mask.squeeze().bool() & node_mask
+        pocket_only_mask = pocket_only_mask.squeeze().bool() & node_mask
+        ligand_mask = linker_mask | fragment_only_mask
+        # General constrains:
+        batch_adj = (batch_mask[:, None] == batch_mask[None, :])
+        nodes_adj = (node_mask[:, None] & node_mask[None, :])
+        rm_self_loops = ~torch.eye(x.size(0), dtype=torch.bool, device=x.device)
+        constraints = batch_adj & nodes_adj & rm_self_loops
+        # Ligand atoms – fully-connected graph
+        ligand_adj = (ligand_mask[:, None] & ligand_mask[None, :])
+        ligand_interactions = ligand_adj & constraints
+        # Pocket atoms - within 4A
+        pocket_adj = (pocket_only_mask[:, None] & pocket_only_mask[None, :])
+        pocket_dists_adj = (torch.cdist(x, x) <= 4)
+        pocket_interactions = pocket_adj & pocket_dists_adj & constraints
+        # Pocket-ligand atoms - within 10A
+        pocket_ligand_cutoff = 4 if self.graph_type == 'FC-4A' else 10
+        pocket_ligand_adj = (ligand_mask[:, None] & pocket_only_mask[None, :])
+        pocket_ligand_adj = pocket_ligand_adj | (pocket_only_mask[:, None] & ligand_mask[None, :])
+        pocket_ligand_dists_adj = (torch.cdist(x, x) <= pocket_ligand_cutoff)
+        pocket_ligand_interactions = pocket_ligand_adj & pocket_ligand_dists_adj & constraints
+        adj = ligand_interactions | pocket_interactions | pocket_ligand_interactions
+        edges = torch.stack(torch.where(adj))
+        return edges

src/lightning.py CHANGED Viewed

@@ -44,7 +44,7 @@ class DDPM(pl.LightningModule):
         normalize_factors, include_charges, model,
         data_path, train_data_prefix, val_data_prefix, batch_size, lr, torch_device, test_epochs, n_stability_samples,
         normalization=None, log_iterations=None, samples_dir=None, data_augmentation=False,
-        center_of_mass='fragments', inpainting=False, anchors_context=True,
     ):
         super(DDPM, self).__init__()
@@ -54,7 +54,7 @@ class DDPM(pl.LightningModule):
         self.val_data_prefix = val_data_prefix
         self.batch_size = batch_size
         self.lr = lr
-        self.torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.include_charges = include_charges
         self.test_epochs = test_epochs
         self.n_stability_samples = n_stability_samples
@@ -72,6 +72,9 @@ class DDPM(pl.LightningModule):
         self.is_geom = ('geom' in self.train_data_prefix) or ('MOAD' in self.train_data_prefix)
         if type(activation) is str:
             activation = get_activation(activation)
@@ -80,7 +83,7 @@ class DDPM(pl.LightningModule):
             in_node_nf=in_node_nf,
             n_dims=n_dims,
             context_node_nf=context_node_nf,
-            device=self.torch_device,
             hidden_nf=hidden_nf,
             activation=activation,
             n_layers=n_layers,
@@ -94,6 +97,7 @@ class DDPM(pl.LightningModule):
             model=model,
             normalization=normalization,
             centering=inpainting,
         )
         edm_class = InpaintingEDM if inpainting else EDM
         self.edm = edm_class(
@@ -424,7 +428,7 @@ class DDPM(pl.LightningModule):
             context = fragment_mask
         # Add information about pocket to the context
-        if isinstance(self.val_dataset, MOADDataset):
             fragment_pocket_mask = fragment_mask
             fragment_only_mask = template_data['fragment_only_mask']
             pocket_only_mask = fragment_pocket_mask - fragment_only_mask

         normalize_factors, include_charges, model,
         data_path, train_data_prefix, val_data_prefix, batch_size, lr, torch_device, test_epochs, n_stability_samples,
         normalization=None, log_iterations=None, samples_dir=None, data_augmentation=False,
+        center_of_mass='fragments', inpainting=False, anchors_context=True, graph_type=None,
     ):
         super(DDPM, self).__init__()
         self.val_data_prefix = val_data_prefix
         self.batch_size = batch_size
         self.lr = lr
+        self.torch_device = torch_device
         self.include_charges = include_charges
         self.test_epochs = test_epochs
         self.n_stability_samples = n_stability_samples
         self.is_geom = ('geom' in self.train_data_prefix) or ('MOAD' in self.train_data_prefix)
+        if graph_type is None:
+            graph_type = '4A' if '.' in train_data_prefix else 'FC'
         if type(activation) is str:
             activation = get_activation(activation)
             in_node_nf=in_node_nf,
             n_dims=n_dims,
             context_node_nf=context_node_nf,
+            device=torch_device,
             hidden_nf=hidden_nf,
             activation=activation,
             n_layers=n_layers,
             model=model,
             normalization=normalization,
             centering=inpainting,
+            graph_type=graph_type,
         )
         edm_class = InpaintingEDM if inpainting else EDM
         self.edm = edm_class(
             context = fragment_mask
         # Add information about pocket to the context
+        if '.' in self.train_data_prefix:
             fragment_pocket_mask = fragment_mask
             fragment_only_mask = template_data['fragment_only_mask']
             pocket_only_mask = fragment_pocket_mask - fragment_only_mask

src/linker_size.py CHANGED Viewed

@@ -21,10 +21,6 @@ class DistributionNodes:
         prob = prob/np.sum(prob)
         self.prob = torch.from_numpy(prob).float()
-        entropy = torch.sum(self.prob * torch.log(self.prob + 1e-30))
-        print("Entropy of n_nodes: H[N]", entropy.item())
         self.m = Categorical(torch.tensor(prob))
     def sample(self, n_samples=1):

         prob = prob/np.sum(prob)
         self.prob = torch.from_numpy(prob).float()
         self.m = Categorical(torch.tensor(prob))
     def sample(self, n_samples=1):

src/linker_size_lightning.py CHANGED Viewed

@@ -40,6 +40,7 @@ class SizeClassifier(pl.LightningModule):
         self.lr = lr
         self.torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.loss_weights = None if loss_weights is None else torch.tensor(loss_weights, device=self.torch_device)
         self.gnn = SizeGNN(
             in_node_nf=in_node_nf,
             hidden_nf=hidden_nf,
@@ -79,7 +80,7 @@ class SizeClassifier(pl.LightningModule):
     def test_dataloader(self):
         return get_dataloader(self.test_dataset, self.batch_size, collate_fn=collate_with_fragment_edges)
-    def forward(self, data, return_loss=True, with_pocket=False):
         h = data['one_hot']
         x = data['positions']
         fragment_mask = data['fragment_only_mask'] if with_pocket else data['fragment_mask']
@@ -91,6 +92,10 @@ class SizeClassifier(pl.LightningModule):
         x = x * fragment_mask
         h = h * fragment_mask
         # Reshaping
         bs, n_nodes = x.shape[0], x.shape[1]
         fragment_mask = fragment_mask.view(bs * n_nodes, 1)

         self.lr = lr
         self.torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.loss_weights = None if loss_weights is None else torch.tensor(loss_weights, device=self.torch_device)
+        self.in_node_nf = in_node_nf
         self.gnn = SizeGNN(
             in_node_nf=in_node_nf,
             hidden_nf=hidden_nf,
     def test_dataloader(self):
         return get_dataloader(self.test_dataset, self.batch_size, collate_fn=collate_with_fragment_edges)
+    def forward(self, data, return_loss=True, with_pocket=False, adjust_shape=False):
         h = data['one_hot']
         x = data['positions']
         fragment_mask = data['fragment_only_mask'] if with_pocket else data['fragment_mask']
         x = x * fragment_mask
         h = h * fragment_mask
+        if h.shape[-1] != self.in_node_nf and adjust_shape:
+            assert torch.allclose(h[..., -1], torch.zeros_like(h[..., -1]))
+            h = h[..., :-1]
         # Reshaping
         bs, n_nodes = x.shape[0], x.shape[1]
         fragment_mask = fragment_mask.view(bs * n_nodes, 1)

src/utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import sys
 from datetime import datetime
 import torch
@@ -21,9 +22,11 @@ class Logger(object):
         # you might want to specify some extra behavior here.
         pass
 def log(*args):
     print(f'[{datetime.now()}]', *args)
 class EMA:
     def __init__(self, beta):
         super().__init__()
@@ -257,6 +260,17 @@ def disable_rdkit_logging():
     rkrb.DisableLog('rdApp.error')
 class FoundNaNException(Exception):
     def __init__(self, x, h):
         x_nan_idx = self.find_nan_idx(x)

 import sys
+import random
 from datetime import datetime
 import torch
         # you might want to specify some extra behavior here.
         pass
 def log(*args):
     print(f'[{datetime.now()}]', *args)
 class EMA:
     def __init__(self, beta):
         super().__init__()
     rkrb.DisableLog('rdApp.error')
+def set_deterministic(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
 class FoundNaNException(Exception):
     def __init__(self, x, h):
         x_nan_idx = self.find_nan_idx(x)