""" foldingdiff implements a diffusion model for generating protein structures. Inspired by the biological folding process, we perform diffusion on the angles between amino acid residues rather than the absolute 3D coordinates of each residue. By effectively treating each residue as its own reference frame, we shift the equivariance constraints into the representation space itself; this allows us to use a vanilla transformer model as our model. Here, we provide a simple online interface for generating single backbones with a given length, starting from a given random seed. Tips for generating proteins: * The maximum sequence sequence length this model has been trained on is 128 residues. The shorter a sequence is, the more likely it will be "designable" (see our preprint). * FoldingDiff does *not* generate the amino acid sequence for its structures, it simply fills the structure with Glycine residues; use a tool like ESM-IF1 to generate amino acids corresponding to generated structure. See our preprint at https://arxiv.org/abs/2209.15611 and our full codebase at https://github.com/microsoft/foldingdiff """ import os import gradio as gr import torch from foldingdiff import sampling from foldingdiff import angles_and_coords as ac def read_mol(molpath: str) -> str: with open(molpath, "r") as fp: lines = fp.readlines() mol = "" for l in lines: mol += l return mol def molecule(input_pdb: str) -> str: """Get the string to view the given pdb in 3dmol.js""" mol = read_mol(input_pdb) x = ( """
""" ) return f"""""" def sample_at_length(l:int, seed:int): """ Sample a single structure at the given length """ torch.manual_seed(seed) l = int(l) # Sample the angles s = sampling.sample_simple("wukevin/foldingdiff_cath", n=1, sweep_lengths=(l, l+1))[0] # Create a PDB file after building out the structure in 3D coordinates outdir = os.path.join(os.getcwd(), "output") os.makedirs(outdir, exist_ok=True) pdb_file = ac.create_new_chain_nerf(os.path.join(outdir, "generated.pdb"), s) return molecule(pdb_file), pdb_file interface = gr.Interface( fn=sample_at_length, title="foldingdiff - protein backbone structure generation with diffusion models", description=__doc__, inputs=[ gr.Number(value=80, label="Protein backbone length to generate", show_label=True, precision=0), gr.Number(value=42, label="Random seed", show_label=True, precision=0), ], outputs=[ gr.HTML(), gr.File(label="Generated structure in PDB format (cartesian coordinates)"), # gr.Dataframe(label="Generated angles defining structure", max_rows=8), ], ) interface.launch()