Spaces:
Running
Running
File size: 4,232 Bytes
2388920 2348010 2388920 47e3cde 035577c 2293b93 035577c 47e3cde 035577c 1eff7fb 2293b93 47e3cde 2293b93 035577c 2508b3c 47e3cde 2508b3c 47e3cde 2508b3c 035577c 23ee17c 2388920 2293b93 4d512f9 2293b93 47e3cde 1eff7fb 47e3cde 2508b3c 47e3cde 035577c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
"""
foldingdiff implements a diffusion model for generating protein structures. Inspired by the biological folding process,
we perform diffusion on the angles between amino acid residues rather than the absolute 3D coordinates of each residue.
By effectively treating each residue as its own reference frame, we shift the equivariance constraints into the
representation space itself; this allows us to use a vanilla transformer model as our model. Here, we provide a simple
online interface for generating single backbones with a given length, starting from a given random seed.
Tips for generating proteins:
* The maximum sequence sequence length this model has been trained on is 128 residues. The shorter a sequence is, the more likely it will be "designable" (see our preprint).
* FoldingDiff does *not* generate the amino acid sequence for its structures, it simply fills the structure with Glycine residues; use a tool like ESM-IF1 to generate amino acids corresponding to generated structure.
See our preprint at https://arxiv.org/abs/2209.15611 and our full codebase at https://github.com/microsoft/foldingdiff
"""
import os
import gradio as gr
import torch
from foldingdiff import sampling
from foldingdiff import angles_and_coords as ac
def read_mol(molpath: str) -> str:
with open(molpath, "r") as fp:
lines = fp.readlines()
mol = ""
for l in lines:
mol += l
return mol
def molecule(input_pdb: str) -> str:
"""Get the string to view the given pdb in 3dmol.js"""
mol = read_mol(input_pdb)
x = (
"""<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
<style>
body{
font-family:sans-serif
}
.mol-container {
width: 100%;
height: 600px;
position: relative;
}
.mol-container select{
background-image:None;
}
</style>
<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
</head>
<body>
<div id="container" class="mol-container"></div>
<script>
let pdb = `"""
+ mol
+ """`
$(document).ready(function () {
let element = $("#container");
let config = { backgroundColor: "black" };
let viewer = $3Dmol.createViewer(element, config);
viewer.addModel(pdb, "pdb");
viewer.getModel(0).setStyle({}, { stick: { colorscheme:"whiteCarbon" } });
viewer.zoomTo();
viewer.render();
viewer.zoom(0.8, 2000);
})
</script>
</body></html>"""
)
return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
def sample_at_length(l:int, seed:int):
"""
Sample a single structure at the given length
"""
torch.manual_seed(seed)
l = int(l)
# Sample the angles
s = sampling.sample_simple("wukevin/foldingdiff_cath", n=1, sweep_lengths=(l, l+1))[0]
# Create a PDB file after building out the structure in 3D coordinates
outdir = os.path.join(os.getcwd(), "output")
os.makedirs(outdir, exist_ok=True)
pdb_file = ac.create_new_chain_nerf(os.path.join(outdir, "generated.pdb"), s)
return molecule(pdb_file), pdb_file
interface = gr.Interface(
fn=sample_at_length,
title="foldingdiff - protein backbone structure generation with diffusion models",
description=__doc__,
inputs=[
gr.Number(value=85, label="Protein backbone length to generate", show_label=True, precision=0),
gr.Number(value=123, label="Random seed", show_label=True, precision=0),
],
outputs=[
gr.HTML(),
gr.File(label="Generated structure in PDB format (cartesian coordinates)"),
# gr.Dataframe(label="Generated angles defining structure", max_rows=8),
],
)
interface.launch() |