Spaces:
Running
Running
""" | |
foldingdiff implements a diffusion model for generating protein structures. Inspired by the biological folding process, | |
we perform diffusion on the angles between amino acid residues rather than the absolute 3D coordinates of each residue. | |
By effectively treating each residue as its own reference frame, we shift the equivariance constraints into the | |
representation space itself; this allows us to use a vanilla transformer model as our model. Here, we provide a simple | |
online interface for generating single backbones with a given length, starting from a given random seed. | |
Tips for generating proteins: | |
* The maximum sequence sequence length this model has been trained on is 128 residues. The shorter a sequence is, the more likely it will be "designable" (see our preprint). | |
* FoldingDiff does *not* generate the amino acid sequence for its structures, it simply fills the structure with Glycine residues; use a tool like ESM-IF1 to generate amino acids corresponding to generated structure. | |
See our preprint at https://arxiv.org/abs/2209.15611 and our full codebase at https://github.com/microsoft/foldingdiff | |
""" | |
import os | |
import gradio as gr | |
import torch | |
from foldingdiff import sampling | |
from foldingdiff import angles_and_coords as ac | |
def read_mol(molpath: str) -> str: | |
with open(molpath, "r") as fp: | |
lines = fp.readlines() | |
mol = "" | |
for l in lines: | |
mol += l | |
return mol | |
def molecule(input_pdb: str) -> str: | |
"""Get the string to view the given pdb in 3dmol.js""" | |
mol = read_mol(input_pdb) | |
x = ( | |
"""<!DOCTYPE html> | |
<html> | |
<head> | |
<meta http-equiv="content-type" content="text/html; charset=UTF-8" /> | |
<style> | |
body{ | |
font-family:sans-serif | |
} | |
.mol-container { | |
width: 100%; | |
height: 600px; | |
position: relative; | |
} | |
.mol-container select{ | |
background-image:None; | |
} | |
</style> | |
<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script> | |
</head> | |
<body> | |
<div id="container" class="mol-container"></div> | |
<script> | |
let pdb = `""" | |
+ mol | |
+ """` | |
$(document).ready(function () { | |
let element = $("#container"); | |
let config = { backgroundColor: "black" }; | |
let viewer = $3Dmol.createViewer(element, config); | |
viewer.addModel(pdb, "pdb"); | |
viewer.getModel(0).setStyle({}, { stick: { colorscheme:"whiteCarbon" } }); | |
viewer.zoomTo(); | |
viewer.render(); | |
viewer.zoom(0.8, 2000); | |
}) | |
</script> | |
</body></html>""" | |
) | |
return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera; | |
display-capture; encrypted-media;" sandbox="allow-modals allow-forms | |
allow-scripts allow-same-origin allow-popups | |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" | |
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>""" | |
def sample_at_length(l:int, seed:int): | |
""" | |
Sample a single structure at the given length | |
""" | |
torch.manual_seed(seed) | |
l = int(l) | |
# Sample the angles | |
s = sampling.sample_simple("wukevin/foldingdiff_cath", n=1, sweep_lengths=(l, l+1))[0] | |
# Create a PDB file after building out the structure in 3D coordinates | |
outdir = os.path.join(os.getcwd(), "output") | |
os.makedirs(outdir, exist_ok=True) | |
pdb_file = ac.create_new_chain_nerf(os.path.join(outdir, "generated.pdb"), s) | |
return molecule(pdb_file), pdb_file | |
interface = gr.Interface( | |
fn=sample_at_length, | |
title="foldingdiff - protein backbone structure generation with diffusion models", | |
description=__doc__, | |
inputs=[ | |
gr.Number(value=85, label="Protein backbone length to generate", show_label=True, precision=0), | |
gr.Number(value=123, label="Random seed", show_label=True, precision=0), | |
], | |
outputs=[ | |
gr.HTML(), | |
gr.File(label="Generated structure in PDB format (cartesian coordinates)"), | |
# gr.Dataframe(label="Generated angles defining structure", max_rows=8), | |
], | |
) | |
interface.launch() |