Initial commit

Browse files

Files changed (3) hide show

README.md +111 -0
config.json +51 -0
diffusion_pytorch_model.safetensors +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,111 @@

+# Abstract
+**DDPM** model trained on [huggan/anime-faces](https://huggingface.co/datasets/huggan/anime-faces) dataset.
+## Training Arguments
+|          Argument           | Value  |
+| :-------------------------: | :----: |
+|         image_size          |   64   |
+|      train_batch_size       |   16   |
+|       eval_batch_size       |   16   |
+|         num_epochs          |   50   |
+| gradient_accumulation_steps |   1    |
+|        learning_rate        |  1e-4  |
+|       lr_warmup_steps       |  500   |
+|       mixed_precision       | "fp16" |
+For training code, please refer to [this link](https://github.com/LittleNyima/code-snippets/blob/master/ddpm-tutorial/ddpm_training.py).
+# Inference
+This project aims to implement DDPM from scratch, so `DDPMScheduler` is not used. Instead, I use only `UNet2DModel` and implement a simple scheduler myself. The inference code is:
+```python
+import torch
+from tqdm import tqdm
+from diffusers import UNet2DModel
+class DDPM:
+    def __init__(
+        self,
+        num_train_timesteps:int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+    ):
+        self.num_train_timesteps = num_train_timesteps
+        self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        self.timesteps = torch.arange(num_train_timesteps - 1, -1, -1)
+    def add_noise(
+        self,
+        original_samples: torch.Tensor,
+        noise: torch.Tensor,
+        timesteps: torch.Tensor,
+    ):
+        alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device ,dtype=original_samples.dtype)
+        noise = noise.to(original_samples.device)
+        timesteps = timesteps.to(original_samples.device)
+        # \sqrt{\bar\alpha_t}
+        sqrt_alpha_prod = alphas_cumprod[timesteps].flatten() ** 0.5
+        while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+        # \sqrt{1 - \bar\alpha_t}
+        sqrt_one_minus_alpha_prod = (1.0 - alphas_cumprod[timesteps]).flatten() ** 0.5
+        while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+        return sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+    @torch.no_grad()
+    def sample(
+        self,
+        unet: UNet2DModel,
+        batch_size: int,
+        in_channels: int,
+        sample_size: int,
+    ):
+        betas = self.betas.to(unet.device)
+        alphas = self.alphas.to(unet.device)
+        alphas_cumprod = self.alphas_cumprod.to(unet.device)
+        timesteps = self.timesteps.to(unet.device)
+        images = torch.randn((batch_size, in_channels, sample_size, sample_size), device=unet.device)
+        for timestep in tqdm(timesteps, desc='Sampling'):
+            pred_noise: torch.Tensor = unet(images, timestep).sample
+            # mean of q(x_{t-1}|x_t)
+            alpha_t = alphas[timestep]
+            alpha_cumprod_t = alphas_cumprod[timestep]
+            sqrt_alpha_t = alpha_t ** 0.5
+            one_minus_alpha_t = 1.0 - alpha_t
+            sqrt_one_minus_alpha_cumprod_t = (1 - alpha_cumprod_t) ** 0.5
+            mean = (images - one_minus_alpha_t / sqrt_one_minus_alpha_cumprod_t * pred_noise) / sqrt_alpha_t
+            # variance of q(x_{t-1}|x_t)
+            if timestep > 1:
+                beta_t = betas[timestep]
+                one_minus_alpha_cumprod_t_minus_one = 1.0 - alphas_cumprod[timestep - 1]
+                one_divided_by_sigma_square = alpha_t / beta_t + 1.0 / one_minus_alpha_cumprod_t_minus_one
+                variance = (1.0 / one_divided_by_sigma_square) ** 0.5
+            else:
+                variance = torch.zeros_like(timestep)
+            epsilon = torch.randn_like(images)
+            images = mean + variance * epsilon
+        images = (images / 2.0 + 0.5).clamp(0, 1).cpu().permute(0, 2, 3, 1).numpy()
+        return images
+model = UNet2DModel.from_pretrained('ddpm-animefaces-64').cuda()
+ddpm = DDPM()
+images = ddpm.sample(model, 32, 3, 64)
+from diffusers.utils import make_image_grid, numpy_to_pil
+image_grid = make_image_grid(numpy_to_pil(images), rows=4, cols=8)
+image_grid.save('ddpm-sample-results.png')
+```
+This can also be found in [this link](https://github.com/LittleNyima/code-snippets/blob/master/ddpm-tutorial/ddpm_sampling.py).

config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "_class_name": "UNet2DModel",
+  "_diffusers_version": "0.28.1",
+  "act_fn": "silu",
+  "add_attention": true,
+  "attention_head_dim": 8,
+  "attn_norm_num_groups": null,
+  "block_out_channels": [
+    128,
+    128,
+    256,
+    256,
+    512,
+    512
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "DownBlock2D",
+    "DownBlock2D",
+    "DownBlock2D",
+    "AttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "downsample_type": "conv",
+  "dropout": 0.0,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 3,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "num_train_timesteps": null,
+  "out_channels": 3,
+  "resnet_time_scale_shift": "default",
+  "sample_size": 64,
+  "time_embedding_type": "positional",
+  "up_block_types": [
+    "UpBlock2D",
+    "AttnUpBlock2D",
+    "UpBlock2D",
+    "UpBlock2D",
+    "UpBlock2D",
+    "UpBlock2D"
+  ],
+  "upsample_type": "conv"
+}

diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e75355c71e36f8e36d802f61b3c7f04cad455e591278e5e8a9a1ad89c4e2f990
+size 454741108