Spaces:

dbaranchuk
/

Switti

Running on Zero

App Files Files Community

realantonvoronov commited on 10 days ago

Commit

1e17711

•

1 Parent(s): e5b0112

update sampling paramters in pipeline and arguments in app

Browse files

Files changed (2) hide show

app.py +22 -2
models/pipeline.py +13 -3

app.py CHANGED Viewed

@@ -27,11 +27,16 @@ def infer(
     more_smooth=True,
     smooth_start_si=2,
     turn_off_cfg_start_si=10,
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     image = pipe(
         prompt=prompt,
         null_prompt=negative_prompt,
@@ -41,6 +46,7 @@ def infer(
         more_smooth=more_smooth,
         smooth_start_si=smooth_start_si,
         turn_off_cfg_start_si=turn_off_cfg_start_si,
         seed=seed,
     )[0]
@@ -103,7 +109,7 @@ with gr.Blocks(css=css) as demo:
             minimum=0.0,
             maximum=10.,
             step=0.5,
-            value=4.,
         )
         with gr.Accordion("Advanced Settings", open=False):
@@ -140,12 +146,24 @@ with gr.Blocks(css=css) as demo:
                     value=2,
                 )
                 turn_off_cfg_start_si = gr.Slider(
-                    label="Disable CFG from scale",
                     minimum=0,
                     maximum=10,
                     step=1,
                     value=8,
                 )
         gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True)# cache_mode="lazy")
@@ -163,6 +181,8 @@ with gr.Blocks(css=css) as demo:
             more_smooth,
             smooth_start_si,
             turn_off_cfg_start_si,
         ],
         outputs=[result, seed],
     )

     more_smooth=True,
     smooth_start_si=2,
     turn_off_cfg_start_si=10,
+    more_diverse=True,
+    last_scale_temp=None,
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    turn_on_cfg_start_si = 2 if more_diverse else 0
     image = pipe(
         prompt=prompt,
         null_prompt=negative_prompt,
         more_smooth=more_smooth,
         smooth_start_si=smooth_start_si,
         turn_off_cfg_start_si=turn_off_cfg_start_si,
+        turn_on_cfg_start_si=turn_on_cfg_start_si,
         seed=seed,
     )[0]
             minimum=0.0,
             maximum=10.,
             step=0.5,
+            value=6.,
         )
         with gr.Accordion("Advanced Settings", open=False):
                     value=2,
                 )
                 turn_off_cfg_start_si = gr.Slider(
+                    label="Disable CFG starting scale",
                     minimum=0,
                     maximum=10,
                     step=1,
                     value=8,
                 )
+            with gr.Row():
+                more_diverse = gr.Checkbox(label="More diverse", value=True)
+                apply_late_temperature = gr.Checkbox(label="Temperature after disabling CFG", value=False)
+                last_scale_temp = gr.Slider(
+                    label="Late temperature value",
+                    minimum=0.1,
+                    maximum=10,
+                    step=0.1,
+                    value=1,
+                )
+                if not apply_late_temperature:
+                    last_scale_temp = None
         gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True)# cache_mode="lazy")
             more_smooth,
             smooth_start_si,
             turn_off_cfg_start_si,
+            more_diverse,
+            last_scale_temp,
         ],
         outputs=[result, seed],
     )

models/pipeline.py CHANGED Viewed

@@ -91,7 +91,9 @@ class SwittiPipeline:
         return_pil: bool = True,
         smooth_start_si: int = 0,
         turn_off_cfg_start_si: int = 10,
         image_size: tuple[int, int] = (512, 512),
     ) -> torch.Tensor | list[PILImage]:
         """
         only used for inference, on autoregressive mode
@@ -155,7 +157,8 @@ class SwittiPipeline:
             else:
                 freqs_cis = switti.freqs_cis
-            if si >= turn_off_cfg_start_si:
                 x_BLC = x_BLC[:B]
                 context = context[:B]
                 context_attn_bias = context_attn_bias[:B]
@@ -170,6 +173,8 @@ class SwittiPipeline:
                     if b.cross_attn.caching  and b.cross_attn.cached_k is not None:
                         b.cross_attn.cached_k = b.cross_attn.cached_k[:B]
                         b.cross_attn.cached_v = b.cross_attn.cached_v[:B]
             for block in switti.blocks:
                 x_BLC = block(
@@ -186,11 +191,16 @@ class SwittiPipeline:
             logits_BlV = switti.get_logits(x_BLC, cond_BD)
             # Guidance
-            if si < turn_off_cfg_start_si:
                 t = cfg
                 logits_BlV = (1 + t) * logits_BlV[:B] - t * logits_BlV[B:]
-            if more_smooth and si >= smooth_start_si:
                 # not used when evaluating FID/IS/Precision/Recall
                 gum_t = max(0.27 * (1 - ratio * 0.95), 0.005)  # refer to mask-git
                 idx_Bl = gumbel_softmax_with_rng(

         return_pil: bool = True,
         smooth_start_si: int = 0,
         turn_off_cfg_start_si: int = 10,
+        turn_on_cfg_start_si: int = 0,
         image_size: tuple[int, int] = (512, 512),
+        last_scale_temp: None | float = None,
     ) -> torch.Tensor | list[PILImage]:
         """
         only used for inference, on autoregressive mode
             else:
                 freqs_cis = switti.freqs_cis
+            if si < turn_on_cfg_start_si or si >= turn_off_cfg_start_si:
+                apply_smooth = False
                 x_BLC = x_BLC[:B]
                 context = context[:B]
                 context_attn_bias = context_attn_bias[:B]
                     if b.cross_attn.caching  and b.cross_attn.cached_k is not None:
                         b.cross_attn.cached_k = b.cross_attn.cached_k[:B]
                         b.cross_attn.cached_v = b.cross_attn.cached_v[:B]
+            else:
+                apply_smooth = more_smooth
             for block in switti.blocks:
                 x_BLC = block(
             logits_BlV = switti.get_logits(x_BLC, cond_BD)
             # Guidance
+            if si < turn_on_cfg_start_si:
+                t = 0 # no guidance
+            elif si >= turn_on_cfg_start_si and si < turn_off_cfg_start_si:
+                # default const cfg
                 t = cfg
                 logits_BlV = (1 + t) * logits_BlV[:B] - t * logits_BlV[B:]
+            elif last_scale_temp is not None:
+                logits_BlV = logits_BlV / last_scale_temp
+            if apply_smooth and si >= smooth_start_si:
                 # not used when evaluating FID/IS/Precision/Recall
                 gum_t = max(0.27 * (1 - ratio * 0.95), 0.005)  # refer to mask-git
                 idx_Bl = gumbel_softmax_with_rng(