Spaces:

openfree
/

ginigen-sora

Paused

Sapir Weissbuch commited on 26 days ago

Commit

05cb3e4

•

2 Parent(s): ba73063 5940103

Merge pull request #30 from LightricksResearch/fix-no-flash-attention

model: fix flash attention enabling - do not check device type at this point

Files changed (2) hide show

xora/models/transformers/attention.py CHANGED Viewed

@@ -179,15 +179,14 @@ class BasicTransformerBlock(nn.Module):
         self._chunk_size = None
         self._chunk_dim = 0
-    def set_use_tpu_flash_attention(self, device):
         r"""
         Function sets the flag in this object and propagates down the children. The flag will enforce the usage of TPU
         attention kernel.
         """
-        if device == "xla":
-            self.use_tpu_flash_attention = True
-            self.attn1.set_use_tpu_flash_attention(device)
-            self.attn2.set_use_tpu_flash_attention(device)
     def set_chunk_feed_forward(self, chunk_size: Optional[int], dim: int = 0):
         # Sets chunk feed-forward
@@ -508,12 +507,11 @@ class Attention(nn.Module):
             processor = AttnProcessor2_0()
         self.set_processor(processor)
-    def set_use_tpu_flash_attention(self, device_type):
         r"""
         Function sets the flag in this object. The flag will enforce the usage of TPU attention kernel.
         """
-        if device_type == "xla":
-            self.use_tpu_flash_attention = True
     def set_processor(self, processor: "AttnProcessor") -> None:
         r"""

         self._chunk_size = None
         self._chunk_dim = 0
+    def set_use_tpu_flash_attention(self):
         r"""
         Function sets the flag in this object and propagates down the children. The flag will enforce the usage of TPU
         attention kernel.
         """
+        self.use_tpu_flash_attention = True
+        self.attn1.set_use_tpu_flash_attention()
+        self.attn2.set_use_tpu_flash_attention()
     def set_chunk_feed_forward(self, chunk_size: Optional[int], dim: int = 0):
         # Sets chunk feed-forward
             processor = AttnProcessor2_0()
         self.set_processor(processor)
+    def set_use_tpu_flash_attention(self):
         r"""
         Function sets the flag in this object. The flag will enforce the usage of TPU attention kernel.
         """
+        self.use_tpu_flash_attention = True
     def set_processor(self, processor: "AttnProcessor") -> None:
         r"""

xora/models/transformers/transformer3d.py CHANGED Viewed

@@ -160,13 +160,11 @@ class Transformer3DModel(ModelMixin, ConfigMixin):
         Function sets the flag in this object and propagates down the children. The flag will enforce the usage of TPU
         attention kernel.
         """
-        logger.info(" ENABLE TPU FLASH ATTENTION -> TRUE")
-        # if using TPU -> configure components to use TPU flash attention
-        if self.device.type == "xla":
-            self.use_tpu_flash_attention = True
-            # push config down to the attention modules
-            for block in self.transformer_blocks:
-                block.set_use_tpu_flash_attention(self.device.type)
     def initialize(self, embedding_std: float, mode: Literal["xora", "legacy"]):
         def _basic_init(module):

         Function sets the flag in this object and propagates down the children. The flag will enforce the usage of TPU
         attention kernel.
         """
+        logger.info("ENABLE TPU FLASH ATTENTION -> TRUE")
+        self.use_tpu_flash_attention = True
+        # push config down to the attention modules
+        for block in self.transformer_blocks:
+            block.set_use_tpu_flash_attention()
     def initialize(self, embedding_std: float, mode: Literal["xora", "legacy"]):
         def _basic_init(module):