Spaces:

flax-community
/

dalle-mini

Running

App Files Files Community

boris commited on Jan 5, 2022

Commit

772415c

•

1 Parent(s): 5c84978

feat: allow abstract_init

Browse files

Files changed (2) hide show

dalle_mini/model/modeling.py +42 -1
tools/train/train.py +4 -1

dalle_mini/model/modeling.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import math
 from functools import partial
-from typing import Optional
 import flax.linen as nn
 import jax
@@ -298,10 +298,51 @@ class FlaxBartPreTrainedModel(FlaxBartPreTrainedModel):
     Edits:
     - added num_params property
     - config_class replaced to DalleBartConfig
     """
     config_class = DalleBartConfig
     @property
     def num_params(self):
         num_params = jax.tree_map(

 import math
 from functools import partial
+from typing import Optional, Tuple
 import flax.linen as nn
 import jax
     Edits:
     - added num_params property
     - config_class replaced to DalleBartConfig
+    - __init__ accepts abstract_init which does uses parameter shape to initialize the model
     """
     config_class = DalleBartConfig
+    def __init__(
+        self,
+        config: DalleBartConfig,
+        input_shape: Tuple[int] = (1, 1),
+        seed: int = 0,
+        dtype: jnp.dtype = jnp.float32,
+        abstract_init: bool = False,
+        **kwargs,
+    ):
+        module = self.module_class(config=config, dtype=dtype, **kwargs)
+        # adapted from HuggingFace FlaxPreTrainedModel
+        if config is None:
+            raise ValueError("config cannot be None")
+        if module is None:
+            raise ValueError("module cannot be None")
+        # Those are private to be exposed as typed property on derived classes.
+        self._config = config
+        self._module = module
+        # Those are public as their type is generic to every derived classes.
+        self.key = PRNGKey(seed)
+        self.dtype = dtype
+        # randomly initialized parameters
+        if abstract_init:
+            # init the model weights only abstractly, eval_shape will return a pytree
+            # with the structure as weights but without any actual values, this will just contain
+            # the shape information. Weights need to be loaded later.
+            init_fn = partial(self.init_weights, input_shape=input_shape)
+            random_params = jax.eval_shape(init_fn, self.key)
+        else:
+            random_params = self.init_weights(self.key, input_shape)
+        # save required_params as set
+        self._required_params = set(flatten_dict(unfreeze(random_params)).keys())
+        self.params = random_params
     @property
     def num_params(self):
         num_params = jax.tree_map(

tools/train/train.py CHANGED Viewed

@@ -434,7 +434,9 @@ def main():
         artifact_dir = artifact.download()
         # load model
-        model = DalleBart.from_pretrained(artifact_dir)
         # avoid OOM on TPU: see https://github.com/google/flax/issues/1658
         print(model.params)
@@ -458,6 +460,7 @@ def main():
                 config=config,
                 seed=training_args.seed_model,
                 dtype=getattr(jnp, model_args.dtype),
             )
             # avoid OOM on TPU: see https://github.com/google/flax/issues/1658
             print(model.params)

         artifact_dir = artifact.download()
         # load model
+        model = DalleBart.from_pretrained(
+            artifact_dir, dtype=getattr(jnp, model_args.dtype), abstract_init=True
+        )
         # avoid OOM on TPU: see https://github.com/google/flax/issues/1658
         print(model.params)
                 config=config,
                 seed=training_args.seed_model,
                 dtype=getattr(jnp, model_args.dtype),
+                abstract_init=True,
             )
             # avoid OOM on TPU: see https://github.com/google/flax/issues/1658
             print(model.params)