Spaces:
Running
Running
feat(data): super conditioning (#141)
Browse files* feat(data): online filtering
* feat(generate): super conditioning
* feat: add processor
- README.md +1 -1
- src/dalle_mini/__init__.py +3 -1
- src/dalle_mini/data.py +69 -20
- src/dalle_mini/model/__init__.py +1 -0
- src/dalle_mini/model/modeling.py +337 -1
- src/dalle_mini/model/processor.py +58 -0
- src/dalle_mini/{text.py → model/text.py} +0 -0
- tools/inference/inference_pipeline.ipynb +25 -44
- tools/train/train.py +21 -1
README.md
CHANGED
@@ -35,7 +35,6 @@ To generate sample predictions and understand the inference pipeline step by ste
|
|
35 |
Join the community on the [DALLE-Pytorch Discord](https://discord.gg/xBPBXfcFHd).
|
36 |
Any contribution is welcome, from reporting issues to proposing fixes/improvements or testing the model with cool prompts!
|
37 |
|
38 |
-
|
39 |
## Development
|
40 |
|
41 |
### Dependencies Installation
|
@@ -95,6 +94,7 @@ Many thanks to the people who helped make it better:
|
|
95 |
|
96 |
- the [DALLE-Pytorch](https://discord.gg/xBPBXfcFHd) and [EleutherAI](https://www.eleuther.ai/) communities for testing and exchanging cool ideas
|
97 |
- [Rohan Anil](https://github.com/rohan-anil) for adding Distributed Shampoo optimizer
|
|
|
98 |
|
99 |
## Citing DALL·E mini
|
100 |
|
|
|
35 |
Join the community on the [DALLE-Pytorch Discord](https://discord.gg/xBPBXfcFHd).
|
36 |
Any contribution is welcome, from reporting issues to proposing fixes/improvements or testing the model with cool prompts!
|
37 |
|
|
|
38 |
## Development
|
39 |
|
40 |
### Dependencies Installation
|
|
|
94 |
|
95 |
- the [DALLE-Pytorch](https://discord.gg/xBPBXfcFHd) and [EleutherAI](https://www.eleuther.ai/) communities for testing and exchanging cool ideas
|
96 |
- [Rohan Anil](https://github.com/rohan-anil) for adding Distributed Shampoo optimizer
|
97 |
+
- [Katherine Crowson](https://github.com/crowsonkb) for [super conditioning](https://twitter.com/RiversHaveWings/status/1478093658716966912)
|
98 |
|
99 |
## Citing DALL·E mini
|
100 |
|
src/dalle_mini/__init__.py
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
__version__ = "0.0.
|
|
|
|
|
|
1 |
+
__version__ = "0.0.3"
|
2 |
+
|
3 |
+
from .model import DalleBart, DalleBartProcessor
|
src/dalle_mini/data.py
CHANGED
@@ -7,7 +7,7 @@ import numpy as np
|
|
7 |
from braceexpand import braceexpand
|
8 |
from datasets import Dataset, load_dataset
|
9 |
|
10 |
-
from .text import TextNormalizer
|
11 |
|
12 |
|
13 |
@dataclass
|
@@ -28,6 +28,11 @@ class Dataset:
|
|
28 |
seed_dataset: int = None
|
29 |
shard_by_host: bool = False
|
30 |
blank_caption_prob: float = 0.0
|
|
|
|
|
|
|
|
|
|
|
31 |
train_dataset: Dataset = field(init=False)
|
32 |
eval_dataset: Dataset = field(init=False)
|
33 |
rng_dataset: jnp.ndarray = field(init=False)
|
@@ -36,6 +41,7 @@ class Dataset:
|
|
36 |
def __post_init__(self):
|
37 |
self.multi_hosts = jax.process_count() > 1
|
38 |
# feed blank captions only in streaming mode for now
|
|
|
39 |
if self.blank_caption_prob:
|
40 |
assert (
|
41 |
self.streaming is True
|
@@ -107,23 +113,30 @@ class Dataset:
|
|
107 |
self.seed_dataset = np.random.get_state()[1][0]
|
108 |
self.rng_dataset = jax.random.PRNGKey(self.seed_dataset)
|
109 |
|
110 |
-
#
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
)
|
128 |
|
129 |
# normalize text
|
@@ -151,6 +164,25 @@ class Dataset:
|
|
151 |
),
|
152 |
)
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
# preprocess
|
155 |
partial_preprocess_function = partial(
|
156 |
preprocess_function,
|
@@ -230,8 +262,8 @@ class Dataset:
|
|
230 |
dataset.set_epoch(epoch)
|
231 |
epoch += 1
|
232 |
for item in dataset:
|
233 |
-
for k
|
234 |
-
batch[k].append(
|
235 |
if len(batch[keys[0]]) == batch_size:
|
236 |
batch = {k: jnp.array(v) for k, v in batch.items()}
|
237 |
yield batch
|
@@ -292,6 +324,23 @@ def normalize_function(example, text_column, text_normalizer):
|
|
292 |
return example
|
293 |
|
294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
def preprocess_function(
|
296 |
examples,
|
297 |
tokenizer,
|
|
|
7 |
from braceexpand import braceexpand
|
8 |
from datasets import Dataset, load_dataset
|
9 |
|
10 |
+
from .model.text import TextNormalizer
|
11 |
|
12 |
|
13 |
@dataclass
|
|
|
28 |
seed_dataset: int = None
|
29 |
shard_by_host: bool = False
|
30 |
blank_caption_prob: float = 0.0
|
31 |
+
clip_score_column: str = "clip_score"
|
32 |
+
min_clip_score: float = None
|
33 |
+
max_clip_score: float = None
|
34 |
+
filter_column: str = None
|
35 |
+
filter_value: str = None
|
36 |
train_dataset: Dataset = field(init=False)
|
37 |
eval_dataset: Dataset = field(init=False)
|
38 |
rng_dataset: jnp.ndarray = field(init=False)
|
|
|
41 |
def __post_init__(self):
|
42 |
self.multi_hosts = jax.process_count() > 1
|
43 |
# feed blank captions only in streaming mode for now
|
44 |
+
# otherwise dataset could be cached with same blanked captions
|
45 |
if self.blank_caption_prob:
|
46 |
assert (
|
47 |
self.streaming is True
|
|
|
113 |
self.seed_dataset = np.random.get_state()[1][0]
|
114 |
self.rng_dataset = jax.random.PRNGKey(self.seed_dataset)
|
115 |
|
116 |
+
# filter data
|
117 |
+
partial_filter_function = partial(
|
118 |
+
filter_function,
|
119 |
+
filter_column=self.filter_column,
|
120 |
+
filter_value=self.filter_value,
|
121 |
+
clip_score_column=self.clip_score_column,
|
122 |
+
min_clip_score=self.min_clip_score,
|
123 |
+
max_clip_score=self.max_clip_score,
|
124 |
+
)
|
125 |
+
for ds in ["train_dataset", "eval_dataset"]:
|
126 |
+
if hasattr(self, ds):
|
127 |
+
setattr(
|
128 |
+
self,
|
129 |
+
ds,
|
130 |
+
(
|
131 |
+
getattr(self, ds).filter(partial_filter_function)
|
132 |
+
if self.streaming
|
133 |
+
else getattr(self, ds).filter(
|
134 |
+
partial_filter_function,
|
135 |
+
num_proc=self.preprocessing_num_workers,
|
136 |
+
load_from_cache_file=not self.overwrite_cache,
|
137 |
+
desc="Filtering datasets",
|
138 |
+
)
|
139 |
+
),
|
140 |
)
|
141 |
|
142 |
# normalize text
|
|
|
164 |
),
|
165 |
)
|
166 |
|
167 |
+
# blank captions
|
168 |
+
if self.blank_caption_prob:
|
169 |
+
partial_blank_caption_function = partial(
|
170 |
+
blank_caption_function,
|
171 |
+
text_column=self.text_column,
|
172 |
+
blank_caption_prob=self.blank_caption_prob,
|
173 |
+
)
|
174 |
+
if hasattr(self, "train_dataset"):
|
175 |
+
self.train_dataset = (
|
176 |
+
self.train_dataset.map(partial_blank_caption_function)
|
177 |
+
if self.streaming
|
178 |
+
else self.train_dataset.map(
|
179 |
+
partial_blank_caption_function,
|
180 |
+
num_proc=self.preprocessing_num_workers,
|
181 |
+
load_from_cache_file=False,
|
182 |
+
desc="Blanking some captions",
|
183 |
+
)
|
184 |
+
)
|
185 |
+
|
186 |
# preprocess
|
187 |
partial_preprocess_function = partial(
|
188 |
preprocess_function,
|
|
|
262 |
dataset.set_epoch(epoch)
|
263 |
epoch += 1
|
264 |
for item in dataset:
|
265 |
+
for k in keys:
|
266 |
+
batch[k].append(item[k])
|
267 |
if len(batch[keys[0]]) == batch_size:
|
268 |
batch = {k: jnp.array(v) for k, v in batch.items()}
|
269 |
yield batch
|
|
|
324 |
return example
|
325 |
|
326 |
|
327 |
+
def filter_function(
|
328 |
+
example,
|
329 |
+
min_clip_score,
|
330 |
+
max_clip_score,
|
331 |
+
clip_score_column,
|
332 |
+
filter_column,
|
333 |
+
filter_value,
|
334 |
+
):
|
335 |
+
if min_clip_score is not None and example[clip_score_column] < min_clip_score:
|
336 |
+
return False
|
337 |
+
if max_clip_score is not None and example[clip_score_column] > max_clip_score:
|
338 |
+
return False
|
339 |
+
if filter_column is not None and example[filter_column] != filter_value:
|
340 |
+
return False
|
341 |
+
return True
|
342 |
+
|
343 |
+
|
344 |
def preprocess_function(
|
345 |
examples,
|
346 |
tokenizer,
|
src/dalle_mini/model/__init__.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from .configuration import DalleBartConfig
|
2 |
from .modeling import DalleBart
|
3 |
from .partitions import set_partitions
|
|
|
4 |
from .tokenizer import DalleBartTokenizer
|
|
|
1 |
from .configuration import DalleBartConfig
|
2 |
from .modeling import DalleBart
|
3 |
from .partitions import set_partitions
|
4 |
+
from .processor import DalleBartProcessor
|
5 |
from .tokenizer import DalleBartTokenizer
|
src/dalle_mini/model/modeling.py
CHANGED
@@ -18,8 +18,9 @@ import math
|
|
18 |
import os
|
19 |
from functools import partial
|
20 |
from pickle import UnpicklingError
|
21 |
-
from typing import Optional, Tuple, Union
|
22 |
|
|
|
23 |
import flax.linen as nn
|
24 |
import jax
|
25 |
import jax.numpy as jnp
|
@@ -39,6 +40,7 @@ from transformers.file_utils import (
|
|
39 |
is_offline_mode,
|
40 |
is_remote_url,
|
41 |
)
|
|
|
42 |
from transformers.modeling_flax_outputs import (
|
43 |
FlaxCausalLMOutputWithCrossAttentions,
|
44 |
FlaxSeq2SeqLMOutput,
|
@@ -691,6 +693,17 @@ class FlaxBartForConditionalGenerationModule(FlaxBartForConditionalGenerationMod
|
|
691 |
)
|
692 |
|
693 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
694 |
class DalleBart(
|
695 |
PretrainedFromWandbMixin, FlaxBartPreTrainedModel, FlaxBartForConditionalGeneration
|
696 |
):
|
@@ -702,6 +715,7 @@ class DalleBart(
|
|
702 |
- no bias in decode method
|
703 |
- custom prepare_inputs_for_generation using "max_length - 1" to avoid issues
|
704 |
related to position embedding during model.generate()
|
|
|
705 |
"""
|
706 |
|
707 |
module_class = FlaxBartForConditionalGenerationModule
|
@@ -872,3 +886,325 @@ class DalleBart(
|
|
872 |
"decoder_attention_mask": extended_attention_mask,
|
873 |
"decoder_position_ids": position_ids,
|
874 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
import os
|
19 |
from functools import partial
|
20 |
from pickle import UnpicklingError
|
21 |
+
from typing import Dict, Optional, Tuple, Union
|
22 |
|
23 |
+
import flax
|
24 |
import flax.linen as nn
|
25 |
import jax
|
26 |
import jax.numpy as jnp
|
|
|
40 |
is_offline_mode,
|
41 |
is_remote_url,
|
42 |
)
|
43 |
+
from transformers.generation_flax_utils import FlaxSampleOutput
|
44 |
from transformers.modeling_flax_outputs import (
|
45 |
FlaxCausalLMOutputWithCrossAttentions,
|
46 |
FlaxSeq2SeqLMOutput,
|
|
|
693 |
)
|
694 |
|
695 |
|
696 |
+
@flax.struct.dataclass
|
697 |
+
class SampleState:
|
698 |
+
cur_len: jnp.ndarray
|
699 |
+
sequences: jnp.ndarray
|
700 |
+
running_token: jnp.ndarray
|
701 |
+
is_sent_finished: jnp.ndarray
|
702 |
+
prng_key: jnp.ndarray
|
703 |
+
model_kwargs: Dict[str, jnp.ndarray]
|
704 |
+
model_kwargs_uncond: Dict[str, jnp.ndarray]
|
705 |
+
|
706 |
+
|
707 |
class DalleBart(
|
708 |
PretrainedFromWandbMixin, FlaxBartPreTrainedModel, FlaxBartForConditionalGeneration
|
709 |
):
|
|
|
715 |
- no bias in decode method
|
716 |
- custom prepare_inputs_for_generation using "max_length - 1" to avoid issues
|
717 |
related to position embedding during model.generate()
|
718 |
+
- custom generate method to allow super conditions
|
719 |
"""
|
720 |
|
721 |
module_class = FlaxBartForConditionalGenerationModule
|
|
|
886 |
"decoder_attention_mask": extended_attention_mask,
|
887 |
"decoder_position_ids": position_ids,
|
888 |
}
|
889 |
+
|
890 |
+
def generate(
|
891 |
+
self,
|
892 |
+
input_ids: jnp.ndarray,
|
893 |
+
attention_mask: Optional[jnp.ndarray] = None,
|
894 |
+
max_length: Optional[int] = None,
|
895 |
+
pad_token_id: Optional[int] = None,
|
896 |
+
bos_token_id: Optional[int] = None,
|
897 |
+
eos_token_id: Optional[int] = None,
|
898 |
+
decoder_start_token_id: Optional[int] = None,
|
899 |
+
do_sample: Optional[bool] = None,
|
900 |
+
prng_key: Optional[jnp.ndarray] = None,
|
901 |
+
top_k: Optional[int] = None,
|
902 |
+
top_p: Optional[float] = None,
|
903 |
+
temperature: Optional[float] = None,
|
904 |
+
num_beams: Optional[int] = None,
|
905 |
+
no_repeat_ngram_size: Optional[int] = None,
|
906 |
+
min_length: Optional[int] = None,
|
907 |
+
forced_bos_token_id: Optional[int] = None,
|
908 |
+
forced_eos_token_id: Optional[int] = None,
|
909 |
+
length_penalty: Optional[float] = None,
|
910 |
+
early_stopping: Optional[bool] = None,
|
911 |
+
trace: bool = True,
|
912 |
+
params: Optional[Dict[str, jnp.ndarray]] = None,
|
913 |
+
condition_scale: Optional[float] = 1.0,
|
914 |
+
input_ids_uncond: Optional[jnp.ndarray] = None,
|
915 |
+
attention_mask_uncond: Optional[jnp.ndarray] = None,
|
916 |
+
**model_kwargs,
|
917 |
+
):
|
918 |
+
"""Edit: Allow super conditioning."""
|
919 |
+
|
920 |
+
# set init values
|
921 |
+
max_length = max_length if max_length is not None else self.config.max_length
|
922 |
+
bos_token_id = (
|
923 |
+
bos_token_id if bos_token_id is not None else self.config.bos_token_id
|
924 |
+
)
|
925 |
+
pad_token_id = (
|
926 |
+
pad_token_id if pad_token_id is not None else self.config.pad_token_id
|
927 |
+
)
|
928 |
+
eos_token_id = (
|
929 |
+
eos_token_id if eos_token_id is not None else self.config.eos_token_id
|
930 |
+
)
|
931 |
+
decoder_start_token_id = (
|
932 |
+
decoder_start_token_id
|
933 |
+
if decoder_start_token_id
|
934 |
+
else self.config.decoder_start_token_id
|
935 |
+
)
|
936 |
+
prng_key = prng_key if prng_key is not None else jax.random.PRNGKey(0)
|
937 |
+
|
938 |
+
if decoder_start_token_id is None and self.config.is_encoder_decoder:
|
939 |
+
raise ValueError(
|
940 |
+
"`decoder_start_token_id` has to be defined for encoder-decoder generation."
|
941 |
+
)
|
942 |
+
|
943 |
+
do_sample = do_sample if do_sample is not None else self.config.do_sample
|
944 |
+
num_beams = num_beams if num_beams is not None else self.config.num_beams
|
945 |
+
|
946 |
+
if self.config.is_encoder_decoder:
|
947 |
+
# add encoder_outputs to model_kwargs
|
948 |
+
if model_kwargs.get("encoder_outputs") is None:
|
949 |
+
model_kwargs_input = dict(model_kwargs)
|
950 |
+
model_kwargs = self._prepare_encoder_decoder_kwargs_for_generation(
|
951 |
+
input_ids,
|
952 |
+
params,
|
953 |
+
{"attention_mask": attention_mask, **model_kwargs_input},
|
954 |
+
)
|
955 |
+
if condition_scale != 1.0:
|
956 |
+
assert (
|
957 |
+
input_ids_uncond is not None
|
958 |
+
), "`input_ids_uncond` has to be defined for super conditioning."
|
959 |
+
assert (
|
960 |
+
do_sample is True
|
961 |
+
), "`do_sample` has to be True for super conditioning."
|
962 |
+
assert (
|
963 |
+
num_beams == 1
|
964 |
+
), "`num_beams` has to be 1 for super conditioning."
|
965 |
+
model_kwargs_uncond = (
|
966 |
+
self._prepare_encoder_decoder_kwargs_for_generation(
|
967 |
+
input_ids_uncond,
|
968 |
+
params,
|
969 |
+
{
|
970 |
+
"attention_mask": attention_mask_uncond,
|
971 |
+
**model_kwargs_input,
|
972 |
+
},
|
973 |
+
)
|
974 |
+
)
|
975 |
+
else:
|
976 |
+
model_kwargs_uncond = None
|
977 |
+
# prepare decoder_input_ids for generation
|
978 |
+
input_ids = (
|
979 |
+
jnp.ones((input_ids.shape[0], 1), dtype="i4") * decoder_start_token_id
|
980 |
+
)
|
981 |
+
|
982 |
+
if not do_sample and num_beams == 1:
|
983 |
+
logits_processor = self._get_logits_processor(
|
984 |
+
no_repeat_ngram_size,
|
985 |
+
min_length,
|
986 |
+
max_length,
|
987 |
+
eos_token_id,
|
988 |
+
forced_bos_token_id,
|
989 |
+
forced_eos_token_id,
|
990 |
+
)
|
991 |
+
return self._greedy_search(
|
992 |
+
input_ids,
|
993 |
+
max_length,
|
994 |
+
pad_token_id,
|
995 |
+
eos_token_id,
|
996 |
+
logits_processor=logits_processor,
|
997 |
+
trace=trace,
|
998 |
+
params=params,
|
999 |
+
model_kwargs=model_kwargs,
|
1000 |
+
)
|
1001 |
+
elif do_sample and num_beams == 1:
|
1002 |
+
logits_warper = self._get_logits_warper(
|
1003 |
+
top_k=top_k, top_p=top_p, temperature=temperature
|
1004 |
+
)
|
1005 |
+
logits_processor = self._get_logits_processor(
|
1006 |
+
no_repeat_ngram_size,
|
1007 |
+
min_length,
|
1008 |
+
max_length,
|
1009 |
+
eos_token_id,
|
1010 |
+
forced_bos_token_id,
|
1011 |
+
forced_eos_token_id,
|
1012 |
+
)
|
1013 |
+
return self._sample(
|
1014 |
+
input_ids,
|
1015 |
+
max_length,
|
1016 |
+
pad_token_id,
|
1017 |
+
eos_token_id,
|
1018 |
+
prng_key,
|
1019 |
+
logits_warper=logits_warper,
|
1020 |
+
logits_processor=logits_processor,
|
1021 |
+
trace=trace,
|
1022 |
+
params=params,
|
1023 |
+
model_kwargs=model_kwargs,
|
1024 |
+
condition_scale=condition_scale,
|
1025 |
+
model_kwargs_uncond=model_kwargs_uncond,
|
1026 |
+
)
|
1027 |
+
elif not do_sample and num_beams > 1:
|
1028 |
+
# broadcast input_ids & encoder_outputs
|
1029 |
+
input_ids = self._expand_to_num_beams(input_ids, num_beams=num_beams)
|
1030 |
+
|
1031 |
+
if "encoder_outputs" in model_kwargs:
|
1032 |
+
model_kwargs["encoder_outputs"][
|
1033 |
+
"last_hidden_state"
|
1034 |
+
] = self._expand_to_num_beams(
|
1035 |
+
model_kwargs["encoder_outputs"]["last_hidden_state"],
|
1036 |
+
num_beams=num_beams,
|
1037 |
+
)
|
1038 |
+
|
1039 |
+
if "attention_mask" in model_kwargs:
|
1040 |
+
model_kwargs["attention_mask"] = self._expand_to_num_beams(
|
1041 |
+
model_kwargs["attention_mask"], num_beams=num_beams
|
1042 |
+
)
|
1043 |
+
|
1044 |
+
logits_processor = self._get_logits_processor(
|
1045 |
+
no_repeat_ngram_size,
|
1046 |
+
min_length,
|
1047 |
+
max_length,
|
1048 |
+
eos_token_id,
|
1049 |
+
forced_bos_token_id,
|
1050 |
+
forced_eos_token_id,
|
1051 |
+
)
|
1052 |
+
|
1053 |
+
return self._beam_search(
|
1054 |
+
input_ids,
|
1055 |
+
max_length,
|
1056 |
+
pad_token_id,
|
1057 |
+
eos_token_id,
|
1058 |
+
length_penalty=length_penalty,
|
1059 |
+
early_stopping=early_stopping,
|
1060 |
+
logits_processor=logits_processor,
|
1061 |
+
trace=trace,
|
1062 |
+
params=params,
|
1063 |
+
model_kwargs=model_kwargs,
|
1064 |
+
)
|
1065 |
+
else:
|
1066 |
+
raise NotImplementedError("`Beam sampling is currently not implemented.")
|
1067 |
+
|
1068 |
+
def _sample(
|
1069 |
+
self,
|
1070 |
+
input_ids: None,
|
1071 |
+
max_length: Optional[int] = None,
|
1072 |
+
pad_token_id: Optional[int] = None,
|
1073 |
+
eos_token_id: Optional[int] = None,
|
1074 |
+
prng_key: Optional[jnp.ndarray] = None,
|
1075 |
+
logits_processor=None,
|
1076 |
+
logits_warper=None,
|
1077 |
+
trace: bool = True,
|
1078 |
+
params: Optional[Dict[str, jnp.ndarray]] = None,
|
1079 |
+
model_kwargs: Optional[Dict[str, jnp.ndarray]] = None,
|
1080 |
+
condition_scale: float = 1.0,
|
1081 |
+
model_kwargs_uncond: Optional[Dict[str, jnp.ndarray]] = None,
|
1082 |
+
):
|
1083 |
+
# init values
|
1084 |
+
max_length = max_length if max_length is not None else self.config.max_length
|
1085 |
+
pad_token_id = (
|
1086 |
+
pad_token_id if pad_token_id is not None else self.config.pad_token_id
|
1087 |
+
)
|
1088 |
+
eos_token_id = (
|
1089 |
+
eos_token_id if eos_token_id is not None else self.config.eos_token_id
|
1090 |
+
)
|
1091 |
+
prng_key = prng_key if prng_key is not None else jax.random.PRNGKey(0)
|
1092 |
+
|
1093 |
+
batch_size, cur_len = input_ids.shape
|
1094 |
+
|
1095 |
+
eos_token_id = jnp.array(eos_token_id)
|
1096 |
+
pad_token_id = jnp.array(pad_token_id)
|
1097 |
+
cur_len = jnp.array(cur_len)
|
1098 |
+
|
1099 |
+
# per batch-item holding current token in loop.
|
1100 |
+
sequences = jnp.full((batch_size, max_length), pad_token_id, dtype=jnp.int32)
|
1101 |
+
sequences = lax.dynamic_update_slice(sequences, input_ids, (0, 0))
|
1102 |
+
|
1103 |
+
# per batch-item state bit indicating if sentence has finished.
|
1104 |
+
is_sent_finished = jnp.zeros((batch_size,), dtype=jnp.bool_)
|
1105 |
+
|
1106 |
+
# For Seq2Seq generation, we only need to use the decoder instead of the whole model in generation loop
|
1107 |
+
# and pass it the `encoder_outputs`, which are part of the `model_kwargs`.
|
1108 |
+
model = self.decode if self.config.is_encoder_decoder else self
|
1109 |
+
|
1110 |
+
# initialize model specific kwargs
|
1111 |
+
model_kwargs = self.prepare_inputs_for_generation(
|
1112 |
+
input_ids, max_length, **model_kwargs
|
1113 |
+
)
|
1114 |
+
if condition_scale != 1.0:
|
1115 |
+
model_kwargs_uncond = self.prepare_inputs_for_generation(
|
1116 |
+
input_ids, max_length, **model_kwargs_uncond
|
1117 |
+
)
|
1118 |
+
|
1119 |
+
# initialize state
|
1120 |
+
state = SampleState(
|
1121 |
+
cur_len=cur_len,
|
1122 |
+
sequences=sequences,
|
1123 |
+
running_token=input_ids,
|
1124 |
+
is_sent_finished=is_sent_finished,
|
1125 |
+
prng_key=prng_key,
|
1126 |
+
model_kwargs=model_kwargs,
|
1127 |
+
model_kwargs_uncond=model_kwargs_uncond,
|
1128 |
+
)
|
1129 |
+
|
1130 |
+
def sample_search_cond_fn(state):
|
1131 |
+
"""state termination condition fn."""
|
1132 |
+
has_reached_max_length = state.cur_len == max_length
|
1133 |
+
all_sequence_finished = jnp.all(state.is_sent_finished)
|
1134 |
+
finish_generation = jnp.logical_or(
|
1135 |
+
has_reached_max_length, all_sequence_finished
|
1136 |
+
)
|
1137 |
+
return ~finish_generation
|
1138 |
+
|
1139 |
+
def sample_search_body_fn(state):
|
1140 |
+
"""state update fn."""
|
1141 |
+
prng_key, prng_key_next = jax.random.split(state.prng_key)
|
1142 |
+
model_outputs = model(
|
1143 |
+
state.running_token, params=params, **state.model_kwargs
|
1144 |
+
)
|
1145 |
+
|
1146 |
+
logits = model_outputs.logits[:, -1]
|
1147 |
+
|
1148 |
+
# perform super conditioning
|
1149 |
+
# Source: @RiversHaveWings - https://twitter.com/RiversHaveWings/status/1478093658716966912?s=20&t=xdm-wZ61Wf7OLnE_NJHZ1w
|
1150 |
+
if condition_scale != 1.0:
|
1151 |
+
model_outputs_uncond = model(
|
1152 |
+
state.running_token, params=params, **state.model_kwargs_uncond
|
1153 |
+
)
|
1154 |
+
logits_uncond = model_outputs_uncond.logits[:, -1]
|
1155 |
+
logits = logits_uncond + condition_scale * (logits - logits_uncond)
|
1156 |
+
else:
|
1157 |
+
model_outputs_uncond = None
|
1158 |
+
|
1159 |
+
# apply min_length, ...
|
1160 |
+
logits = logits_processor(state.sequences, logits, state.cur_len)
|
1161 |
+
# apply top_k, top_k, temperature
|
1162 |
+
logits = logits_warper(logits, logits, state.cur_len)
|
1163 |
+
|
1164 |
+
next_token = jax.random.categorical(prng_key, logits, axis=-1)
|
1165 |
+
|
1166 |
+
next_is_sent_finished = state.is_sent_finished | (
|
1167 |
+
next_token == eos_token_id
|
1168 |
+
)
|
1169 |
+
next_token = (
|
1170 |
+
next_token * ~next_is_sent_finished
|
1171 |
+
+ pad_token_id * next_is_sent_finished
|
1172 |
+
)
|
1173 |
+
next_token = next_token[:, None]
|
1174 |
+
|
1175 |
+
next_sequences = lax.dynamic_update_slice(
|
1176 |
+
state.sequences, next_token, (0, state.cur_len)
|
1177 |
+
)
|
1178 |
+
next_model_kwargs = self.update_inputs_for_generation(
|
1179 |
+
model_outputs, state.model_kwargs
|
1180 |
+
)
|
1181 |
+
next_model_kwargs_uncond = (
|
1182 |
+
self.update_inputs_for_generation(
|
1183 |
+
model_outputs_uncond, state.model_kwargs_uncond
|
1184 |
+
)
|
1185 |
+
if condition_scale != 1.0
|
1186 |
+
else None
|
1187 |
+
)
|
1188 |
+
|
1189 |
+
return SampleState(
|
1190 |
+
cur_len=state.cur_len + 1,
|
1191 |
+
sequences=next_sequences,
|
1192 |
+
running_token=next_token,
|
1193 |
+
is_sent_finished=next_is_sent_finished,
|
1194 |
+
model_kwargs=next_model_kwargs,
|
1195 |
+
model_kwargs_uncond=next_model_kwargs_uncond,
|
1196 |
+
prng_key=prng_key_next,
|
1197 |
+
)
|
1198 |
+
|
1199 |
+
# The very first prompt often has sequence length > 1, so run outside of `lax.while_loop` to comply with TPU
|
1200 |
+
if input_ids.shape[1] > 1:
|
1201 |
+
state = sample_search_body_fn(state)
|
1202 |
+
|
1203 |
+
if not trace:
|
1204 |
+
state = self._run_loop_in_debug(
|
1205 |
+
sample_search_cond_fn, sample_search_body_fn, state
|
1206 |
+
)
|
1207 |
+
else:
|
1208 |
+
state = lax.while_loop(sample_search_cond_fn, sample_search_body_fn, state)
|
1209 |
+
|
1210 |
+
return FlaxSampleOutput(sequences=state.sequences)
|
src/dalle_mini/model/processor.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" DalleBart processor """
|
2 |
+
|
3 |
+
import jax.numpy as jnp
|
4 |
+
|
5 |
+
from .configuration import DalleBartConfig
|
6 |
+
from .text import TextNormalizer
|
7 |
+
from .tokenizer import DalleBartTokenizer
|
8 |
+
from .utils import PretrainedFromWandbMixin
|
9 |
+
|
10 |
+
|
11 |
+
class DalleBartProcessorBase:
|
12 |
+
def __init__(
|
13 |
+
self, tokenizer: DalleBartTokenizer, normalize_text: bool, max_text_length: int
|
14 |
+
):
|
15 |
+
self.tokenizer = tokenizer
|
16 |
+
self.normalize_text = normalize_text
|
17 |
+
self.max_text_length = max_text_length
|
18 |
+
if normalize_text:
|
19 |
+
self.text_processor = TextNormalizer()
|
20 |
+
# create unconditional tokens
|
21 |
+
uncond = self.tokenizer(
|
22 |
+
"",
|
23 |
+
return_tensors="jax",
|
24 |
+
padding="max_length",
|
25 |
+
truncation=True,
|
26 |
+
max_length=self.max_text_length,
|
27 |
+
).data
|
28 |
+
self.input_ids_uncond = uncond["input_ids"]
|
29 |
+
self.attention_mask_uncond = uncond["attention_mask"]
|
30 |
+
|
31 |
+
def __call__(self, text: str = None):
|
32 |
+
# check that text is not a string
|
33 |
+
assert not isinstance(text, str), "text must be a list of strings"
|
34 |
+
|
35 |
+
if self.normalize_text:
|
36 |
+
text = [self.text_processor(t) for t in text]
|
37 |
+
res = self.tokenizer(
|
38 |
+
text,
|
39 |
+
return_tensors="jax",
|
40 |
+
padding="max_length",
|
41 |
+
truncation=True,
|
42 |
+
max_length=self.max_text_length,
|
43 |
+
).data
|
44 |
+
# tokens used only with super conditioning
|
45 |
+
n = len(text)
|
46 |
+
res["input_ids_uncond"] = jnp.repeat(self.input_ids_uncond, n, axis=0)
|
47 |
+
res["attention_mask_uncond"] = jnp.repeat(self.attention_mask_uncond, n, axis=0)
|
48 |
+
return res
|
49 |
+
|
50 |
+
@classmethod
|
51 |
+
def from_pretrained(cls, *args, **kwargs):
|
52 |
+
tokenizer = DalleBartTokenizer.from_pretrained(*args, **kwargs)
|
53 |
+
config = DalleBartConfig.from_pretrained(*args, **kwargs)
|
54 |
+
return cls(tokenizer, config.normalize_text, config.max_text_length)
|
55 |
+
|
56 |
+
|
57 |
+
class DalleBartProcessor(PretrainedFromWandbMixin, DalleBartProcessorBase):
|
58 |
+
pass
|
src/dalle_mini/{text.py → model/text.py}
RENAMED
File without changes
|
tools/inference/inference_pipeline.ipynb
CHANGED
@@ -75,7 +75,7 @@
|
|
75 |
"# Model references\n",
|
76 |
"\n",
|
77 |
"# dalle-mini\n",
|
78 |
-
"DALLE_MODEL = \"dalle-mini/dalle-mini/model-
|
79 |
"DALLE_COMMIT_ID = None\n",
|
80 |
"\n",
|
81 |
"# VQGAN model\n",
|
@@ -126,7 +126,7 @@
|
|
126 |
"outputs": [],
|
127 |
"source": [
|
128 |
"# Load models & tokenizer\n",
|
129 |
-
"from dalle_mini
|
130 |
"from vqgan_jax.modeling_flax_vqgan import VQModel\n",
|
131 |
"from transformers import CLIPProcessor, FlaxCLIPModel\n",
|
132 |
"\n",
|
@@ -134,14 +134,13 @@
|
|
134 |
"model = DalleBart.from_pretrained(\n",
|
135 |
" DALLE_MODEL, revision=DALLE_COMMIT_ID, dtype=dtype, abstract_init=True\n",
|
136 |
")\n",
|
137 |
-
"tokenizer = DalleBartTokenizer.from_pretrained(DALLE_MODEL, revision=DALLE_COMMIT_ID)\n",
|
138 |
"\n",
|
139 |
"# Load VQGAN\n",
|
140 |
"vqgan = VQModel.from_pretrained(VQGAN_REPO, revision=VQGAN_COMMIT_ID)\n",
|
141 |
"\n",
|
142 |
"# Load CLIP\n",
|
143 |
"clip = FlaxCLIPModel.from_pretrained(CLIP_REPO, revision=CLIP_COMMIT_ID)\n",
|
144 |
-
"
|
145 |
]
|
146 |
},
|
147 |
{
|
@@ -192,17 +191,18 @@
|
|
192 |
"from functools import partial\n",
|
193 |
"\n",
|
194 |
"# model inference\n",
|
195 |
-
"@partial(jax.pmap, axis_name=\"batch\", static_broadcasted_argnums=(3, 4))\n",
|
196 |
-
"def p_generate(
|
|
|
|
|
197 |
" return model.generate(\n",
|
198 |
" **tokenized_prompt,\n",
|
199 |
-
" do_sample=True,\n",
|
200 |
-
" num_beams=1,\n",
|
201 |
" prng_key=key,\n",
|
202 |
" params=params,\n",
|
203 |
" top_k=top_k,\n",
|
204 |
" top_p=top_p,\n",
|
205 |
-
"
|
|
|
206 |
" )\n",
|
207 |
"\n",
|
208 |
"\n",
|
@@ -258,7 +258,7 @@
|
|
258 |
"id": "rsmj0Aj5OQox"
|
259 |
},
|
260 |
"source": [
|
261 |
-
"Our model
|
262 |
]
|
263 |
},
|
264 |
{
|
@@ -269,9 +269,9 @@
|
|
269 |
},
|
270 |
"outputs": [],
|
271 |
"source": [
|
272 |
-
"from dalle_mini
|
273 |
"\n",
|
274 |
-
"
|
275 |
]
|
276 |
},
|
277 |
{
|
@@ -291,7 +291,7 @@
|
|
291 |
},
|
292 |
"outputs": [],
|
293 |
"source": [
|
294 |
-
"prompt = \"
|
295 |
]
|
296 |
},
|
297 |
{
|
@@ -302,34 +302,7 @@
|
|
302 |
},
|
303 |
"outputs": [],
|
304 |
"source": [
|
305 |
-
"
|
306 |
-
"processed_prompt"
|
307 |
-
]
|
308 |
-
},
|
309 |
-
{
|
310 |
-
"cell_type": "markdown",
|
311 |
-
"metadata": {
|
312 |
-
"id": "QUzYACWxOe5z"
|
313 |
-
},
|
314 |
-
"source": [
|
315 |
-
"We tokenize the prompt."
|
316 |
-
]
|
317 |
-
},
|
318 |
-
{
|
319 |
-
"cell_type": "code",
|
320 |
-
"execution_count": null,
|
321 |
-
"metadata": {
|
322 |
-
"id": "n8e7MvGwOe5z"
|
323 |
-
},
|
324 |
-
"outputs": [],
|
325 |
-
"source": [
|
326 |
-
"tokenized_prompt = tokenizer(\n",
|
327 |
-
" processed_prompt,\n",
|
328 |
-
" return_tensors=\"jax\",\n",
|
329 |
-
" padding=\"max_length\",\n",
|
330 |
-
" truncation=True,\n",
|
331 |
-
" max_length=128,\n",
|
332 |
-
").data\n",
|
333 |
"tokenized_prompt"
|
334 |
]
|
335 |
},
|
@@ -390,7 +363,9 @@
|
|
390 |
"\n",
|
391 |
"# We can customize top_k/top_p used for generating samples\n",
|
392 |
"gen_top_k = None\n",
|
393 |
-
"gen_top_p = None"
|
|
|
|
|
394 |
]
|
395 |
},
|
396 |
{
|
@@ -413,7 +388,13 @@
|
|
413 |
" key, subkey = jax.random.split(key)\n",
|
414 |
" # generate images\n",
|
415 |
" encoded_images = p_generate(\n",
|
416 |
-
" tokenized_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
" )\n",
|
418 |
" # remove BOS\n",
|
419 |
" encoded_images = encoded_images.sequences[..., 1:]\n",
|
@@ -444,7 +425,7 @@
|
|
444 |
"from flax.training.common_utils import shard\n",
|
445 |
"\n",
|
446 |
"# get clip scores\n",
|
447 |
-
"clip_inputs =
|
448 |
" text=[prompt] * jax.device_count(),\n",
|
449 |
" images=images,\n",
|
450 |
" return_tensors=\"np\",\n",
|
|
|
75 |
"# Model references\n",
|
76 |
"\n",
|
77 |
"# dalle-mini\n",
|
78 |
+
"DALLE_MODEL = \"dalle-mini/dalle-mini/model-2vm4itcx:latest\" # can be wandb artifact or 🤗 Hub or local folder or google bucket\n",
|
79 |
"DALLE_COMMIT_ID = None\n",
|
80 |
"\n",
|
81 |
"# VQGAN model\n",
|
|
|
126 |
"outputs": [],
|
127 |
"source": [
|
128 |
"# Load models & tokenizer\n",
|
129 |
+
"from dalle_mini import DalleBart, DalleBartProcessor\n",
|
130 |
"from vqgan_jax.modeling_flax_vqgan import VQModel\n",
|
131 |
"from transformers import CLIPProcessor, FlaxCLIPModel\n",
|
132 |
"\n",
|
|
|
134 |
"model = DalleBart.from_pretrained(\n",
|
135 |
" DALLE_MODEL, revision=DALLE_COMMIT_ID, dtype=dtype, abstract_init=True\n",
|
136 |
")\n",
|
|
|
137 |
"\n",
|
138 |
"# Load VQGAN\n",
|
139 |
"vqgan = VQModel.from_pretrained(VQGAN_REPO, revision=VQGAN_COMMIT_ID)\n",
|
140 |
"\n",
|
141 |
"# Load CLIP\n",
|
142 |
"clip = FlaxCLIPModel.from_pretrained(CLIP_REPO, revision=CLIP_COMMIT_ID)\n",
|
143 |
+
"clip_processor = CLIPProcessor.from_pretrained(CLIP_REPO, revision=CLIP_COMMIT_ID)"
|
144 |
]
|
145 |
},
|
146 |
{
|
|
|
191 |
"from functools import partial\n",
|
192 |
"\n",
|
193 |
"# model inference\n",
|
194 |
+
"@partial(jax.pmap, axis_name=\"batch\", static_broadcasted_argnums=(3, 4, 5, 6))\n",
|
195 |
+
"def p_generate(\n",
|
196 |
+
" tokenized_prompt, key, params, top_k, top_p, temperature, condition_scale\n",
|
197 |
+
"):\n",
|
198 |
" return model.generate(\n",
|
199 |
" **tokenized_prompt,\n",
|
|
|
|
|
200 |
" prng_key=key,\n",
|
201 |
" params=params,\n",
|
202 |
" top_k=top_k,\n",
|
203 |
" top_p=top_p,\n",
|
204 |
+
" temperature=temperature,\n",
|
205 |
+
" condition_scale=condition_scale,\n",
|
206 |
" )\n",
|
207 |
"\n",
|
208 |
"\n",
|
|
|
258 |
"id": "rsmj0Aj5OQox"
|
259 |
},
|
260 |
"source": [
|
261 |
+
"Our model requires processing prompts."
|
262 |
]
|
263 |
},
|
264 |
{
|
|
|
269 |
},
|
270 |
"outputs": [],
|
271 |
"source": [
|
272 |
+
"from dalle_mini import DalleBartProcessor\n",
|
273 |
"\n",
|
274 |
+
"processor = DalleBartProcessor.from_pretrained(DALLE_MODEL, revision=DALLE_COMMIT_ID)"
|
275 |
]
|
276 |
},
|
277 |
{
|
|
|
291 |
},
|
292 |
"outputs": [],
|
293 |
"source": [
|
294 |
+
"prompt = \"a blue table\""
|
295 |
]
|
296 |
},
|
297 |
{
|
|
|
302 |
},
|
303 |
"outputs": [],
|
304 |
"source": [
|
305 |
+
"tokenized_prompt = processor([prompt])\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
"tokenized_prompt"
|
307 |
]
|
308 |
},
|
|
|
363 |
"\n",
|
364 |
"# We can customize top_k/top_p used for generating samples\n",
|
365 |
"gen_top_k = None\n",
|
366 |
+
"gen_top_p = None\n",
|
367 |
+
"temperature = 0.85\n",
|
368 |
+
"cond_scale = 3.0"
|
369 |
]
|
370 |
},
|
371 |
{
|
|
|
388 |
" key, subkey = jax.random.split(key)\n",
|
389 |
" # generate images\n",
|
390 |
" encoded_images = p_generate(\n",
|
391 |
+
" tokenized_prompt,\n",
|
392 |
+
" shard_prng_key(subkey),\n",
|
393 |
+
" model.params,\n",
|
394 |
+
" gen_top_k,\n",
|
395 |
+
" gen_top_p,\n",
|
396 |
+
" temperature,\n",
|
397 |
+
" cond_scale,\n",
|
398 |
" )\n",
|
399 |
" # remove BOS\n",
|
400 |
" encoded_images = encoded_images.sequences[..., 1:]\n",
|
|
|
425 |
"from flax.training.common_utils import shard\n",
|
426 |
"\n",
|
427 |
"# get clip scores\n",
|
428 |
+
"clip_inputs = clip_processor(\n",
|
429 |
" text=[prompt] * jax.device_count(),\n",
|
430 |
" images=images,\n",
|
431 |
" return_tensors=\"np\",\n",
|
tools/train/train.py
CHANGED
@@ -103,7 +103,7 @@ class ModelArguments:
|
|
103 |
|
104 |
def __post_init__(self):
|
105 |
if self.tokenizer_name is None:
|
106 |
-
self.tokenizer_name
|
107 |
assert (
|
108 |
self.tokenizer_name is not None
|
109 |
), "Tokenizer name or model name/path needs to be specified"
|
@@ -209,6 +209,26 @@ class DataTrainingArguments:
|
|
209 |
"help": "Probability of removing some captions for classifier-free guidance."
|
210 |
},
|
211 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
max_train_samples: Optional[int] = field(
|
213 |
default=None,
|
214 |
metadata={
|
|
|
103 |
|
104 |
def __post_init__(self):
|
105 |
if self.tokenizer_name is None:
|
106 |
+
self.tokenizer_name = self.model_name_or_path
|
107 |
assert (
|
108 |
self.tokenizer_name is not None
|
109 |
), "Tokenizer name or model name/path needs to be specified"
|
|
|
209 |
"help": "Probability of removing some captions for classifier-free guidance."
|
210 |
},
|
211 |
)
|
212 |
+
clip_score_column: Optional[str] = field(
|
213 |
+
default="clip_score",
|
214 |
+
metadata={"help": "Column that containts clip score for filtering."},
|
215 |
+
)
|
216 |
+
min_clip_score: Optional[float] = field(
|
217 |
+
default=None,
|
218 |
+
metadata={"help": "Minimum clip score required."},
|
219 |
+
)
|
220 |
+
max_clip_score: Optional[float] = field(
|
221 |
+
default=None,
|
222 |
+
metadata={"help": "Maximum clip score required."},
|
223 |
+
)
|
224 |
+
filter_column: Optional[str] = field(
|
225 |
+
default=None,
|
226 |
+
metadata={"help": "Column that containts classes to be filtered."},
|
227 |
+
)
|
228 |
+
filter_value: Optional[str] = field(
|
229 |
+
default=None,
|
230 |
+
metadata={"help": "Class value to be kept during filtering."},
|
231 |
+
)
|
232 |
max_train_samples: Optional[int] = field(
|
233 |
default=None,
|
234 |
metadata={
|