Spaces:
Runtime error
Runtime error
import argparse | |
import ast | |
def get_default_params(model_name): | |
# Params from paper (https://arxiv.org/pdf/2103.00020.pdf) | |
model_name = model_name.lower() | |
if "vit" in model_name: | |
return {"lr": 5.0e-4, "beta1": 0.9, "beta2": 0.98, "eps": 1.0e-6} | |
else: | |
return {"lr": 5.0e-4, "beta1": 0.9, "beta2": 0.999, "eps": 1.0e-8} | |
class ParseKwargs(argparse.Action): | |
def __call__(self, parser, namespace, values, option_string=None): | |
kw = {} | |
for value in values: | |
key, value = value.split('=') | |
try: | |
kw[key] = ast.literal_eval(value) | |
except ValueError: | |
kw[key] = str(value) # fallback to string (avoid need to escape on command line) | |
setattr(namespace, self.dest, kw) | |
def parse_args(args): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--exp-name", | |
type=str, | |
help="the name of experiment", | |
) | |
parser.add_argument( | |
"--no-text-condition", | |
action='store_true', | |
help="whether to use text condition", | |
) | |
parser.add_argument( | |
"--train-data", | |
type=str, | |
default=None, | |
help="Path to file(s) with training data. When using webdataset, multiple datasources can be combined using the `::` separator.", | |
nargs='+', | |
) | |
parser.add_argument( | |
"--train-data-sample-ratio", | |
type=float, | |
default=[1.0], | |
help="When using multiple data sources, this controls the sample ratio. ", | |
nargs='+', | |
) | |
parser.add_argument( | |
"--train-data-upsampling-factors", | |
type=str, | |
default=None, | |
help=( | |
"When using multiple data sources with webdataset and sampling with replacement, this can be used to upsample specific data sources. " | |
"Similar to --train-data, this should be a string with as many numbers as there are data sources, separated by `::` (e.g. 1::2::0.5) " | |
"By default, datapoints are sampled uniformly regardless of the dataset sizes." | |
) | |
) | |
parser.add_argument( | |
"--ignore-in-train", | |
type=int, | |
default=[0], | |
help=( | |
"Whether Ignore coresponding dataset during training" | |
),nargs='+' | |
) | |
parser.add_argument( | |
"--ignore-in-val", | |
type=int, | |
default=[0], | |
help=( | |
"Whether Ignore coresponding dataset during val" | |
),nargs='+' | |
) | |
parser | |
parser.add_argument( | |
"--extra-train-data", | |
type=str, | |
default=None, | |
help="Path to file(s) with training data", | |
) | |
parser.add_argument( | |
"--val-data", | |
type=str, | |
default=[None], | |
nargs='+', | |
help="Path to file(s) with validation data", | |
) | |
parser.add_argument( | |
"--extra-val-data", | |
type=str, | |
default=None, | |
help="Path to file(s) with validation data", | |
) | |
parser.add_argument( | |
"--train-folder", | |
type=str, | |
default=None, | |
help="Path to images of training data", | |
nargs='+', | |
) | |
parser.add_argument( | |
"--extra-train-folder", | |
type=str, | |
default=None, | |
help="Path to images of training data", | |
) | |
parser.add_argument( | |
"--val-folder", | |
type=str, | |
default=[None], | |
nargs='+', | |
help="Path to images of val data", | |
) | |
parser.add_argument( | |
"--extra-val-folder", | |
type=str, | |
default=None, | |
help="Path to images of val data", | |
) | |
parser.add_argument( | |
"--save-path", | |
type=str, | |
default=None, | |
help="Path to save checkpoints", | |
) | |
parser.add_argument( | |
"--train-num-samples", | |
type=int, | |
default=None, | |
help="Number of samples in dataset. Required for webdataset if not available in info file.", | |
) | |
parser.add_argument( | |
"--val-num-samples", | |
type=int, | |
default=None, | |
help="Number of samples in dataset. Useful for webdataset if not available in info file.", | |
) | |
parser.add_argument( | |
"--dataset-type", | |
choices=["webdataset", "csv", "synthetic", "auto", "preference", "rating", "regional", "ranking", "ImageReward", "HPD"], | |
default="auto", | |
help="Which type of dataset to process.", | |
nargs='+' | |
) | |
parser.add_argument( | |
"--dataset-resampled", | |
default=False, | |
action="store_true", | |
help="Whether to use sampling with replacement for webdataset shard selection." | |
) | |
parser.add_argument( | |
"--csv-separator", | |
type=str, | |
default="\t", | |
help="For csv-like datasets, which separator to use." | |
) | |
parser.add_argument( | |
"--csv-img-key", | |
type=str, | |
default="filepath", | |
help="For csv-like datasets, the name of the key for the image paths." | |
) | |
parser.add_argument( | |
"--csv-caption-key", | |
type=str, | |
default="title", | |
help="For csv-like datasets, the name of the key for the captions." | |
) | |
parser.add_argument( | |
"--imagenet-val", | |
type=str, | |
default=None, | |
help="Path to imagenet val set for conducting zero shot evaluation.", | |
) | |
parser.add_argument( | |
"--imagenet-v2", | |
type=str, | |
default=None, | |
help="Path to imagenet v2 for conducting zero shot evaluation.", | |
) | |
parser.add_argument( | |
"--logs", | |
type=str, | |
default="./logs/", | |
help="Where to store tensorboard logs. Use None to avoid storing logs.", | |
) | |
parser.add_argument( | |
"--log-local", | |
action="store_true", | |
default=False, | |
help="log files on local master, otherwise global master only.", | |
) | |
parser.add_argument( | |
"--name", | |
type=str, | |
default=None, | |
help="Optional identifier for the experiment when storing logs. Otherwise use current time.", | |
) | |
parser.add_argument( | |
"--workers", type=int, default=1, help="Number of dataloader workers per GPU.", nargs='+' | |
) | |
parser.add_argument( | |
"--batch-size", type=int, default=64, help="Batch size per GPU.", nargs='+' | |
) | |
parser.add_argument( | |
"--val-batch-size", type=int, default=64, help="Batch size per GPU.", nargs='+' | |
) | |
parser.add_argument( | |
"--iterations", type=int, default=None, help="Number of iterations to train for." | |
) | |
parser.add_argument( | |
"--iters-cooldown", type=int, default=None | |
) | |
parser.add_argument("--lr", type=float, default=None, help="Learning rate.") | |
parser.add_argument("--beta1", type=float, default=None, help="Adam beta 1.") | |
parser.add_argument("--beta2", type=float, default=None, help="Adam beta 2.") | |
parser.add_argument("--eps", type=float, default=None, help="Adam epsilon.") | |
parser.add_argument("--wd", type=float, default=0.2, help="Weight decay.") | |
parser.add_argument( | |
"--warmup", type=int, default=10000, help="Number of steps to warmup for." | |
) | |
parser.add_argument( | |
"--use-bn-sync", | |
default=False, | |
action="store_true", | |
help="Whether to use batch norm sync.") | |
parser.add_argument( | |
"--skip-scheduler", | |
action="store_true", | |
default=False, | |
help="Use this flag to skip the learning rate decay.", | |
) | |
parser.add_argument( | |
"--lr-scheduler", | |
type=str, | |
default='cosine', | |
help="LR scheduler. One of: 'cosine', 'const' (constant), 'const-cooldown' (constant w/ cooldown). Default: cosine", | |
) | |
parser.add_argument( | |
"--lr-cooldown-end", type=float, default=0.0, | |
help="End learning rate for cooldown schedule. Default: 0" | |
) | |
parser.add_argument( | |
"--lr-cooldown-power", type=float, default=1.0, | |
help="Power for polynomial cooldown schedule. Default: 1.0 (linear decay)" | |
) | |
parser.add_argument( | |
"--save-most-recent", | |
action="store_true", | |
default=False, | |
help="Always save the most recent model trained to epoch_latest.pt.", | |
) | |
parser.add_argument( | |
"--zeroshot-frequency", type=int, default=2, help="How often to run zero shot." | |
) | |
parser.add_argument( | |
"--resume", | |
default=None, | |
type=str, | |
help="path to latest checkpoint (default: none)", | |
) | |
parser.add_argument( | |
"--precision", | |
choices=["amp", "amp_bf16", "amp_bfloat16", "bf16", "fp16", "fp32"], | |
default="amp", | |
help="Floating point precision." | |
) | |
parser.add_argument( | |
"--model", | |
type=str, | |
default="RN50", | |
help="Name of the vision backbone to use.", | |
) | |
parser.add_argument( | |
"--pretrained", | |
default='', | |
type=str, | |
help="Use a pretrained CLIP model weights with the specified tag or file path.", | |
) | |
parser.add_argument( | |
"--pretrained-image", | |
default=False, | |
action='store_true', | |
help="Load imagenet pretrained weights for image tower backbone if available.", | |
) | |
parser.add_argument( | |
"--lock-image", | |
default=False, | |
action='store_true', | |
help="Lock full image tower by disabling gradients.", | |
) | |
parser.add_argument( | |
"--lock-image-unlocked-groups", | |
type=int, | |
default=0, | |
help="Leave last n image tower layer groups unlocked.", | |
) | |
parser.add_argument( | |
"--lock-image-freeze-bn-stats", | |
default=False, | |
action='store_true', | |
help="Freeze BatchNorm running stats in image tower for any locked layers.", | |
) | |
parser.add_argument( | |
'--image-mean', type=float, nargs='+', default=None, metavar='MEAN', | |
help='Override default image mean value of dataset') | |
parser.add_argument( | |
'--image-std', type=float, nargs='+', default=None, metavar='STD', | |
help='Override default image std deviation of of dataset') | |
parser.add_argument('--aug-cfg', nargs='*', default={}, action=ParseKwargs) | |
parser.add_argument( | |
'--light-augmentation', action='store_true', | |
help='') | |
parser.add_argument( | |
"--grad-checkpointing", | |
default=False, | |
action='store_true', | |
help="Enable gradient checkpointing.", | |
) | |
parser.add_argument( | |
"--local-loss", | |
default=False, | |
action="store_true", | |
help="calculate loss w/ local features @ global (instead of realizing full global @ global matrix)" | |
) | |
parser.add_argument( | |
"--gather-with-grad", | |
default=False, | |
action="store_true", | |
help="enable full distributed gradient for feature gather" | |
) | |
parser.add_argument( | |
'--force-image-size', type=int, nargs='+', default=None, | |
help='Override default image size' | |
) | |
parser.add_argument( | |
"--force-quick-gelu", | |
default=False, | |
action='store_true', | |
help="Force use of QuickGELU activation for non-OpenAI transformer models.", | |
) | |
parser.add_argument( | |
"--force-patch-dropout", | |
default=None, | |
type=float, | |
help="Override the patch dropout during training, for fine tuning with no dropout near the end as in the paper", | |
) | |
parser.add_argument( | |
"--force-custom-text", | |
default=False, | |
action='store_true', | |
help="Force use of CustomTextCLIP model (separate text-tower).", | |
) | |
parser.add_argument( | |
"--torchscript", | |
default=False, | |
action='store_true', | |
help="torch.jit.script the model, also uses jit version of OpenAI models if pretrained=='openai'", | |
) | |
parser.add_argument( | |
"--trace", | |
default=False, | |
action='store_true', | |
help="torch.jit.trace the model for inference / eval only", | |
) | |
parser.add_argument( | |
"--accum-freq", type=int, default=1, help="Update the model every --acum-freq steps." | |
) | |
# arguments for distributed training | |
parser.add_argument( | |
"--dist-url", | |
default="env://", | |
type=str, | |
help="url used to set up distributed training", | |
) | |
parser.add_argument( | |
"--dist-backend", default="nccl", type=str, help="distributed backend" | |
) | |
parser.add_argument( | |
"--report-to", | |
default='', | |
type=str, | |
help="Options are ['tensorboard']" | |
) | |
parser.add_argument( | |
"--debug", | |
default=False, | |
action="store_true", | |
help="If true, more information is logged." | |
) | |
parser.add_argument( | |
"--copy-codebase", | |
default=False, | |
action="store_true", | |
help="If true, we copy the entire base on the log directory, and execute from there." | |
) | |
parser.add_argument( | |
"--horovod", | |
default=False, | |
action="store_true", | |
help="Use horovod for distributed training." | |
) | |
parser.add_argument( | |
"--ddp-static-graph", | |
default=False, | |
action='store_true', | |
help="Enable static graph optimization for DDP in PyTorch >= 1.11.", | |
) | |
parser.add_argument( | |
"--no-set-device-rank", | |
default=False, | |
action="store_true", | |
help="Don't set device index from local rank (when CUDA_VISIBLE_DEVICES restricted to one per proc)." | |
) | |
parser.add_argument( | |
"--seed", type=int, default=16, help="Default random seed." | |
) | |
parser.add_argument( | |
"--grad-clip-norm", type=float, default=None, help="Gradient clip." | |
) | |
parser.add_argument( | |
"--lock-text", | |
default=False, | |
action='store_true', | |
help="Lock full text tower by disabling gradients.", | |
) | |
parser.add_argument( | |
"--lock-text-unlocked-layers", | |
type=int, | |
default=0, | |
help="Leave last n image tower layer groups unlocked.", | |
) | |
parser.add_argument( | |
"--margin", | |
type=float, | |
default=1.0, | |
help="hyper parameter for margin loss in ranking", | |
) | |
parser.add_argument( | |
"--lock-text-freeze-layer-norm", | |
default=False, | |
action='store_true', | |
help="Freeze BatchNorm running stats in image tower for any locked layers.", | |
) | |
parser.add_argument( | |
"--log-every-n-steps", | |
type=int, | |
default=10, | |
help="Log every n steps to tensorboard/console.", | |
) | |
parser.add_argument( | |
"--coca-caption-loss-weight", | |
type=float, | |
default=2.0, | |
help="Weight assigned to caption loss in CoCa." | |
) | |
parser.add_argument( | |
"--coca-contrastive-loss-weight", | |
type=float, | |
default=1.0, | |
help="Weight assigned to contrastive loss when training CoCa." | |
) | |
parser.add_argument( | |
"--remote-sync", | |
type=str, | |
default=None, | |
help="Optinoally sync with a remote path specified by this arg", | |
) | |
parser.add_argument( | |
"--remote-sync-frequency", | |
type=int, | |
default=300, | |
help="How frequently to sync to a remote directly if --remote-sync is not None.", | |
) | |
parser.add_argument( | |
"--remote-sync-protocol", | |
choices=["s3", "fsspec"], | |
default="s3", | |
help="How to do the remote sync backup if --remote-sync is not None.", | |
) | |
parser.add_argument( | |
"--delete-previous-checkpoint", | |
default=False, | |
action="store_true", | |
help="If true, delete previous checkpoint after storing a new one." | |
) | |
parser.add_argument( | |
"--distill-model", | |
default=None, | |
help='Which model arch to distill from, if any.' | |
) | |
parser.add_argument( | |
"--distill-pretrained", | |
default=None, | |
help='Which pre-trained weights to distill from, if any.' | |
) | |
args = parser.parse_args(args) | |
# If some params are not passed, we use the default values based on model name. | |
default_params = get_default_params(args.model) | |
for name, val in default_params.items(): | |
if getattr(args, name) is None: | |
setattr(args, name, val) | |
return args | |