--- license: apache-2.0 datasets: - Skylion007/openwebtext language: - en pipeline_tag: text-generation --- ### gpt trained with nanoGPT Configs: - batch size = 32 - bias = False - bloack_size = 1024 - n heads = 8 - h layers = 6 - dropout = 0.0 - n embed = 768 - vocab size = 50304 - gradient_accumulation_steps = 1 - learning_rate = 1e-3 - iters = 7250 - lr_decay_iters = 5000 - min_lr = 1e-5 - warmup_iters = 400 - mfu = 30.45935 - train_loss = 3.89759 - val_loss = 3.91001