AI-Scientist / templates /nanoGPT_lite /backup_seed_ideas.json
pradachan's picture
Upload folder using huggingface_hub
f71c233 verified
raw
history blame
1.03 kB
[
{
"Name": "adaptive_block_size",
"Title": "Adaptive Block Size: Dynamic Context Window Adjustment for Efficient Training",
"Experiment": "Modify the model to dynamically adjust its block size during training, starting with a smaller block size and gradually increasing it. This could potentially lead to faster initial training and better long-range dependency learning.",
"Interestingness": 6,
"Feasibility": 4,
"Novelty": 4
},
{
"Name": "layerwise_learning_rates",
"Title": "Layer-wise Learning Rate Adaptation: Optimizing Training Dynamics in Transformer Models",
"Experiment": "Implement layer-wise learning rates, where each transformer layer has its own learning rate. Modify the configure_optimizers function to assign different learning rates to different layers, with deeper layers having lower learning rates. Compare the training dynamics, convergence speed, and final performance with the baseline model.",
"Interestingness": 4,
"Feasibility": 6,
"Novelty": 2
}
]