[ | |
{ | |
"Name": "adaptive_block_size", | |
"Title": "Adaptive Block Size: Dynamic Context Window Adjustment for Efficient Training", | |
"Experiment": "Modify the model to dynamically adjust its block size during training, starting with a smaller block size and gradually increasing it. This could potentially lead to faster initial training and better long-range dependency learning.", | |
"Interestingness": 6, | |
"Feasibility": 4, | |
"Novelty": 4 | |
}, | |
{ | |
"Name": "layerwise_learning_rates", | |
"Title": "Layer-wise Learning Rate Adaptation: Optimizing Training Dynamics in Transformer Models", | |
"Experiment": "Implement layer-wise learning rates, where each transformer layer has its own learning rate. Modify the configure_optimizers function to assign different learning rates to different layers, with deeper layers having lower learning rates. Compare the training dynamics, convergence speed, and final performance with the baseline model.", | |
"Interestingness": 4, | |
"Feasibility": 6, | |
"Novelty": 2 | |
} | |
] | |