|
\documentclass{article} |
|
\usepackage{iclr2024_conference,times} |
|
|
|
\usepackage[utf8]{inputenc} |
|
\usepackage[T1]{fontenc} |
|
\usepackage{hyperref} |
|
\usepackage{url} |
|
\usepackage{booktabs} |
|
\usepackage{amsfonts} |
|
\usepackage{nicefrac} |
|
\usepackage{microtype} |
|
\usepackage{titletoc} |
|
|
|
\usepackage{subcaption} |
|
\usepackage{graphicx} |
|
\usepackage{amsmath} |
|
\usepackage{multirow} |
|
\usepackage{color} |
|
\usepackage{colortbl} |
|
\usepackage{cleveref} |
|
\usepackage{algorithm} |
|
\usepackage{algorithmicx} |
|
\usepackage{algpseudocode} |
|
|
|
\DeclareMathOperator*{\argmin}{arg\,min} |
|
\DeclareMathOperator*{\argmax}{arg\,max} |
|
|
|
\graphicspath{{../}} |
|
\begin{filecontents}{references.bib} |
|
@article{lu2024aiscientist, |
|
title={The {AI} {S}cientist: Towards Fully Automated Open-Ended Scientific Discovery}, |
|
author={Lu, Chris and Lu, Cong and Lange, Robert Tjarko and Foerster, Jakob and Clune, Jeff and Ha, David}, |
|
journal={arXiv preprint arXiv:2408.06292}, |
|
year={2024} |
|
} |
|
|
|
@book{goodfellow2016deep, |
|
title={Deep learning}, |
|
author={Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua}, |
|
volume={1}, |
|
year={2016}, |
|
publisher={MIT Press} |
|
} |
|
|
|
@article{power2022grokking, |
|
title={Grokking: Generalization beyond overfitting on small algorithmic datasets}, |
|
author={Power, Alethea and Burda, Yuri and Edwards, Harri and Babuschkin, Igor and Misra, Vedant}, |
|
journal={arXiv preprint arXiv:2201.02177}, |
|
year={2022} |
|
} |
|
|
|
@article{vaswani2017attention, |
|
title={Attention is all you need}, |
|
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, |
|
journal={Advances in neural information processing systems}, |
|
volume={30}, |
|
year={2017} |
|
} |
|
|
|
@article{kingma2014adam, |
|
title={Adam: A method for stochastic optimization}, |
|
author={Kingma, Diederik P and Ba, Jimmy}, |
|
journal={arXiv preprint arXiv:1412.6980}, |
|
year={2014} |
|
} |
|
|
|
@article{ba2016layer, |
|
title={Layer normalization}, |
|
author={Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey E}, |
|
journal={arXiv preprint arXiv:1607.06450}, |
|
year={2016} |
|
} |
|
|
|
@article{loshchilov2017adamw, |
|
title={Decoupled weight decay regularization}, |
|
author={Loshchilov, Ilya and Hutter, Frank}, |
|
journal={arXiv preprint arXiv:1711.05101}, |
|
year={2017} |
|
} |
|
|
|
@article{radford2019language, |
|
title={Language Models are Unsupervised Multitask Learners}, |
|
author={Radford, Alec and Wu, Jeff and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya}, |
|
year={2019} |
|
} |
|
|
|
@article{bahdanau2014neural, |
|
title={Neural machine translation by jointly learning to align and translate}, |
|
author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua}, |
|
journal={arXiv preprint arXiv:1409.0473}, |
|
year={2014} |
|
} |
|
|
|
@article{paszke2019pytorch, |
|
title={Pytorch: An imperative style, high-performance deep learning library}, |
|
author={Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others}, |
|
journal={Advances in neural information processing systems}, |
|
volume={32}, |
|
year={2019} |
|
} |
|
\end{filecontents} |
|
|
|
\title{TITLE HERE} |
|
|
|
\author{GPT-4o \& Claude\\ |
|
Department of Computer Science\\ |
|
University of LLMs\\ |
|
} |
|
|
|
\newcommand{\fix}{\marginpar{FIX}} |
|
\newcommand{\new}{\marginpar{NEW}} |
|
|
|
\begin{document} |
|
|
|
\maketitle |
|
|
|
\begin{abstract} |
|
ABSTRACT HERE |
|
\end{abstract} |
|
|
|
\section{Introduction} |
|
\label{sec:intro} |
|
INTRO HERE |
|
|
|
\section{Related Work} |
|
\label{sec:related} |
|
RELATED WORK HERE |
|
|
|
\section{Background} |
|
\label{sec:background} |
|
BACKGROUND HERE |
|
|
|
\section{Method} |
|
\label{sec:method} |
|
METHOD HERE |
|
|
|
\section{Experimental Setup} |
|
\label{sec:experimental} |
|
EXPERIMENTAL SETUP HERE |
|
|
|
\section{Results} |
|
\label{sec:results} |
|
RESULTS HERE |
|
|
|
|
|
\begin{figure}[h] |
|
\centering |
|
\begin{subfigure}{0.49\textwidth} |
|
\includegraphics[width=\textwidth]{train_acc_x_div_y.png} |
|
\label{fig:first-run} |
|
\end{subfigure} |
|
\hfill |
|
\begin{subfigure}{0.49\textwidth} |
|
\includegraphics[width=\textwidth]{train_loss_x_div_y.png} |
|
\label{fig:second-run} |
|
\end{subfigure} |
|
\caption{PLEASE FILL IN CAPTION HERE} |
|
\label{fig:first_figure} |
|
\end{figure} |
|
|
|
\section{Conclusions and Future Work} |
|
\label{sec:conclusion} |
|
CONCLUSIONS HERE |
|
|
|
This work was generated by \textsc{The AI Scientist} \citep{lu2024aiscientist}. |
|
|
|
\bibliographystyle{iclr2024_conference} |
|
\bibliography{references} |
|
|
|
\end{document} |
|
|