pradachan's picture
Upload folder using huggingface_hub
f71c233 verified
raw
history blame
4.93 kB
%% LaTeX2e file `references.bib'
%% generated by the `filecontents' environment
%% from source `template' on 2024/08/08.
%%
@book{goodfellow2016deep,
title={Deep learning},
author={Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua},
volume={1},
year={2016},
publisher={MIT Press}
}
@article{vaswani2017attention,
title={Attention is all you need},
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
journal={Advances in neural information processing systems},
volume={30},
year={2017}
}
@article{karpathy2023nanogpt,
title = {nanoGPT},
author = {Karpathy, Andrej},
year = {2023},
journal = {URL https://github.com/karpathy/nanoGPT/tree/master},
note = {GitHub repository}
}
@article{kingma2014adam,
title={Adam: A method for stochastic optimization},
author={Kingma, Diederik P and Ba, Jimmy},
journal={arXiv preprint arXiv:1412.6980},
year={2014}
}
@article{ba2016layer,
title={Layer normalization},
author={Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey E},
journal={arXiv preprint arXiv:1607.06450},
year={2016}
}
@article{loshchilov2017adamw,
title={Decoupled weight decay regularization},
author={Loshchilov, Ilya and Hutter, Frank},
journal={arXiv preprint arXiv:1711.05101},
year={2017}
}
@article{radford2019language,
title={Language Models are Unsupervised Multitask Learners},
author={Radford, Alec and Wu, Jeff and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
year={2019}
}
@article{bahdanau2014neural,
title={Neural machine translation by jointly learning to align and translate},
author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
journal={arXiv preprint arXiv:1409.0473},
year={2014}
}
@article{paszke2019pytorch,
title={Pytorch: An imperative style, high-performance deep learning library},
author={Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others},
journal={Advances in neural information processing systems},
volume={32},
year={2019}
}
@misc{gpt4,
title={GPT-4 Technical Report},
author={OpenAI},
year={2024},
eprint={2303.08774},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2303.08774},
}
@Article{Shen2017StyleTF,
author = {T. Shen and Tao Lei and R. Barzilay and T. Jaakkola},
booktitle = {Neural Information Processing Systems},
journal = {ArXiv},
title = {Style Transfer from Non-Parallel Text by Cross-Alignment},
volume = {abs/1705.09655},
year = {2017}
}
@Article{Pfeiffer2020AdapterFusionNT,
author = {Jonas Pfeiffer and Aishwarya Kamath and Andreas Rücklé and Kyunghyun Cho and Iryna Gurevych},
booktitle = {Conference of the European Chapter of the Association for Computational Linguistics},
journal = {ArXiv},
title = {AdapterFusion: Non-Destructive Task Composition for Transfer Learning},
volume = {abs/2005.00247},
year = {2020}
}
@Book{Meng2023SIGGRAPH2C,
author = {Chenlin Meng and Jiaming Song and Shuang Li and Jun-Yan Zhu and Stefano Ermon and Tsung-Yi Lin and Chen-Hsuan Lin and Karsten Kreis},
booktitle = {SIGGRAPH Courses},
journal = {ACM SIGGRAPH 2023 Courses},
title = {SIGGRAPH 2023 Course on Diffusion Models},
year = {2023}
}
@Article{Keskar2019CTRLAC,
author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher},
booktitle = {arXiv.org},
journal = {ArXiv},
title = {CTRL: A Conditional Transformer Language Model for Controllable Generation},
volume = {abs/1909.05858},
year = {2019}
}
@Article{Keskar2019CTRLAC,
author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher},
booktitle = {arXiv.org},
journal = {ArXiv},
title = {CTRL: A Conditional Transformer Language Model for Controllable Generation},
volume = {abs/1909.05858},
year = {2019}
}
@Article{Keskar2019CTRLAC,
author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher},
booktitle = {arXiv.org},
journal = {ArXiv},
title = {CTRL: A Conditional Transformer Language Model for Controllable Generation},
volume = {abs/1909.05858},
year = {2019}
}
@Article{Keskar2019CTRLAC,
author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher},
booktitle = {arXiv.org},
journal = {ArXiv},
title = {CTRL: A Conditional Transformer Language Model for Controllable Generation},
volume = {abs/1909.05858},
year = {2019}
}
@Article{Keskar2019CTRLAC,
author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher},
booktitle = {arXiv.org},
journal = {ArXiv},
title = {CTRL: A Conditional Transformer Language Model for Controllable Generation},
volume = {abs/1909.05858},
year = {2019}
}