{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", "s" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{\n", " \"createdAt\": \"2024-06-19T09:44:35.578Z\",\n", " \"id\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"metadata\": {},\n", " \"name\": \"TEST\",\n", " \"participant\": {},\n", " \"steps\": [\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:44:48.221\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"1ab30fe6-99cb-4327-bd7d-e3128b2c1a62\",\n", " \"input\": {\n", " \"question\": \"What is inductive bias in CNNs?\"\n", " },\n", " \"metadata\": {},\n", " \"name\": \"RAG\",\n", " \"output\": {\n", " \"answer\": \"Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\",\n", " \"chat_history\": [],\n", " \"original_question\": \"What is inductive bias in CNNs?\",\n", " \"question\": \"What is inductive bias in CNNs?\",\n", " \"rephrased_question\": \"What is inductive bias in CNNs?\",\n", " \"source_documents\": [\n", " \"page_content='Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function \\ud835\\udc39[\\u22c5] with parameters \\ud835\\udf19\\\" that takes the node embeddings and adjacency matrix and outputs new node embeddings ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 38, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.4147438882496012}\",\n", " \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\",\n", " \"page_content='Convolutional networks \\u2022 Networks for images \\u2022 Invariance and equivariance \\u2022 1D convolution \\u2022 Convolutional layers \\u2022 Channels \\u2022 Receptive fields \\u2022 Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"\n", " ]\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:44:46.84\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"retrieval\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:44:48.223\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"18af26e1-e738-4304-b484-1fd7bd5a26e6\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"User\",\n", " \"output\": {\n", " \"content\": \"What is inductive bias in CNNs?\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:44:48.223\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"user_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:44:48.225\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"067c2783-38ff-43e4-866a-abf957415359\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"AI_Tutor\",\n", " \"output\": {\n", " \"content\": \"Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:44:48.225\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"assistant_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:44:58.148\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"d2b74775-41fc-4ebc-bc40-d8d7cf67015d\",\n", " \"input\": {\n", " \"question\": \"Do transformers have this?\"\n", " },\n", " \"metadata\": {},\n", " \"name\": \"RAG\",\n", " \"output\": {\n", " \"answer\": \"I'm not sure, I don't have that information.\",\n", " \"chat_history\": [\n", " \"content='What is inductive bias in CNNs?'\",\n", " \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\"\n", " ],\n", " \"original_question\": \"Do transformers have this?\",\n", " \"question\": \"Do transformers have this?\",\n", " \"rephrased_question\": \"Do transformers exhibit a similar inductive bias as CNNs?\",\n", " \"source_documents\": [\n", " \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture \\u2022 Let\\u2019s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\",\n", " \"page_content='Transformers \\u2022 Motivation \\u2022 Dot-product self-attention \\u2022 Applying Self-Attention \\u2022 The Transformer Architecture \\u2022 Three Types of NLP Transformer Models \\u2022 Encoder \\u2022 Decoder \\u2022 Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\",\n", " \"page_content='Transformers \\u2022 Motivation \\u2022 Dot-product self-attention \\u2022 Applying Self-Attention \\u2022 The Transformer Architecture \\u2022 Three Types of NLP Transformer Models \\u2022 Encoder \\u2022 Decoder \\u2022 Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"\n", " ]\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:44:56.486\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"retrieval\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:44:58.15\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"2ec55d74-cce5-42d4-827e-375d597cf91c\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"User\",\n", " \"output\": {\n", " \"content\": \"Do transformers have this?\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:44:58.15\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"user_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:44:58.151\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"703534b4-981a-43a3-8571-773367be2fa5\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"AI_Tutor\",\n", " \"output\": {\n", " \"content\": \"I'm not sure, I don't have that information.\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:44:58.151\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"assistant_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:13.1\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"fd58e85c-dbaa-4d06-8a75-a3839e37856d\",\n", " \"input\": {\n", " \"question\": \"cool, suggest a project with transformers to use as my final project\"\n", " },\n", " \"metadata\": {},\n", " \"name\": \"RAG\",\n", " \"output\": {\n", " \"answer\": \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\",\n", " \"chat_history\": [\n", " \"content='What is inductive bias in CNNs?'\",\n", " \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\",\n", " \"content='Do transformers have this?'\",\n", " \"content=\\\"I'm not sure, I don't have that information.\\\"\"\n", " ],\n", " \"original_question\": \"cool, suggest a project with transformers to use as my final project\",\n", " \"question\": \"cool, suggest a project with transformers to use as my final project\",\n", " \"rephrased_question\": \"What is the concept of inductive bias in transformers, and how does it relate to the prioritization of information from neighboring nodes like in CNNs?\",\n", " \"source_documents\": [\n", " \"page_content='Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function \\ud835\\udc39[\\u22c5] with parameters \\ud835\\udf19\\\" that takes the node embeddings and adjacency matrix and outputs new node embeddings ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 38, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.3613770850497736}\",\n", " \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture \\u2022 Let\\u2019s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\",\n", " \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"\n", " ]\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:10.667\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"retrieval\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:13.102\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"c86489d2-cbf3-475e-867e-d1b20addedf0\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"User\",\n", " \"output\": {\n", " \"content\": \"cool, suggest a project with transformers to use as my final project\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:13.102\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"user_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:13.103\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"75eb694e-4b1b-4161-96ea-9ecec4092f53\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"AI_Tutor\",\n", " \"output\": {\n", " \"content\": \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:13.103\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"assistant_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:26.404\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"4d02c5ad-1bd9-4767-beab-4c724062093e\",\n", " \"input\": {\n", " \"question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\"\n", " },\n", " \"metadata\": {},\n", " \"name\": \"RAG\",\n", " \"output\": {\n", " \"answer\": \"Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\",\n", " \"chat_history\": [\n", " \"content='What is inductive bias in CNNs?'\",\n", " \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\",\n", " \"content='Do transformers have this?'\",\n", " \"content=\\\"I'm not sure, I don't have that information.\\\"\",\n", " \"content='cool, suggest a project with transformers to use as my final project'\",\n", " \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\"\n", " ],\n", " \"original_question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\",\n", " \"question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\",\n", " \"rephrased_question\": \"What is image captioning and how are transformers beneficial for this task?\",\n", " \"source_documents\": [\n", " \"page_content='Image captioning 24 UDL \\u2022 E.g. CNN-RNN, LSTM, Transformers ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf', 'page': 23, 'date': '01/18/2024\\\\n Thursday', 'tldr': \\\"We will introduce the topic of deep learning, a bit about it's history, and what impact it has had. Then we'll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\\\", 'title': '01 - Intro to Deep Learning and Course Logistics', 'lecture_recording': None, 'suggested_readings': 'UDL Chapter 1', 'score': 0.552598095386973}\",\n", " \"page_content='Next Time \\u2022 Tokenization and Learned Embeddings \\u2022 Training and Fine-Tuning Transformers \\u2022 Image Transformers \\u2022 Multimodal Transformers \\u2022 \\u2026 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\",\n", " \"page_content='Encoder-Decoder Model \\u2022 Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"\n", " ]\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:23.305\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"retrieval\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:26.405\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"3cce51a5-4bcc-42f0-b186-063cb713856a\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"User\",\n", " \"output\": {\n", " \"content\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:26.405\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"user_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:26.407\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"36ae0e9d-540f-45e9-9adf-a596d0886304\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"AI_Tutor\",\n", " \"output\": {\n", " \"content\": \"Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:26.407\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"assistant_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:48.729\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"9e061c1e-ad50-400f-983f-813587508223\",\n", " \"input\": {\n", " \"question\": \"How do I learn the math behind transformers, what are the most important bits?\"\n", " },\n", " \"metadata\": {},\n", " \"name\": \"RAG\",\n", " \"output\": {\n", " \"answer\": \"You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\",\n", " \"chat_history\": [\n", " \"content='Do transformers have this?'\",\n", " \"content=\\\"I'm not sure, I don't have that information.\\\"\",\n", " \"content='cool, suggest a project with transformers to use as my final project'\",\n", " \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\",\n", " \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n", " \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\"\n", " ],\n", " \"original_question\": \"How do I learn the math behind transformers, what are the most important bits?\",\n", " \"question\": \"How do I learn the math behind transformers, what are the most important bits?\",\n", " \"rephrased_question\": \"What are the key components I should focus on when learning the mathematics behind transformers?\",\n", " \"source_documents\": [\n", " \"page_content='After the break \\u2022 Image Transformers \\u2022 Multimodal Transformers \\u2022 RAG pattern \\u2022 Training and Fine-Tuning Transformers \\u2022 \\u2026 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\",\n", " \"page_content='UDL Chapter 11 Lecture 02/29/2024 Thursday 11a - Recurrent Neural Networks [slides] [lecture recording] Suggested Readings: UDL Chapter 11 Lecture 03/05/2024 Tuesday 12 - Transformers [slides] [lecture recording] Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer Discussion 03/06/2024 Wednesday Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer) Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\",\n", " \"page_content=\\\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer 14 -- Vision & Multimodal Transformers tl;dr: In this lecture we'll cover vision and multimodal transformers as a survey of three papers. [slides] [lecture recording] Suggested Readings: See slides for references 15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs. [slides] [lecture recording] Suggested Readings: See slides for references\\\" metadata={'source': 'https://dl4ds.github.io/sp2024/lectures/', 'page': 0, 'score': 0.3078678255019791}\"\n", " ]\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:46.877\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"retrieval\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:48.731\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"9cd0e108-9097-4372-97d5-bb2624a03548\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"User\",\n", " \"output\": {\n", " \"content\": \"How do I learn the math behind transformers, what are the most important bits?\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:48.731\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"user_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:45:48.733\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"8c9cf79f-767d-4184-9eeb-baf6e867653e\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"AI_Tutor\",\n", " \"output\": {\n", " \"content\": \"You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:45:48.733\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"assistant_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:46:02.781\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"74ed6d18-6c08-4d65-b47a-4d8efbcdfc20\",\n", " \"input\": {\n", " \"question\": \"What is attention?\"\n", " },\n", " \"metadata\": {},\n", " \"name\": \"RAG\",\n", " \"output\": {\n", " \"answer\": \"Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\",\n", " \"chat_history\": [\n", " \"content='cool, suggest a project with transformers to use as my final project'\",\n", " \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\",\n", " \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n", " \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\",\n", " \"content='How do I learn the math behind transformers, what are the most important bits?'\",\n", " \"content='You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!'\"\n", " ],\n", " \"original_question\": \"What is attention?\",\n", " \"question\": \"What is attention?\",\n", " \"rephrased_question\": \"What is attention?\",\n", " \"source_documents\": [\n", " \"page_content='Transformers, GPTs and Assistants A. Vaswani et al., \\u201cAttention is All you Need,\\u201d presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf', 'page': 37, 'date': '01/18/2024\\\\n Thursday', 'tldr': \\\"We will introduce the topic of deep learning, a bit about it's history, and what impact it has had. Then we'll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\\\", 'title': '01 - Intro to Deep Learning and Course Logistics', 'lecture_recording': None, 'suggested_readings': 'UDL Chapter 1', 'score': 0.40361429640387503}\",\n", " \"page_content='Motivation \\u2022 Need to efficiently process large strings of text \\u2022 Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\",\n", " \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"\n", " ]\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:46:00.606\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"retrieval\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:46:02.783\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"718cdca2-b453-4b3a-9e0b-fbcdd38c54c7\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"User\",\n", " \"output\": {\n", " \"content\": \"What is attention?\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:46:02.783\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"user_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:46:02.785\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"ae559dc2-c069-419e-b0e5-f117861292f8\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"AI_Tutor\",\n", " \"output\": {\n", " \"content\": \"Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:46:02.785\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"assistant_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:46:10.547\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"4ac119fb-86f5-4528-9edc-44cc616de390\",\n", " \"input\": {\n", " \"question\": \"cool thanks, see ya\"\n", " },\n", " \"metadata\": {},\n", " \"name\": \"RAG\",\n", " \"output\": {\n", " \"answer\": \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\",\n", " \"chat_history\": [\n", " \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n", " \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\",\n", " \"content='How do I learn the math behind transformers, what are the most important bits?'\",\n", " \"content='You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!'\",\n", " \"content='What is attention?'\",\n", " \"content='Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!'\"\n", " ],\n", " \"original_question\": \"cool thanks, see ya\",\n", " \"question\": \"cool thanks, see ya\",\n", " \"rephrased_question\": \"No rephrasing needed.\",\n", " \"source_documents\": [\n", " \"page_content='Motivation \\u2022 Need to efficiently process large strings of text \\u2022 Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\",\n", " \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\",\n", " \"page_content='Example 1: univariate regression AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI= Pr(y|f[x, \\u03c6], \\u03c32) = 1 p 2\\u21e1\\u03c32 exp \\\\uf8ff \\u2212(y \\u2212f[x, \\u03c6])2 2\\u03c32 \\\" AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg= Pr(y|\\u00b5, \\u03c32) = 1 p 2\\u21e1\\u03c32 exp \\\\uf8ff \\u2212(y \\u2212\\u00b5)2 2\\u03c32 \\\" In this case, just the mean Just learn the mean, \\ud835\\udf07, and assume the variance is fixed,. 63 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 61, 'date': '02/01/2024\\\\n Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.038714982109774976}\"\n", " ]\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:46:08.896\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"retrieval\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:46:10.549\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"4425cdbd-0da9-4d90-80e2-d909d4390189\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"User\",\n", " \"output\": {\n", " \"content\": \"cool thanks, see ya\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:46:10.549\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"user_message\"\n", " },\n", " {\n", " \"attachments\": [],\n", " \"endTime\": \"2024-06-19T09:46:10.55\",\n", " \"error\": null,\n", " \"generation\": null,\n", " \"id\": \"ae5047bf-cdbd-4490-a5fb-beb9cf2f27e5\",\n", " \"input\": null,\n", " \"metadata\": {},\n", " \"name\": \"AI_Tutor\",\n", " \"output\": {\n", " \"content\": \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\"\n", " },\n", " \"parentId\": null,\n", " \"scores\": [],\n", " \"startTime\": \"2024-06-19T09:46:10.55\",\n", " \"tags\": null,\n", " \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n", " \"type\": \"assistant_message\"\n", " }\n", " ],\n", " \"tags\": [\n", " \"FAISS\",\n", " \"gpt-3.5-turbo-1106\"\n", " ]\n", "}]\n", "{'step_id': ['1ab30fe6-99cb-4327-bd7d-e3128b2c1a62', 'd2b74775-41fc-4ebc-bc40-d8d7cf67015d', 'fd58e85c-dbaa-4d06-8a75-a3839e37856d', '4d02c5ad-1bd9-4767-beab-4c724062093e', '9e061c1e-ad50-400f-983f-813587508223', '74ed6d18-6c08-4d65-b47a-4d8efbcdfc20', '4ac119fb-86f5-4528-9edc-44cc616de390'], 'user_input': ['What is inductive bias in CNNs?', 'Do transformers have this?', 'cool, suggest a project with transformers to use as my final project', 'Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?', 'How do I learn the math behind transformers, what are the most important bits?', 'What is attention?', 'cool thanks, see ya'], 'output': ['Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.', \"I'm not sure, I don't have that information.\", \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\", 'Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!', 'You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!', 'Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!', \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\"], 'context': [['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function š¹[ā‹…] with parameters šœ™\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.4147438882496012}', \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\", \"page_content='Convolutional networks ā€¢ Networks for images ā€¢ Invariance and equivariance ā€¢ 1D convolution ā€¢ Convolutional layers ā€¢ Channels ā€¢ Receptive fields ā€¢ Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"], [\"page_content='Transformers ā€¢ Multi-headed Self Attention is just one component of the transformer architecture ā€¢ Letā€™s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\", \"page_content='Transformers ā€¢ Motivation ā€¢ Dot-product self-attention ā€¢ Applying Self-Attention ā€¢ The Transformer Architecture ā€¢ Three Types of NLP Transformer Models ā€¢ Encoder ā€¢ Decoder ā€¢ Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\", \"page_content='Transformers ā€¢ Motivation ā€¢ Dot-product self-attention ā€¢ Applying Self-Attention ā€¢ The Transformer Architecture ā€¢ Three Types of NLP Transformer Models ā€¢ Encoder ā€¢ Decoder ā€¢ Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"], ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function š¹[ā‹…] with parameters šœ™\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.3613770850497736}', \"page_content='Transformers ā€¢ Multi-headed Self Attention is just one component of the transformer architecture ā€¢ Letā€™s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\", \"page_content='Transformers ā€¢ Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"], ['page_content=\\'Image captioning 24 UDL ā€¢ E.g. CNN-RNN, LSTM, Transformers \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 23, \\'date\\': \\'01/18/2024\\\\n Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.552598095386973}', \"page_content='Next Time ā€¢ Tokenization and Learned Embeddings ā€¢ Training and Fine-Tuning Transformers ā€¢ Image Transformers ā€¢ Multimodal Transformers ā€¢ ā€¦ 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\", \"page_content='Encoder-Decoder Model ā€¢ Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"], [\"page_content='After the break ā€¢ Image Transformers ā€¢ Multimodal Transformers ā€¢ RAG pattern ā€¢ Training and Fine-Tuning Transformers ā€¢ ā€¦ 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\", \"page_content='UDL Chapter 11 Lecture 02/29/2024 Thursday 11a - Recurrent Neural Networks [slides] [lecture recording] Suggested Readings: UDL Chapter 11 Lecture 03/05/2024 Tuesday 12 - Transformers [slides] [lecture recording] Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer Discussion 03/06/2024 Wednesday Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer) Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\", 'page_content=\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer 14 -- Vision & Multimodal Transformers tl;dr: In this lecture we\\'ll cover vision and multimodal transformers as a survey of three papers. [slides] [lecture recording] Suggested Readings: See slides for references 15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs. [slides] [lecture recording] Suggested Readings: See slides for references\" metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/lectures/\\', \\'page\\': 0, \\'score\\': 0.3078678255019791}'], ['page_content=\\'Transformers, GPTs and Assistants A. Vaswani et al., ā€œAttention is All you Need,ā€ presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 37, \\'date\\': \\'01/18/2024\\\\n Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.40361429640387503}', \"page_content='Motivation ā€¢ Need to efficiently process large strings of text ā€¢ Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\", \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"], [\"page_content='Motivation ā€¢ Need to efficiently process large strings of text ā€¢ Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\", \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\", 'page_content=\\'Example 1: univariate regression AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI= Pr(y|f[x, Ļ†], Ļƒ2) = 1 p 2ā‡”Ļƒ2 exp \\\\uf8ff āˆ’(y āˆ’f[x, Ļ†])2 2Ļƒ2 \" AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg= Pr(y|Āµ, Ļƒ2) = 1 p 2ā‡”Ļƒ2 exp \\\\uf8ff āˆ’(y āˆ’Āµ)2 2Ļƒ2 \" In this case, just the mean Just learn the mean, šœ‡, and assume the variance is fixed,. 63 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf\\', \\'page\\': 61, \\'date\\': \\'02/01/2024\\\\n Thursday\\', \\'tldr\\': \\'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.\\', \\'title\\': \\'05 - Loss Functions\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_k2t4xjkd\\', \\'suggested_readings\\': \\'UDL Chapter 5\\', \\'score\\': -0.038714982109774976}']]}\n", "Step ID: 1ab30fe6-99cb-4327-bd7d-e3128b2c1a62\n", "User Input: What is inductive bias in CNNs?\n", "Output: Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\n", "Context: ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function š¹[ā‹…] with parameters šœ™\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.4147438882496012}', \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\", \"page_content='Convolutional networks ā€¢ Networks for images ā€¢ Invariance and equivariance ā€¢ 1D convolution ā€¢ Convolutional layers ā€¢ Channels ā€¢ Receptive fields ā€¢ Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"]\n", "Step ID: d2b74775-41fc-4ebc-bc40-d8d7cf67015d\n", "User Input: Do transformers have this?\n", "Output: I'm not sure, I don't have that information.\n", "Context: [\"page_content='Transformers ā€¢ Multi-headed Self Attention is just one component of the transformer architecture ā€¢ Letā€™s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\", \"page_content='Transformers ā€¢ Motivation ā€¢ Dot-product self-attention ā€¢ Applying Self-Attention ā€¢ The Transformer Architecture ā€¢ Three Types of NLP Transformer Models ā€¢ Encoder ā€¢ Decoder ā€¢ Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\", \"page_content='Transformers ā€¢ Motivation ā€¢ Dot-product self-attention ā€¢ Applying Self-Attention ā€¢ The Transformer Architecture ā€¢ Three Types of NLP Transformer Models ā€¢ Encoder ā€¢ Decoder ā€¢ Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"]\n", "Step ID: fd58e85c-dbaa-4d06-8a75-a3839e37856d\n", "User Input: cool, suggest a project with transformers to use as my final project\n", "Output: You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\n", "Context: ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function š¹[ā‹…] with parameters šœ™\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.3613770850497736}', \"page_content='Transformers ā€¢ Multi-headed Self Attention is just one component of the transformer architecture ā€¢ Letā€™s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\", \"page_content='Transformers ā€¢ Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"]\n", "Step ID: 4d02c5ad-1bd9-4767-beab-4c724062093e\n", "User Input: Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\n", "Output: Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\n", "Context: ['page_content=\\'Image captioning 24 UDL ā€¢ E.g. CNN-RNN, LSTM, Transformers \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 23, \\'date\\': \\'01/18/2024\\\\n Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.552598095386973}', \"page_content='Next Time ā€¢ Tokenization and Learned Embeddings ā€¢ Training and Fine-Tuning Transformers ā€¢ Image Transformers ā€¢ Multimodal Transformers ā€¢ ā€¦ 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\", \"page_content='Encoder-Decoder Model ā€¢ Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"]\n", "Step ID: 9e061c1e-ad50-400f-983f-813587508223\n", "User Input: How do I learn the math behind transformers, what are the most important bits?\n", "Output: You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\n", "Context: [\"page_content='After the break ā€¢ Image Transformers ā€¢ Multimodal Transformers ā€¢ RAG pattern ā€¢ Training and Fine-Tuning Transformers ā€¢ ā€¦ 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\", \"page_content='UDL Chapter 11 Lecture 02/29/2024 Thursday 11a - Recurrent Neural Networks [slides] [lecture recording] Suggested Readings: UDL Chapter 11 Lecture 03/05/2024 Tuesday 12 - Transformers [slides] [lecture recording] Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer Discussion 03/06/2024 Wednesday Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer) Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\", 'page_content=\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer 14 -- Vision & Multimodal Transformers tl;dr: In this lecture we\\'ll cover vision and multimodal transformers as a survey of three papers. [slides] [lecture recording] Suggested Readings: See slides for references 15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs. [slides] [lecture recording] Suggested Readings: See slides for references\" metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/lectures/\\', \\'page\\': 0, \\'score\\': 0.3078678255019791}']\n", "Step ID: 74ed6d18-6c08-4d65-b47a-4d8efbcdfc20\n", "User Input: What is attention?\n", "Output: Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\n", "Context: ['page_content=\\'Transformers, GPTs and Assistants A. Vaswani et al., ā€œAttention is All you Need,ā€ presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 37, \\'date\\': \\'01/18/2024\\\\n Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.40361429640387503}', \"page_content='Motivation ā€¢ Need to efficiently process large strings of text ā€¢ Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\", \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"]\n", "Step ID: 4ac119fb-86f5-4528-9edc-44cc616de390\n", "User Input: cool thanks, see ya\n", "Output: You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\n", "Context: [\"page_content='Motivation ā€¢ Need to efficiently process large strings of text ā€¢ Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\", \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\", 'page_content=\\'Example 1: univariate regression AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI= Pr(y|f[x, Ļ†], Ļƒ2) = 1 p 2ā‡”Ļƒ2 exp \\\\uf8ff āˆ’(y āˆ’f[x, Ļ†])2 2Ļƒ2 \" AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg= Pr(y|Āµ, Ļƒ2) = 1 p 2ā‡”Ļƒ2 exp \\\\uf8ff āˆ’(y āˆ’Āµ)2 2Ļƒ2 \" In this case, just the mean Just learn the mean, šœ‡, and assume the variance is fixed,. 63 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf\\', \\'page\\': 61, \\'date\\': \\'02/01/2024\\\\n Thursday\\', \\'tldr\\': \\'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.\\', \\'title\\': \\'05 - Loss Functions\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_k2t4xjkd\\', \\'suggested_readings\\': \\'UDL Chapter 5\\', \\'score\\': -0.038714982109774976}']\n" ] } ], "source": [ "import os\n", "from dotenv import load_dotenv\n", "from literalai import LiteralClient\n", "\n", "virtual_app = dict(\n", " llm=dict(modelname=\"AI Tutor - GPT3 - FAISS\"),\n", " template=\"information about the template I used in my app\",\n", " debug=\"all of these fields are completely optional\",\n", ")\n", "from trulens_eval import Select\n", "from trulens_eval.tru_virtual import VirtualApp\n", "\n", "virtual_app = VirtualApp(virtual_app) # can start with the prior dictionary\n", "virtual_app[Select.RecordCalls.llm.maxtokens] = 1024\n", "\n", "from trulens_eval import Select\n", "\n", "retriever = Select.RecordCalls.retriever\n", "synthesizer = Select.RecordCalls.synthesizer\n", "\n", "virtual_app[retriever] = \"retriever\"\n", "virtual_app[synthesizer] = \"synthesizer\"\n", "\n", "\n", "load_dotenv()\n", "\n", "literal_client = LiteralClient()\n", "\n", "threads = literal_client.api.get_threads(first=1).data\n", "print(threads)\n", "\n", "rag_steps = []\n", "for thread in threads:\n", " rag_steps.extend([step for step in thread.steps if step.name == \"RAG\"])\n", "\n", "save_log_dict = {\"step_id\": [], \"user_input\": [], \"output\": [], \"context\": []}\n", "for step in rag_steps:\n", " save_log_dict[\"step_id\"].append(step.id)\n", " save_log_dict[\"user_input\"].append(step.input[\"question\"])\n", " save_log_dict[\"output\"].append(step.output[\"answer\"])\n", " save_log_dict[\"context\"].append(step.output[\"source_documents\"])\n", "\n", "print(save_log_dict)\n", "\n", "records = []\n", "for i in range(len(save_log_dict[\"step_id\"])):\n", " step_id = save_log_dict[\"step_id\"][i]\n", " user_input = save_log_dict[\"user_input\"][i]\n", " output = save_log_dict[\"output\"][i]\n", " context = save_log_dict[\"context\"][i]\n", "\n", " print(f\"Step ID: {step_id}\")\n", " print(f\"User Input: {user_input}\")\n", " print(f\"Output: {output}\")\n", " print(f\"Context: {context}\")\n", "\n", " from trulens_eval.tru_virtual import VirtualRecord\n", "\n", " # The selector for a presumed context retrieval component's call to\n", " # `get_context`. The names are arbitrary but may be useful for readability on\n", " # your end.\n", " context_call = retriever.get_context\n", " generation = synthesizer.generate\n", "\n", " rec = VirtualRecord(\n", " main_input=f\"{user_input}\",\n", " main_output=f\"{output}\",\n", " calls={\n", " context_call: dict(args=[f\"{user_input}\"], rets=[f\"{output}\"]),\n", " generation: dict(\n", " args=[f\"{context}\"],\n", " rets=[f\"{output}\"],\n", " ),\n", " },\n", " )\n", "\n", " records.append(rec)\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "āœ… In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .\n", "āœ… In context_relevance_with_cot_reasons, input context will be set to __record__.app.retriever.get_context.rets[:] .\n", "āœ… In Groundedness, input source will be set to __record__.app.retriever.get_context.rets[:].collect() .\n", "āœ… In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .\n", "āœ… In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .\n", "āœ… In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .\n" ] } ], "source": [ "from trulens_eval.feedback.provider import OpenAI\n", "from trulens_eval.feedback.feedback import Feedback\n", "\n", "# Initialize provider class\n", "provider = OpenAI()\n", "\n", "# Select context to be used in feedback. We select the return values of the\n", "# virtual `get_context` call in the virtual `retriever` component. Names are\n", "# arbitrary except for `rets`.\n", "context = context_call.rets[:]\n", "\n", "# Question/statement relevance between question and each context chunk.\n", "f_context_relevance = (\n", " Feedback(provider.context_relevance_with_cot_reasons)\n", " .on_input()\n", " .on(context)\n", ")\n", "\n", "# Define a groundedness feedback function\n", "f_groundedness = (\n", " Feedback(provider.groundedness_measure_with_cot_reasons, name = \"Groundedness\")\n", " .on(context.collect())\n", " .on_output()\n", ")\n", "\n", "# Question/answer relevance between overall question and answer.\n", "f_qa_relevance = (\n", " Feedback(provider.relevance_with_cot_reasons, name = \"Answer Relevance\")\n", " .on_input_output()\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "šŸ¦‘ Tru initialized with db url sqlite:///default.sqlite .\n", "šŸ›‘ Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.\n" ] } ], "source": [ "from trulens_eval.tru_virtual import TruVirtual\n", "\n", "virtual_recorder = TruVirtual(\n", " app_id=\"AI Tutor - GPT3 - FAISS\",\n", " app=virtual_app,\n", " feedbacks=[f_context_relevance, f_groundedness, f_qa_relevance],\n", " feedback_mode = \"deferred\" # optional\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "for record in records:\n", " virtual_recorder.add_record(record)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Force stopping dashboard ...\n", "Starting dashboard ...\n", "Config file already exists. Skipping writing process.\n", "Credentials file already exists. Skipping writing process.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c3eb563d17e84983ad0d8eca36904ddb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valuā€¦" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Dashboard started at http://10.0.0.226:8501 .\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from trulens_eval import Tru\n", "tru = Tru()\n", "\n", "tru.run_dashboard(force=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Will keep max of 32 feedback(s) running.\n", "Tasks are spread among max of 128 thread(s).\n", "Will rerun running feedbacks after a minute.\n", "Will rerun failed feedbacks after 5 minutes.\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "df51361ac4664845b804e574497e1b58", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Feedback Status: 80%|######## | 84/105 [00:00