File size: 115,012 Bytes
7f989d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{\n",
      "    \"createdAt\": \"2024-06-19T09:44:35.578Z\",\n",
      "    \"id\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "    \"metadata\": {},\n",
      "    \"name\": \"TEST\",\n",
      "    \"participant\": {},\n",
      "    \"steps\": [\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:44:48.221\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"1ab30fe6-99cb-4327-bd7d-e3128b2c1a62\",\n",
      "            \"input\": {\n",
      "                \"question\": \"What is inductive bias in CNNs?\"\n",
      "            },\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"RAG\",\n",
      "            \"output\": {\n",
      "                \"answer\": \"Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\",\n",
      "                \"chat_history\": [],\n",
      "                \"original_question\": \"What is inductive bias in CNNs?\",\n",
      "                \"question\": \"What is inductive bias in CNNs?\",\n",
      "                \"rephrased_question\": \"What is inductive bias in CNNs?\",\n",
      "                \"source_documents\": [\n",
      "                    \"page_content='Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function \\ud835\\udc39[\\u22c5] with parameters \\ud835\\udf19\\\" that takes the node embeddings and adjacency matrix and outputs new node embeddings ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 38, 'date': '04/16/2024\\\\n     Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.4147438882496012}\",\n",
      "                    \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n     Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\",\n",
      "                    \"page_content='Convolutional networks \\u2022 Networks for images \\u2022 Invariance and equivariance \\u2022 1D convolution \\u2022 Convolutional layers \\u2022 Channels \\u2022 Receptive fields \\u2022 Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n     Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"\n",
      "                ]\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:44:46.84\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"retrieval\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:44:48.223\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"18af26e1-e738-4304-b484-1fd7bd5a26e6\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"User\",\n",
      "            \"output\": {\n",
      "                \"content\": \"What is inductive bias in CNNs?\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:44:48.223\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"user_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:44:48.225\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"067c2783-38ff-43e4-866a-abf957415359\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"AI_Tutor\",\n",
      "            \"output\": {\n",
      "                \"content\": \"Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:44:48.225\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"assistant_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:44:58.148\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"d2b74775-41fc-4ebc-bc40-d8d7cf67015d\",\n",
      "            \"input\": {\n",
      "                \"question\": \"Do transformers have this?\"\n",
      "            },\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"RAG\",\n",
      "            \"output\": {\n",
      "                \"answer\": \"I'm not sure, I don't have that information.\",\n",
      "                \"chat_history\": [\n",
      "                    \"content='What is inductive bias in CNNs?'\",\n",
      "                    \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\"\n",
      "                ],\n",
      "                \"original_question\": \"Do transformers have this?\",\n",
      "                \"question\": \"Do transformers have this?\",\n",
      "                \"rephrased_question\": \"Do transformers exhibit a similar inductive bias as CNNs?\",\n",
      "                \"source_documents\": [\n",
      "                    \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture \\u2022 Let\\u2019s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\",\n",
      "                    \"page_content='Transformers \\u2022 Motivation \\u2022 Dot-product self-attention \\u2022 Applying Self-Attention \\u2022 The Transformer Architecture \\u2022 Three Types of NLP Transformer Models \\u2022 Encoder \\u2022 Decoder \\u2022 Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\",\n",
      "                    \"page_content='Transformers \\u2022 Motivation \\u2022 Dot-product self-attention \\u2022 Applying Self-Attention \\u2022 The Transformer Architecture \\u2022 Three Types of NLP Transformer Models \\u2022 Encoder \\u2022 Decoder \\u2022 Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"\n",
      "                ]\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:44:56.486\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"retrieval\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:44:58.15\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"2ec55d74-cce5-42d4-827e-375d597cf91c\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"User\",\n",
      "            \"output\": {\n",
      "                \"content\": \"Do transformers have this?\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:44:58.15\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"user_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:44:58.151\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"703534b4-981a-43a3-8571-773367be2fa5\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"AI_Tutor\",\n",
      "            \"output\": {\n",
      "                \"content\": \"I'm not sure, I don't have that information.\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:44:58.151\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"assistant_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:13.1\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"fd58e85c-dbaa-4d06-8a75-a3839e37856d\",\n",
      "            \"input\": {\n",
      "                \"question\": \"cool, suggest a project with transformers to use as my final project\"\n",
      "            },\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"RAG\",\n",
      "            \"output\": {\n",
      "                \"answer\": \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\",\n",
      "                \"chat_history\": [\n",
      "                    \"content='What is inductive bias in CNNs?'\",\n",
      "                    \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\",\n",
      "                    \"content='Do transformers have this?'\",\n",
      "                    \"content=\\\"I'm not sure, I don't have that information.\\\"\"\n",
      "                ],\n",
      "                \"original_question\": \"cool, suggest a project with transformers to use as my final project\",\n",
      "                \"question\": \"cool, suggest a project with transformers to use as my final project\",\n",
      "                \"rephrased_question\": \"What is the concept of inductive bias in transformers, and how does it relate to the prioritization of information from neighboring nodes like in CNNs?\",\n",
      "                \"source_documents\": [\n",
      "                    \"page_content='Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function \\ud835\\udc39[\\u22c5] with parameters \\ud835\\udf19\\\" that takes the node embeddings and adjacency matrix and outputs new node embeddings ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 38, 'date': '04/16/2024\\\\n     Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.3613770850497736}\",\n",
      "                    \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture \\u2022 Let\\u2019s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\",\n",
      "                    \"page_content='Transformers \\u2022 Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"\n",
      "                ]\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:10.667\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"retrieval\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:13.102\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"c86489d2-cbf3-475e-867e-d1b20addedf0\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"User\",\n",
      "            \"output\": {\n",
      "                \"content\": \"cool, suggest a project with transformers to use as my final project\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:13.102\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"user_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:13.103\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"75eb694e-4b1b-4161-96ea-9ecec4092f53\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"AI_Tutor\",\n",
      "            \"output\": {\n",
      "                \"content\": \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:13.103\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"assistant_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:26.404\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"4d02c5ad-1bd9-4767-beab-4c724062093e\",\n",
      "            \"input\": {\n",
      "                \"question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\"\n",
      "            },\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"RAG\",\n",
      "            \"output\": {\n",
      "                \"answer\": \"Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\",\n",
      "                \"chat_history\": [\n",
      "                    \"content='What is inductive bias in CNNs?'\",\n",
      "                    \"content='Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.'\",\n",
      "                    \"content='Do transformers have this?'\",\n",
      "                    \"content=\\\"I'm not sure, I don't have that information.\\\"\",\n",
      "                    \"content='cool, suggest a project with transformers to use as my final project'\",\n",
      "                    \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\"\n",
      "                ],\n",
      "                \"original_question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\",\n",
      "                \"question\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\",\n",
      "                \"rephrased_question\": \"What is image captioning and how are transformers beneficial for this task?\",\n",
      "                \"source_documents\": [\n",
      "                    \"page_content='Image captioning 24 UDL \\u2022 E.g. CNN-RNN, LSTM, Transformers ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf', 'page': 23, 'date': '01/18/2024\\\\n     Thursday', 'tldr': \\\"We will introduce the topic of deep learning, a bit about it's history, and what impact it has had. Then we'll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\\\", 'title': '01 - Intro to Deep Learning and Course Logistics', 'lecture_recording': None, 'suggested_readings': 'UDL Chapter 1', 'score': 0.552598095386973}\",\n",
      "                    \"page_content='Next Time \\u2022 Tokenization and Learned Embeddings \\u2022 Training and Fine-Tuning Transformers \\u2022 Image Transformers \\u2022 Multimodal Transformers \\u2022 \\u2026 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\",\n",
      "                    \"page_content='Encoder-Decoder Model \\u2022 Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"\n",
      "                ]\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:23.305\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"retrieval\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:26.405\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"3cce51a5-4bcc-42f0-b186-063cb713856a\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"User\",\n",
      "            \"output\": {\n",
      "                \"content\": \"Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:26.405\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"user_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:26.407\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"36ae0e9d-540f-45e9-9adf-a596d0886304\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"AI_Tutor\",\n",
      "            \"output\": {\n",
      "                \"content\": \"Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:26.407\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"assistant_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:48.729\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"9e061c1e-ad50-400f-983f-813587508223\",\n",
      "            \"input\": {\n",
      "                \"question\": \"How do I learn the math behind transformers, what are the most important bits?\"\n",
      "            },\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"RAG\",\n",
      "            \"output\": {\n",
      "                \"answer\": \"You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\",\n",
      "                \"chat_history\": [\n",
      "                    \"content='Do transformers have this?'\",\n",
      "                    \"content=\\\"I'm not sure, I don't have that information.\\\"\",\n",
      "                    \"content='cool, suggest a project with transformers to use as my final project'\",\n",
      "                    \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\",\n",
      "                    \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n",
      "                    \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\"\n",
      "                ],\n",
      "                \"original_question\": \"How do I learn the math behind transformers, what are the most important bits?\",\n",
      "                \"question\": \"How do I learn the math behind transformers, what are the most important bits?\",\n",
      "                \"rephrased_question\": \"What are the key components I should focus on when learning the mathematics behind transformers?\",\n",
      "                \"source_documents\": [\n",
      "                    \"page_content='After the break \\u2022 Image Transformers \\u2022 Multimodal Transformers \\u2022 RAG pattern \\u2022 Training and Fine-Tuning Transformers \\u2022 \\u2026 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\",\n",
      "                    \"page_content='UDL Chapter 11  Lecture 02/29/2024  Thursday  11a - Recurrent Neural Networks     [slides]  [lecture recording]    Suggested Readings: UDL Chapter 11  Lecture 03/05/2024  Tuesday  12 - Transformers     [slides]  [lecture recording]    Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer  Discussion 03/06/2024  Wednesday  Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer)  Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\",\n",
      "                    \"page_content=\\\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer   14 -- Vision & Multimodal Transformers tl;dr: In this lecture we'll cover vision and multimodal transformers as a survey of three papers.      [slides]  [lecture recording]     Suggested Readings: See slides for references   15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs.      [slides]  [lecture recording]     Suggested Readings: See slides for references\\\" metadata={'source': 'https://dl4ds.github.io/sp2024/lectures/', 'page': 0, 'score': 0.3078678255019791}\"\n",
      "                ]\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:46.877\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"retrieval\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:48.731\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"9cd0e108-9097-4372-97d5-bb2624a03548\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"User\",\n",
      "            \"output\": {\n",
      "                \"content\": \"How do I learn the math behind transformers, what are the most important bits?\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:48.731\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"user_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:45:48.733\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"8c9cf79f-767d-4184-9eeb-baf6e867653e\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"AI_Tutor\",\n",
      "            \"output\": {\n",
      "                \"content\": \"You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:45:48.733\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"assistant_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:46:02.781\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"74ed6d18-6c08-4d65-b47a-4d8efbcdfc20\",\n",
      "            \"input\": {\n",
      "                \"question\": \"What is attention?\"\n",
      "            },\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"RAG\",\n",
      "            \"output\": {\n",
      "                \"answer\": \"Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\",\n",
      "                \"chat_history\": [\n",
      "                    \"content='cool, suggest a project with transformers to use as my final project'\",\n",
      "                    \"content=\\\"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\\\"\",\n",
      "                    \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n",
      "                    \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\",\n",
      "                    \"content='How do I learn the math behind transformers, what are the most important bits?'\",\n",
      "                    \"content='You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!'\"\n",
      "                ],\n",
      "                \"original_question\": \"What is attention?\",\n",
      "                \"question\": \"What is attention?\",\n",
      "                \"rephrased_question\": \"What is attention?\",\n",
      "                \"source_documents\": [\n",
      "                    \"page_content='Transformers, GPTs and Assistants A. Vaswani et al., \\u201cAttention is All you Need,\\u201d presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf', 'page': 37, 'date': '01/18/2024\\\\n     Thursday', 'tldr': \\\"We will introduce the topic of deep learning, a bit about it's history, and what impact it has had. Then we'll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\\\", 'title': '01 - Intro to Deep Learning and Course Logistics', 'lecture_recording': None, 'suggested_readings': 'UDL Chapter 1', 'score': 0.40361429640387503}\",\n",
      "                    \"page_content='Motivation \\u2022 Need to efficiently process large strings of text \\u2022 Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\",\n",
      "                    \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n     Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"\n",
      "                ]\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:46:00.606\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"retrieval\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:46:02.783\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"718cdca2-b453-4b3a-9e0b-fbcdd38c54c7\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"User\",\n",
      "            \"output\": {\n",
      "                \"content\": \"What is attention?\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:46:02.783\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"user_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:46:02.785\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"ae559dc2-c069-419e-b0e5-f117861292f8\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"AI_Tutor\",\n",
      "            \"output\": {\n",
      "                \"content\": \"Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:46:02.785\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"assistant_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:46:10.547\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"4ac119fb-86f5-4528-9edc-44cc616de390\",\n",
      "            \"input\": {\n",
      "                \"question\": \"cool thanks, see ya\"\n",
      "            },\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"RAG\",\n",
      "            \"output\": {\n",
      "                \"answer\": \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\",\n",
      "                \"chat_history\": [\n",
      "                    \"content='Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?'\",\n",
      "                    \"content='Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!'\",\n",
      "                    \"content='How do I learn the math behind transformers, what are the most important bits?'\",\n",
      "                    \"content='You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!'\",\n",
      "                    \"content='What is attention?'\",\n",
      "                    \"content='Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!'\"\n",
      "                ],\n",
      "                \"original_question\": \"cool thanks, see ya\",\n",
      "                \"question\": \"cool thanks, see ya\",\n",
      "                \"rephrased_question\": \"No rephrasing needed.\",\n",
      "                \"source_documents\": [\n",
      "                    \"page_content='Motivation \\u2022 Need to efficiently process large strings of text \\u2022 Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\",\n",
      "                    \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n     Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\",\n",
      "                    \"page_content='Example 1: univariate regression <latexit sha1_ba se64=\\\"VrHufIDtMmytgQ3rO/QqNtGUe l4=\\\">AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI=</la texit> Pr(y|f[x, \\u03c6], \\u03c32) = 1 p 2\\u21e1\\u03c32 exp \\\\uf8ff \\u2212(y \\u2212f[x, \\u03c6])2 2\\u03c32 \\\" <latexit sha1_ba se64=\\\"sL5nmFJHMlFiWTdFxaDUKnbUr Jg=\\\">AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg=</latexit> Pr(y|\\u00b5, \\u03c32) = 1 p 2\\u21e1\\u03c32 exp \\\\uf8ff \\u2212(y \\u2212\\u00b5)2 2\\u03c32 \\\" In this case, just the mean Just learn the mean, \\ud835\\udf07, and assume the variance is fixed,. 63 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 61, 'date': '02/01/2024\\\\n     Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.038714982109774976}\"\n",
      "                ]\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:46:08.896\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"retrieval\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:46:10.549\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"4425cdbd-0da9-4d90-80e2-d909d4390189\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"User\",\n",
      "            \"output\": {\n",
      "                \"content\": \"cool thanks, see ya\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:46:10.549\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"user_message\"\n",
      "        },\n",
      "        {\n",
      "            \"attachments\": [],\n",
      "            \"endTime\": \"2024-06-19T09:46:10.55\",\n",
      "            \"error\": null,\n",
      "            \"generation\": null,\n",
      "            \"id\": \"ae5047bf-cdbd-4490-a5fb-beb9cf2f27e5\",\n",
      "            \"input\": null,\n",
      "            \"metadata\": {},\n",
      "            \"name\": \"AI_Tutor\",\n",
      "            \"output\": {\n",
      "                \"content\": \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\"\n",
      "            },\n",
      "            \"parentId\": null,\n",
      "            \"scores\": [],\n",
      "            \"startTime\": \"2024-06-19T09:46:10.55\",\n",
      "            \"tags\": null,\n",
      "            \"threadId\": \"7a2c91be-4f47-4f60-8ded-537dd31386df\",\n",
      "            \"type\": \"assistant_message\"\n",
      "        }\n",
      "    ],\n",
      "    \"tags\": [\n",
      "        \"FAISS\",\n",
      "        \"gpt-3.5-turbo-1106\"\n",
      "    ]\n",
      "}]\n",
      "{'step_id': ['1ab30fe6-99cb-4327-bd7d-e3128b2c1a62', 'd2b74775-41fc-4ebc-bc40-d8d7cf67015d', 'fd58e85c-dbaa-4d06-8a75-a3839e37856d', '4d02c5ad-1bd9-4767-beab-4c724062093e', '9e061c1e-ad50-400f-983f-813587508223', '74ed6d18-6c08-4d65-b47a-4d8efbcdfc20', '4ac119fb-86f5-4528-9edc-44cc616de390'], 'user_input': ['What is inductive bias in CNNs?', 'Do transformers have this?', 'cool, suggest a project with transformers to use as my final project', 'Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?', 'How do I learn the math behind transformers, what are the most important bits?', 'What is attention?', 'cool thanks, see ya'], 'output': ['Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.', \"I'm not sure, I don't have that information.\", \"You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\", 'Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!', 'You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!', 'Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!', \"You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\"], 'context': [['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function 𝐹[⋅] with parameters 𝜙\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n     Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.4147438882496012}', \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n     Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\", \"page_content='Convolutional networks • Networks for images • Invariance and equivariance • 1D convolution • Convolutional layers • Channels • Receptive fields • Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n     Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"], [\"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture • Let’s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\", \"page_content='Transformers • Motivation • Dot-product self-attention • Applying Self-Attention • The Transformer Architecture • Three Types of NLP Transformer Models • Encoder • Decoder • Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\", \"page_content='Transformers • Motivation • Dot-product self-attention • Applying Self-Attention • The Transformer Architecture • Three Types of NLP Transformer Models • Encoder • Decoder • Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"], ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function 𝐹[⋅] with parameters 𝜙\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n     Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.3613770850497736}', \"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture • Let’s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\", \"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"], ['page_content=\\'Image captioning 24 UDL • E.g. CNN-RNN, LSTM, Transformers \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 23, \\'date\\': \\'01/18/2024\\\\n     Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.552598095386973}', \"page_content='Next Time • Tokenization and Learned Embeddings • Training and Fine-Tuning Transformers • Image Transformers • Multimodal Transformers • … 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\", \"page_content='Encoder-Decoder Model • Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"], [\"page_content='After the break • Image Transformers • Multimodal Transformers • RAG pattern • Training and Fine-Tuning Transformers • … 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\", \"page_content='UDL Chapter 11  Lecture 02/29/2024  Thursday  11a - Recurrent Neural Networks     [slides]  [lecture recording]    Suggested Readings: UDL Chapter 11  Lecture 03/05/2024  Tuesday  12 - Transformers     [slides]  [lecture recording]    Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer  Discussion 03/06/2024  Wednesday  Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer)  Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\", 'page_content=\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer   14 -- Vision & Multimodal Transformers tl;dr: In this lecture we\\'ll cover vision and multimodal transformers as a survey of three papers.      [slides]  [lecture recording]     Suggested Readings: See slides for references   15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs.      [slides]  [lecture recording]     Suggested Readings: See slides for references\" metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/lectures/\\', \\'page\\': 0, \\'score\\': 0.3078678255019791}'], ['page_content=\\'Transformers, GPTs and Assistants A. Vaswani et al., “Attention is All you Need,” presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 37, \\'date\\': \\'01/18/2024\\\\n     Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.40361429640387503}', \"page_content='Motivation • Need to efficiently process large strings of text • Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\", \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n     Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"], [\"page_content='Motivation • Need to efficiently process large strings of text • Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\", \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n     Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\", 'page_content=\\'Example 1: univariate regression <latexit sha1_ba se64=\"VrHufIDtMmytgQ3rO/QqNtGUe l4=\">AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI=</la texit> Pr(y|f[x, φ], σ2) = 1 p 2⇡σ2 exp \\\\uf8ff −(y −f[x, φ])2 2σ2 \" <latexit sha1_ba se64=\"sL5nmFJHMlFiWTdFxaDUKnbUr Jg=\">AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg=</latexit> Pr(y|µ, σ2) = 1 p 2⇡σ2 exp \\\\uf8ff −(y −µ)2 2σ2 \" In this case, just the mean Just learn the mean, 𝜇, and assume the variance is fixed,. 63 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf\\', \\'page\\': 61, \\'date\\': \\'02/01/2024\\\\n     Thursday\\', \\'tldr\\': \\'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.\\', \\'title\\': \\'05 - Loss Functions\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_k2t4xjkd\\', \\'suggested_readings\\': \\'UDL Chapter 5\\', \\'score\\': -0.038714982109774976}']]}\n",
      "Step ID: 1ab30fe6-99cb-4327-bd7d-e3128b2c1a62\n",
      "User Input: What is inductive bias in CNNs?\n",
      "Output: Inductive bias in CNNs refers to the inherent preference for prioritizing information from neighboring nodes when updating each node in the network. This bias helps the network effectively aggregate information from nearby nodes, leading to more efficient and accurate node embeddings. You can find more information about this topic in the lecture slides on Graph Neural Networks and Convolutional Networks.\n",
      "Context: ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function 𝐹[⋅] with parameters 𝜙\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n     Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.4147438882496012}', \"page_content='https://poloclub.github.io/cnn-explainer/ 92 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 91, 'date': '02/27/2024\\\\n     Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.30807206912986296}\", \"page_content='Convolutional networks • Networks for images • Invariance and equivariance • 1D convolution • Convolutional layers • Channels • Receptive fields • Convolutional network for MNIST 1D 44 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/10_convolutional_networks.pdf', 'page': 43, 'date': '02/27/2024\\\\n     Tuesday', 'tldr': 'We cover 1D and 2D convolutional neural networks along with subsampling and upsampling operations.', 'title': '10 - Convolutional Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_8ejvyib4', 'suggested_readings': 'UDL Chapter 10', 'score': 0.26420832659802906}\"]\n",
      "Step ID: d2b74775-41fc-4ebc-bc40-d8d7cf67015d\n",
      "User Input: Do transformers have this?\n",
      "Output: I'm not sure, I don't have that information.\n",
      "Context: [\"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture • Let’s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2451855994037282}\", \"page_content='Transformers • Motivation • Dot-product self-attention • Applying Self-Attention • The Transformer Architecture • Three Types of NLP Transformer Models • Encoder • Decoder • Encoder-Decoder 55 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 54, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.24314965373956088}\", \"page_content='Transformers • Motivation • Dot-product self-attention • Applying Self-Attention • The Transformer Architecture • Three Types of NLP Transformer Models • Encoder • Decoder • Encoder-Decoder 10 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 9, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2367633946657962}\"]\n",
      "Step ID: fd58e85c-dbaa-4d06-8a75-a3839e37856d\n",
      "User Input: cool, suggest a project with transformers to use as my final project\n",
      "Output: You could consider working on a project that explores the use of transformers in natural language processing tasks, such as sentiment analysis, language translation, or text generation. This could be a great opportunity to apply what you've learned about transformers in a real-world context. Good luck with your project!\n",
      "Context: ['page_content=\\'Graph convolutional network These models are convolutional in that they update each node by aggregating information from nearby nodes. As such, they induce a relational inductive bias (i.e., a bias toward prioritizing information from neighbors). 39 A function 𝐹[⋅] with parameters 𝜙\" that takes the node embeddings and adjacency matrix and outputs new node embeddings \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf\\', \\'page\\': 38, \\'date\\': \\'04/16/2024\\\\n     Tuesday\\', \\'tldr\\': \\'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.\\', \\'title\\': \\'20 -- Graph Neural Networks\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_bt7qtymk\\', \\'suggested_readings\\': \\'UDL Chapter 13\\', \\'score\\': 0.3613770850497736}', \"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture • Let’s look at a transformer block (or layer) from the encoder 49 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 48, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2733153333305661}\", \"page_content='Transformers • Multi-headed Self Attention is just one component of the transformer architecture 48 Encoder Decoder ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 47, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.22982265165302218}\"]\n",
      "Step ID: 4d02c5ad-1bd9-4767-beab-4c724062093e\n",
      "User Input: Could you give me a quick rundown on what image captioning is? and how transformers are useful for this?\n",
      "Output: Image captioning is the process of generating textual descriptions for images. Transformers can be useful for this task because they are effective at processing sequential data and can learn to effectively capture the relationships between the visual and textual information in images and their corresponding captions. You can find more information about this in the lecture slides on Image Transformers and Multimodal Transformers. Good luck with your project!\n",
      "Context: ['page_content=\\'Image captioning 24 UDL • E.g. CNN-RNN, LSTM, Transformers \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 23, \\'date\\': \\'01/18/2024\\\\n     Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.552598095386973}', \"page_content='Next Time • Tokenization and Learned Embeddings • Training and Fine-Tuning Transformers • Image Transformers • Multimodal Transformers • … 76 Link Feedback ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 75, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.34481147697680925}\", \"page_content='Encoder-Decoder Model • Used for machine translation, which is a sequence-to-sequence task 22 https://jalammar.github.io/illustrated-transformer/ ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 21, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.33002046196039614}\"]\n",
      "Step ID: 9e061c1e-ad50-400f-983f-813587508223\n",
      "User Input: How do I learn the math behind transformers, what are the most important bits?\n",
      "Output: You can start by diving into the lecture slides and suggested readings on transformers, particularly Chapter 12 and the Illustrated Transformer. These resources should cover the important mathematical concepts behind transformers and help you understand the key components. Good luck with your studies!\n",
      "Context: [\"page_content='After the break • Image Transformers • Multimodal Transformers • RAG pattern • Training and Fine-Tuning Transformers • … 68 Feedback ChatGPT ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/13_transformers_part2_v2.pdf', 'page': 67, 'date': '03/07/2024\\\\n     Thursday', 'tldr': 'In this lecture we continue to review the transformer architecture. We continue the discussion of decoders and encoder-decoder architectures, then discuss scaling to large contexts and then tokenization and embedding.', 'title': '13 - Transformers Part 2', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_m9hhz175', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.3366359998900601}\", \"page_content='UDL Chapter 11  Lecture 02/29/2024  Thursday  11a - Recurrent Neural Networks     [slides]  [lecture recording]    Suggested Readings: UDL Chapter 11  Lecture 03/05/2024  Tuesday  12 - Transformers     [slides]  [lecture recording]    Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer  Discussion 03/06/2024  Wednesday  Discussion_05 - Deep Dive 2: Deep Learning Modules in Pytorch (CNN, RNN/LSTM, Transformer)  Github Link: disc5 Lecture' metadata={'source': 'https://dl4ds.github.io/sp2024/schedule/', 'page': 0, 'score': 0.3100015518546917}\", 'page_content=\"Suggested Readings: UDL Chapter 12 Optional The Illustrated Transformer   14 -- Vision & Multimodal Transformers tl;dr: In this lecture we\\'ll cover vision and multimodal transformers as a survey of three papers.      [slides]  [lecture recording]     Suggested Readings: See slides for references   15 -- Improving LLM Perf tl;dr: In this lecture we talk about ways to improve LLM performance short of retraining or finetuning. We cover more sophisticated prompt strategies, retrieval augmentation and cognitive architectures building systems and agents based on LLMs.      [slides]  [lecture recording]     Suggested Readings: See slides for references\" metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/lectures/\\', \\'page\\': 0, \\'score\\': 0.3078678255019791}']\n",
      "Step ID: 74ed6d18-6c08-4d65-b47a-4d8efbcdfc20\n",
      "User Input: What is attention?\n",
      "Output: Attention is a mechanism used in transformers to focus on different parts of the input when processing information. It allows the model to weigh the importance of different elements in the input sequence, enabling it to effectively capture relationships and dependencies. You can find more information about attention in the lecture slides on transformers. Good luck with your studies!\n",
      "Context: ['page_content=\\'Transformers, GPTs and Assistants A. Vaswani et al., “Attention is All you Need,” presented at the 31st Conference on Neural Information Processing Systems (NIPS 2017), Long Beach, CA, USA, 2017, p. 11. [Online]. Available: https://arxiv.org/abs/1706.03762 State of GPT, Andrej Karpathy, MS Build Keynote 38 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/presentations/01_sp2024_v02.pdf\\', \\'page\\': 37, \\'date\\': \\'01/18/2024\\\\n     Thursday\\', \\'tldr\\': \"We will introduce the topic of deep learning, a bit about it\\'s history, and what impact it has had. Then we\\'ll go over the course logistics, the lecture topics, problem sets and the mid-term and final projects.\", \\'title\\': \\'01 - Intro to Deep Learning and Course Logistics\\', \\'lecture_recording\\': None, \\'suggested_readings\\': \\'UDL Chapter 1\\', \\'score\\': 0.40361429640387503}', \"page_content='Motivation • Need to efficiently process large strings of text • Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': 0.2435349602286475}\", \"page_content='Graph Attention 59 Regular graph convolution Graph attention ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/20_graph_neural_networks.pdf', 'page': 58, 'date': '04/16/2024\\\\n     Tuesday', 'tldr': 'In this lecture we introduce graph neural networks, define matrix representations, how to do graph level classification and regression, and how to define graph convolutional network layers.', 'title': '20 -- Graph Neural Networks', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_bt7qtymk', 'suggested_readings': 'UDL Chapter 13', 'score': 0.22901789972755515}\"]\n",
      "Step ID: 4ac119fb-86f5-4528-9edc-44cc616de390\n",
      "User Input: cool thanks, see ya\n",
      "Output: You're welcome! If you have any more questions in the future, feel free to ask. Goodbye!\n",
      "Context: [\"page_content='Motivation • Need to efficiently process large strings of text • Need to relate words across fairly long context lengths Self-Attention addresses these problems ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/12_transformers_v3_export.pdf', 'page': 16, 'date': '03/05/2024\\\\n     Tuesday', 'tldr': 'In this lecture we cover the transformer architecture, starting with the motivation that required a new type of model, the concept and implementation of self-attention and then the full transformer architecture for encoder, decoder and encoder-decoder type models.', 'title': '12 - Transformers', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_r59pkzbo', 'suggested_readings': 'UDL Chapter 12\\\\n\\\\n\\\\nOptional \\\\nThe Illustrated Transformer', 'score': -0.015666472239410423}\", \"page_content='Example 1: univariate regression 61 ' metadata={'source': 'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf', 'page': 59, 'date': '02/01/2024\\\\n     Thursday', 'tldr': 'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.', 'title': '05 - Loss Functions', 'lecture_recording': 'https://mymedia.bu.edu/media/t/1_k2t4xjkd', 'suggested_readings': 'UDL Chapter 5', 'score': -0.03567729015020071}\", 'page_content=\\'Example 1: univariate regression <latexit sha1_ba se64=\"VrHufIDtMmytgQ3rO/QqNtGUe l4=\">AXBXiclZjZbtw2FEDHXdN0c1 rELwVaoUaApHAM20iXlwKJHWezU0/iN bEcg9JQGsYUJUuUPRN1not+TN+KvY7 +hX9hV5K8jC6l37oAPEw9xul5TEUZB JUeilpX9m3n3vfc/+PDKR1c/uTz 6fvfbFXpGWech3w1Sm+UHACi6F4rta MkPspyzJB8PzhZM3z/jOeFSNWOHmf8 KGxEpEImYbQ8ezv/fzm+Fc/CdJRFU0 O/SAaLcCfbCiOFvxCxAl7tXL+9nzo5 yF1fKk8ovTXFcrfiZaXK1MJhOfjzJf8 kgf3m7Mm+PbzkZv1T7Ut5X9XMRDfXQ8 O7+0uFR/PFpYbgvzvfbTP752feAP0rB MuNKhZEVxuLyU6aOK5VqEk+u+mXBMx aesJgfQlGxhBdHVZ2ziXcDIgMvSnP4p 7RXR9+uUbGkKMZJAGbC9LDAzARd7LDU 0U9HlVBZqbkKm46iUno69cwCeAOR81D LMRYmAsYqxcOGeRMwzJd9RU/D9MkYW pQ+avrzyDhAY+FqvhpWS/ZNJ1muHQ /EyY/XxzrQVoXki3nDSK2YRi4ReDyp Kr4YL2IgOACxyAlIFS+gTZOfIPKWEYU tKgFXzR6BveE9n5CmleYx5KSjvSQaFD LJRx1rjViwlElH2QbF8254BnCdwyrAU OGLozXYzpiaXNTfKTzpCpMDPeQMxXz uguYcsikmVHXUKWUDXsWL9g6zlTJ23 i0qweam4iyNrJu47OaV7UoOvUEWTBJo y7Vh1BloQbyoAlDLclo9hwolnIm5VK KwKsjH7eRp0+85MBO/NUQbXS9dbr0j6 zxjKiAnA1We+BVMh7+pr6dT2LpJzVvu mwEfeEBarW4XlcTOti05gVm1sQs06V8 ik2YJQnp53TMah8oz0Z2gCeCLrsyFi t7SFuoSbFkT9hdgqnkp+eHtxe/56Kha MpeN+UOyCQ0VZeZqyIT/R0MDeITh/QU RvHipRIsHgXrxUgn3d7R0LMcb20TqtY OCUEwKPUaXv4hVt04dwYNEzRWCJh24 ZsJhRY5irqyCRgZvuFh7NhAIZpk2Mwx lGlR5pzc/NB+hkitm9tiLszDqntDlUb o3je4nNaCMjwczvgl1QOU0aDJZ5CWas BylMyRWdLRK7/QcIm5rv56yZui04r56 UbH4wLVqcMQ356vIHXIyYWdSRqC04/ zrYksRz9QVvT7fr2yKqNV9+RrR07XLc pSbvtKN2w71kBPx0zHaTeIRizoStd WOkHrEcvQHbnzuOmahcN1m5K0e5FHp +1wpyba/tHOkGtmjkmpHJhjXyr9JoRF TUXtFNOEx0hsQlhMyq4F/8fKtjksd60 mhMV+IbqaCWBpwCWeQhPCYnMJd802ht VNh7rpVpnMhshsQlh8yBI86yaExZiKs VM8YVmGxCZE8jEeRzSPGZYylwSXpHM sSJkS7k2VD5Mu5IJYGmEehs5OoMRyFS hDtsglgu68wrnzlNoFyu6i3dHe9e0r FmqETwNIWucY8f8t5kQU4xXDMciU5E 8jKaAL72OlT5+L0F0QVOckF0djSMaXn lp5Tum/pPqW5peQXQRA9t5T8OgmiM0v PKN2zdI/S0tKS0l1LdymNLI0ofWDpA0 pDS0NK1yxdo1RbSk6k8ESwdIfSoaVDS g8sPaD0haUvKH1k6SNKX1r6ktI3lr6h 9J6l9yhljJK1y1dp5RbSl4dBNGqpau UBpaS35wrVnapzSzNKP0vqX3KR1YSn 4Vw/PMUnK8gQejpZLSx5Y+plRYSn6/B dFTS59SmliaUPrE0ieUvrb0NaUPLX1I aWwpeTcApxNLtym1b4GqgtJnlj6j9NT SU/d7AT5dxsC1MbdsA1uUpamlG5YSn 4pwFHC0hNynoxUe1e7eNtE7muRmnIHa zN+UZvkPFJT7mDt3emiNrk/RWrKh2To 63vTFymQUrjTH8/OL+O3sLSwt7K4/MP inWd35u+utm9or/S+6n3bu9lb7v3Yu9 t71Ov3dnth79+Z6zNfz3wz9vcH3N/z v3VqO/MtHW+7HU+c3/B3l+BUI=</la texit> Pr(y|f[x, φ], σ2) = 1 p 2⇡σ2 exp \\\\uf8ff −(y −f[x, φ])2 2σ2 \" <latexit sha1_ba se64=\"sL5nmFJHMlFiWTdFxaDUKnbUr Jg=\">AW4XiclZjLbtw2FECVPtP05b SoN90INQIkRWLYRvrYFEjsOC879Th+J pZjUBpKw5iZImyZ6LOB3RXdNtP6hf0 M7ptN73UaIbRvfSiAzjD3HPExyUpcRT mUpR6aemvK+8+97H3x49aNrH3/y6W efz13/Yr/MqiLie1Ems+IwZCWXQvE9L bTkh3nBWRpKfhCerhl+cM6LUmRqV49y fpyRIlYRExD6GSO94qbo1+CtLodlCJ J2auVW/5PfhAXLKqXx3VQnhW6Xgly0e J6ZTweB3yYB5LH+ujOxLw5ugNV3Go2 FYNCpEM9PHJ3MLS4lLz8WlhuS0seO2n d3L9q37Qz6Iq5UpHkpXl0fJSro9rVmg RST6+FlQlz1l0yhJ+BEXFUl4e10+xv 4NiPT9OCvgT2m/ib59Rc3SshylIZgp0 4MSMxN0saNKxz8e10LleYqmjQUV9LX mW+S6/dFwSMtR1BgUSGgr340YJAhDVN wLVD8IsrSlKl+Hayub0N6Q54IVfOzqp mO8bjrDcOh+JlxuqT3VktQvNUvOGk kYxlVwi8GRc13wxWcRAcABikROQKV5C nSY/YewvIwrLTwIGHmZD6FzsPx+TqpX mCeSko70kGhRyYcda41YMJVpR9kBxf dv+AZwXcAsQFfhi6M52MmZGk+v03yoi 7QuTQy3UDCV8KYJGHLEpBlR1CVlHBp 1LF+xtZzpk7bxGV509XCRJC1W3QdXdC 8qH7XaSLIgkWYdK0mgiwJN4s+SxlkuS 2fwIBT30TcqlBYFWRh9os7Ladmwhem 8Mc9kvXW69J+s8ZyogJwO4z34KpiHf1 tWxm+9PknDe+KfChP4DJ6l7CimQyrGk jMKo2NqZmkytk0mxBqMguqbpjUPlue gO0ATwpqsKoeK3tNtNCZasCQe3YahFJ fnRncXv+PC4XjLbxvxDsgkVlVXuqsiE /0dFfXg84fUFETx5mUSTB4Fm8jIJ93c 0dazAC9tEmrmDglBMCj1C218kqntNE8 GdzVLUVwiYeuGbCYUmOY67sgkYGb7hQ etYQBEaZDQZYySzsio4ufmh9QyRje3 xUKYh1X3hiqN0L1vcDm7CsrwcDjnl1w eoyGk3yGWaX6rEDJHJopHb4KSg1bzL X7mymfFJ1Wws82vagXzA7VRTxs5MNP B8JsagjUV1wsnHWJYnlaA/qmi3Xt3tW b7z6liztxOG6TUnqbXvpth3uJT3gZ5u O3m4Sj1jUkaiutofUI5ajPajLncdN1y gcrtuUpN5pHp2w52ZaPnHuwOumTkmZ bJvjn2ZDCYhLGoqaqeYpTxB4iSExbTq WvB/rOyYw3LXmoSw2CtFVzMBLPW5xEO YhLA42cJds41hdOhbrpVJvMBMichLD 5iKR71JITFhIqJUzxleY7ESYjkcYDzO KB5zLGUuyQ8I7ljRsiSci2oYpB1JRPA 0hC1NnQ0Bj2QmUINtkEsl3Tlc6Vp9A qVnQV7ka3rukYc1QhSaApS2yx/xgy7 nJQpxiOGa5kpwLZOU0gT3s9KgzPf2Fc U1OcmE8snRE6YWlF5QeWHpAaWEp+UQ xs8tJb9Owvjc0nNK9y3dp7SytKJ0z9I 9SmNLY0ofWvqQ0sjSiNI1S9co1ZaSEy k8ESzdpXRg6YDSQ0sPKX1h6QtKH1v6m NKXlr6k9I2lbyi9b+l9SpmljNJ1S9cp 5ZaSVwdhvGrpKqWhpeS3H+w1S3uU5pb mlD6w9AGlfUvJr2J4nlKjfwYLRUv rE0ieUCkvJ7cwfmbpM0pTS1NKn1r6l NLXlr6m9JGljyhNLCXvBuB0YukOpfYt UF1Sum3pNqVnlp653wvw2TSGroW5ZSv YojSzNKN0w1LySwGOEpaekvNkrNq72v RtE7mvxWrGHazN+PRqkvNYzbiDtXen6 dXk/hSrGR+Qrq/vz16kQErhTn8yt7CM 38LSwv7K4vL3i3e37y7cW23f0F71va +8W56y94P3j3vsdfz9rzI+9P72/vH+3 c+mv91/rf53yfqO1fa70Op/5P/4D+ G/4mg=</latexit> Pr(y|µ, σ2) = 1 p 2⇡σ2 exp \\\\uf8ff −(y −µ)2 2σ2 \" In this case, just the mean Just learn the mean, 𝜇, and assume the variance is fixed,. 63 \\' metadata={\\'source\\': \\'https://dl4ds.github.io/sp2024/static_files/lectures/05_loss_functions_v2.pdf\\', \\'page\\': 61, \\'date\\': \\'02/01/2024\\\\n     Thursday\\', \\'tldr\\': \\'We reconsider loss functions as a measure of how well the data fits to parametric probability distribution. We show that for univariate gaussian distributions we arrive back at least squares loss. We then introduce the notion of maximum likelihood and see how we can use that to define loss functions for many types data distributions. We cover some examples and then show how to generalize. This is a key topic to aid you in applying deep learning models to new types of data.\\', \\'title\\': \\'05 - Loss Functions\\', \\'lecture_recording\\': \\'https://mymedia.bu.edu/media/t/1_k2t4xjkd\\', \\'suggested_readings\\': \\'UDL Chapter 5\\', \\'score\\': -0.038714982109774976}']\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv\n",
    "from literalai import LiteralClient\n",
    "\n",
    "virtual_app = dict(\n",
    "    llm=dict(modelname=\"AI Tutor - GPT3 - FAISS\"),\n",
    "    template=\"information about the template I used in my app\",\n",
    "    debug=\"all of these fields are completely optional\",\n",
    ")\n",
    "from trulens_eval import Select\n",
    "from trulens_eval.tru_virtual import VirtualApp\n",
    "\n",
    "virtual_app = VirtualApp(virtual_app)  # can start with the prior dictionary\n",
    "virtual_app[Select.RecordCalls.llm.maxtokens] = 1024\n",
    "\n",
    "from trulens_eval import Select\n",
    "\n",
    "retriever = Select.RecordCalls.retriever\n",
    "synthesizer = Select.RecordCalls.synthesizer\n",
    "\n",
    "virtual_app[retriever] = \"retriever\"\n",
    "virtual_app[synthesizer] = \"synthesizer\"\n",
    "\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "literal_client = LiteralClient()\n",
    "\n",
    "threads = literal_client.api.get_threads(first=1).data\n",
    "print(threads)\n",
    "\n",
    "rag_steps = []\n",
    "for thread in threads:\n",
    "    rag_steps.extend([step for step in thread.steps if step.name == \"RAG\"])\n",
    "\n",
    "save_log_dict = {\"step_id\": [], \"user_input\": [], \"output\": [], \"context\": []}\n",
    "for step in rag_steps:\n",
    "    save_log_dict[\"step_id\"].append(step.id)\n",
    "    save_log_dict[\"user_input\"].append(step.input[\"question\"])\n",
    "    save_log_dict[\"output\"].append(step.output[\"answer\"])\n",
    "    save_log_dict[\"context\"].append(step.output[\"source_documents\"])\n",
    "\n",
    "print(save_log_dict)\n",
    "\n",
    "records = []\n",
    "for i in range(len(save_log_dict[\"step_id\"])):\n",
    "    step_id = save_log_dict[\"step_id\"][i]\n",
    "    user_input = save_log_dict[\"user_input\"][i]\n",
    "    output = save_log_dict[\"output\"][i]\n",
    "    context = save_log_dict[\"context\"][i]\n",
    "\n",
    "    print(f\"Step ID: {step_id}\")\n",
    "    print(f\"User Input: {user_input}\")\n",
    "    print(f\"Output: {output}\")\n",
    "    print(f\"Context: {context}\")\n",
    "\n",
    "    from trulens_eval.tru_virtual import VirtualRecord\n",
    "\n",
    "    # The selector for a presumed context retrieval component's call to\n",
    "    # `get_context`. The names are arbitrary but may be useful for readability on\n",
    "    # your end.\n",
    "    context_call = retriever.get_context\n",
    "    generation = synthesizer.generate\n",
    "\n",
    "    rec = VirtualRecord(\n",
    "        main_input=f\"{user_input}\",\n",
    "        main_output=f\"{output}\",\n",
    "        calls={\n",
    "            context_call: dict(args=[f\"{user_input}\"], rets=[f\"{output}\"]),\n",
    "            generation: dict(\n",
    "                args=[f\"{context}\"],\n",
    "                rets=[f\"{output}\"],\n",
    "            ),\n",
    "        },\n",
    "    )\n",
    "\n",
    "    records.append(rec)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .\n",
      "✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.retriever.get_context.rets[:] .\n",
      "✅ In Groundedness, input source will be set to __record__.app.retriever.get_context.rets[:].collect() .\n",
      "✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .\n",
      "✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .\n",
      "✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .\n"
     ]
    }
   ],
   "source": [
    "from trulens_eval.feedback.provider import OpenAI\n",
    "from trulens_eval.feedback.feedback import Feedback\n",
    "\n",
    "# Initialize provider class\n",
    "provider = OpenAI()\n",
    "\n",
    "# Select context to be used in feedback. We select the return values of the\n",
    "# virtual `get_context` call in the virtual `retriever` component. Names are\n",
    "# arbitrary except for `rets`.\n",
    "context = context_call.rets[:]\n",
    "\n",
    "# Question/statement relevance between question and each context chunk.\n",
    "f_context_relevance = (\n",
    "    Feedback(provider.context_relevance_with_cot_reasons)\n",
    "    .on_input()\n",
    "    .on(context)\n",
    ")\n",
    "\n",
    "# Define a groundedness feedback function\n",
    "f_groundedness = (\n",
    "    Feedback(provider.groundedness_measure_with_cot_reasons, name = \"Groundedness\")\n",
    "    .on(context.collect())\n",
    "    .on_output()\n",
    ")\n",
    "\n",
    "# Question/answer relevance between overall question and answer.\n",
    "f_qa_relevance = (\n",
    "    Feedback(provider.relevance_with_cot_reasons, name = \"Answer Relevance\")\n",
    "    .on_input_output()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "🦑 Tru initialized with db url sqlite:///default.sqlite .\n",
      "🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.\n"
     ]
    }
   ],
   "source": [
    "from trulens_eval.tru_virtual import TruVirtual\n",
    "\n",
    "virtual_recorder = TruVirtual(\n",
    "    app_id=\"AI Tutor - GPT3 - FAISS\",\n",
    "    app=virtual_app,\n",
    "    feedbacks=[f_context_relevance, f_groundedness, f_qa_relevance],\n",
    "    feedback_mode = \"deferred\" # optional\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "for record in records:\n",
    "    virtual_recorder.add_record(record)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Force stopping dashboard ...\n",
      "Starting dashboard ...\n",
      "Config file already exists. Skipping writing process.\n",
      "Credentials file already exists. Skipping writing process.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c3eb563d17e84983ad0d8eca36904ddb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dashboard started at http://10.0.0.226:8501 .\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from trulens_eval import Tru\n",
    "tru = Tru()\n",
    "\n",
    "tru.run_dashboard(force=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Will keep max of 32 feedback(s) running.\n",
      "Tasks are spread among max of 128 thread(s).\n",
      "Will rerun running feedbacks after a minute.\n",
      "Will rerun failed feedbacks after 5 minutes.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<Thread(Thread-8 (runloop), started daemon 13681946624)>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "df51361ac4664845b804e574497e1b58",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Feedback Status:  80%|########  | 84/105 [00:00<?, ?feedbacks/s, DONE=84, NONE=21]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b27d0ca707e8414692191c246324466c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Done Runs: 0runs [00:00, ?runs/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d8345150aecc47fe9f37cc84f53fbb33",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Waiting for Runs: 0runs [00:00, ?runs/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "tru.start_evaluator()\n",
    "\n",
    "# tru.stop_evaluator() # stop if needed"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "devpy11",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.1.-1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}