from manim_imports_ext import * from _2024.transformers.helpers import * from _2024.transformers.embedding import * # Intro chapter class GPTInitials(InteractiveScene): def construct(self): # Write name frame = self.frame name_str = "Generative Pre-trained Transformer" name = Text(name_str, font_size=72) name.to_edge(UP) name.save_state() frame.move_to(name).shift(DOWN) words = VGroup(name[word][0] for word in name_str.split(" ")) initials = Text("GPT") initials.replace(name, dim_to_match=1) t_target = initials["T"][0].generate_target() t_target.shift(3 * RIGHT) words[0].next_to(initials["P"], LEFT, aligned_edge=DOWN) words[1].next_to(t_target, LEFT, aligned_edge=DOWN) morty = Mortimer(mode='plain').flip() morty.next_to(initials, DL).shift(0.5 * DOWN) morty.body.insert_n_curves(100) self.add(morty) def letter_anim(letters, point): for letter in letters: letter.save_state() letter.set_opacity(0) letter.move_to(point) return LaggedStart( (Restore(letter) for letter in letters), lag_ratio=0.05, time_span=(0.25, 0.75) ) self.play( LaggedStartMap(FadeIn, initials, scale=2, lag_ratio=0.25, run_time=1), morty.change("raise_right_hand", initials), ) self.play(Blink(morty)) self.play( ReplacementTransform(initials[0], words[0][0]), letter_anim(words[0][1:], initials[0].get_center()), morty.animate.look_at(words[0]), run_time=1 ) self.wait(0.5) self.play( words[0].animate.next_to(words[1], LEFT, aligned_edge=DOWN), Transform(initials[2], t_target), ReplacementTransform(initials[1], words[1][0]), letter_anim(words[1][1:], initials[1].get_center()), morty.change("well", words[1]), run_time=1 ) self.remove(initials) self.wait(0.5) self.play( Transform(name[:-len(words[2])], name.saved_state[:-len(words[2])]), ReplacementTransform(initials[2], words[2][0]), letter_anim(words[2][1:], initials[2].get_center()), morty.animate.look_at(words[2]) ) self.add(name) self.play(Blink(morty)) self.wait() # Set up T structure h_line = Line(LEFT, RIGHT).set_width(FRAME_WIDTH) h_line.next_to(words, DOWN).set_x(0) v_lines = Line(UP, DOWN).set_height(FRAME_HEIGHT).replicate(2) v_lines.arrange(RIGHT, buff=FRAME_WIDTH / 3) v_lines.next_to(h_line, DOWN, buff=0) t_lines = VGroup(h_line, *v_lines) t_lines.set_stroke(GREY_B, 1) # Go through each word words.target = words.generate_target() words.target[0].set_fill(YELLOW) words.target[1:].set_fill(WHITE, 0.5, border_width=0) offset = FRAME_WIDTH / 3 words.target[0].set_x(-offset) words.target[1].set_x(0) words.target[2].set_x(offset) line = Underline(words.target[0]) line.set_stroke(YELLOW) self.play(LaggedStart( MoveToTarget(words), ShowCreation(line), frame.animate.center(), morty.change("thinking", words.target[0]).set_opacity(0), Write(t_lines, stroke_width=2), FlashAround(words.target[0].copy()) )) self.remove(morty) self.wait() for i in [1, 2]: words.target = words.generate_target() words.target.set_fill(WHITE, 0.5, border_width=0) words.target[i].set_fill(YELLOW, 1, border_width=0.5) self.play( line.animate.become(Underline(words[i])).set_stroke(YELLOW).set_anim_args(run_time=0.75), FlashAround(words[i]), MoveToTarget(words), ) self.wait() # Isolate just Transformer self.play( words[2].animate.set_x(0).set_color(WHITE).shift(0.25 * UP), line.animate.set_x(0).set_color(WHITE).set_width(6).shift(0.25 * UP), FadeOut(words[0], LEFT), FadeOut(words[1], 3 * LEFT), Uncreate(t_lines, lag_ratio=0), ) self.wait() class DifferentUsesOfModel(InteractiveScene): def construct(self): # Set up sentences sentences = VGroup( Text("A machine learning model ..."), Text("A fashion model ..."), ) images = Group( NeuralNetwork([8, 6, 6, 8]), ImageMobject("Zoolander"), ) for sent, image, sign in zip(sentences, images, [-1, 1]): sent.set_y(-2) sent.set_x(sign * FRAME_WIDTH / 4) image.set_width(4) image.next_to(sent, UP, buff=0.5) images[0].match_y(images[1]) sentences[0]["model"].set_color(BLUE) sentences[1]["model"].set_color(YELLOW) # Put word in context word = Text("model", font_size=72) word.to_edge(UP, buff=0.25) self.play(FadeIn(word, UP)) self.wait() self.play( FadeTransform(word.copy(), sentences[0]["model"]), LaggedStart( Write(sentences[0]), Write(images[0], lag_ratio=0.01, stroke_width=0.5), lag_ratio=0.5, run_time=2 ) ) self.wait() self.play( FadeTransform(word.copy(), sentences[1]["model"]), LaggedStart( Write(sentences[1]), FadeIn(images[1], shift=0.5 * UP, scale=1.25), lag_ratio=0.2, run_time=2 ) ) self.wait() # Show relevance s0, s1 = sentences path_arc = -0.65 * PI left_arrows = VGroup( Arrow( s0[word].get_top(), s0["model"].get_top(), path_arc=path_arc ) for word in ["machine", "learning"] ) right_arrow = Arrow( s1["fashion"].get_top(), s1["model"].get_top(), path_arc=path_arc ) left_arrows[0].set_stroke(TEAL, opacity=0.9) left_arrows[1].set_stroke(TEAL_D, opacity=0.75) right_arrow.set_stroke(TEAL_C, opacity=0.8) self.play( LaggedStartMap(ShowCreation, left_arrows, lag_ratio=0.5), self.frame.animate.move_to(DOWN), images.animate.shift(UP), ) self.play(ShowCreation(right_arrow)) self.wait() # Show word vectors words = VGroup( *(s0[word] for word in s0.get_text().split(" ")[:-1]), *(s1[word] for word in s1.get_text().split(" ")[:-1]), ) vectors = VGroup( NumericEmbedding().set_height(2).next_to(word, DOWN, buff=0.2) for word in words ) self.play( LaggedStartMap(FadeIn, vectors, shift=0.25 * DOWN, lag_ratio=0.25, run_time=3) ) self.play( LaggedStartMap(RandomizeMatrixEntries, vectors) ) self.wait() class BigMatrixMultiplication(InteractiveScene): mat_dims = (12, 12) random_seed = 9 def construct(self): # Test matrix = WeightMatrix(shape=self.mat_dims) matrix.set_width(FRAME_WIDTH - 4) matrix.to_edge(LEFT, buff=0.5) vector = NumericEmbedding(length=self.mat_dims[0]) vector.match_height(matrix) vector.next_to(matrix, RIGHT) self.add(matrix) self.add(vector) show_matrix_vector_product(self, matrix, vector) self.wait() class LongListOFQuestions(InteractiveScene): def construct(self): # Add word and vector word = Text("Queen") arrow = Vector(0.75 * RIGHT) vector = NumericEmbedding(length=12) vector.set_height(5) word_group = VGroup(word, arrow, vector) word_group.arrange(RIGHT, buff=0.15) word_group.to_edge(LEFT, buff=2.0) self.add(word_group) # Add neurons and questions questions = VGroup(map(Text, [ "Is it English?", "Is it a noun?", "Does it refer to a person?", "Is it an amount?", "Is A tone assertive", "Is it a piece of a bigger word?", "Is it part of a quote?", "Is it part of a lie?", ])) questions.scale(0.75) n_questions = len(questions) neurons = VGroup(Circle(radius=0.2) for n in range(n_questions)) neurons.add(Tex(R"\vdots", font_size=72)) neurons.arrange_in_grid(n_questions, 1, buff_ratio=0.5) neurons.set_height(6) neurons.set_stroke(WHITE, 1) neurons.next_to(vector, RIGHT, buff=3.0) values = [0.9, 0.8, 0.85, 0.1, 0.5, 0.05, 0.2, 0.02] for neuron, question, value in zip(neurons, questions, values): neuron.set_fill(WHITE, value) question.next_to(neuron, RIGHT) # Add connections connections = VGroup( VGroup( Line( elem.get_right(), neuron.get_center(), buff=neuron.get_width() / 2 ).set_stroke( color=value_to_color(random.uniform(-10, 10)), width=2 * random.random() ) for elem in vector.get_entries() ) for neuron in neurons[:-1] ) # Animate lag_ratio = 0.3 self.play( LaggedStart( (ShowCreation(line_group, lag_ratio=0) for line_group in connections), lag_ratio=lag_ratio, ), LaggedStartMap(FadeIn, neurons, lag_ratio=lag_ratio), LaggedStartMap(FadeIn, questions, lag_ratio=lag_ratio), run_time=4 ) self.wait() class ChatBotIcon(InteractiveScene): def construct(self): # Add bot bot = SVGMobject("ChatBot") bot.set_fill(GREY_B) bot[0].set_stroke(WHITE, 3) bot.set_height(3) bot.to_edge(RIGHT) arrow = Vector( 1.5 * RIGHT, max_tip_length_to_length_ratio=0.4, max_width_to_length_ratio=9.0, ) arrow.set_stroke(width=20) arrow.next_to(bot, LEFT).match_y(bot[0]) self.play( ShowCreation(arrow), Write(bot), ) self.wait() class GamePlan(InteractiveScene): screen_opacity = 0.0 def construct(self): # Setup up icons self.add(FullScreenRectangle()) videos = VideoIcon().get_grid(7, 1, buff_ratio=0.3) videos.set_fill(BLUE_B) videos.set_height(6.5) videos.to_corner(UL) column_x = videos.get_x() nn_vids = videos[:4] tr_vids = videos[4:] tr_vids.save_state() tr_vids.scale(1.25) tr_vids.space_out_submobjects(1.25) tr_vids.set_y(0).to_edge(LEFT) def highlight_video(video, group=videos): for vid in group: vid.target = vid.generate_target() if vid is video: vid.target.set_x(column_x + 0.5) vid.target.set_opacity(1) else: vid.target.set_x(column_x) vid.target.set_opacity(0.5) return LaggedStartMap(MoveToTarget, group, lag_ratio=0.01, run_time=1) self.add(tr_vids) # Here now here_arrow = Vector(0.75 * LEFT, stroke_width=10) here_arrow.set_color(RED).next_to(tr_vids[0], RIGHT) here_words = Text("You are\nhere") here_words.next_to(here_arrow, RIGHT) here_words.set_color(RED) here_group = VGroup(here_arrow, here_words) self.play( highlight_video(tr_vids[0], tr_vids), MaintainPositionRelativeTo(here_group, tr_vids[0]), VFadeIn(here_group), ) self.wait() # First chapter curly = self.get_curly_brace(tr_vids[0]) topics = VGroup( Text("Beginning"), Text("Ending"), Text("Background material"), Text("Premise of deep learning"), Text("Word embeddings"), Text("Dot products"), Text("Softmax"), ) topics.arrange(DOWN, buff=0.5, aligned_edge=LEFT) topics.next_to(curly.get_corner(UR), DR, buff=0.25) for topic in topics[-4:]: topic.scale(0.8, about_edge=LEFT) topic.shift(0.5 * RIGHT) dot = Dot(color=WHITE) dot.next_to(topic, LEFT) topic.add(dot) screen = ScreenRectangle() screen.set_fill(BLACK, 1) screen.set_stroke(WHITE, 2) screen.set_opacity(self.screen_opacity) screen.set_height(5) screen.next_to(topics[0], DOWN, aligned_edge=LEFT) self.play( FadeOut(here_group), ShowCreation(curly), FadeIn(screen, RIGHT), FadeInFromPoint(topics[0], here_group.get_center()), ) self.wait() self.play( topics[0].animate.set_opacity(0.5), FadeIn(topics[1]), screen.animate.next_to(topics[1], DOWN, aligned_edge=LEFT) ) self.wait() self.play( screen.animate.scale(0.5, about_edge=DR).to_edge(RIGHT), topics[1].animate.set_opacity(0.5), LaggedStartMap(FadeIn, topics[2:], shift=0.1 * DOWN, lag_ratio=0.5) ) self.wait() # Second chapter new_curly = self.get_curly_brace(tr_vids[1].copy().shift(0.5 * RIGHT)) screen.target = screen.generate_target() screen.target.set_height(5) screen.target.next_to(curly, RIGHT) att_title = Text("Attention") att_title.next_to(screen.target, UP, aligned_edge=LEFT) self.play( highlight_video(tr_vids[1], tr_vids), curly.animate.become(new_curly), FadeOut(topics), MoveToTarget(screen), FadeInFromPoint(att_title, tr_vids[1].get_center()), ) self.wait() # Third chapter new_curly = self.get_curly_brace(tr_vids[2].copy().shift(0.5 * RIGHT)) chapter3_topics = Text( "MLPs, Training, Positional encodings, ..." ) chapter3_topics.next_to(screen, UP, aligned_edge=LEFT) self.play( highlight_video(tr_vids[2], tr_vids), curly.animate.become(new_curly), FadeOut(att_title), FadeIn(chapter3_topics, lag_ratio=0.1, time_span=(1, 3)), ) self.wait() # Show earlier chapters prev_thumbnails = Group( ImageMobject(f"nn{k}_thumbnail.png") for k in range(1, 5) ) prev_thumbnails.arrange(RIGHT, buff=1.0) prev_thumbnails.set_width(FRAME_WIDTH - 2) prev_thumbnails.move_to(2 * UP) tn_dir = "/Users/grant/3Blue1Brown Dropbox/3Blue1Brown/videos/2024/transformers/Thumbnails/" new_thumbnails = Group( ImageMobject(os.path.join(tn_dir, f"Chapter{n}")) for n in range(5, 8) ) for tn1, tn2 in zip(prev_thumbnails, new_thumbnails): tn2.replace(tn1, stretch=True) tn2.next_to(tn1, DOWN, buff=1.0) chapter_titles = VGroup( Text(f"Chapter {k}", font_size=30) for k in range(1, 8) ) for title, rect in zip(chapter_titles, (*prev_thumbnails, *new_thumbnails)): title.next_to(rect, UP, buff=0.2, aligned_edge=LEFT) tr_rect = SurroundingRectangle( Group(new_thumbnails, chapter_titles[4:]), buff=0.25 ) tr_rect.set_stroke(BLUE, 2) tr_label = Text("Transformers") tr_label.next_to(tr_rect, DOWN) self.play( FadeOut(curly), FadeOut(screen), FadeOut(chapter3_topics), LaggedStartMap(FadeIn, chapter_titles,), FadeIn(prev_thumbnails, shift=0.5 * UP, lag_ratio=0.25), *( FadeTransform(vid, tn) for vid, tn in zip(tr_vids, new_thumbnails) ), ) self.play( ShowCreation(tr_rect), FadeIn(tr_label), ) self.wait() def get_curly_brace(self, video, width=2.0, height=6.5, buff=0.1): start = video.get_right() + buff * RIGHT top_point = np.array([start[0] + width, 0.5 * height, 0]) low_point = np.array([start[0] + width, -0.5 * height, 0]) result = VGroup( CubicBezier( start, start + width * RIGHT, point + width * LEFT, point, ) for point in [top_point, low_point] ) result.set_stroke(GREY_A, 2) return result class SkipAhead(TeacherStudentsScene): def construct(self): self.remove(self.background) morty = self.teacher self.play( morty.change("hesitant", self.students), self.change_students("confused", "pondering", "pondering", look_at=self.screen), ) self.wait(2) self.play(self.change_students("confused", "tease", "well", look_at=morty.eyes)) self.wait(5) class SeaOfNumbersUnderlay(TeacherStudentsScene): def construct(self): # Test morty = self.teacher stds = self.students for pi in self.pi_creatures: pi.body.insert_n_curves(100) self.play( morty.change("pleading"), self.change_students("surprised", "horrified", "droopy") ) self.look_at(3 * LEFT + 2 * UP) self.look_at(3 * RIGHT + 2 * UP) self.look_at(3 * LEFT + 2 * UP) self.play( morty.change("raise_right_hand", self.screen), self.change_students("hesitant", "pondering", "maybe", look_at=self.screen) ) self.wait(2) self.play(self.change_students("erm", "pondering", "confused", look_at=self.screen)) self.wait(2) self.look_at(5 * RIGHT + 2 * UP) self.play(self.change_students("hesitant", "pondering", "hesitant", look_at=5 * RIGHT + 2 * UP)) self.wait(3) self.play( morty.change("well"), self.change_students("pondering", "pondering", "erm", look_at=self.screen) ) self.wait(3) self.play( morty.change("raise_left_hand", look_at=5 * RIGHT + 3 * UP), self.change_students("tease", "thinking", "pondering", look_at=5 * RIGHT + 3 * UP) ) self.wait(8) class Outdated(TeacherStudentsScene): def construct(self): # Add label text = Text("GPT-3", font="Consolas", font_size=72) openai_logo = SVGMobject("OpenAI.svg") openai_logo.set_fill(WHITE) openai_logo.set_height(2.0 * text.get_height()) gpt3_label = VGroup(openai_logo, text) gpt3_label.arrange(RIGHT) gpt3_label.scale(0.75) param_count = Text("175B Parameters") param_count.set_color(BLUE) param_count.next_to(gpt3_label, DOWN, aligned_edge=LEFT) gpt3_label.add(param_count) gpt3_label.move_to(self.hold_up_spot, DOWN) morty = self.teacher morty.body.insert_n_curves(100) self.play( morty.change("raise_right_hand"), FadeIn(gpt3_label, UP), ) self.play(self.change_students("raise_left_hand", "hesitant", "sassy")) self.play( self.students[0].says(TexText("Isn't that outdated?")) ) self.wait(3) class ConvolutionComment(InteractiveScene): def construct(self): # Test morty = Mortimer() morty.to_corner(DR) bubble = morty.get_bubble(Text(""" In other models, the weighted sums can be grouped differently, e.g. as convolutions, but for Transformers it's always matrix-vector multiplication. """, font_size=36, alignment="LEFT"), bubble_type=SpeechBubble) self.add(bubble) self.play(morty.change("speaking")) for x in range(2): self.play(Blink(morty)) self.wait() class ConfusionAtScreen(TeacherStudentsScene): def construct(self): self.play( self.teacher.change("well"), self.change_students("maybe", "confused", "concentrating", look_at=self.screen) ) self.wait(2) self.play( self.teacher.change("tease"), self.change_students("hesitant", "plain", "erm", look_at=self.teacher.eyes) ) self.wait(3) class HoldUpExample(TeacherStudentsScene): def construct(self): self.background.set_fill(opacity=0.0) self.teacher.body.insert_n_curves(100) self.play( self.teacher.change("raise_right_hand"), self.change_students("happy", "hooray", "well", look_at=4 * UR) ) self.wait(5) class ReactToWordVectors(InteractiveScene): def construct(self): # Test morty = Mortimer().flip() randy = Randolph().flip() morty, randy = pis = VGroup(morty, randy) pis.arrange(RIGHT, buff=2.0) pis.to_edge(DOWN) randy.make_eye_contact(morty) self.add(pis) self.play( PiCreatureSays( morty, "This is how search\nworks you know!", target_mode="hooray", content_introduction_class=FadeIn, content_introduction_kwargs=dict(lag_ratio=0.1), ), randy.change("guilty"), ) self.play(Blink(randy)) self.wait() dots = Text(".....", font_size=120) dots[:1].set_opacity(0) dots[-1:].set_opacity(0) self.play( morty.debubble(), PiCreatureBubbleIntroduction( randy, dots, target_mode="confused", bubble_type=ThoughtBubble, ), morty.change("tease", look_at=6 * LEFT), ) self.play(Blink(morty)) self.wait() class DimensionComparrison(InteractiveScene): def construct(self): titles = VGroup( Text("3d vectors"), Text("Word vectors"), ) titles.scale(1.5) for title, vect in zip(titles, [LEFT, RIGHT]): title.move_to(vect * FRAME_WIDTH / 4) title.to_edge(UP, buff=MED_SMALL_BUFF) h_line = Line(LEFT, RIGHT) h_line.set_width(FRAME_WIDTH) h_line.next_to(titles, DOWN) h_line.set_x(0) v_line = Line(UP, DOWN) v_line.set_height(FRAME_HEIGHT) lines = VGroup(h_line, v_line) lines.set_stroke(GREY_B, 2) self.play( ShowCreation(lines, lag_ratio=0.5), LaggedStartMap(Write, titles, lag_ratio=0.5) ) self.wait() class AtLeastKindOf(TeacherStudentsScene): def construct(self): # Test morty = self.teacher stds = self.students self.play( morty.says("...kind of", mode="hesitant"), self.change_students("hesitant", "sassy", "erm", look_at=self.screen) ) self.wait(3) self.play( self.change_students("sassy", "hesitant", "hesitant", look_at=morty.eyes), morty.change("guilty"), ) self.wait(4) class NetworkEndAnnotation(InteractiveScene): opacity = 0.5 def construct(self): im = ImageMobject("NetworkEnd") im.set_height(FRAME_HEIGHT) self.add(im) # word by word prof = Text("Professor").set_height(0.25).move_to(np.array([4.77, 3.36, 0.])) hp = Text("Harry Potter").set_height(0.33).move_to(np.array([-5.58, 3.33, 0.])) lf = Text("least favourite").set_height(0.26).move_to(np.array([1.39, 3.35, 0])) snape = Rectangle(3.5, 0.3).move_to(np.array([5.0, 1.11, 0])) def get_inverse_rect(mob): big_rect = FullScreenFadeRectangle() big_rect.scale(1.1) lil_rect = SurroundingRectangle(mob) big_rect.start_new_path(lil_rect.get_points()[-1]) big_rect.append_points(lil_rect.get_points()[-2::-1]) big_rect.set_stroke(WHITE, 1) big_rect.set_fill(BLACK, self.opacity) return big_rect rects = VGroup(map(get_inverse_rect, [prof, hp, lf, snape])) rect = rects[0].copy() self.play(FadeIn(rect)) self.wait() for rect2 in rects[1:]: self.play(Transform(rect, rect2)) self.wait() self.play(FadeOut(rect)) class LowTempHighTempContrast(InteractiveScene): def construct(self): # Test titles = VGroup( Text("Temp = 0", font_size=72).set_x(-FRAME_WIDTH / 4), Text("Temp = 5", font_size=72).set_x(FRAME_WIDTH / 4), ) titles.to_edge(UP, buff=0.25) h_line = Line(LEFT, RIGHT).set_width(FRAME_WIDTH) h_line.next_to(titles, DOWN, buff=0.1) v_line = Line(UP, DOWN).set_height(FRAME_HEIGHT) lines = VGroup(h_line, v_line) lines.set_stroke(GREY_B, 2) self.play( LaggedStartMap(FadeIn, titles, shift=0.25 * UP, lag_ratio=0.25), LaggedStartMap(Write, lines, lag_ratio=0.5), run_time=1 ) self.wait() class Intuitions(TeacherStudentsScene): def construct(self): # Add words words = VGroup( Text("Structure of Deep Learning"), Text("Word embeddings"), Text("Dot products"), Text("Softmax"), ) words.arrange(DOWN, buff=0.5, aligned_edge=LEFT) words.move_to(self.hold_up_spot, DOWN) checks = VGroup( Checkmark(font_size=72).next_to(word, LEFT) for word in words ) checks.set_color(GREEN) morty = self.teacher self.play( LaggedStartMap(FadeIn, words, shift=UP, lag_ratio=0.1), morty.change("raise_right_hand"), self.change_students("thinking", "pondering", "well", look_at=words), run_time=1, ) self.play( LaggedStartMap(Write, checks, lag_ratio=0.25, stroke_color=GREEN), ) for pi in self.students: pi.body.insert_n_curves(100) self.play( self.change_students("tease", "thinking", "well") ) self.wait(4) class PiGesturingAtEarlyView(PiCreatureScene): def construct(self): morty = self.pi_creature.flip() morty.to_corner(DR) morty.shift(0.5 * LEFT) morty.set_color(GREY_BROWN) morty.body.insert_n_curves(100) for mode in ["raise_right_hand", "well", "gracious", "well", "tease"]: self.play(morty.change(mode, ORIGIN + 2 * random.random() * UP)) self.wait(3) class EndScreen(PatreonEndScreen): pass # Attention chapter class HighlightAttentionTitle(TeacherStudentsScene): def construct(self): # Add image im = ImageMobject("AttentionPaper") im.set_height(FRAME_HEIGHT) title = Text("Attention is All You Need") title.set_height(0.219) title.move_to(np.array([-0.037, 3.28, 0.0])) title.set_fill(BLACK, 1) self.clear() self.background.set_opacity(0) self.add(self.background, im) self.wait() morty = self.teacher for pi in self.pi_creatures: pi.body.insert_n_curves(100) self.play( im.animate.set_opacity(0.1), title.animate.set_fill(WHITE).scale(2).next_to(morty, UP, MED_LARGE_BUFF).to_edge(RIGHT), LaggedStartMap(VFadeIn, self.pi_creatures), morty.change("raise_right_hand"), self.change_students("pondering", "well", "thinking", look_at=self.hold_up_spot) ) self.wait() # # Small transition # alt_title = Text("Attention is all\nyou need") # alt_title.move_to(4.68 * LEFT) # self.play( # # TransformMatchingStrings(title, alt_title, run_time=1), # FadeTransformPieces(title, alt_title, run_time=1), # FadeOut(im, scale=2), # self.change_students("pondering", "pondering", "pondering", look_at=alt_title), # ) # self.wait() # Highlight attention att = title["Attention"][0] rest = title["is All You Need"][0] self.play( FlashAround(att, run_time=2), att.animate.set_color(YELLOW), ) self.wait(2) self.play( att.animate.center().to_edge(UP), FadeOut(rest, DR), FadeOut(im, scale=1.5), self.background.animate.set_opacity(0.75), morty.change("tease", 3 * UP), self.change_students(None, None, "pondering", look_at=3 * UP) ) self.look_at(3 * UL) self.wait() self.look_at(3 * UR) self.wait(2) # Key property sentence = Text("What makes Attention powerful is that it's parallelizable") sentence.move_to(UP) sent_att = sentence["Attention"] sent_par = sentence["parallelizable"] sent_att.set_opacity(0) sent_par.set_opacity(0) par_box = SurroundingRectangle(sent_par, buff=0) par_box.stretch(1.2, 1, about_edge=DOWN) par_box.set_stroke(width=0) par_box.set_fill(RED, 0.2) par_line = Underline(sent_par, stretch_factor=1) par_line.set_stroke(RED, 2) self.play( att.animate.replace(sentence["Attention"]), FadeIn(sentence, lag_ratio=0.1), morty.change("raise_right_hand", sentence), self.change_students("sassy", "confused", "pondering", look_at=sentence) ) self.play( ShowCreation(par_line), morty.animate.look_at(par_line), FadeIn(par_box), self.change_students("pondering", look_at=par_line), ) self.wait(5) class ThinkOfMoreExamples(TeacherStudentsScene): def construct(self): # Show general confusion morty = self.teacher for pi in self.pi_creatures: pi.body.insert_n_curves(100) self.play( morty.change("raise_right_hand"), self.change_students("confused", "maybe", "confused", look_at=3 *UP, run_time=2, lag_ratio=0.25), ) self.wait(2) self.play(morty.change("guilty")) self.play( self.change_students("confused", "pleading", "concentrating", look_at=3 * UP, run_time=2, lag_ratio=0.25) ) self.wait(3) self.play( self.change_students("maybe", "confused", "dejected", look_at=morty.eyes, lag_ratio=0), morty.change("well") ) self.wait(2) # Ask about the goal self.wait() self.play(LaggedStart( self.students[2].says("What is attention\nsupposed to do?"), self.students[0].change("maybe"), self.students[1].change("pondering"), morty.change("tease"), lag_ratio=0.1 )) self.wait(5) class SimplerExample(TeacherStudentsScene): def construct(self): self.play( self.change_students("pondering", "thinking", "pondering", look_at=self.screen) ) self.play( self.teacher.says("Take a simpler\nexample"), self.change_students("pondering", look_at=self.teacher.eyes) ) self.play(self.change_students("thinking", "well", "tease")) self.wait(6) class NotQuiteTrue(InteractiveScene): def construct(self): morty = Mortimer() morty.to_corner(DR) self.play( morty.says("Actually, that's not\nquite true!"), run_time=1 ) for x in range(2): self.play(Blink(morty)) self.wait() class ThisIsMadeUp(TeacherStudentsScene): def construct(self): for pi in self.students: pi.change_mode("pondering").look_at(self.screen) self.play( self.teacher.says("This is a made-up\nmotivating example"), self.change_students("pondering", look_at=self.teacher.eyes) ) self.play(self.change_students("well", "sassy", "guilty", look_at=self.teacher.eyes)) self.wait(4) class AskAboutOtherEmbeddings(TeacherStudentsScene): def construct(self): # Test self.play( self.students[1].says( TexText(R"What does $W_Q$ do \\ to the non-nouns?"), mode="raise_left_hand" ), self.teacher.change("guilty"), ) self.play( self.change_students("confused", None, "pondering", look_at=self.screen) ) self.wait() self.play(self.teacher.change("shruggie")) self.play( self.change_students("sassy", "maybe", "sassy"), ) self.wait(3) class ShoutSoftmax(TeacherStudentsScene): def construct(self): self.play(LaggedStart( self.students[0].change("happy"), self.students[1].change("hooray"), self.students[2].says("Softmax!", mode="surprised", bubble_config=dict(buff=0.5, direction=LEFT)), self.teacher.change("well") )) self.wait(5) class LeftArcSmaller(InteractiveScene): def construct(self): # Test arrow = Arrow(RIGHT, LEFT, path_arc=1.0 * PI, stroke_color=RED, stroke_width=8) self.play(ShowCreation(arrow)) self.wait() class SetThemToZero(TeacherStudentsScene): def construct(self): # Test self.play( self.students[0].says("Set them to 0?", mode="maybe"), self.students[1].change("pondering", look_at=self.screen), self.students[1].change("pondering", look_at=self.screen), ) self.wait() self.play( self.teacher.says("Then they wouldn't\nbe normalized", mode="tease"), ) self.wait(3) class CalledMasking(TeacherStudentsScene): def construct(self): self.play( self.teacher.says(TexText(R"This is called\\``masking''")), self.change_students( "pondering", "confused", "erm", look_at=self.screen, ) ) self.wait(5) class ReferenceLargerContextTechnologies(InteractiveScene): def construct(self): # Test words = VGroup( Text("Sparse Attention Mechanisms"), Text("Blockwise Attention"), Text("Linformer"), Text("Reformer"), Text("Ring attention"), Text("Longformer"), Text("Adaptive Attention Span"), Tex(R"\vdots") ) words.arrange(DOWN, aligned_edge=LEFT, buff=MED_LARGE_BUFF) words[-1].shift(0.5 * RIGHT) self.play( LaggedStartMap(FadeIn, words, shift=0.5 * DOWN, lag_ratio=0.5, run_time=4) ) self.wait() self.play( LaggedStartMap(FadeOut, words, shift=RIGHT, lag_ratio=0.1) ) self.wait() class AskAboutCrossAttention(TeacherStudentsScene): def construct(self): stds = self.students self.play( stds[0].change("hesitant", look_at=stds[1].eyes), stds[1].says("What about\ncross-attention?", bubble_config=dict(buff=0.5), mode="raise_left_hand"), stds[2].change("pondering", look_at=stds[1].eyes), self.teacher.change("well", look_at=stds[1].eyes) ) self.wait(5) class SelfVsCrossFrames(InteractiveScene): def construct(self): # Add screens self.add(FullScreenRectangle()) screens = ScreenRectangle().replicate(2) screens.set_fill(BLACK, 1) screens.set_stroke(WHITE, 2) screens.set_height(0.45 * FRAME_HEIGHT) screens.arrange(RIGHT, buff=0.5) self.add(screens) # Add titles titles = VGroup( Text("Self-attention", font_size=60), Text("Cross-attention", font_size=60), ) for title, screen in zip(titles, screens): title.next_to(screen, UP, buff=MED_LARGE_BUFF) self.play(Write(titles[0])) self.wait() self.play(TransformMatchingStrings(titles[0].copy(), titles[1])) self.wait() class OngoingTranscription(InteractiveScene): def construct(self): phrase = Text("or maybe audio input of speech, and an ongoing transcription") words = break_into_words(phrase) for word in words: self.add(word) self.wait(0.1 * len(word)) self.wait() class ReferenceStraightforwardValueMatrix(TeacherStudentsScene): def construct(self): morty = self.teacher morty.body.insert_n_curves(100) self.play( morty.change("raise_right_hand"), self.change_students("happy", "well", "tease", look_at=3 * UR) ) self.wait(3) self.play( morty.change("hesitant"), self.change_students("erm", "hesitant", "guilty", look_at=3 * UR) ) self.wait(5) class SeekingMatchedParameters(TeacherStudentsScene): def construct(self): # Test for pi in self.pi_creatures: pi.body.insert_n_curves(100) equation = VGroup( Text("# Value params").set_color(RED), Tex("=", font_size=72).rotate(PI / 2), Text("(# Query params) + (# Key params)"), ) equation[2].scale(0.75) equation[2]["# Query params"].set_color(YELLOW) equation[2]["# Key params"].set_color(TEAL) equation.arrange(DOWN, buff=MED_LARGE_BUFF) equation.move_to(self.hold_up_spot, DOWN) self.play( self.teacher.change("raise_right_hand", equation), FadeIn(equation, UP), self.change_students("erm", "confused", "sassy", look_at=equation), ) self.wait(2) self.play( self.change_students("pondering", "confused", "hesitant", look_at=self.screen) ) self.wait(4) self.play( self.change_students("erm", "confused", "sassy", look_at=equation) ) self.wait(4) class HeadName(InteractiveScene): def construct(self): # Test title = Text("One head of attention", font_size=72) title.to_edge(UP) head = title["head"][0] self.play( Write(title, run_time=1) ) self.play( FlashAround(head, time_width=2, run_time=2), head.animate.set_color(YELLOW), ) self.wait() class DInputAndOutputOfValue(InteractiveScene): def construct(self): # Test d_embed = 12_288 in_label, out_label = [ VGroup(Text(text), Integer(d_embed)) for text in ["d_input", "d_output"] ] for label, shift in [(in_label, LEFT), (out_label, RIGHT)]: label.arrange(DOWN) label.scale(0.65) label.next_to(ORIGIN, UP, buff=LARGE_BUFF) label.shift(1.0 * shift) arrow = Arrow(label, 0.5 * shift) label.add(arrow) self.play(FadeIn(in_label, lag_ratio=0.1)) self.wait() self.play(FadeIn(out_label, lag_ratio=0.1)) self.wait() class NowRepeatManyTimes(TeacherStudentsScene): def construct(self): # Test self.play( self.change_students("pondering", "pondering", "pondering", look_at=self.screen), ) self.wait() self.play( self.teacher.says("Now do that about\n10,000 times"), self.change_students("droopy", "erm", "well", look_at=self.teacher.eyes) ) self.wait(5) class ALotToHoldInYouHead(TeacherStudentsScene): def construct(self): self.play( self.teacher.says("It's a lot to\nhold in your head!", mode="surprised"), self.change_students("confused", "erm", "dejected", look_at=self.screen), ) self.wait(5) class ReactToMHSA(TeacherStudentsScene): def construct(self): self.play( self.teacher.change("hesitant"), self.change_students("sad", "confused", "dejected", look_at=self.screen) ) self.wait(3) self.play( self.change_students("guilty", "maybe", "erm") ) self.wait(3) class AskAboutOutput(TeacherStudentsScene): random_seed = 3 def construct(self): morty = self.teacher stds = self.students self.play( stds[0].change("hesitant", look_at=stds[1].eyes), stds[1].says("What about the\nOutput matrix?", mode="raise_left_hand"), stds[2].change("hesitant", look_at=stds[1].eyes), ) self.play( morty.change("concentrating") ) self.play(Blink(morty)) self.wait(5) class OneThirdOfWhatYouNeed(InteractiveScene): def construct(self): # Test self.add(FullScreenRectangle().fix_in_frame()) title = Text("Attention is All You Need", font_size=72) all_word = title["All"][0] cross = Line(all_word.get_left(), all_word.get_right()) cross.set_stroke(RED, 8) correction = Text("About 1/3 of What", font_size=60) correction.set_color(RED) correction.next_to(all_word, UP, MED_LARGE_BUFF) lines = VGroup( CubicBezier( all_word.get_corner(UP + v), all_word.get_corner(UP + v) + 0.5 * UP, correction.get_corner(DOWN + v) + 0.5 * DOWN, correction.get_corner(DOWN + v), ) for v in [LEFT, RIGHT] ) lines.set_stroke(RED, 2) self.add(title) self.wait() self.add(all_word, cross) self.play(ShowCreation(cross), all_word.animate.set_fill(opacity=0.5)) self.play( FadeTransform(all_word.copy(), correction), ShowCreation(lines, lag_ratio=0), ) self.wait() self.play(self.frame.animate.set_y(-3.75).set_height(11), run_time=2) self.wait() class MoreResourcesBelow(InteractiveScene): def construct(self): # Test self.add(FullScreenRectangle()) words = Text("More resources below", font_size=72) words.move_to(UP) arrows = Vector(1.5 * DOWN, stroke_width=10).get_grid(1, 3, buff=1.5) arrows.next_to(words, DOWN, buff=MED_LARGE_BUFF) morty = Mortimer() morty.body.insert_n_curves(100) morty.to_corner(DR) self.add(words) self.play( LaggedStartMap(GrowArrow, arrows, lag_ratio=0.5), morty.change("thinking", look_at=4 * DOWN) ) self.play(Blink(morty)) self.wait() class PatreonEndScreen(EndScreen): pass # MLP Chapter class HowAndWhere(TeacherStudentsScene): def construct(self): # Test self.remove(self.background) self.play( self.students[1].says("How?", mode="raise_left_hand", look_at=self.screen), self.teacher.change("tease"), self.students[2].change("pondering", look_at=self.screen), ) self.play( self.students[0].says("Where?", mode="maybe", look_at=self.screen), ) self.wait(3) class IntroducingMLPs(TeacherStudentsScene): def construct(self): # Look at screen morty = self.teacher screen = self.screen self.play( morty.change("raise_right_hand", screen), self.change_students("pondering", "confused", "pondering", look_at=screen), ) self.wait(2) # Computation vs. interpretation words = VGroup(Text("Computation"), Text("Interpretation")) words.scale(1.5) words.arrange(DOWN, buff=1.0, aligned_edge=LEFT) words.to_corner(UR).shift(LEFT) check = Checkmark() check.match_height(words[0]) check.next_to(words[0], RIGHT) check.set_color(GREEN) warning = SVGMobject("warning") warning.set_color(RED) warning.match_height(words[1]) warning.next_to(words[1], RIGHT) self.play( FadeIn(words[0], UP), self.change_students("tease", "happy", "thinking", look_at=words[0]), morty.change("raise_left_hand", words[0]), ) self.play(Write(check, stroke_color=GREEN)) self.wait(3) self.play( FadeIn(words[1], UP), self.change_students("erm", "confused", "pondering", words[1]), morty.change("maybe", words[1]) ) self.play(Write(warning, stroke_color=RED)) self.wait(5) class ReferenceFactStorage(TeacherStudentsScene): def construct(self): # Look at screen morty = self.teacher screen = self.screen self.play( morty.change("raise_right_hand", screen), self.change_students("pondering", "confused", "pondering", look_at=screen), ) self.wait(4) # Hold up words words = Text("Store a fact", font_size=72) words.next_to(morty, UP, LARGE_BUFF).shift_onto_screen() self.play( morty.change("raise_left_hand"), FadeIn(words, UP), self.change_students("erm", "maybe", "sassy", look_at=morty.eyes), ) self.look_at(self.screen) self.wait(3) self.play(morty.change("tease")) self.wait(2) # Relax self.play( FadeOut(words, DOWN), self.change_students("pondering", "tease", "happy", look_at=self.screen), ) self.wait(3) class LookingAtPreview(TeacherStudentsScene): def construct(self): # Test bubble = ThoughtBubble(filler_shape=(5, 2.5)) bubble.flip() bubble.pin_to(self.students[2]) bubble.to_edge(LEFT) self.play( self.teacher.change("raise_right_hand"), self.change_students("pondering", "confused", "pondering", look_at=bubble), ) self.play(FadeIn(bubble, lag_ratio=0.1)) self.play(self.teacher.change("tease")) self.wait(2) self.play(self.change_students("erm", "pondering", "thinking", look_at=bubble)) self.wait(3) class EmbeddingLabel(InteractiveScene): def construct(self): # Background bg = ImageMobject("/Users/grant/3Blue1Brown Dropbox/3Blue1Brown/videos/2024/transformers/images/EmbeddingStill.jpg") bg.set_height(FRAME_HEIGHT) # self.add(bg) # Label ghost_vect = Rectangle() ghost_vect.set_shape(0.8, 4) ghost_vect.move_to([4.25, -1.0, 0]) brace = Brace(ghost_vect, LEFT) name = brace.get_text("Embedding") length = Integer(12288) length.next_to(brace, LEFT, buff=0.5).shift(0.25 * UP) numbers_label = Text("Numbers") numbers_label.next_to(length, DOWN) gpt3_label = Text("(Length in GPT-3)", font_size=24) gpt3_label.next_to(length, UP, buff=1.0), gpt3_label.set_color(YELLOW) arrow = Arrow(gpt3_label.get_bottom(), length.get_top(), buff=0.1) self.play( GrowFromCenter(brace), Write(name) ) self.wait() self.play( FadeTransform(name, numbers_label), CountInFrom(length, 0, run_time=1.5), ) self.play( FadeIn(gpt3_label, lag_ratio=0.1), GrowFromCenter(arrow), ) self.wait(1.5) class ThatWhichDoesntKillHeader(InteractiveScene): def construct(self): # Test words = Text("That which does not kill you only makes you") words.to_edge(UP) rect = SurroundingRectangle(words["you"][-1], buff=0.1) rect.set_stroke(BLUE, 3) rect.set_fill(BLUE, 0.5) arrow = Arrow(rect.get_bottom(), rect.get_bottom() + 2 * DL) arrow.match_color(rect) brace = Brace(rect, DOWN, buff=0.1) self.add(rect, words, brace) class QuickAttentionDescription(InteractiveScene): def construct(self): # To be added standing on an Attention block morty = Mortimer(height=2) morty.move_to(DOWN + LEFT) morty.flip() self.play(morty.says("Incorporate context", look_at=4 * DOWN)) for x in range(2): self.play(Blink(morty)) self.wait(2) class QuickMLPDescription(InteractiveScene): def construct(self): # To be added standing on an MLP block morty = Mortimer(height=2, color=GREY_C) morty.move_to(DOWN + RIGHT) self.play(morty.says("More\ncomputation", mode="maybe", look_at=4 * DOWN)) for x in range(2): self.play(Blink(morty)) self.wait(2) class ContrastBetweenSimpleComputationDifficultInterpretation(InteractiveScene): def construct(self): morty = Mortimer(height=2).to_edge(DOWN, buff=1.0) morty.body.insert_n_curves(100) items = VGroup( VGroup(Text("Computation"), Checkmark().set_height(0.5).set_color(GREEN)), VGroup(Text("Interpretation"), SVGMobject("warning").set_color(RED).set_height(0.5)), ) for item, vect in zip(items, [LEFT, RIGHT]): item.scale(0.75) item.arrange(RIGHT) item.next_to(morty, UP + vect, buff=0.5) item.shift(-1.0 * vect * RIGHT) self.play( morty.change("raise_right_hand", items[0]), FadeIn(items[0], UP) ) self.play(Blink(morty)) self.wait() self.play( morty.change("raise_left_hand", items[1]), FadeIn(items[1], UP), items[0].animate.fade(0.5), ) self.play(Blink(morty)) self.wait() class AmbientChangingDots(InteractiveScene): def construct(self): # Test dots = Dot().get_grid(20, 30) dots.set_height(8) dots.set_fill(opacity=0.5) dots.phases = np.random.uniform(0, TAU, len(dots)) dots.freqs = np.random.uniform(0.3, 0.8, len(dots)) def update_dots(dots): for dot, phase, freq in zip(dots, dots.phases, dots.freqs): dot.set_fill(opacity=np.cos(phase + freq * self.time)**2) return dots dots.add_updater(update_dots) self.add(dots) self.wait(30) class MakeSomeAssumptions(TeacherStudentsScene): def construct(self): # Test self.play( self.teacher.says("We need a\nfew assumptions"), self.change_students("pondering", "sassy", "tease", look_at=self.screen) ) self.play(self.teacher.change("raise_right_hand")) self.wait(6) class WhatAboutBiggerThanOne(TeacherStudentsScene): def construct(self): # Test self.screen.set_x(0) self.play( self.students[0].change("pondering", self.screen), self.students[1].says("And if it's\nbigger than 1?", mode="sassy", bubble_direction=RIGHT), self.students[2].change("erm", self.screen), self.teacher.change("guilty"), ) self.wait(2) self.play( # self.teacher.says("Don't worry\nabout it", mode="maybe") self.teacher.change("maybe") ) self.play( self.change_students("hesitant", "sassy", "angry") ) self.wait(3) class HighlightRect(InteractiveScene): def construct(self): rect = Rectangle(1, 3) rect.scale(0.5) rect.set_stroke(MAROON_B, 3) self.play(ShowCreation(rect)) self.wait() class AskWhy(TeacherStudentsScene): def construct(self): self.remove(self.background) # Test for pi in self.pi_creatures: pi.body.insert_n_curves(100) self.play( self.teacher.change("guilty"), self.students[2].says(Text("Um, Why?", font_size=72), mode="maybe", look_at=self.screen), self.students[0].change("confused", self.screen), self.students[1].change("erm", self.screen), ) self.wait() self.play(self.teacher.change("tease")) self.wait(2) class GPT3Logo(InteractiveScene): def construct(self): # Test gpt3_label = Text("GPT-3", font="Consolas", font_size=72) openai_logo = SVGMobject("OpenAI.svg") openai_logo.set_fill(WHITE) openai_logo.set_height(2.0 * gpt3_label.get_height()) title = VGroup(openai_logo, gpt3_label) title.arrange(RIGHT) title.to_edge(UP) self.play( FadeIn(gpt3_label, lag_ratio=0.1), Write(openai_logo, stroke_color=BLUE, stroke_width=0.5), ) self.wait() class AndGate(InteractiveScene): def construct(self): # Test gate = SVGMobject("and_gate") gate.set_fill(WHITE).set_stroke(width=0) name = Text("AND\nGate", font_size=96, alignment="LEFT") name.next_to(gate, RIGHT, LARGE_BUFF) self.play( Write(gate), FadeIn(name, lag_ratio=0.1, time_span=(0, 2)), run_time=3 ) self.wait() class MJFactsAsVectorSum(InteractiveScene): def construct(self): # Test facts = VGroup( Tex(Rf"\overrightarrow{{\text{{{fact}}}}}") for fact in [ "Basketball", "Chicago Bulls", "Number 23", "Born 1963", ] ) facts.add(Tex(R"\vdots")) facts.arrange(DOWN, buff=0.75) colors = ["#F88158", "#CE1141", YELLOW, GREY, WHITE] for fact, color in zip(facts, colors): fact.set_color(color) plusses = Tex(R"+").replicate(len(facts) - 1) for f1, f2, plus in zip(facts, facts[1:], plusses): plus.move_to(midpoint(f1.get_bottom(), f2.get_top())) self.add(facts[0]) for fact, plus in zip(facts[1:], plusses): self.play( FadeIn(fact, shift=0.5 * DOWN), Write(plus), run_time=1, ) self.wait() class AskAboutBias(TeacherStudentsScene): def construct(self): for pi in self.pi_creatures: pi.body.insert_n_curves(100) self.play( self.students[0].change("erm", look_at=self.screen), self.students[1].change("confused", look_at=self.screen), self.students[2].says("What's that\nbias doing?", look_at=self.screen, bubble_direction=LEFT), ) self.play( self.teacher.change("maybe") ) self.wait(4) self.play( self.change_students("sassy", "maybe", "pondering", look_at=self.screen) ) self.wait(4) class ThatsIt(TeacherStudentsScene): def construct(self): for pi in self.pi_creatures: pi.body.insert_n_curves(100) self.play( self.teacher.says("That's it!", mode="hooray", look_at=self.students), self.change_students("happy", "thinking", "well", look_at=self.screen) ) self.wait() self.play(self.teacher.debubble(mode="raise_right_hand", look_at=self.screen)) self.wait() self.play( self.change_students("thinking", "tease", "happy", look_at=self.screen) ) self.wait(3) class AddTwoMatrixSizes(InteractiveScene): def construct(self): # Test rect = Rectangle(3.0, 1.0) rect.set_stroke(BLUE, 3) total = Integer(2 * 4 * (12288**2)) total.set_color(BLUE) total.next_to(rect, UP) self.play( ShowCreation(rect), Write(total) ) self.wait() class ReflectOnTwoThings(TeacherStudentsScene): def construct(self): # Initial reactions morty = self.teacher screen = self.screen stds = self.students morty.change_mode("raise_right_hand").look_at(self.screen) for std in stds: std.change_mode("happy") self.play( self.change_students("pondering", "thinking", "happy", look_at=screen) ) self.wait(2) # Reflection points points = VGroup( Text("Two points of reflection"), Text("1."), Text("2."), ) points[0].add(Underline(points[0], buff=-0.05)) points[0].scale(1.25) points[0].set_color(YELLOW) dials = VGroup(Dial(initial_value=random.random()) for n in range(10)) dials.set_height(0.5) dials.arrange(RIGHT) dials.set_flat_stroke(True) dials[-2].become(Tex(R"\dots").replace(dials[-2], dim_to_match=0)) dials.next_to(points[1], RIGHT) points[1].add(dials) vectors = self.get_vectors() points[2].add(vectors) points.arrange(DOWN, aligned_edge=LEFT, buff=0.5) points.to_edge(UP) points[1:].shift(MED_SMALL_BUFF * RIGHT + 0.5 * DOWN) self.play( morty.change("tease", points[0]), self.change_students("erm", "plain", "hesitant", look_at=points[0]), Write(points[0], stroke_color=YELLOW_B), ) self.wait(2) self.play( Write(points[1][:2]), LaggedStartMap(FadeIn, dials, lag_ratio=0.25), self.change_students("tease", "plain", "erm", look_at=points[1]), morty.change("raise_right_hand", points[1]), ) self.wait(2) # Show vector clump self.play( VFadeIn(points[2]), Rotate(vectors, PI, axis=UP, run_time=8), self.change_students("confused", "hesitant", "erm", look_at=points[2]), morty.change("surprised", points[2]), ) self.wait(3) def get_vectors(self): dodec = Dodecahedron() vectors = VGroup() for face in dodec: for vert in face.get_anchors(): if not any([np.isclose(vert, v.get_end()).all() for v in vectors]): vect = Vector(vert) vect.set_color(random_bright_color(hue_range=(0.5, 0.7))) vect.always.set_perpendicular_to_camera(self.frame) vectors.add(vect) vectors.rotate(25 * DEGREES, axis=UR) vectors.set_height(1.5) return vectors class RotatingVectors(ReflectOnTwoThings): def construct(self): self.clear() # Test vectors = self.get_vectors() vectors.set_height(4) self.play( Rotate(vectors, TAU, axis=UP, run_time=25, rate_func=linear), ) class AskIfThisIsReal(TeacherStudentsScene): def construct(self): morty = self.teacher stds = self.students # Test self.play( stds[0].says("Is his how ChatGPT store facts?"), morty.change("guilty"), ) self.look_at(self.screen) self.wait() self.play( self.change_students("speaking", "pondering", "skeptical", look_at=self.screen) ) self.wait(2) self.play( morty.says("Almost certainly\nnot quite...", mode="maybe"), stds[1].change("angry"), stds[2].change("erm"), ) self.wait(4) class SingleNeuronVsMultiple(InteractiveScene): def construct(self): # Add network radius = 0.1 layers = VGroup( Dot(radius=radius).get_grid(n, 1, buff=radius) for n in [8, 16, 8] ) layers.arrange(RIGHT, buff=2.0) layers.set_stroke(WHITE, 1) for layer in layers: for dot in layer: dot.set_fill(opacity=random.random()) globals().update(locals()) connections = VGroup( get_network_connections(layers[i], layers[i + 1]) for i in (0, 1) ) network = VGroup(layers, connections) network.set_height(5) network.center() self.add(network) # Show first neuron light up rect = SurroundingRectangle(layers[1][0]) name = Text("Michael Jordan") name.next_to(rect, UP, SMALL_BUFF) name.save_state() for letter, dot in zip(*make_even(name, layers[0])): letter.move_to(dot) letter.set_opacity(0) thick_connections = connections.copy() for group in thick_connections: for line in group: line.set_stroke(width=2 * line.get_stroke_width(), opacity=1) line.insert_n_curves(20) self.play( LaggedStartMap( VShowPassingFlash, thick_connections[0], lag_ratio=1 / len(thick_connections[0]), time_width=2.0, ), layers[1][0].animate.set_fill(opacity=1), layers[1][1:].animate.set_fill(opacity=0), Restore(name, lag_ratio=0.05), run_time=2 ) self.play(ShowCreation(rect)) self.wait() network.add(rect, name) # Split the image network_copy = network.copy() for dot in network_copy[0][1]: dot.set_fill(opacity=random.random()) network_copy.to_edge(RIGHT) network_copy[-2].become(SurroundingRectangle(network_copy[0][1])) v_line = Line(UP, DOWN).set_height(FRAME_HEIGHT) v_line.set_stroke(WHITE, 2) check = Checkmark().set_fill(GREEN).scale(2) ex = Exmark().set_fill(RED).scale(2) check.next_to(network_copy[-1], RIGHT) ex.move_to(check).shift(0.5 * FRAME_WIDTH * LEFT) self.play( network.animate.to_edge(LEFT), TransformFromCopy(network, network_copy), ShowCreation(v_line), ) self.play(LaggedStart( Write(ex, stroke_color=RED), Write(check, stroke_color=GREEN), lag_ratio=0.5 )) self.wait() class WriteSuperposition(InteractiveScene): def construct(self): # Test word = Text("Superposition", font_size=120) outline = word.copy() outline.set_stroke(TEAL, 3) outline.set_fill(opacity=0) self.play( FadeIn(word, lag_ratio=0.1), LaggedStartMap( VShowPassingFlash, outline, time_width=2, run_time=5, lag_ratio=0.01 ) ) self.wait() class JohnsonLindenstraussName(InteractiveScene): def construct(self): # Test text = VGroup( Text("Johnson–Lindenstrauss\nLemma"), Tex(R"\Rightarrow", font_size=120), ) text[0].set_color(RED_B) text.arrange(RIGHT, buff=MED_LARGE_BUFF) self.play( FadeIn(text[0], lag_ratio=0.1), Write(text[1], run_time=1) ) class ContrastGPTDimensionSizes(InteractiveScene): def construct(self): # Setup openai_logo = SVGMobject("OpenAI.svg") openai_logo.set_fill(WHITE) openai_logo.set_height(1.0) model_names = VGroup( Text("GPT-2"), Text("GPT-3"), Text("GPT-4"), ) model_names.scale(1.25) model_names.arrange(RIGHT, buff=2.0) model_names.set_color(GREY_A) arrows = VGroup( Arrow(n1, n2, buff=0.25) for n1, n2 in zip(model_names, model_names[1:]) ) dim_counts = VGroup( Text(f"Model dim: {dim}", font_size=36) for dim in ["768", "12,288", "???"] ) for model, count in zip(model_names, dim_counts): count.next_to(model, DOWN) arrows.add_to_back(Arrow().set_opacity(0)) for name, count, arrow in zip(model_names, dim_counts, arrows): self.play( FadeIn(name), FadeIn(count, 0.5 * DOWN), GrowArrow(arrow) ) self.wait() class ReferenceSAP(TeacherStudentsScene): def construct(self): morty = self.teacher stds = self.students self.background.scale(2) self.frame.scale(1.25, about_edge=DR) # Test bubble = stds[0].get_bubble("How would you\ntest this?", bubble_type=SpeechBubble) bubble.shift(0.5 * LEFT) self.play(LaggedStart( FadeIn(bubble, lag_ratio=0.1), stds[0].change("raise_left_hand"), stds[1].change("confused"), stds[2].change("maybe"), morty.change("tease") )) self.wait(2) self.play( morty.says(TexText(R"There's nice\\research using\\Sparse Autoencoder"), mode="hooray") ) self.play( self.change_students(None, "pondering", "erm", look_at=morty.bubble), ) self.wait() self.look_at(self.screen) self.wait(5) class DetailsNotDiscussed(InteractiveScene): def construct(self): # Test title = Text("Details not discussed") title.add(Underline(title)) title.scale(1.25) title.set_color(RED) title.to_edge(UP).to_edge(RIGHT, buff=0) details = VGroup( Text("Tokenization"), Text("Positional encoding"), Text("Layer normalization"), Text("Batch normalization"), Text("Training"), ) dots = Dot().get_grid(len(details), 1, buff=0.75) dots.next_to(title, DOWN, buff=MED_LARGE_BUFF).shift(2 * LEFT) for detail, dot in zip(details, dots): detail.next_to(dot, RIGHT) detail.add_to_back(dot) vdots = Tex(R"\vdots") vdots.next_to(details, DOWN, MED_LARGE_BUFF).shift(LEFT) details.add(vdots) details.set_color(GREY_A) self.add(title) self.play( LaggedStartMap(FadeIn, details, shift=0.25 * DOWN, lag_ratio=0.5), run_time=4 ) self.wait() # Highlight training self.play( details[-2][1:].animate.scale(2, about_edge=LEFT).set_color(WHITE), details[-1].animate.shift(0.1 * DOWN), details[:-2].animate.set_opacity(0.5).scale(0.9, about_edge=UL), ) self.wait() class TriPanelWithPi(InteractiveScene): def construct(self): vlines = Line(UP, DOWN).replicate(2) vlines.set_height(FRAME_HEIGHT / 2) vlines.arrange(RIGHT, buff=FRAME_WIDTH / 3) vlines.to_edge(UP, buff=0) hline = Line(LEFT, RIGHT).set_width(FRAME_WIDTH) hline.move_to(vlines, DOWN) lines = VGroup(*vlines, hline) lines.set_stroke(WHITE, 2) self.add(lines) # Test morty = Mortimer(mode="happy") morty.to_corner(DR).shift(3 * LEFT) self.play(morty.change("tease", 4 * UL)) self.play(Blink(morty)) self.wait() self.play(morty.change("coin_flip_2", 3 * UP)) self.play(Blink(morty)) self.wait() self.play(morty.change("hooray", 5 * UR).set_anim_args(path_arc=10 * DEGREES)) self.play(Blink(morty)) self.wait() class WriteRLHF(InteractiveScene): def construct(self): words = VGroup( Text("Reinforcement"), Text("Learning with"), Text("Human"), Text("Feedback"), ) words.scale(1.5) words.arrange(DOWN, aligned_edge=LEFT) words.set_color(GREY_A) self.play(LaggedStartMap(FadeIn, words, shift=0.25 * DOWN, lag_ratio=0.25)) self.play(*( word[1:].animate.set_opacity(0.75) for word in words )) self.wait()