Some refactors

- Split out `_TexParser` class - Replace `math_mode` parameter with `tex_environment` - Fix the bug that braces following even number of backslashes aren't matched
2025-11-14 08:17:43 +00:00 · 2021-12-13 21:01:27 +08:00 · 2021-12-13 21:01:27 +08:00 · 0de303d5e0
commit 0de303d5e0
parent 155839bde9
1 changed files with 210 additions and 197 deletions
--- a/manimlib/mobject/svg/mtex_mobject.py
+++ b/manimlib/mobject/svg/mtex_mobject.py
@ -18,6 +18,10 @@ SCALE_FACTOR_PER_FONT_POINT = 0.001
 tex_hash_to_mob_map = {}


+def _get_neighbouring_pairs(iterable):
+    return list(adjacent_pairs(iterable))[:-1]
+
+
 class _LabelledTex(SVGMobject):
    CONFIG = {
        "height": None,
@ -65,6 +69,185 @@ class _TexSpan(object):
        ]) + ")"


+class _TexParser(object):
+    def __init__(self, mtex):
+        self.tex_string = mtex.tex_string
+        strings_to_break_up = remove_list_redundancies([
+            *mtex.isolate, *mtex.tex_to_color_map.keys(), mtex.tex_string
+        ])
+        if "" in strings_to_break_up:
+            strings_to_break_up.remove("")
+        unbreakable_commands = mtex.unbreakable_commands
+
+        self.tex_spans_dict = {}
+        self.current_label = 0
+        self.break_up_by_braces()
+        self.break_up_by_scripts()
+        self.break_up_by_additional_strings(strings_to_break_up)
+        self.merge_unbreakable_commands(unbreakable_commands)
+        self.analyse_containing_labels()
+
+    @staticmethod
+    def label_to_color_tuple(n):
+        # Get a unique color different from black,
+        # or the svg file will not include the color information.
+        rgb = n + 1
+        rg, b = divmod(rgb, 256)
+        r, g = divmod(rg, 256)
+        return r, g, b
+
+    @staticmethod
+    def contains(span_0, span_1):
+        return span_0[0] <= span_1[0] and span_1[1] <= span_0[1]
+
+    def add_tex_span(self, span_tuple, script_type=0, label=-1):
+        if script_type == 0:
+            # Should be additionally labelled.
+            label = self.current_label
+            self.current_label += 1
+
+        tex_span = _TexSpan(script_type, label)
+        self.tex_spans_dict[span_tuple] = tex_span
+
+    def break_up_by_braces(self):
+        tex_string = self.tex_string
+        span_tuples = []
+        left_brace_indices = []
+        for match_obj in re.finditer(r"(\\*)(\{|\})", tex_string):
+            # Braces following even numbers of backslashes are counted.
+            if len(match_obj.group(1)) % 2 == 1:
+                continue
+            if match_obj.group(2) == "{":
+                left_brace_index = match_obj.span(2)[0]
+                left_brace_indices.append(left_brace_index)
+            else:
+                left_brace_index = left_brace_indices.pop()
+                right_brace_index = match_obj.span(2)[1]
+                span_tuples.append((left_brace_index, right_brace_index))
+        if left_brace_indices:
+            self.raise_tex_parsing_error()
+
+        self.paired_braces_tuples = span_tuples
+        for span_tuple in span_tuples:
+            self.add_tex_span(span_tuple)
+
+    def break_up_by_scripts(self):
+        tex_string = self.tex_string
+        brace_indices_dict = dict(self.tex_spans_dict.keys())
+        for match_obj in re.finditer(r"((?<!\\)(_|\^)\s*)|(\s+(_|\^)\s*)", tex_string):
+            script_type = 1 if "_" in match_obj.group() else 2
+            token_begin, token_end = match_obj.span()
+            if token_end in brace_indices_dict:
+                content_span = (token_end, brace_indices_dict[token_end])
+            else:
+                content_match_obj = re.match(r"\w|\\[a-zA-Z]+", tex_string[token_end:])
+                if not content_match_obj:
+                    self.raise_tex_parsing_error()
+                content_span = tuple([
+                    index + token_end for index in content_match_obj.span()
+                ])
+                self.add_tex_span(content_span)
+            label = self.tex_spans_dict[content_span].label
+            self.add_tex_span(
+                (token_begin, content_span[1]),
+                script_type=script_type,
+                label=label
+            )
+
+    def break_up_by_additional_strings(self, strings_to_break_up):
+        tex_string = self.tex_string
+        all_span_tuples = []
+        for string in strings_to_break_up:
+            # Only matches non-crossing strings.
+            for match_obj in re.finditer(re.escape(string), tex_string):
+                all_span_tuples.append(match_obj.span())
+
+        script_spans_dict = dict([
+            span_tuple[::-1]
+            for span_tuple, tex_span in self.tex_spans_dict.items()
+            if tex_span.script_type != 0
+        ])
+        for span_begin, span_end in all_span_tuples:
+            if span_end in script_spans_dict.values():
+                # Deconstruct spans with subscripts & superscripts.
+                while span_end in script_spans_dict:
+                    span_end = script_spans_dict[span_end]
+                if span_begin >= span_end:
+                    continue
+            span_tuple = (span_begin, span_end)
+            if span_tuple not in self.tex_spans_dict:
+                self.add_tex_span(span_tuple)
+
+    def merge_unbreakable_commands(self, unbreakable_commands):
+        tex_string = self.tex_string
+        command_merge_spans = []
+        brace_indices_dict = dict(self.paired_braces_tuples)
+        # Braces leading by `unbreakable_commands` shouldn't be marked.
+        for command in unbreakable_commands:
+            for match_obj in re.finditer(re.escape(command), tex_string):
+                merge_begin_index = match_obj.span()[1]
+                merge_end_index = merge_begin_index
+                if merge_end_index not in brace_indices_dict:
+                    continue
+                while merge_end_index in brace_indices_dict:
+                    merge_end_index = brace_indices_dict[merge_end_index]
+                command_merge_spans.append((merge_begin_index, merge_end_index))
+
+        self.tex_spans_dict = {
+            span_tuple: tex_span
+            for span_tuple, tex_span in self.tex_spans_dict.items()
+            if all([
+                not _TexParser.contains(merge_span, span_tuple)
+                for merge_span in command_merge_spans
+            ])
+        }
+
+    def analyse_containing_labels(self):
+        for span_0, tex_span_0 in self.tex_spans_dict.items():
+            if tex_span_0.script_type != 0:
+                continue
+            for span_1, tex_span_1 in self.tex_spans_dict.items():
+                if _TexParser.contains(span_1, span_0):
+                    tex_span_1.containing_labels.append(tex_span_0.label)
+
+    def get_labelled_expression(self):
+        tex_string = self.tex_string
+        if not self.tex_spans_dict:
+            return tex_string
+
+        indices_with_labels = sorted([
+            (span_tuple[i], i, span_tuple[1 - i], tex_span.label)
+            for span_tuple, tex_span in self.tex_spans_dict.items()
+            if tex_span.script_type == 0
+            for i in range(2)
+        ], key=lambda t: (t[0], -t[1], -t[2]))
+        # Add one more item to ensure all the substrings are joined.
+        indices_with_labels.append((len(tex_string), 0, 0, 0))
+
+        result = tex_string[: indices_with_labels[0][0]]
+        index_with_label_pairs = _get_neighbouring_pairs(indices_with_labels)
+        for index_with_label, next_index_with_label in index_with_label_pairs:
+            index, flag, _, label = index_with_label
+            next_index, *_ = next_index_with_label
+            # Adding one more pair of braces will help maintain the glyghs of tex file...
+            if flag == 0:
+                color_tuple = _TexParser.label_to_color_tuple(label)
+                result += "".join([
+                    "{{",
+                    "\\color[RGB]",
+                    "{",
+                    ",".join(map(str, color_tuple)),
+                    "}"
+                ])
+            else:
+                result += "}}"
+            result += tex_string[index : next_index]
+        return result
+
+    def raise_tex_parsing_error(self):
+        raise ValueError(f"Failed to parse tex: \"{self.tex_string}\"")
+
+
 class MTex(VMobject):
    CONFIG = {
        "fill_opacity": 1.0,
@ -74,7 +257,7 @@ class MTex(VMobject):
        "height": None,
        "organize_left_to_right": False,
        "alignment": "\\centering",
-        "math_mode": True,
+        "tex_environment": "align*",
        "isolate": [],
        "unbreakable_commands": ["\\begin", "\\end"],
        "tex_to_color_map": {},
@ -82,14 +265,13 @@ class MTex(VMobject):

    def __init__(self, tex_string, **kwargs):
        super().__init__(**kwargs)
-        tex_string = tex_string.strip("\n")
-        # Prevent from passing an empty string.
-        if not tex_string:
-            tex_string = "\\quad"
-        self.tex_string = tex_string
-        self.parse_tex()
+        self.tex_string = MTex.modify_tex_string(tex_string)

-        full_tex = self.get_tex_file_body()
+        tex_parser = _TexParser(self)
+        self.tex_spans_dict = tex_parser.tex_spans_dict
+
+        new_tex = tex_parser.get_labelled_expression()
+        full_tex = self.get_tex_file_body(new_tex)
        hash_val = hash(full_tex)
        if hash_val not in tex_hash_to_mob_map:
            with display_during_execution(f"Writing \"{tex_string}\""):
@ -111,160 +293,22 @@ class MTex(VMobject):
            self.organize_submobjects_left_to_right()

    @staticmethod
-    def label_to_color_tuple(n):
-        # Get a unique color different from black,
-        # or the svg file will not include the color information.
-        rgb = n + 1
-        rg, b = divmod(rgb, 256)
-        r, g = divmod(rg, 256)
-        return r, g, b
+    def modify_tex_string(tex_string):
+        result = tex_string.strip("\n")
+        # Prevent from passing an empty string.
+        if not result:
+            result = "\\quad"
+        return result

-    @staticmethod
-    def get_neighbouring_pairs(iterable):
-        return list(adjacent_pairs(iterable))[:-1]
-
-    @staticmethod
-    def contains(span_0, span_1):
-        return span_0[0] <= span_1[0] and span_1[1] <= span_0[1]
-
-    def add_tex_span(self, span_tuple, script_type=0, label=-1):
-        if script_type == 0:
-            # Should be additionally labelled.
-            label = self.current_label
-            self.current_label += 1
-
-        tex_span = _TexSpan(script_type, label)
-        self.tex_spans_dict[span_tuple] = tex_span
-
-    def parse_tex(self):
-        self.tex_spans_dict = {}
-        self.current_label = 0
-        self.break_up_by_braces()
-        self.break_up_by_scripts()
-        self.break_up_by_additional_strings()
-        self.merge_unbreakable_strings()
-        self.analyse_containing_labels()
-
-    def break_up_by_braces(self):
-        tex_string = self.tex_string
-        span_tuples = []
-        left_brace_indices = []
-        for match_obj in re.finditer(r"(?<!\\)(\{|\})", tex_string):
-            if match_obj.group() == "{":
-                left_brace_index = match_obj.span()[0]
-                left_brace_indices.append(left_brace_index)
-            else:
-                left_brace_index = left_brace_indices.pop()
-                right_brace_index = match_obj.span()[1]
-                span_tuples.append((left_brace_index, right_brace_index))
-        if left_brace_indices:
-            self.raise_tex_parsing_error()
-
-        self.paired_braces_tuples = span_tuples
-        for span_tuple in span_tuples:
-            self.add_tex_span(span_tuple)
-
-    def break_up_by_scripts(self):
-        tex_string = self.tex_string
-        brace_indices_dict = dict(self.tex_spans_dict.keys())
-        script_spans = []
-        for match_obj in re.finditer(r"((?<!\\)(_|\^)\s*)|(\s+(_|\^)\s*)", tex_string):
-            script_type = 1 if "_" in match_obj.group() else 2
-            token_begin, token_end = match_obj.span()
-            if token_end in brace_indices_dict:
-                content_span = (token_end, brace_indices_dict[token_end])
-            else:
-                content_match_obj = re.match(r"\w|\\[a-zA-Z]+", tex_string[token_end:])
-                if not content_match_obj:
-                    self.raise_tex_parsing_error()
-                content_span = tuple([
-                    index + token_end for index in content_match_obj.span()
-                ])
-                self.add_tex_span(content_span)
-            subscript_span = (token_begin, content_span[1])
-            script_spans.append(subscript_span)
-            label = self.tex_spans_dict[content_span].label
-            self.add_tex_span(
-                subscript_span,
-                script_type=script_type,
-                label=label
-            )
-        self.script_spans = script_spans
-
-    def break_up_by_additional_strings(self):
-        additional_strings_to_break_up = remove_list_redundancies([
-            *self.isolate, *self.tex_to_color_map.keys(), self.tex_string
-        ])
-        if "" in additional_strings_to_break_up:
-            additional_strings_to_break_up.remove("")
-        if not additional_strings_to_break_up:
-            return
-
-        tex_string = self.tex_string
-        all_span_tuples = []
-        for string in additional_strings_to_break_up:
-            # Only matches non-crossing strings.
-            for match_obj in re.finditer(re.escape(string), tex_string):
-                all_span_tuples.append(match_obj.span())
-
-        script_spans_dict = dict([
-            span_tuple[::-1]
-            for span_tuple in self.script_spans
-        ])
-        for span_begin, span_end in all_span_tuples:
-            if span_end in script_spans_dict.values():
-                # Deconstruct spans with subscripts & superscripts.
-                while span_end in script_spans_dict:
-                    span_end = script_spans_dict[span_end]
-                if span_begin >= span_end:
-                    continue
-            span_tuple = (span_begin, span_end)
-            if span_tuple not in self.tex_spans_dict:
-                self.add_tex_span(span_tuple)
-
-    def merge_unbreakable_strings(self):
-        tex_string = self.tex_string
-        command_merge_spans = []
-        brace_indices_dict = dict(self.paired_braces_tuples)
-        # Braces leading by `unbreakable_commands` shouldn't be marked.
-        for command in self.unbreakable_commands:
-            for match_obj in re.finditer(re.escape(command), tex_string):
-                merge_begin_index = match_obj.span()[1]
-                merge_end_index = merge_begin_index
-                if merge_end_index not in brace_indices_dict:
-                    continue
-                while merge_end_index in brace_indices_dict:
-                    merge_end_index = brace_indices_dict[merge_end_index]
-                command_merge_spans.append((merge_begin_index, merge_end_index))
-
-        if not command_merge_spans:
-            return
-        self.tex_spans_dict = {
-            span_tuple: tex_span
-            for span_tuple, tex_span in self.tex_spans_dict.items()
-            if all([
-                not MTex.contains(merge_span, span_tuple)
-                for merge_span in command_merge_spans
+    def get_tex_file_body(self, new_tex):
+        if self.tex_environment:
+            new_tex = "\n".join([
+                f"\\begin{{{self.tex_environment}}}",
+                new_tex,
+                f"\\end{{{self.tex_environment}}}"
            ])
-        }
-
-    def analyse_containing_labels(self):
-        for span_0, tex_span_0 in self.tex_spans_dict.items():
-            if tex_span_0.script_type != 0:
-                continue
-            for span_1, tex_span_1 in self.tex_spans_dict.items():
-                if MTex.contains(span_1, span_0):
-                    tex_span_1.containing_labels.append(tex_span_0.label)
-
-    def raise_tex_parsing_error(self):
-        raise ValueError(f"Failed to parse tex: \"{self.tex_string}\"")
-
-    def get_tex_file_body(self):
-        new_tex = self.get_modified_expression()
-
-        if self.math_mode:
-            new_tex = "\n".join(["\\begin{align*}", new_tex, "\\end{align*}"])
-        new_tex = "\n".join([self.alignment, new_tex])
+        if self.alignment:
+            new_tex = "\n".join([self.alignment, new_tex])

        tex_config = get_tex_config()
        return tex_config["tex_body"].replace(
@ -272,40 +316,6 @@ class MTex(VMobject):
            new_tex
        )

-    def get_modified_expression(self):
-        tex_string = self.tex_string
-        if not self.tex_spans_dict:
-            return tex_string
-
-        indices_with_labels = sorted([
-            (span_tuple[i], i, span_tuple[1 - i], tex_span.label)
-            for span_tuple, tex_span in self.tex_spans_dict.items()
-            if tex_span.script_type == 0
-            for i in range(2)
-        ], key=lambda t: (t[0], -t[1], -t[2]))
-        # Add one more item to ensure all the substrings are joined.
-        indices_with_labels.append((len(tex_string), 0, 0, 0))
-
-        result = tex_string[: indices_with_labels[0][0]]
-        index_with_label_pairs = MTex.get_neighbouring_pairs(indices_with_labels)
-        for index_with_label, next_index_with_label in index_with_label_pairs:
-            index, flag, _, label = index_with_label
-            next_index, *_ = next_index_with_label
-            # Adding one more pair of braces will help maintain the glyghs of tex file...
-            if flag == 0:
-                color_tuple = MTex.label_to_color_tuple(label)
-                result += "".join([
-                    "{{",
-                    "\\color[RGB]",
-                    "{",
-                    ",".join(map(str, color_tuple)),
-                    "}"
-                ])
-            else:
-                result += "}}"
-            result += tex_string[index : next_index]
-        return result
-
    def build_submobjects(self):
        if not self.submobjects:
            return
@ -339,10 +349,11 @@ class MTex(VMobject):
        # This function sorts the submobjects of scripts in the order of tex given.
        index_and_span_list = sorted([
            (index, span_tuple)
-            for span_tuple in self.script_spans
+            for span_tuple, tex_span in self.tex_spans_dict.items()
+            if tex_span.script_type != 0
            for index in span_tuple
        ])
-        index_and_span_pair = MTex.get_neighbouring_pairs(index_and_span_list)
+        index_and_span_pair = _get_neighbouring_pairs(index_and_span_list)
        for index_and_span_0, index_and_span_1 in index_and_span_pair:
            index_0, span_tuple_0 = index_and_span_0
            index_1, span_tuple_1 = index_and_span_1
@ -497,12 +508,14 @@ class MTex(VMobject):
    def print_tex_strings_of_submobjects(self):
        # For debugging
        # Work with `index_labels()`
+        print("\n")
        print(f"Submobjects of \"{self.get_tex()}\":")
        for i, submob in enumerate(self.submobjects):
            print(f"{i}: \"{submob.get_tex()}\"")
+        print("\n")


 class MTexText(MTex):
    CONFIG = {
-        "math_mode": False,
+        "tex_environment": None,
    }