Some refactors

- Split out `_TexParser` class
- Replace `math_mode` parameter with `tex_environment`
- Fix the bug that braces following even number of backslashes aren't matched
This commit is contained in:
Michael W 2021-12-13 21:01:27 +08:00 committed by GitHub
parent 155839bde9
commit 0de303d5e0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -18,6 +18,10 @@ SCALE_FACTOR_PER_FONT_POINT = 0.001
tex_hash_to_mob_map = {}
def _get_neighbouring_pairs(iterable):
return list(adjacent_pairs(iterable))[:-1]
class _LabelledTex(SVGMobject):
CONFIG = {
"height": None,
@ -65,6 +69,185 @@ class _TexSpan(object):
]) + ")"
class _TexParser(object):
def __init__(self, mtex):
self.tex_string = mtex.tex_string
strings_to_break_up = remove_list_redundancies([
*mtex.isolate, *mtex.tex_to_color_map.keys(), mtex.tex_string
])
if "" in strings_to_break_up:
strings_to_break_up.remove("")
unbreakable_commands = mtex.unbreakable_commands
self.tex_spans_dict = {}
self.current_label = 0
self.break_up_by_braces()
self.break_up_by_scripts()
self.break_up_by_additional_strings(strings_to_break_up)
self.merge_unbreakable_commands(unbreakable_commands)
self.analyse_containing_labels()
@staticmethod
def label_to_color_tuple(n):
# Get a unique color different from black,
# or the svg file will not include the color information.
rgb = n + 1
rg, b = divmod(rgb, 256)
r, g = divmod(rg, 256)
return r, g, b
@staticmethod
def contains(span_0, span_1):
return span_0[0] <= span_1[0] and span_1[1] <= span_0[1]
def add_tex_span(self, span_tuple, script_type=0, label=-1):
if script_type == 0:
# Should be additionally labelled.
label = self.current_label
self.current_label += 1
tex_span = _TexSpan(script_type, label)
self.tex_spans_dict[span_tuple] = tex_span
def break_up_by_braces(self):
tex_string = self.tex_string
span_tuples = []
left_brace_indices = []
for match_obj in re.finditer(r"(\\*)(\{|\})", tex_string):
# Braces following even numbers of backslashes are counted.
if len(match_obj.group(1)) % 2 == 1:
continue
if match_obj.group(2) == "{":
left_brace_index = match_obj.span(2)[0]
left_brace_indices.append(left_brace_index)
else:
left_brace_index = left_brace_indices.pop()
right_brace_index = match_obj.span(2)[1]
span_tuples.append((left_brace_index, right_brace_index))
if left_brace_indices:
self.raise_tex_parsing_error()
self.paired_braces_tuples = span_tuples
for span_tuple in span_tuples:
self.add_tex_span(span_tuple)
def break_up_by_scripts(self):
tex_string = self.tex_string
brace_indices_dict = dict(self.tex_spans_dict.keys())
for match_obj in re.finditer(r"((?<!\\)(_|\^)\s*)|(\s+(_|\^)\s*)", tex_string):
script_type = 1 if "_" in match_obj.group() else 2
token_begin, token_end = match_obj.span()
if token_end in brace_indices_dict:
content_span = (token_end, brace_indices_dict[token_end])
else:
content_match_obj = re.match(r"\w|\\[a-zA-Z]+", tex_string[token_end:])
if not content_match_obj:
self.raise_tex_parsing_error()
content_span = tuple([
index + token_end for index in content_match_obj.span()
])
self.add_tex_span(content_span)
label = self.tex_spans_dict[content_span].label
self.add_tex_span(
(token_begin, content_span[1]),
script_type=script_type,
label=label
)
def break_up_by_additional_strings(self, strings_to_break_up):
tex_string = self.tex_string
all_span_tuples = []
for string in strings_to_break_up:
# Only matches non-crossing strings.
for match_obj in re.finditer(re.escape(string), tex_string):
all_span_tuples.append(match_obj.span())
script_spans_dict = dict([
span_tuple[::-1]
for span_tuple, tex_span in self.tex_spans_dict.items()
if tex_span.script_type != 0
])
for span_begin, span_end in all_span_tuples:
if span_end in script_spans_dict.values():
# Deconstruct spans with subscripts & superscripts.
while span_end in script_spans_dict:
span_end = script_spans_dict[span_end]
if span_begin >= span_end:
continue
span_tuple = (span_begin, span_end)
if span_tuple not in self.tex_spans_dict:
self.add_tex_span(span_tuple)
def merge_unbreakable_commands(self, unbreakable_commands):
tex_string = self.tex_string
command_merge_spans = []
brace_indices_dict = dict(self.paired_braces_tuples)
# Braces leading by `unbreakable_commands` shouldn't be marked.
for command in unbreakable_commands:
for match_obj in re.finditer(re.escape(command), tex_string):
merge_begin_index = match_obj.span()[1]
merge_end_index = merge_begin_index
if merge_end_index not in brace_indices_dict:
continue
while merge_end_index in brace_indices_dict:
merge_end_index = brace_indices_dict[merge_end_index]
command_merge_spans.append((merge_begin_index, merge_end_index))
self.tex_spans_dict = {
span_tuple: tex_span
for span_tuple, tex_span in self.tex_spans_dict.items()
if all([
not _TexParser.contains(merge_span, span_tuple)
for merge_span in command_merge_spans
])
}
def analyse_containing_labels(self):
for span_0, tex_span_0 in self.tex_spans_dict.items():
if tex_span_0.script_type != 0:
continue
for span_1, tex_span_1 in self.tex_spans_dict.items():
if _TexParser.contains(span_1, span_0):
tex_span_1.containing_labels.append(tex_span_0.label)
def get_labelled_expression(self):
tex_string = self.tex_string
if not self.tex_spans_dict:
return tex_string
indices_with_labels = sorted([
(span_tuple[i], i, span_tuple[1 - i], tex_span.label)
for span_tuple, tex_span in self.tex_spans_dict.items()
if tex_span.script_type == 0
for i in range(2)
], key=lambda t: (t[0], -t[1], -t[2]))
# Add one more item to ensure all the substrings are joined.
indices_with_labels.append((len(tex_string), 0, 0, 0))
result = tex_string[: indices_with_labels[0][0]]
index_with_label_pairs = _get_neighbouring_pairs(indices_with_labels)
for index_with_label, next_index_with_label in index_with_label_pairs:
index, flag, _, label = index_with_label
next_index, *_ = next_index_with_label
# Adding one more pair of braces will help maintain the glyghs of tex file...
if flag == 0:
color_tuple = _TexParser.label_to_color_tuple(label)
result += "".join([
"{{",
"\\color[RGB]",
"{",
",".join(map(str, color_tuple)),
"}"
])
else:
result += "}}"
result += tex_string[index : next_index]
return result
def raise_tex_parsing_error(self):
raise ValueError(f"Failed to parse tex: \"{self.tex_string}\"")
class MTex(VMobject):
CONFIG = {
"fill_opacity": 1.0,
@ -74,7 +257,7 @@ class MTex(VMobject):
"height": None,
"organize_left_to_right": False,
"alignment": "\\centering",
"math_mode": True,
"tex_environment": "align*",
"isolate": [],
"unbreakable_commands": ["\\begin", "\\end"],
"tex_to_color_map": {},
@ -82,14 +265,13 @@ class MTex(VMobject):
def __init__(self, tex_string, **kwargs):
super().__init__(**kwargs)
tex_string = tex_string.strip("\n")
# Prevent from passing an empty string.
if not tex_string:
tex_string = "\\quad"
self.tex_string = tex_string
self.parse_tex()
self.tex_string = MTex.modify_tex_string(tex_string)
full_tex = self.get_tex_file_body()
tex_parser = _TexParser(self)
self.tex_spans_dict = tex_parser.tex_spans_dict
new_tex = tex_parser.get_labelled_expression()
full_tex = self.get_tex_file_body(new_tex)
hash_val = hash(full_tex)
if hash_val not in tex_hash_to_mob_map:
with display_during_execution(f"Writing \"{tex_string}\""):
@ -111,160 +293,22 @@ class MTex(VMobject):
self.organize_submobjects_left_to_right()
@staticmethod
def label_to_color_tuple(n):
# Get a unique color different from black,
# or the svg file will not include the color information.
rgb = n + 1
rg, b = divmod(rgb, 256)
r, g = divmod(rg, 256)
return r, g, b
def modify_tex_string(tex_string):
result = tex_string.strip("\n")
# Prevent from passing an empty string.
if not result:
result = "\\quad"
return result
@staticmethod
def get_neighbouring_pairs(iterable):
return list(adjacent_pairs(iterable))[:-1]
@staticmethod
def contains(span_0, span_1):
return span_0[0] <= span_1[0] and span_1[1] <= span_0[1]
def add_tex_span(self, span_tuple, script_type=0, label=-1):
if script_type == 0:
# Should be additionally labelled.
label = self.current_label
self.current_label += 1
tex_span = _TexSpan(script_type, label)
self.tex_spans_dict[span_tuple] = tex_span
def parse_tex(self):
self.tex_spans_dict = {}
self.current_label = 0
self.break_up_by_braces()
self.break_up_by_scripts()
self.break_up_by_additional_strings()
self.merge_unbreakable_strings()
self.analyse_containing_labels()
def break_up_by_braces(self):
tex_string = self.tex_string
span_tuples = []
left_brace_indices = []
for match_obj in re.finditer(r"(?<!\\)(\{|\})", tex_string):
if match_obj.group() == "{":
left_brace_index = match_obj.span()[0]
left_brace_indices.append(left_brace_index)
else:
left_brace_index = left_brace_indices.pop()
right_brace_index = match_obj.span()[1]
span_tuples.append((left_brace_index, right_brace_index))
if left_brace_indices:
self.raise_tex_parsing_error()
self.paired_braces_tuples = span_tuples
for span_tuple in span_tuples:
self.add_tex_span(span_tuple)
def break_up_by_scripts(self):
tex_string = self.tex_string
brace_indices_dict = dict(self.tex_spans_dict.keys())
script_spans = []
for match_obj in re.finditer(r"((?<!\\)(_|\^)\s*)|(\s+(_|\^)\s*)", tex_string):
script_type = 1 if "_" in match_obj.group() else 2
token_begin, token_end = match_obj.span()
if token_end in brace_indices_dict:
content_span = (token_end, brace_indices_dict[token_end])
else:
content_match_obj = re.match(r"\w|\\[a-zA-Z]+", tex_string[token_end:])
if not content_match_obj:
self.raise_tex_parsing_error()
content_span = tuple([
index + token_end for index in content_match_obj.span()
])
self.add_tex_span(content_span)
subscript_span = (token_begin, content_span[1])
script_spans.append(subscript_span)
label = self.tex_spans_dict[content_span].label
self.add_tex_span(
subscript_span,
script_type=script_type,
label=label
)
self.script_spans = script_spans
def break_up_by_additional_strings(self):
additional_strings_to_break_up = remove_list_redundancies([
*self.isolate, *self.tex_to_color_map.keys(), self.tex_string
])
if "" in additional_strings_to_break_up:
additional_strings_to_break_up.remove("")
if not additional_strings_to_break_up:
return
tex_string = self.tex_string
all_span_tuples = []
for string in additional_strings_to_break_up:
# Only matches non-crossing strings.
for match_obj in re.finditer(re.escape(string), tex_string):
all_span_tuples.append(match_obj.span())
script_spans_dict = dict([
span_tuple[::-1]
for span_tuple in self.script_spans
])
for span_begin, span_end in all_span_tuples:
if span_end in script_spans_dict.values():
# Deconstruct spans with subscripts & superscripts.
while span_end in script_spans_dict:
span_end = script_spans_dict[span_end]
if span_begin >= span_end:
continue
span_tuple = (span_begin, span_end)
if span_tuple not in self.tex_spans_dict:
self.add_tex_span(span_tuple)
def merge_unbreakable_strings(self):
tex_string = self.tex_string
command_merge_spans = []
brace_indices_dict = dict(self.paired_braces_tuples)
# Braces leading by `unbreakable_commands` shouldn't be marked.
for command in self.unbreakable_commands:
for match_obj in re.finditer(re.escape(command), tex_string):
merge_begin_index = match_obj.span()[1]
merge_end_index = merge_begin_index
if merge_end_index not in brace_indices_dict:
continue
while merge_end_index in brace_indices_dict:
merge_end_index = brace_indices_dict[merge_end_index]
command_merge_spans.append((merge_begin_index, merge_end_index))
if not command_merge_spans:
return
self.tex_spans_dict = {
span_tuple: tex_span
for span_tuple, tex_span in self.tex_spans_dict.items()
if all([
not MTex.contains(merge_span, span_tuple)
for merge_span in command_merge_spans
def get_tex_file_body(self, new_tex):
if self.tex_environment:
new_tex = "\n".join([
f"\\begin{{{self.tex_environment}}}",
new_tex,
f"\\end{{{self.tex_environment}}}"
])
}
def analyse_containing_labels(self):
for span_0, tex_span_0 in self.tex_spans_dict.items():
if tex_span_0.script_type != 0:
continue
for span_1, tex_span_1 in self.tex_spans_dict.items():
if MTex.contains(span_1, span_0):
tex_span_1.containing_labels.append(tex_span_0.label)
def raise_tex_parsing_error(self):
raise ValueError(f"Failed to parse tex: \"{self.tex_string}\"")
def get_tex_file_body(self):
new_tex = self.get_modified_expression()
if self.math_mode:
new_tex = "\n".join(["\\begin{align*}", new_tex, "\\end{align*}"])
new_tex = "\n".join([self.alignment, new_tex])
if self.alignment:
new_tex = "\n".join([self.alignment, new_tex])
tex_config = get_tex_config()
return tex_config["tex_body"].replace(
@ -272,40 +316,6 @@ class MTex(VMobject):
new_tex
)
def get_modified_expression(self):
tex_string = self.tex_string
if not self.tex_spans_dict:
return tex_string
indices_with_labels = sorted([
(span_tuple[i], i, span_tuple[1 - i], tex_span.label)
for span_tuple, tex_span in self.tex_spans_dict.items()
if tex_span.script_type == 0
for i in range(2)
], key=lambda t: (t[0], -t[1], -t[2]))
# Add one more item to ensure all the substrings are joined.
indices_with_labels.append((len(tex_string), 0, 0, 0))
result = tex_string[: indices_with_labels[0][0]]
index_with_label_pairs = MTex.get_neighbouring_pairs(indices_with_labels)
for index_with_label, next_index_with_label in index_with_label_pairs:
index, flag, _, label = index_with_label
next_index, *_ = next_index_with_label
# Adding one more pair of braces will help maintain the glyghs of tex file...
if flag == 0:
color_tuple = MTex.label_to_color_tuple(label)
result += "".join([
"{{",
"\\color[RGB]",
"{",
",".join(map(str, color_tuple)),
"}"
])
else:
result += "}}"
result += tex_string[index : next_index]
return result
def build_submobjects(self):
if not self.submobjects:
return
@ -339,10 +349,11 @@ class MTex(VMobject):
# This function sorts the submobjects of scripts in the order of tex given.
index_and_span_list = sorted([
(index, span_tuple)
for span_tuple in self.script_spans
for span_tuple, tex_span in self.tex_spans_dict.items()
if tex_span.script_type != 0
for index in span_tuple
])
index_and_span_pair = MTex.get_neighbouring_pairs(index_and_span_list)
index_and_span_pair = _get_neighbouring_pairs(index_and_span_list)
for index_and_span_0, index_and_span_1 in index_and_span_pair:
index_0, span_tuple_0 = index_and_span_0
index_1, span_tuple_1 = index_and_span_1
@ -497,12 +508,14 @@ class MTex(VMobject):
def print_tex_strings_of_submobjects(self):
# For debugging
# Work with `index_labels()`
print("\n")
print(f"Submobjects of \"{self.get_tex()}\":")
for i, submob in enumerate(self.submobjects):
print(f"{i}: \"{submob.get_tex()}\"")
print("\n")
class MTexText(MTex):
CONFIG = {
"math_mode": False,
"tex_environment": None,
}