[WIP] Refactor LabelledString and relevant classes

This commit is contained in:
YishiMichael 2022-05-05 23:03:02 +08:00
parent 511a3aab3d
commit 642602155d
No known key found for this signature in database
GPG key ID: EC615C0C5A86BC80
3 changed files with 325 additions and 332 deletions

View file

@ -11,7 +11,6 @@ from manimlib.mobject.types.vectorized_mobject import VGroup
from manimlib.utils.color import color_to_rgb from manimlib.utils.color import color_to_rgb
from manimlib.utils.color import rgb_to_hex from manimlib.utils.color import rgb_to_hex
from manimlib.utils.config_ops import digest_config from manimlib.utils.config_ops import digest_config
from manimlib.utils.iterables import remove_list_redundancies
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
@ -92,14 +91,14 @@ class LabelledString(SVGMobject, ABC):
)) ))
if unrecognized_colors: if unrecognized_colors:
log.warning( log.warning(
"Unrecognized color label(s) detected (%s, etc). " "Unrecognized color labels detected (%s, etc). "
"Skip the labelling process.", "Skip the labelling process.",
self.int_to_hex(unrecognized_colors[0]) self.int_to_hex(unrecognized_colors[0])
) )
submob_color_ints = [0] * num_submobjects submob_color_ints = [0] * num_submobjects
#TODO: remove this # Rearrange colors so that the n-th submobject from the left
#if self.sort_labelled_submobs: # is labelled by the n-th submobject of `labelled_svg` from the left.
submob_indices = sorted( submob_indices = sorted(
range(num_submobjects), range(num_submobjects),
key=lambda index: tuple( key=lambda index: tuple(
@ -122,11 +121,6 @@ class LabelledString(SVGMobject, ABC):
for submob, color_int in zip(self.submobjects, submob_color_ints): for submob, color_int in zip(self.submobjects, submob_color_ints):
submob.label = color_int - 1 submob.label = color_int - 1
#@property
#@abstractmethod
#def sort_labelled_submobs(self) -> bool:
# return False
# Toolkits # Toolkits
def get_substr(self, span: Span) -> str: def get_substr(self, span: Span) -> str:
@ -176,20 +170,8 @@ class LabelledString(SVGMobject, ABC):
if spans is None: if spans is None:
raise TypeError(f"Invalid selector: '{sel}'") raise TypeError(f"Invalid selector: '{sel}'")
result.extend(spans) result.extend(spans)
#return sorted(filter(
# lambda span: span[0] < span[1],
# self.remove_redundancies(result)
#))
return result return result
@staticmethod
def chain(*iterables: Iterable[T]) -> list[T]:
return list(it.chain(*iterables))
@staticmethod
def remove_redundancies(vals: Sequence[T]) -> list[T]:
return remove_list_redundancies(vals)
@staticmethod @staticmethod
def get_neighbouring_pairs(vals: Sequence[T]) -> list[tuple[T, T]]: def get_neighbouring_pairs(vals: Sequence[T]) -> list[tuple[T, T]]:
return list(zip(vals[:-1], vals[1:])) return list(zip(vals[:-1], vals[1:]))
@ -254,7 +236,7 @@ class LabelledString(SVGMobject, ABC):
for piece_span in self.get_complement_spans(span, repl_spans) for piece_span in self.get_complement_spans(span, repl_spans)
] ]
repl_strs = [*repl_strs, ""] repl_strs = [*repl_strs, ""]
return "".join(self.chain(*zip(pieces, repl_strs))) return "".join(it.chain(*zip(pieces, repl_strs)))
@staticmethod @staticmethod
def color_to_hex(color: ManimColor) -> str: def color_to_hex(color: ManimColor) -> str:
@ -268,26 +250,58 @@ class LabelledString(SVGMobject, ABC):
def int_to_hex(rgb_int: int) -> str: def int_to_hex(rgb_int: int) -> str:
return f"#{rgb_int:06x}".upper() return f"#{rgb_int:06x}".upper()
@staticmethod
@abstractmethod
def get_tag_string_pair(
attr_dict: dict[str, str], label_hex: str | None
) -> tuple[str, str]:
return ("", "")
# Parsing # Parsing
def parse(self) -> None: def parse(self) -> None:
begin_cmd_spans, end_cmd_spans, cmd_spans = self.get_command_spans() begin_cmd_spans, end_cmd_spans, other_cmd_spans = self.get_cmd_spans()
cmd_span_items = sorted(it.chain(
cmd_span_items = sorted(self.chain(
[(begin_cmd_span, 1) for begin_cmd_span in begin_cmd_spans], [(begin_cmd_span, 1) for begin_cmd_span in begin_cmd_spans],
[(end_cmd_span, -1) for end_cmd_span in end_cmd_spans], [(end_cmd_span, -1) for end_cmd_span in end_cmd_spans],
[(cmd_span, 0) for cmd_span in cmd_spans], [(cmd_span, 0) for cmd_span in other_cmd_spans],
), key=lambda t: t[0]) ), key=lambda t: t[0])
self.cmd_span_items = cmd_span_items cmd_spans = [span for span, _ in cmd_span_items]
flags = [flag for _, flag in cmd_span_items]
cmd_span_pairs = [] specified_items = self.get_specified_items(
self.get_cmd_span_pairs(cmd_span_items)
)
split_items = [
(span, attr_dict)
for specified_span, attr_dict in specified_items
for span in self.split_span_by_levels(
specified_span, cmd_spans, flags
)
]
self.specified_spans = [span for span, _ in specified_items]
self.labelled_spans = [span for span, _ in split_items]
self.check_overlapping()
cmd_repl_items_for_content = [
(span, self.get_repl_substr_for_content(self.get_substr(span)))
for span in cmd_spans
]
self.cmd_repl_items_for_matching = [
(span, self.get_repl_substr_for_matching(self.get_substr(span)))
for span in cmd_spans
]
self.original_content = self.get_content(
cmd_repl_items_for_content, split_items, is_labelled=False
)
self.labelled_content = self.get_content(
cmd_repl_items_for_content, split_items, is_labelled=True
)
@abstractmethod
def get_cmd_spans(self) -> tuple[list[Span], list[Span], list[Span]]:
return [], [], []
@staticmethod
def get_cmd_span_pairs(
cmd_span_items: list[tuple[Span, int]]
) -> list[tuple[Span, Span]]:
result = []
begin_cmd_spans_stack = [] begin_cmd_spans_stack = []
for cmd_span, flag in cmd_span_items: for cmd_span, flag in cmd_span_items:
if flag == 1: if flag == 1:
@ -296,25 +310,56 @@ class LabelledString(SVGMobject, ABC):
if not begin_cmd_spans_stack: if not begin_cmd_spans_stack:
raise ValueError("Missing '{' inserted") raise ValueError("Missing '{' inserted")
begin_cmd_span = begin_cmd_spans_stack.pop() begin_cmd_span = begin_cmd_spans_stack.pop()
cmd_span_pairs.append((begin_cmd_span, cmd_span)) result.append((begin_cmd_span, cmd_span))
if begin_cmd_spans_stack: if begin_cmd_spans_stack:
raise ValueError("Missing '}' inserted") raise ValueError("Missing '}' inserted")
return result
specified_items = self.get_specified_items(cmd_span_pairs) @abstractmethod
split_items = [ def get_specified_items(
(span, attr_dict) self, cmd_span_pairs: list[tuple[Span, Span]]
for specified_span, attr_dict in specified_items ) -> list[tuple[Span, dict[str, str]]]:
for span in self.split_span_by_levels(specified_span)[0] return []
]
command_repl_items = [ def split_span_by_levels(
(span, self.get_replaced_substr(self.get_substr(span), flag)) self, arbitrary_span: Span, cmd_spans: list[Span], flags: list[int]
for span, flag in cmd_span_items ) -> list[Span]:
] cmd_range = (
self.command_repl_items = command_repl_items sum([
arbitrary_span[0] > interval_begin
for interval_begin, _ in cmd_spans
]),
sum([
arbitrary_span[1] >= interval_end
for _, interval_end in cmd_spans
])
)
complement_spans = self.get_complement_spans(
self.full_span, cmd_spans
)
adjusted_span = (
max(arbitrary_span[0], complement_spans[cmd_range[0]][0]),
min(arbitrary_span[1], complement_spans[cmd_range[1]][1])
)
if adjusted_span[0] > adjusted_span[1]:
return []
self.specified_spans = [span for span, _ in specified_items] upward_cmd_spans = []
labelled_spans = [span for span, _ in split_items] downward_cmd_spans = []
for cmd_span, flag in list(zip(cmd_spans, flags))[slice(*cmd_range)]:
if flag == 1:
upward_cmd_spans.append(cmd_span)
elif flag == -1:
if upward_cmd_spans:
upward_cmd_spans.pop()
else:
downward_cmd_spans.append(cmd_span)
return self.get_complement_spans(
adjusted_span, downward_cmd_spans + upward_cmd_spans
)
def check_overlapping(self) -> None:
labelled_spans = self.labelled_spans
if len(labelled_spans) >= 16777216: if len(labelled_spans) >= 16777216:
raise ValueError("Cannot handle that many substrings") raise ValueError("Cannot handle that many substrings")
for span_0, span_1 in it.product(labelled_spans, repeat=2): for span_0, span_1 in it.product(labelled_spans, repeat=2):
@ -324,92 +369,73 @@ class LabelledString(SVGMobject, ABC):
"Partially overlapping substrings detected: " "Partially overlapping substrings detected: "
f"'{self.get_substr(span_0)}' and '{self.get_substr(span_1)}'" f"'{self.get_substr(span_0)}' and '{self.get_substr(span_1)}'"
) )
self.labelled_spans = labelled_spans
self.original_content, self.labelled_content = ( @abstractmethod
self.get_full_content_string(self.replace_string( def get_repl_substr_for_content(self, substr: str) -> str:
self.full_span, self.chain( return ""
command_repl_items,
[ @abstractmethod
((index, index), inserted_str) def get_repl_substr_for_matching(self, substr: str) -> str:
for index, inserted_str in self.sort_obj_pairs_by_spans([ return ""
(span, self.get_tag_string_pair(
@staticmethod
@abstractmethod
def get_cmd_str_pair(
attr_dict: dict[str, str], label_hex: str | None
) -> tuple[str, str]:
return "", ""
@abstractmethod
def get_content_prefix_and_suffix(
self, is_labelled: bool
) -> tuple[str, str]:
return "", ""
def get_content(
self, cmd_repl_items_for_content: list[Span, str],
split_items: list[tuple[Span, dict[str, str]]], is_labelled: bool
) -> str:
inserted_str_pairs = [
(span, self.get_cmd_str_pair(
attr_dict, attr_dict,
label_hex=self.int_to_hex(label + 1) if is_labelled else None label_hex=self.int_to_hex(label + 1) if is_labelled else None
)) ))
for label, (span, attr_dict) in enumerate(split_items) for label, (span, attr_dict) in enumerate(split_items)
])
] ]
repl_items = cmd_repl_items_for_content + [
((index, index), inserted_str)
for index, inserted_str in self.sort_obj_pairs_by_spans(
inserted_str_pairs
) )
), is_labelled=is_labelled)
for is_labelled in (False, True)
)
def split_span_by_levels(
self, arbitrary_span: Span
) -> tuple[list[Span], int, int]:
interval_span_items = self.cmd_span_items
interval_spans = [span for span, _ in interval_span_items]
interval_range = (
sum([
arbitrary_span[0] > interval_begin
for interval_begin, _ in interval_spans
]),
sum([
arbitrary_span[1] >= interval_end
for _, interval_end in interval_spans
])
)
complement_spans = self.get_complement_spans(self.full_span, interval_spans)
adjusted_span = (
max(arbitrary_span[0], complement_spans[interval_range[0]][0]),
min(arbitrary_span[1], complement_spans[interval_range[1]][1])
)
if adjusted_span[0] > adjusted_span[1]:
return [], 0, 0
upwards_stack = []
downwards_stack = []
for interval_index in range(*interval_range):
_, level_shift = interval_span_items[interval_index]
if level_shift == 1:
upwards_stack.append(interval_index)
elif level_shift == -1:
if upwards_stack:
upwards_stack.pop()
else:
downwards_stack.append(interval_index)
covered_interval_spans = [
interval_spans[piece_index]
for piece_index in self.chain(downwards_stack, upwards_stack)
] ]
result = self.get_complement_spans(adjusted_span, covered_interval_spans) prefix, suffix = self.get_content_prefix_and_suffix(is_labelled)
return result, len(downwards_stack), len(upwards_stack) return "".join([
prefix,
@abstractmethod self.replace_string(self.full_span, repl_items),
def get_command_spans(self) -> tuple[list[Span], list[Span], list[Span]]: suffix
return [], [], [] ])
@abstractmethod
def get_specified_items(
self, cmd_span_pairs: list[tuple[Span, Span]]
) -> list[tuple[Span, dict[str, str]]]:
return []
@abstractmethod
def get_replaced_substr(self, substr: str, flag: int) -> str:
return ""
@abstractmethod
def get_full_content_string(self, content_string: str, is_labelled: bool) -> str:
return ""
# Selector # Selector
@abstractmethod def get_submob_indices_list_by_span(
def get_cleaned_substr(self, span: Span) -> str: self, arbitrary_span: Span
return "" ) -> list[int]:
return [
submob_index
for submob_index, label in enumerate(self.labels)
if label != -1 and self.span_contains(
arbitrary_span, self.labelled_spans[label]
)
]
def get_specified_part_items(self) -> list[tuple[str, list[int]]]:
return [
(
self.get_substr(span),
self.get_submob_indices_list_by_span(span)
)
for span in self.specified_spans
]
def get_group_part_items(self) -> list[tuple[str, list[int]]]: def get_group_part_items(self) -> list[tuple[str, list[int]]]:
if not self.labels: if not self.labels:
@ -436,7 +462,13 @@ class LabelledString(SVGMobject, ABC):
) )
] ]
group_substrs = [ group_substrs = [
self.get_cleaned_substr(span) if span[0] < span[1] else "" re.sub(r"\s+", "", self.replace_string(
span, [
(cmd_span, repl_str)
for cmd_span, repl_str in self.cmd_repl_items_for_matching
if self.span_contains(span, cmd_span)
]
))
for span in self.get_complement_spans( for span in self.get_complement_spans(
(ordered_spans[0][0], ordered_spans[-1][1]), interval_spans (ordered_spans[0][0], ordered_spans[-1][1]), interval_spans
) )
@ -447,26 +479,6 @@ class LabelledString(SVGMobject, ABC):
] ]
return list(zip(group_substrs, submob_indices_lists)) return list(zip(group_substrs, submob_indices_lists))
def get_submob_indices_list_by_span(
self, arbitrary_span: Span
) -> list[int]:
return [
submob_index
for submob_index, label in enumerate(self.labels)
if label != -1 and self.span_contains(
arbitrary_span, self.labelled_spans[label]
)
]
def get_specified_part_items(self) -> list[tuple[str, list[int]]]:
return [
(
self.get_substr(span),
self.get_submob_indices_list_by_span(span)
)
for span in self.specified_spans
]
def get_submob_indices_lists_by_selector( def get_submob_indices_lists_by_selector(
self, selector: Selector self, selector: Selector
) -> list[list[int]]: ) -> list[list[int]]:

View file

@ -73,42 +73,21 @@ class MTex(LabelledString):
file_path = tex_to_svg_file(full_tex) file_path = tex_to_svg_file(full_tex)
return file_path return file_path
#@property
#def sort_labelled_submobs(self) -> bool:
# return False
# Toolkits
@staticmethod
def get_color_command_str(rgb_hex: str) -> str:
rgb = MTex.hex_to_int(rgb_hex)
rg, b = divmod(rgb, 256)
r, g = divmod(rg, 256)
return f"\\color[RGB]{{{r}, {g}, {b}}}"
@staticmethod
def get_tag_string_pair(
attr_dict: dict[str, str], label_hex: str | None
) -> tuple[str, str]:
if label_hex is None:
return ("", "")
return ("{{" + MTex.get_color_command_str(label_hex), "}}")
# Parsing # Parsing
def get_command_spans(self) -> tuple[list[Span], list[Span], list[Span]]: def get_cmd_spans(self) -> tuple[list[Span], list[Span], list[Span]]:
cmd_spans = self.find_spans(r"\\(?:[a-zA-Z]+|\s|\S)") backslash_spans = self.find_spans(r"\\(?:[a-zA-Z]+|\s|\S)")
begin_cmd_spans = [ def find_unescaped_spans(pattern):
span return list(filter(
for span in self.find_spans("{") lambda span: (span[0] - 1, span[1]) not in backslash_spans,
if (span[0] - 1, span[1]) not in cmd_spans self.find_spans(pattern)
] ))
end_cmd_spans = [
span return (
for span in self.find_spans("}") find_unescaped_spans(r"{"),
if (span[0] - 1, span[1]) not in cmd_spans find_unescaped_spans(r"}"),
] backslash_spans + find_unescaped_spans(r"[_^]")
return begin_cmd_spans, end_cmd_spans, cmd_spans )
def get_specified_items( def get_specified_items(
self, cmd_span_pairs: list[tuple[Span, Span]] self, cmd_span_pairs: list[tuple[Span, Span]]
@ -117,8 +96,8 @@ class MTex(LabelledString):
(span_begin, span_end) (span_begin, span_end)
for (_, span_begin), (span_end, _) in cmd_span_pairs for (_, span_begin), (span_end, _) in cmd_span_pairs
] ]
specified_spans = self.chain( specified_spans = [
[ *[
cmd_content_spans[range_begin] cmd_content_spans[range_begin]
for _, (range_begin, range_end) in self.compress_neighbours([ for _, (range_begin, range_end) in self.compress_neighbours([
(span_begin + index, span_end - index) (span_begin + index, span_end - index)
@ -128,77 +107,57 @@ class MTex(LabelledString):
]) ])
if range_end - range_begin >= 2 if range_end - range_begin >= 2
], ],
[ *[
span span
for selector in self.tex_to_color_map for selector in self.tex_to_color_map
for span in self.find_spans_by_selector(selector) for span in self.find_spans_by_selector(selector)
], ],
self.find_spans_by_selector(self.isolate) *self.find_spans_by_selector(self.isolate)
) ]
return [(span, {}) for span in specified_spans] return [(span, {}) for span in specified_spans]
def get_replaced_substr(self, substr: str, flag: int) -> str: def get_repl_substr_for_content(self, substr: str) -> str:
return substr return substr
def get_full_content_string(self, content_string: str, is_labelled: bool) -> str: def get_repl_substr_for_matching(self, substr: str) -> str:
result = content_string return substr if substr.startswith("\\") else ""
@staticmethod
def get_color_cmd_str(rgb_hex: str) -> str:
rgb = MTex.hex_to_int(rgb_hex)
rg, b = divmod(rgb, 256)
r, g = divmod(rg, 256)
return f"\\color[RGB]{{{r}, {g}, {b}}}"
@staticmethod
def get_cmd_str_pair(
attr_dict: dict[str, str], label_hex: str | None
) -> tuple[str, str]:
if label_hex is None:
return "", ""
return "{{" + MTex.get_color_cmd_str(label_hex), "}}"
def get_content_prefix_and_suffix(
self, is_labelled: bool
) -> tuple[str, str]:
prefix_lines = []
suffix_lines = []
if not is_labelled:
prefix_lines.append(self.get_color_cmd_str(self.base_color_hex))
if self.alignment:
prefix_lines.append(self.alignment)
if self.tex_environment: if self.tex_environment:
if isinstance(self.tex_environment, str): if isinstance(self.tex_environment, str):
prefix = f"\\begin{{{self.tex_environment}}}" env_prefix = f"\\begin{{{self.tex_environment}}}"
suffix = f"\\end{{{self.tex_environment}}}" env_suffix = f"\\end{{{self.tex_environment}}}"
else: else:
prefix, suffix = self.tex_environment env_prefix, env_suffix = self.tex_environment
result = "\n".join([prefix, result, suffix]) prefix_lines.append(env_prefix)
if self.alignment: suffix_lines.append(env_suffix)
result = "\n".join([self.alignment, result]) return (
"".join([line + "\n" for line in prefix_lines]),
if not is_labelled: "".join(["\n" + line for line in suffix_lines])
result = "\n".join([ )
self.get_color_command_str(self.base_color_hex),
result
])
return result
# Selector
def get_cleaned_substr(self, span: Span) -> str:
backslash_indices = [
index for index, _ in self.find_spans(r"\\[\s\S]")
]
ignored_indices = [
index
for index, _ in self.find_spans(r"[\s_^{}]")
if index - 1 not in backslash_indices
]
span_begin, span_end = span
while span_begin in ignored_indices:
span_begin += 1
while span_end - 1 in ignored_indices:
span_end -= 1
shrinked_span = (span_begin, span_end)
whitespace_repl_items = []
for whitespace_span in self.find_spans(r"\s+"):
if not self.span_contains(shrinked_span, whitespace_span):
continue
if whitespace_span[0] - 1 in backslash_indices:
whitespace_span = (whitespace_span[0] + 1, whitespace_span[1])
if all(
self.get_substr((index, index + 1)).isalpha()
for index in (whitespace_span[0] - 1, whitespace_span[1])
):
replaced_substr = " "
else:
replaced_substr = ""
whitespace_repl_items.append((whitespace_span, replaced_substr))
_, unclosed_right_braces, unclosed_left_braces = self.split_span_by_levels(shrinked_span)
return "".join([
unclosed_right_braces * "{",
self.replace_string(shrinked_span, whitespace_repl_items),
unclosed_left_braces * "}"
])
# Method alias # Method alias

View file

@ -71,15 +71,8 @@ MARKUP_TAG_CONVERSION_DICT = {
"tt": {"font_family": "monospace"}, "tt": {"font_family": "monospace"},
"u": {"underline": "single"}, "u": {"underline": "single"},
} }
# See https://gitlab.gnome.org/GNOME/glib/-/blob/main/glib/gmarkup.c XML_ENTITIES = ("&lt;", "&gt;", "&amp;", "&quot;", "&apos;")
# Line 629, 2204 XML_ENTITY_CHARS = "<>&\"'"
XML_ENTITIES = (
("<", "&lt;"),
(">", "&gt;"),
("&", "&amp;"),
("\"", "&quot;"),
("'", "&apos;")
)
# Temporary handler # Temporary handler
@ -240,68 +233,50 @@ class MarkupText(LabelledString):
f"{validate_error}" f"{validate_error}"
) )
#@property
#def sort_labelled_submobs(self) -> bool:
# return True
# Toolkits
@staticmethod
def get_tag_string_pair(
attr_dict: dict[str, str], label_hex: str | None
) -> tuple[str, str]:
if label_hex is not None:
converted_attr_dict = {"foreground": label_hex}
for key, val in attr_dict.items():
substitute_key = MARKUP_COLOR_KEYS_DICT.get(key.lower(), None)
if substitute_key is None:
converted_attr_dict[key] = val
elif substitute_key:
converted_attr_dict[key] = "black"
#else:
# converted_attr_dict[key] = "black"
else:
converted_attr_dict = attr_dict.copy()
attrs_str = " ".join([
f"{key}='{val}'"
for key, val in converted_attr_dict.items()
])
return (f"<span {attrs_str}>", "</span>")
def get_global_attr_dict(self) -> dict[str, str]:
result = {
"foreground": self.base_color_hex,
"font_family": self.font,
"font_style": self.slant,
"font_weight": self.weight,
"font_size": str(self.font_size * 1024),
}
# `line_height` attribute is supported since Pango 1.50.
pango_version = manimpango.pango_version()
if tuple(map(int, pango_version.split("."))) < (1, 50):
if self.lsh is not None:
log.warning(
"Pango version %s found (< 1.50), "
"unable to set `line_height` attribute",
pango_version
)
else:
line_spacing_scale = self.lsh or DEFAULT_LINE_SPACING_SCALE
result["line_height"] = str(((line_spacing_scale) + 1) * 0.6)
return result
# Parsing # Parsing
def get_command_spans(self) -> tuple[list[Span], list[Span], list[Span]]: def get_cmd_spans(self) -> tuple[list[Span], list[Span], list[Span]]:
begin_cmd_spans = self.find_spans(
r"<\w+\s*(?:\w+\s*\=\s*(['\x22])[\s\S]*?\1\s*)*>"
)
end_cmd_spans = self.find_spans(r"</\w+\s*>")
if not self.is_markup: if not self.is_markup:
return [], [], self.find_spans(r"[<>&\x22']")
# See https://gitlab.gnome.org/GNOME/glib/-/blob/main/glib/gmarkup.c
string = self.string
cmd_spans = [] cmd_spans = []
cmd_pattern = re.compile(r"""
&[\s\S]*?; # entity & character reference
|</?\w+(?:\s*\w+\s*\=\s*(['"])[\s\S]*?\1)*/?> # tag
|<\?[\s\S]*?\?>|<\?> # instruction
|<!--[\s\S]*?-->|<!---?> # comment
|<!\[CDATA\[[\s\S]*?\]\]> # cdata
|<!DOCTYPE # doctype (require balancing groups)
|[>"'] # characters to escape
""", re.X)
match_obj = cmd_pattern.search(string)
while match_obj:
span_begin, span_end = match_obj.span()
if match_obj.group() == "<!DOCTYPE":
balance = 1
while balance != 0:
angle_match_obj = re.compile(r"[<>]").search(
string, pos=span_end
)
balance += {"<": 1, ">": -1}[angle_match_obj.group()]
span_end = angle_match_obj.end()
cmd_spans.append((span_begin, span_end))
match_obj = cmd_pattern.search(string, pos=span_end)
begin_cmd_spans = []
end_cmd_spans = []
other_cmd_spans = []
for cmd_span in cmd_spans:
substr = self.get_substr(cmd_span)
if re.fullmatch(r"<\w[\s\S]*[^/]>", substr):
begin_cmd_spans.append(cmd_span)
elif substr.startswith("</"):
end_cmd_spans.append(cmd_span)
else: else:
cmd_spans = self.find_spans(r"&[\s\S]*?;") # TODO other_cmd_spans.append(cmd_span)
return begin_cmd_spans, end_cmd_spans, cmd_spans return begin_cmd_spans, end_cmd_spans, other_cmd_spans
def get_specified_items( def get_specified_items(
self, cmd_span_pairs: list[tuple[Span, Span]] self, cmd_span_pairs: list[tuple[Span, Span]]
@ -323,8 +298,6 @@ class MarkupText(LabelledString):
) )
return [ return [
(self.full_span, self.get_global_attr_dict()),
(self.full_span, self.global_config),
*internal_items, *internal_items,
*[ *[
(span, {key: val}) (span, {key: val})
@ -341,34 +314,83 @@ class MarkupText(LabelledString):
(span, local_config) (span, local_config)
for selector, local_config in self.local_configs.items() for selector, local_config in self.local_configs.items()
for span in self.find_spans_by_selector(selector) for span in self.find_spans_by_selector(selector)
],
*[
(span, {})
for span in self.find_spans_by_selector(self.isolate)
] ]
] ]
def get_replaced_substr(self, substr: str, flag: int) -> str: def get_repl_substr_for_content(self, substr: str) -> str:
if flag: if substr.startswith("<") and substr.endswith(">"):
return "" return ""
return dict(XML_ENTITIES).get(substr, substr) if substr in XML_ENTITY_CHARS:
return XML_ENTITIES[XML_ENTITY_CHARS.index(substr)]
return substr
def get_full_content_string(self, content_string: str, is_labelled: bool) -> str: def get_repl_substr_for_matching(self, substr: str) -> str:
return content_string if substr.startswith("<") and substr.endswith(">"):
return ""
if substr in XML_ENTITIES:
return XML_ENTITY_CHARS[XML_ENTITIES.index(substr)]
if substr.startswith("&#") and substr.endswith(";"):
if substr.startswith("&#x"):
char_reference = int(substr[3:-1], 16)
else:
char_reference = int(substr[2:-1], 10)
return chr(char_reference)
return substr
# Selector @staticmethod
def get_cmd_str_pair(
attr_dict: dict[str, str], label_hex: str | None
) -> tuple[str, str]:
if label_hex is not None:
converted_attr_dict = {"foreground": label_hex}
for key, val in attr_dict.items():
substitute_key = MARKUP_COLOR_KEYS_DICT.get(key.lower(), None)
if substitute_key is None:
converted_attr_dict[key] = val
elif substitute_key:
converted_attr_dict[key] = "black"
else:
converted_attr_dict = attr_dict.copy()
attrs_str = " ".join([
f"{key}='{val}'"
for key, val in converted_attr_dict.items()
])
return f"<span {attrs_str}>", "</span>"
def get_cleaned_substr(self, span: Span) -> str: def get_content_prefix_and_suffix(
filtered_repl_items = [] self, is_labelled: bool
entity_to_char_dict = { ) -> tuple[str, str]:
entity: char global_attr_dict = {
for char, entity in XML_ENTITIES "foreground": self.base_color_hex,
"font_family": self.font,
"font_style": self.slant,
"font_weight": self.weight,
"font_size": str(self.font_size * 1024),
} }
for cmd_span, replaced_substr in self.command_repl_items: global_attr_dict.update(self.global_config)
if not self.span_contains(span, cmd_span): # `line_height` attribute is supported since Pango 1.50.
continue pango_version = manimpango.pango_version()
if re.fullmatch(r"&[\s\S]*;", replaced_substr): if tuple(map(int, pango_version.split("."))) < (1, 50):
if replaced_substr in entity_to_char_dict: if self.lsh is not None:
replaced_substr = entity_to_char_dict[replaced_substr] log.warning(
filtered_repl_items.append((cmd_span, replaced_substr)) "Pango version %s found (< 1.50), "
"unable to set `line_height` attribute",
pango_version
)
else:
line_spacing_scale = self.lsh or DEFAULT_LINE_SPACING_SCALE
global_attr_dict["line_height"] = str(
((line_spacing_scale) + 1) * 0.6
)
return self.replace_string(span, filtered_repl_items).strip() # TODO return self.get_cmd_str_pair(
global_attr_dict,
label_hex=self.int_to_hex(0) if is_labelled else None
)
# Method alias # Method alias