3b1b-manim/manimlib/mobject/svg/string_mobject.py
2023-01-11 14:20:20 -08:00

586 lines
21 KiB
Python

from __future__ import annotations
from abc import ABC, abstractmethod
import itertools as it
import re
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist
from manimlib.constants import WHITE
from manimlib.logger import log
from manimlib.mobject.svg.svg_mobject import SVGMobject
from manimlib.mobject.types.vectorized_mobject import VMobject
from manimlib.mobject.types.vectorized_mobject import VGroup
from manimlib.utils.color import color_to_hex
from manimlib.utils.color import hex_to_int
from manimlib.utils.color import int_to_hex
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from typing import Callable
from manimlib.typing import ManimColor, Span, Selector
class StringMobject(SVGMobject, ABC):
"""
An abstract base class for `Tex` and `MarkupText`
This class aims to optimize the logic of "slicing submobjects
via substrings". This could be much clearer and more user-friendly
than slicing through numerical indices explicitly.
Users are expected to specify substrings in `isolate` parameter
if they want to do anything with their corresponding submobjects.
`isolate` parameter can be either a string, a `re.Pattern` object,
or a 2-tuple containing integers or None, or a collection of the above.
Note, substrings specified cannot *partly* overlap with each other.
Each instance of `StringMobject` may generate 2 svg files.
The additional one is generated with some color commands inserted,
so that each submobject of the original `SVGMobject` will be labelled
by the color of its paired submobject from the additional `SVGMobject`.
"""
height = None
def __init__(
self,
string: str,
fill_color: ManimColor = WHITE,
stroke_color: ManimColor = WHITE,
stroke_width: float = 0,
base_color: ManimColor = WHITE,
isolate: Selector = (),
protect: Selector = (),
# When set to true, only the labelled svg is
# rendered, and its contents are used directly
# for the body of this String Mobject
use_labelled_svg: bool = False,
**kwargs
):
self.string = string
self.base_color = base_color or WHITE
self.isolate = isolate
self.protect = protect
self.use_labelled_svg = use_labelled_svg
self.parse()
super().__init__(
stroke_color=stroke_color,
fill_color=fill_color,
stroke_width=stroke_width,
**kwargs
)
self.labels = [submob.label for submob in self.submobjects]
def get_file_path(self, is_labelled: bool = False) -> str:
is_labelled = is_labelled or self.use_labelled_svg
return self.get_file_path_by_content(self.get_content(is_labelled))
@abstractmethod
def get_file_path_by_content(self, content: str) -> str:
return ""
def assign_labels_by_color(self, mobjects: list[VMobject]) -> None:
"""
Assuming each mobject in the list `mobjects` has a fill color
meant to represent a numerical label, this assigns those
those numerical labels to each mobject as an attribute
"""
labels_count = len(self.labelled_spans)
if labels_count == 1:
for mob in mobjects:
mob.label = 0
return
unrecognizable_colors = []
for mob in mobjects:
label = hex_to_int(color_to_hex(mob.get_fill_color()))
if label >= labels_count:
unrecognizable_colors.append(label)
label = 0
mob.label = label
if unrecognizable_colors:
log.warning(
"Unrecognizable color labels detected (%s). " + \
"The result could be unexpected.",
", ".join(
int_to_hex(color)
for color in unrecognizable_colors
)
)
def mobjects_from_file(self, file_path: str) -> list[VMobject]:
submobs = super().mobjects_from_file(file_path)
if self.use_labelled_svg:
# This means submobjects are colored according to spans
self.assign_labels_by_color(submobs)
return submobs
# Otherwise, submobs are not colored, so generate a new list
# of submobject which are and use those for labels
unlabelled_submobs = submobs
labelled_content = self.get_content(is_labelled=True)
labelled_file = self.get_file_path_by_content(labelled_content)
labelled_submobs = super().mobjects_from_file(labelled_file)
self.labelled_submobs = labelled_submobs
self.unlabelled_submobs = unlabelled_submobs
self.assign_labels_by_color(labelled_submobs)
self.rearrange_submobjects_by_positions(labelled_submobs, unlabelled_submobs)
for usm, lsm in zip(unlabelled_submobs, labelled_submobs):
usm.label = lsm.label
if len(unlabelled_submobs) != len(labelled_submobs):
log.warning(
"Cannot align submobjects of the labelled svg " + \
"to the original svg. Skip the labelling process."
)
for usm in unlabelled_submobs:
usm.label = 0
return unlabelled_submobs
return unlabelled_submobs
def rearrange_submobjects_by_positions(
self, labelled_submobs: list[VMobject], unlabelled_submobs: list[VMobject],
) -> None:
"""
Rearrange `labeleled_submobjects` so that each submobject
is labelled by the nearest one of `unlabelled_submobs`.
The correctness cannot be ensured, since the svg may
change significantly after inserting color commands.
"""
if len(labelled_submobs) == 0:
return
labelled_svg = VGroup(*labelled_submobs)
labelled_svg.replace(VGroup(*unlabelled_submobs))
distance_matrix = cdist(
[submob.get_center() for submob in unlabelled_submobs],
[submob.get_center() for submob in labelled_submobs]
)
_, indices = linear_sum_assignment(distance_matrix)
labelled_submobs[:] = [labelled_submobs[index] for index in indices]
# Toolkits
def find_spans_by_selector(self, selector: Selector) -> list[Span]:
def find_spans_by_single_selector(sel):
if isinstance(sel, str):
return [
match_obj.span()
for match_obj in re.finditer(re.escape(sel), self.string)
]
if isinstance(sel, re.Pattern):
return [
match_obj.span()
for match_obj in sel.finditer(self.string)
]
if isinstance(sel, tuple) and len(sel) == 2 and all(
isinstance(index, int) or index is None
for index in sel
):
l = len(self.string)
span = tuple(
default_index if index is None else
min(index, l) if index >= 0 else max(index + l, 0)
for index, default_index in zip(sel, (0, l))
)
return [span]
return None
result = find_spans_by_single_selector(selector)
if result is None:
result = []
for sel in selector:
spans = find_spans_by_single_selector(sel)
if spans is None:
raise TypeError(f"Invalid selector: '{sel}'")
result.extend(spans)
return list(filter(lambda span: span[0] <= span[1], result))
@staticmethod
def span_contains(span_0: Span, span_1: Span) -> bool:
return span_0[0] <= span_1[0] and span_0[1] >= span_1[1]
# Parsing
def parse(self) -> None:
def get_substr(span: Span) -> str:
return self.string[slice(*span)]
configured_items = self.get_configured_items()
isolated_spans = self.find_spans_by_selector(self.isolate)
protected_spans = self.find_spans_by_selector(self.protect)
command_matches = self.get_command_matches(self.string)
def get_key(category, i, flag):
def get_span_by_category(category, i):
if category == 0:
return configured_items[i][0]
if category == 1:
return isolated_spans[i]
if category == 2:
return protected_spans[i]
return command_matches[i].span()
index, paired_index = get_span_by_category(category, i)[::flag]
return (
index,
flag * (2 if index != paired_index else -1),
-paired_index,
flag * category,
flag * i
)
index_items = sorted([
(category, i, flag)
for category, item_length in enumerate((
len(configured_items),
len(isolated_spans),
len(protected_spans),
len(command_matches)
))
for i in range(item_length)
for flag in (1, -1)
], key=lambda t: get_key(*t))
inserted_items = []
labelled_items = []
overlapping_spans = []
level_mismatched_spans = []
label = 1
protect_level = 0
bracket_stack = [0]
bracket_count = 0
open_command_stack = []
open_stack = []
for category, i, flag in index_items:
if category >= 2:
protect_level += flag
if flag == 1 or category == 2:
continue
inserted_items.append((i, 0))
command_match = command_matches[i]
command_flag = self.get_command_flag(command_match)
if command_flag == 1:
bracket_count += 1
bracket_stack.append(bracket_count)
open_command_stack.append((len(inserted_items), i))
continue
if command_flag == 0:
continue
pos, i_ = open_command_stack.pop()
bracket_stack.pop()
open_command_match = command_matches[i_]
attr_dict = self.get_attr_dict_from_command_pair(
open_command_match, command_match
)
if attr_dict is None:
continue
span = (open_command_match.end(), command_match.start())
labelled_items.append((span, attr_dict))
inserted_items.insert(pos, (label, 1))
inserted_items.insert(-1, (label, -1))
label += 1
continue
if flag == 1:
open_stack.append((
len(inserted_items), category, i,
protect_level, bracket_stack.copy()
))
continue
span, attr_dict = configured_items[i] \
if category == 0 else (isolated_spans[i], {})
pos, category_, i_, protect_level_, bracket_stack_ \
= open_stack.pop()
if category_ != category or i_ != i:
overlapping_spans.append(span)
continue
if protect_level_ or protect_level:
continue
if bracket_stack_ != bracket_stack:
level_mismatched_spans.append(span)
continue
labelled_items.append((span, attr_dict))
inserted_items.insert(pos, (label, 1))
inserted_items.append((label, -1))
label += 1
labelled_items.insert(0, ((0, len(self.string)), {}))
inserted_items.insert(0, (0, 1))
inserted_items.append((0, -1))
if overlapping_spans:
log.warning(
"Partly overlapping substrings detected: %s",
", ".join(
f"'{get_substr(span)}'"
for span in overlapping_spans
)
)
if level_mismatched_spans:
log.warning(
"Cannot handle substrings: %s",
", ".join(
f"'{get_substr(span)}'"
for span in level_mismatched_spans
)
)
def reconstruct_string(
start_item: tuple[int, int],
end_item: tuple[int, int],
command_replace_func: Callable[[re.Match], str],
command_insert_func: Callable[[int, int, dict[str, str]], str]
) -> str:
def get_edge_item(i: int, flag: int) -> tuple[Span, str]:
if flag == 0:
match_obj = command_matches[i]
return (
match_obj.span(),
command_replace_func(match_obj)
)
span, attr_dict = labelled_items[i]
index = span[flag < 0]
return (
(index, index),
command_insert_func(i, flag, attr_dict)
)
items = [
get_edge_item(i, flag)
for i, flag in inserted_items[slice(
inserted_items.index(start_item),
inserted_items.index(end_item) + 1
)]
]
pieces = [
get_substr((start, end))
for start, end in zip(
[interval_end for (_, interval_end), _ in items[:-1]],
[interval_start for (interval_start, _), _ in items[1:]]
)
]
interval_pieces = [piece for _, piece in items[1:-1]]
return "".join(it.chain(*zip(pieces, (*interval_pieces, ""))))
self.labelled_spans = [span for span, _ in labelled_items]
self.reconstruct_string = reconstruct_string
def get_content(self, is_labelled: bool) -> str:
content = self.reconstruct_string(
(0, 1), (0, -1),
self.replace_for_content,
lambda label, flag, attr_dict: self.get_command_string(
attr_dict,
is_end=flag < 0,
label_hex=int_to_hex(label) if is_labelled else None
)
)
prefix, suffix = self.get_content_prefix_and_suffix(
is_labelled=is_labelled
)
return "".join((prefix, content, suffix))
@staticmethod
@abstractmethod
def get_command_matches(string: str) -> list[re.Match]:
return []
@staticmethod
@abstractmethod
def get_command_flag(match_obj: re.Match) -> int:
return 0
@staticmethod
@abstractmethod
def replace_for_content(match_obj: re.Match) -> str:
return ""
@staticmethod
@abstractmethod
def replace_for_matching(match_obj: re.Match) -> str:
return ""
@staticmethod
@abstractmethod
def get_attr_dict_from_command_pair(
open_command: re.Match, close_command: re.Match,
) -> dict[str, str] | None:
return None
@abstractmethod
def get_configured_items(self) -> list[tuple[Span, dict[str, str]]]:
return []
@staticmethod
@abstractmethod
def get_command_string(
attr_dict: dict[str, str], is_end: bool, label_hex: str | None
) -> str:
return ""
@abstractmethod
def get_content_prefix_and_suffix(
self, is_labelled: bool
) -> tuple[str, str]:
return "", ""
# Selector
def get_submob_indices_list_by_span(
self, arbitrary_span: Span
) -> list[int]:
return [
submob_index
for submob_index, label in enumerate(self.labels)
if self.span_contains(arbitrary_span, self.labelled_spans[label])
]
def get_specified_part_items(self) -> list[tuple[str, list[int]]]:
return [
(
self.string[slice(*span)],
self.get_submob_indices_list_by_span(span)
)
for span in self.labelled_spans[1:]
]
def get_specified_substrings(self) -> list[str]:
substrs = [
self.string[slice(*span)]
for span in self.labelled_spans[1:]
]
# Use dict.fromkeys to remove duplicates while retaining order
return list(dict.fromkeys(substrs).keys())
def get_group_part_items(self) -> list[tuple[str, list[int]]]:
if not self.labels:
return []
def get_neighbouring_pairs(vals):
return list(zip(vals[:-1], vals[1:]))
range_lens, group_labels = zip(*(
(len(list(grouper)), val)
for val, grouper in it.groupby(self.labels)
))
submob_indices_lists = [
list(range(*submob_range))
for submob_range in get_neighbouring_pairs(
[0, *it.accumulate(range_lens)]
)
]
labelled_spans = self.labelled_spans
start_items = [
(group_labels[0], 1),
*(
(curr_label, 1)
if self.span_contains(
labelled_spans[prev_label], labelled_spans[curr_label]
)
else (prev_label, -1)
for prev_label, curr_label in get_neighbouring_pairs(
group_labels
)
)
]
end_items = [
*(
(curr_label, -1)
if self.span_contains(
labelled_spans[next_label], labelled_spans[curr_label]
)
else (next_label, 1)
for curr_label, next_label in get_neighbouring_pairs(
group_labels
)
),
(group_labels[-1], -1)
]
group_substrs = [
re.sub(r"\s+", "", self.reconstruct_string(
start_item, end_item,
self.replace_for_matching,
lambda label, flag, attr_dict: ""
))
for start_item, end_item in zip(start_items, end_items)
]
return list(zip(group_substrs, submob_indices_lists))
def get_submob_indices_lists_by_selector(
self, selector: Selector
) -> list[list[int]]:
return list(filter(
lambda indices_list: indices_list,
[
self.get_submob_indices_list_by_span(span)
for span in self.find_spans_by_selector(selector)
]
))
def build_parts_from_indices_lists(
self, indices_lists: list[list[int]]
) -> VGroup:
return VGroup(*(
VGroup(*(
self.submobjects[submob_index]
for submob_index in indices_list
))
for indices_list in indices_lists
))
def build_groups(self) -> VGroup:
return self.build_parts_from_indices_lists([
indices_list
for _, indices_list in self.get_group_part_items()
])
def select_parts(self, selector: Selector) -> VGroup:
specified_substrings = self.get_specified_substrings()
if isinstance(selector, (str, re.Pattern)) and selector not in specified_substrings:
return self.select_unisolated_substring(selector)
indices_list = self.get_submob_indices_lists_by_selector(selector)
return self.build_parts_from_indices_lists(indices_list)
def __getitem__(self, value: int | slice | Selector) -> VMobject:
if isinstance(value, (int, slice)):
return super().__getitem__(value)
return self.select_parts(value)
def select_part(self, selector: Selector, index: int = 0) -> VMobject:
return self.select_parts(selector)[index]
def substr_to_path_count(self, substr: str) -> int:
return len(re.sub(r"\s", "", substr))
def get_symbol_substrings(self):
return list(re.sub(r"\s", "", self.string))
def select_unisolated_substring(self, pattern: str | re.Pattern) -> VGroup:
if isinstance(pattern, str):
pattern = re.compile(re.escape(pattern))
result = []
for match in re.finditer(pattern, self.string):
index = match.start()
start = self.substr_to_path_count(self.string[:index])
substr = match.group()
end = start + self.substr_to_path_count(substr)
result.append(self[start:end])
return VGroup(*result)
def set_parts_color(self, selector: Selector, color: ManimColor):
self.select_parts(selector).set_color(color)
return self
def set_parts_color_by_dict(self, color_map: dict[Selector, ManimColor]):
for selector, color in color_map.items():
self.set_parts_color(selector, color)
return self
def get_string(self) -> str:
return self.string