[WIP] Remove comments

This commit is contained in:
YishiMichael 2022-05-04 22:18:19 +08:00
parent 1cb7401141
commit 511a3aab3d
No known key found for this signature in database
GPG key ID: EC615C0C5A86BC80
4 changed files with 55 additions and 1625 deletions

View file

@ -213,10 +213,7 @@ class AddTextWordByWord(ShowIncreasingSubsets):
def __init__(self, string_mobject, **kwargs):
assert isinstance(string_mobject, LabelledString)
grouped_mobject = string_mobject.build_parts_from_indices_lists([
indices_list
for _, indices_list in string_mobject.get_group_part_items()
])
grouped_mobject = string_mobject.build_groups()
digest_config(self, kwargs)
if self.run_time is None:
self.run_time = self.time_per_word * len(grouped_mobject)

View file

@ -60,10 +60,6 @@ class LabelledString(SVGMobject, ABC):
self.full_span = (0, len(self.string))
self.parse()
super().__init__(**kwargs)
#self.labelled_submobject_items = [
# (submob.label, submob)
# for submob in self.submobjects
#]
self.labels = [submob.label for submob in self.submobjects]
def get_file_path(self) -> str:
@ -102,8 +98,8 @@ class LabelledString(SVGMobject, ABC):
)
submob_color_ints = [0] * num_submobjects
#TODO: remove this
#if self.sort_labelled_submobs:
# TODO: remove this
submob_indices = sorted(
range(num_submobjects),
key=lambda index: tuple(
@ -136,20 +132,12 @@ class LabelledString(SVGMobject, ABC):
def get_substr(self, span: Span) -> str:
return self.string[slice(*span)]
#def match(self, pattern: str | re.Pattern, **kwargs) -> re.Pattern | None:
# if isinstance(pattern, str):
# pattern = re.compile(pattern)
# return re.compile(pattern).match(self.string, **kwargs)
def find_spans(self, pattern: str) -> list[Span]:
return [
match_obj.span()
for match_obj in re.finditer(pattern, self.string)
]
#def find_indices(self, pattern: str | re.Pattern, **kwargs) -> list[int]:
# return [index for index, _ in self.find_spans(pattern, **kwargs)]
def find_spans_by_selector(self, selector: Selector) -> list[Span]:
def find_spans_by_single_selector(sel):
if isinstance(sel, str):
@ -241,229 +229,6 @@ class LabelledString(SVGMobject, ABC):
def span_contains(span_0: Span, span_1: Span) -> bool:
return span_0[0] <= span_1[0] and span_0[1] >= span_1[1]
#def get_level_interval_spans(
# self,
# tag_span_pairs: list[tuple[Span, Span]],
# entity_spans: list[Span]
#) -> list[tuple[Span, int]]:
# return sorted(self.chain(
# [(begin_cmd_span, 1) for begin_cmd_span, _ in tag_span_pairs],
# [(end_cmd_span, -1) for _, end_cmd_span in tag_span_pairs],
# [(entity_span, 0) for entity_span in entity_spans],
# ), key=lambda t: t[0])
# #piece_spans = self.get_complement_spans(self.full_span, [
# # interval_span for interval_span, _ in level_interval_spans
# #])
# #piece_levels = [0, *it.accumulate([tag for _, tag in level_interval_spans])]
# #return piece_spans, piece_levels
def split_span_by_levels(
self, arbitrary_span: Span
) -> tuple[list[Span], int, int]:
# ignorable_indices --
# left_bracket_spans
# right_bracket_spans
# entity_spans
#piece_spans, piece_levels = zip(*self.piece_items)
#ignorable_indices = self.ignorable_indices
#piece_spans = self.piece_spans
#piece_levels = self.piece_levels
#piece_begins, piece_ends = zip(*piece_spans)
#span_begin, span_end = arbitrary_span
#while span_begin in ignorable_indices:
# span_begin += 1
#while span_end - 1 in ignorable_indices:
# span_end -= 1
#entity_spans = self.chain(
# left_bracket_spans, right_bracket_spans, entity_spans
#)
#if arbitrary_span[0] > arbitrary_span[1]:
# return []
#level_interval_span_items = self.level_interval_span_items
#if not level_interval_span_items:
# #if
# return [arbitrary_span]
#span_begin, span_end = arbitrary_span
#print(level_interval_span_items)
#level_interval_spans, level_shifts = zip(*level_interval_span_items) # TODO: avoid empty list
interval_span_items = self.cmd_span_items
interval_spans = [span for span, _ in interval_span_items]
#level_interval_spans = self.level_interval_spans
#level_shifts = self.level_shifts
#print(level_interval_span_items, arbitrary_span)
#index_begin = sum([
# arbitrary_span[0] > piece_end
# for _, piece_end in piece_spans
#])
#interval_index_begin = sum([
# span_begin >= interval_begin
# for interval_begin, _ in level_interval_spans
#])
#index_end = sum([
# arbitrary_span[1] >= piece_begin
# for piece_begin, _ in piece_spans
#])
#interval_index_end = sum([
# span_end >= interval_end
# for _, interval_end in level_interval_spans
#])
#interval_range = (
# sum([
# arbitrary_span[0] >= interval_begin
# for interval_begin, _ in interval_spans
# ]),
# sum([
# arbitrary_span[1] >= interval_end
# for _, interval_end in interval_spans
# ])
#)
#interval_range = (interval_range[0], interval_range[1] - len(level_interval_spans))
#print(interval_index_begin, interval_index_end)
#complement_spans = self.get_complement_spans(self.full_span, interval_spans)
#adjusted_span = (
# #max(arbitrary_span[0], level_interval_spans[interval_range[0] - 1][1]),
# #if interval_range[0] > 0 else arbitrary_span[0],
# #min(arbitrary_span[1], level_interval_spans[interval_range[1]][0])
# #if interval_range[1] < len(level_interval_spans) else arbitrary_span[1]
#)
#adjusted_span = (
# max(arbitrary_span[0], complement_spans[interval_range[0]][0]),
# min(arbitrary_span[1], complement_spans[interval_range[1]][1])
#)
#print(arbitrary_span, adjusted_span)
interval_range = (
sum([
arbitrary_span[0] > interval_begin
for interval_begin, _ in interval_spans
]),
sum([
arbitrary_span[1] >= interval_end
for _, interval_end in interval_spans
])
)
complement_spans = self.get_complement_spans(self.full_span, interval_spans)
adjusted_span = (
max(arbitrary_span[0], complement_spans[interval_range[0]][0]),
min(arbitrary_span[1], complement_spans[interval_range[1]][1])
)
if adjusted_span[0] > adjusted_span[1]:
#print([])
return [], 0, 0
#lowest_level = min(
# piece_levels[index_begin:index_end]
#)
#split_piece_indices = []
#target_level = piece_levels[index_begin]
#for piece_index in range(index_begin, index_end):
# if piece_levels[piece_index] != target_level:
# continue
# split_piece_indices.append(piece_index)
# target_level -= 1
# if target_level < lowest_level:
# break
#len_indices = len(split_piece_indices)
#target_level = piece_levels[index_end - 1]
#for piece_index in range(index_begin, index_end)[::-1]:
# if piece_levels[piece_index] != target_level:
# continue
# split_piece_indices.insert(len_indices, piece_index + 1)
# target_level -= 1
# if target_level < lowest_level:
# break
upwards_stack = []
downwards_stack = []
for interval_index in range(*interval_range):
_, level_shift = interval_span_items[interval_index]
if level_shift == 1:
upwards_stack.append(interval_index)
elif level_shift == -1:
if upwards_stack:
upwards_stack.pop()
else:
downwards_stack.append(interval_index)
#split_piece_indices = downwards_stack + upwards_stack
#print(split_piece_indices)
covered_interval_spans = [
interval_spans[piece_index]
for piece_index in self.chain(downwards_stack, upwards_stack)
]
result = self.get_complement_spans(adjusted_span, covered_interval_spans)
return result, len(downwards_stack), len(upwards_stack)
#if interval_index_begin > 0:
# span_begin = max(span_begin, level_interval_spans[interval_index_begin - 1][1])
#if interval_index_end < len(level_interval_spans):
# span_end = min(span_end, level_interval_spans[interval_index_end][0])
#universal_span = (span_begin, span_end)
#print(universal_span, self.get_complement_spans(universal_span, interval_spans))
#print(self.get_complement_spans(adjusted_span, interval_spans))
#span_begins = [
# level_interval_spans[piece_index][0][1]
# for piece_index in split_piece_indices
#]
#span_begins[0] = max(arbitrary_span[0], span_begins[0])
#span_ends = [
# level_interval_spans[piece_index - 1][0][1]
# for piece_index in split_piece_indices[1:]
#]
#span_ends[-1] = min(arbitrary_span[1], span_ends[-1])
#return list(zip(span_begins, span_ends))
#lowest_level_indices = [
# piece_index
# for piece_index, piece_level in enumerate(piece_levels)
# if left_piece_index <= piece_index <= right_piece_index
# and piece_level == lowest_level
#]
#left_lowest_index = min(lowest_level_indices)
#right_lowest_index = max(lowest_level_indices)
#while right_lowest_index != right_piece_index:
#left_parallel_index = max(
# piece_index
# for piece_index, piece_level in enumerate(piece_levels)
# if left_piece_index <= piece_index <= right_piece_index
# and piece_level == piece_levels[left_piece_index]
#)
#right_parallel_index = min(
# piece_index
# for piece_index, piece_level in enumerate(piece_levels)
# if left_piece_index <= piece_index <= right_piece_index
# and piece_level == piece_levels[right_piece_index]
#)
#result.append((
# piece_spans[left_lowest_index][0],
# piece_spans[right_lowest_index][1]
#))
#lowest_piece_indices = [
# piece_index
# for piece_index, piece_level in enumerate(
# )
#]
#adjusted_span_begin = max(span_begin, piece_spans[begin_piece_index][0]) ##
#adjusted_span_end = min(span_end, piece_spans[end_piece_index][1]) ##
#begin_level_mismatch = piece_levels[begin_piece_index] - lowest_level
#end_level_mismatch = piece_levels[end_piece_index] - lowest_level
#if begin_level_mismatch:
# span_begin = piece_spans[max([
# index
# for index, piece_level in enumerate(piece_levels)
# if piece_level == lowest_level and index < begin_piece_index
# ])][1]
# begin_level_mismatch = 0
#if end_level_mismatch:
# span_end = piece_spans[min([
# index
# for index, piece_level in enumerate(piece_levels)
# if piece_level == lowest_level and index > end_piece_index
# ])][0]
# end_level_mismatch = 0
@staticmethod
def get_complement_spans(
universal_span: Span, interval_spans: list[Span]
@ -477,7 +242,7 @@ class LabelledString(SVGMobject, ABC):
(*span_ends, universal_span[1])
))
def replace_string(self, span: Span, repl_items: list[Span, str]): # TODO: need `span` attr?
def replace_string(self, span: Span, repl_items: list[Span, str]):
if not repl_items:
return self.get_substr(span)
@ -491,23 +256,6 @@ class LabelledString(SVGMobject, ABC):
repl_strs = [*repl_strs, ""]
return "".join(self.chain(*zip(pieces, repl_strs)))
#def get_replaced_string(
# self,
# inserted_string_pairs: list[tuple[Span, tuple[str, str]]],
# repl_items: list[tuple[Span, str]]
#) -> str:
# all_repl_items = self.chain(
# repl_items,
# [
# ((index, index), inserted_string)
# for index, inserted_string
# in self.sort_inserted_strings_from_pairs(
# inserted_string_pairs
# )
# ]
# )
# return self.replace_string(self.full_span, all_repl_items)
@staticmethod
def color_to_hex(color: ManimColor) -> str:
return rgb_to_hex(color_to_rgb(color))
@ -527,66 +275,8 @@ class LabelledString(SVGMobject, ABC):
) -> tuple[str, str]:
return ("", "")
#def get_color_tag_str(self, rgb_int: int, is_begin_tag: bool) -> str:
# return self.get_tag_str({
# "foreground": self.int_to_hex(rgb_int)
# }, escape_color_keys=False, is_begin_tag=is_begin_tag)
# Parsing
#@abstractmethod
#def get_command_spans(self) -> list[Span]:
# return []
# #return [
# # self.match(r"\\(?:[a-zA-Z]+|.)", pos=index).span()
# # for index in self.backslash_indices
# #]
#@abstractmethod
#@staticmethod
#def get_command_repl_dict() -> dict[str | re.Pattern, str]:
# return {}
#@abstractmethod
#def parse_setup(self) -> None:
# return
#@abstractmethod
#def get_command_repl_items(self) -> list[tuple[Span, str]]:
# return []
# #result = []
# #for cmd_span in self.command_spans:
# # cmd_str = self.get_substr(cmd_span)
# # if
# # repl_str = self.command_repl_dict.get(cmd_str, cmd_str)
# # result.append((cmd_span, repl_str))
# #return result
#def span_cuts_at_entity(self, span: Span) -> bool:
# return any([
# entity_begin < index < entity_end
# for index in span
# for entity_begin, entity_end in self.command_repl_items
# ])
#@abstractmethod
#def get_all_specified_items(self) -> list[tuple[Span, dict[str, str]]]:
# return []
#def get_specified_items(self) -> list[tuple[Span, dict[str, str]]]:
# return [
# (span, attr_dict)
# for span, attr_dict in self.get_all_specified_items()
# if not any([
# entity_begin < index < entity_end
# for index in span
# for entity_begin, entity_end in self.command_repl_items
# ])
# ]
#def get_specified_spans(self) -> list[Span]:
# return [span for span, _ in self.specified_items]
def parse(self) -> None:
begin_cmd_spans, end_cmd_spans, cmd_spans = self.get_command_spans()
@ -611,105 +301,11 @@ class LabelledString(SVGMobject, ABC):
raise ValueError("Missing '}' inserted")
specified_items = self.get_specified_items(cmd_span_pairs)
#entity_spans = self.get_entity_spans()
#self.entity_spans = entity_spans
#tag_span_pairs, internal_items = self.get_internal_items()
#self.level_interval_spans = self.get_level_interval_spans(
# tag_span_pairs, self.entity_spans
#)
#self.level_interval_spans = [
# level_interval_span
# for level_interval_span, _ in level_interval_span_items
#]
#self.level_shifts = [
# level_shift
# for _, level_shift in level_interval_span_items
#] # TODO
#self.tag_content_spans = [
# (content_begin, content_end)
# for (_, content_begin), (content_end, _) in tag_span_pairs
#]
#self.tag_spans = self.chain(*tag_span_pairs)
#specified_items = self.chain(
# self.get_specified_items(cmd_span_pairs)
# internal_items,
# self.get_external_items(),
# [
# (span, {})
# for span in self.find_spans_by_selector(self.isolate)
# ]
#)
#print(f"\n{specified_items=}\n")
#specified_spans =
split_items = [
(span, attr_dict)
for specified_span, attr_dict in specified_items
for span in self.split_span_by_levels(specified_span)[0]
]
#print([self.get_substr(span) for span, _ in specified_items])
#print([self.get_substr(span) for span, _ in split_items])
#print(f"\n{split_items=}\n")
#labelled_spans = [span for span, _ in split_items]
#labelled_spans = self.get_labelled_spans(split_spans)
#if len(labelled_spans) >= 16777216:
# raise ValueError("Cannot handle that many substrings")
#content_strings = []
#for is_labelled in (False, True):
#
# content_strings.append(content_string)
#inserted_str_pairs = self.chain(
# [
# (span, (
# self.get_tag_str(attr_dict, escape_color_keys=True, is_begin_tag=True),
# self.get_tag_str(attr_dict, escape_color_keys=True, is_begin_tag=False)
# ))
# for span, attr_dict in split_items
# ],
# [
# (span, (
# self.get_color_tag_str(label + 1, is_begin_tag=True),
# self.get_color_tag_str(label + 1, is_begin_tag=False)
# ))
# for span, attr_dict in split_items
# ]
#)
#decorated_strings = [
# self.replace_string(self.full_span, [
# (span, str_pair[flag])
# for span, str_pair in command_repl_items
# ])
# for flag in range(2)
#]
#full_content_strings = {}
#for is_labelled in (False, True):
# inserted_str_pairs = [
# (span, self.get_tag_string_pair(
# attr_dict,
# rgb_hex=self.int_to_hex(label + 1) if is_labelled else None
# ))
# for label, (span, attr_dict) in enumerate(split_items)
# ]
# repl_items = self.chain(
# command_repl_items,
# [
# ((index, index), inserted_str)
# for index, inserted_str
# in self.sort_obj_pairs_by_spans(inserted_str_pairs)
# ]
# )
# content_string = self.replace_string(
# self.full_span, repl_items
# )
# full_content_string = self.get_full_content_string(content_string)
# #full_content_strings[is_labelled] = full_content_string
command_repl_items = [
(span, self.get_replaced_substr(self.get_substr(span), flag))
@ -718,14 +314,17 @@ class LabelledString(SVGMobject, ABC):
self.command_repl_items = command_repl_items
self.specified_spans = [span for span, _ in specified_items]
self.labelled_spans = [span for span, _ in split_items]
for span_0, span_1 in it.product(self.labelled_spans, repeat=2):
labelled_spans = [span for span, _ in split_items]
if len(labelled_spans) >= 16777216:
raise ValueError("Cannot handle that many substrings")
for span_0, span_1 in it.product(labelled_spans, repeat=2):
if not span_0[0] < span_1[0] < span_0[1] < span_1[1]:
continue
raise ValueError(
"Partially overlapping substrings detected: "
f"'{self.get_substr(span_0)}' and '{self.get_substr(span_1)}'"
)
self.labelled_spans = labelled_spans
self.original_content, self.labelled_content = (
self.get_full_content_string(self.replace_string(
@ -745,162 +344,59 @@ class LabelledString(SVGMobject, ABC):
), is_labelled=is_labelled)
for is_labelled in (False, True)
)
print(self.original_content)
print()
print(self.labelled_content)
def split_span_by_levels(
self, arbitrary_span: Span
) -> tuple[list[Span], int, int]:
interval_span_items = self.cmd_span_items
interval_spans = [span for span, _ in interval_span_items]
interval_range = (
sum([
arbitrary_span[0] > interval_begin
for interval_begin, _ in interval_spans
]),
sum([
arbitrary_span[1] >= interval_end
for _, interval_end in interval_spans
])
)
complement_spans = self.get_complement_spans(self.full_span, interval_spans)
adjusted_span = (
max(arbitrary_span[0], complement_spans[interval_range[0]][0]),
min(arbitrary_span[1], complement_spans[interval_range[1]][1])
)
if adjusted_span[0] > adjusted_span[1]:
return [], 0, 0
#self.original_content = full_content_strings[False]
#self.labelled_content = full_content_strings[True]
#print(self.original_content)
#print()
#print(self.labelled_content)
upwards_stack = []
downwards_stack = []
for interval_index in range(*interval_range):
_, level_shift = interval_span_items[interval_index]
if level_shift == 1:
upwards_stack.append(interval_index)
elif level_shift == -1:
if upwards_stack:
upwards_stack.pop()
else:
downwards_stack.append(interval_index)
#self.command_repl_dict = self.get_command_repl_dict()
#self.command_repl_items = []
#self.bracket_content_spans = []
##self.command_spans = self.get_command_spans()
##self.specified_items = self.get_specified_items()
#self.specified_spans = []
#self.check_overlapping() #######
#self.labelled_spans = []
#if len(self.labelled_spans) >= 16777216:
# raise ValueError("Cannot handle that many substrings")
covered_interval_spans = [
interval_spans[piece_index]
for piece_index in self.chain(downwards_stack, upwards_stack)
]
result = self.get_complement_spans(adjusted_span, covered_interval_spans)
return result, len(downwards_stack), len(upwards_stack)
@abstractmethod
def get_command_spans(self) -> tuple[list[Span], list[Span], list[Span]]:
return [], [], []
#@abstractmethod
#def get_entity_spans(self) -> list[Span]:
# return []
#@abstractmethod
#def get_internal_items(
# self
#) -> tuple[list[tuple[Span, Span]], list[tuple[Span, dict[str, str]]]]:
# return [], []
@abstractmethod
def get_specified_items(
self, cmd_span_pairs: list[tuple[Span, Span]]
) -> list[tuple[Span, dict[str, str]]]:
return []
#@abstractmethod
#def get_spans_from_items(self, specified_items: list[tuple[Span, dict[str, str]]]) -> list[Span]:
# return []
#def split_span(self, arbitrary_span: Span) -> list[Span]:
# span_begin, span_end = arbitrary_span
# # TODO: improve algorithm
# span_begin += sum([
# entity_end - span_begin
# for entity_begin, entity_end in self.entity_spans
# if entity_begin < span_begin < entity_end
# ])
# span_end -= sum([
# span_end - entity_begin
# for entity_begin, entity_end in self.entity_spans
# if entity_begin < span_end < entity_end
# ])
# if span_begin >= span_end:
# return []
# adjusted_span = (span_begin, span_end)
# result = []
# span_choices = list(filter(
# lambda span: span[0] < span[1] and self.span_contains(
# adjusted_span, span
# ),
# self.tag_content_spans
# ))
# while span_choices:
# chosen_span = min(span_choices, key=lambda t: (t[0], -t[1]))
# result.append(chosen_span)
# span_choices = list(filter(
# lambda span: chosen_span[1] <= span[0],
# span_choices
# ))
# result.extend(self.chain(*[
# self.get_complement_spans(span, sorted([
# (max(tag_span[0], span[0]), min(tag_span[1], span[1]))
# for tag_span in self.tag_spans
# if tag_span[0] < span[1] and span[0] < tag_span[1]
# ]))
# for span in self.get_complement_spans(adjusted_span, result)
# ]))
# return list(filter(lambda span: span[0] < span[1], result))
#@abstractmethod
#def get_split_items(self, specified_items: list[T]) -> list[T]:
# return []
#@abstractmethod
#def get_labelled_spans(self, split_spans: list[Span]) -> list[Span]:
# return []
#@abstractmethod
#def get_predefined_inserted_str_items(
# self, split_items: list[T]
#) -> list[tuple[Span, tuple[str, str]]]:
# return []
#def check_overlapping(self) -> None:
#for span_0, span_1 in it.product(self.specified_spans, self.bracket_content_spans):
# if not any(
# span_0[0] < span_1[0] <= span_0[1] <= span_1[1],
# span_1[0] <= span_0[0] <= span_1[1] < span_0[1]
# ):
# continue
# raise ValueError(
# f"Invalid substring detected: '{self.get_substr(span_0)}'"
# )
# TODO: test bracket_content_spans
#@abstractmethod
#def get_inserted_string_pairs(
# self, is_labelled: bool
#) -> list[tuple[Span, tuple[str, str]]]:
# return []
#@abstractmethod
#def get_labelled_spans(self) -> list[Span]:
# return []
#def get_decorated_string(
# self, is_labelled: bool, replace_commands: bool
#) -> str:
# inserted_string_pairs = [
# (indices, str_pair)
# for indices, str_pair in self.get_inserted_string_pairs(
# is_labelled=is_labelled
# )
# if not any(
# cmd_begin < index < cmd_end
# for index in indices
# for (cmd_begin, cmd_end), _ in self.command_repl_items
# )
# ]
# repl_items = [
# ((index, index), inserted_string)
# for index, inserted_string
# in self.sort_inserted_strings_from_pairs(
# inserted_string_pairs
# )
# ]
# if replace_commands:
# repl_items.extend(self.command_repl_items)
# return self.get_replaced_substr(self.full_span, repl_items)
#@abstractmethod
#def get_additional_inserted_str_pairs(
# self
#) -> list[tuple[Span, tuple[str, str]]]:
# return []
@abstractmethod
def get_replaced_substr(self, substr: str, flag: int) -> str:
return ""
@ -909,9 +405,6 @@ class LabelledString(SVGMobject, ABC):
def get_full_content_string(self, content_string: str, is_labelled: bool) -> str:
return ""
#def get_content(self, is_labelled: bool) -> str:
# return self.content_strings[int(is_labelled)]
# Selector
@abstractmethod
@ -996,13 +489,11 @@ class LabelledString(SVGMobject, ABC):
for indices_list in indices_lists
])
#def select_part_by_span(self, arbitrary_span: Span) -> VGroup:
# return VGroup(*[
# self.labelled_submobject_items[submob_index]
# for submob_index in self.get_submob_indices_list_by_span(
# arbitrary_span
# )
# ])
def build_groups(self) -> VGroup:
return self.build_parts_from_indices_lists([
indices_list
for _, indices_list in self.get_group_part_items()
])
def select_parts(self, selector: Selector) -> VGroup:
return self.build_parts_from_indices_lists(

View file

@ -31,16 +31,6 @@ if TYPE_CHECKING:
SCALE_FACTOR_PER_FONT_POINT = 0.001
#TEX_COLOR_COMMANDS_DICT = {
# "\\color": (1, False),
# "\\textcolor": (1, False),
# "\\pagecolor": (1, True),
# "\\colorbox": (1, True),
# "\\fcolorbox": (2, True),
#}
#TEX_COLOR_COMMAND_SUFFIX = "replaced"
class MTex(LabelledString):
CONFIG = {
"font_size": 48,
@ -104,97 +94,8 @@ class MTex(LabelledString):
return ("", "")
return ("{{" + MTex.get_color_command_str(label_hex), "}}")
#@staticmethod
#def shrink_span(span: Span, skippable_indices: list[int]) -> Span:
# span_begin, span_end = span
# while span_begin in skippable_indices:
# span_begin += 1
# while span_end - 1 in skippable_indices:
# span_end -= 1
# return (span_begin, span_end)
# Parsing
#def parse(self) -> None: # TODO
#command_spans = self.find_spans(r"\\(?:[a-zA-Z]+|.)")
#specified_spans = self.chain(
# inner_content_spans,
# *[
# self.find_spans_by_selector(selector)
# for selector in self.tex_to_color_map.keys()
# ],
# self.find_spans_by_selector(self.isolate)
#)
#print(specified_spans)
#label_span_list = self.remove_redundancies(self.chain(*[
# self.split_span(span)
# for span in specified_spans
#]))
#print(label_span_list)
#for span in all_specified_spans:
# adjusted_span, _, _ = self.adjust_span(span, align_level=True)
# if adjusted_span[0] > adjusted_span[1]:
# continue
# specified_spans.append(adjusted_span)
#reversed_script_spans_dict = {
# span_end: span_begin
# for span_begin, _, span_end in script_items
#}
#label_span_list = [
# (content_begin, span_end)
# for _, content_begin, span_end in script_items
#]
#for span_begin, span_end in specified_spans:
# while span_end in reversed_script_spans_dict:
# span_end = reversed_script_spans_dict[span_end]
# if span_begin >= span_end:
# continue
# shrinked_span = (span_begin, span_end)
# if shrinked_span in label_span_list:
# continue
# label_span_list.append(shrinked_span)
#inserted_str_items = [
# (span, (
# ("{{", "{{" + self.get_color_command_str(label + 1)),
# ("}}", "}}"),
# ))
# for label, span in enumerate(label_span_list)
#]
#command_repl_items = [
# ((index, index), str_pair)
# for index, str_pair in self.sort_obj_pairs_by_spans(inserted_str_items)
#]
#for cmd_span in command_spans:
# cmd_str = self.get_substr(cmd_span)
# if cmd_str not in TEX_COLOR_COMMANDS_DICT:
# continue
# repl_str = f"{cmd_str}{TEX_COLOR_COMMAND_SUFFIX}"
# command_repl_items.append((cmd_span, (cmd_str, repl_str)))
#print(decorated_strings)
#return specified_spans, label_span_list, decorated_strings
#self.command_spans = self.find_spans(r"\\(?:[a-zA-Z]+|.)")
#self.ignorable_indices = self.get_ignorable_indices()
#self.brace_content_spans = self.get_brace_content_spans()
#self.command_repl_items = self.get_command_repl_items()
##self.backslash_indices = self.get_backslash_indices()
#self.ignorable_indices = self.get_ignorable_indices()
##self.script_items = self.get_script_items()
##self.script_char_indices = self.get_script_char_indices()
##self.script_content_spans = self.get_script_content_spans()
##self.script_spans = self.get_script_spans()
#self.specified_spans = self.get_specified_spans()
##super().parse()
#self.label_span_list = self.get_label_span_list()
def get_command_spans(self) -> tuple[list[Span], list[Span], list[Span]]:
cmd_spans = self.find_spans(r"\\(?:[a-zA-Z]+|\s|\S)")
begin_cmd_spans = [
@ -209,85 +110,6 @@ class MTex(LabelledString):
]
return begin_cmd_spans, end_cmd_spans, cmd_spans
#def get_entity_spans(self) -> list[Span]:
# return self.find_spans(r"\\(?:[a-zA-Z]+|.)")
#def get_internal_items(
# self
#) -> tuple[list[tuple[Span, Span]], list[tuple[Span, dict[str, str]]]]:
# command_spans = self.entity_spans
# brace_span_pairs = []
# brace_begin_spans_stack = []
# for span in self.find_spans(r"[{}]"):
# char_index = span[0]
# if (char_index - 1, char_index + 1) in command_spans:
# continue
# if self.get_substr(span) == "{":
# brace_begin_spans_stack.append(span)
# else:
# if not brace_begin_spans_stack:
# raise ValueError("Missing '{' inserted")
# brace_span = brace_begin_spans_stack.pop()
# brace_span_pairs.append((brace_span, span))
# if brace_begin_spans_stack:
# raise ValueError("Missing '}' inserted")
#tag_span_pairs = brace_span_pairs.copy()
#script_entity_dict = dict(self.chain(
# [
# (span_begin, span_end)
# for (span_begin, _), (_, span_end) in brace_span_pairs
# ],
# command_spans
#))
#script_additional_brace_spans = [
# (char_index + 1, script_entity_dict.get(
# script_begin, script_begin + 1
# ))
# for char_index, script_begin in self.find_spans(r"[_^]\s*(?=.)")
# if (char_index - 1, char_index + 1) not in command_spans
#]
#for char_index, script_begin in self.find_spans(r"[_^]\s*(?=.)"):
# if (char_index - 1, char_index + 1) in command_spans:
# continue
# script_end = script_entity_dict.get(script_begin, script_begin + 1)
# tag_span_pairs.append(
# ((char_index, char_index + 1), (script_end, script_end))
# )
# script_additional_brace_spans.append((char_index + 1, script_end))
#tag_span_pairs = self.chain(
# brace_span_pairs,
# [
# ((script_begin - 1, script_begin), (script_end, script_end))
# for script_begin, script_end in script_additional_brace_spans
# ]
#)
#brace_content_spans = [
# (span_begin, span_end)
# for (_, span_begin), (span_end, _) in brace_span_pairs
#]
#internal_items = [
# (brace_content_spans[range_begin], {})
# for _, (range_begin, range_end) in self.compress_neighbours([
# (span_begin + index, span_end - index)
# for index, (span_begin, span_end) in enumerate(
# brace_content_spans
# )
# ])
# if range_end - range_begin >= 2
#]
##self.script_additional_brace_spans = script_additional_brace_spans
#return brace_span_pairs, internal_items
#def get_external_items(self) -> list[tuple[Span, dict[str, str]]]:
# return [
# (span, {})
# for selector in self.tex_to_color_map
# for span in self.find_spans_by_selector(selector)
# ]
def get_specified_items(
self, cmd_span_pairs: list[tuple[Span, Span]]
) -> list[tuple[Span, dict[str, str]]]:
@ -315,292 +137,8 @@ class MTex(LabelledString):
)
return [(span, {}) for span in specified_spans]
#def get_label_span_list(self, split_spans: list[Span]) -> list[Span]:
# return split_spans.copy()
#def get_spans_from_items(self, specified_items: list[Span]) -> list[Span]:
# return specified_items
#def get_split_items(self, specified_items: list[Span]) -> list[Span]:
# return self.remove_redundancies(self.chain(*[
# self.split_span(span)
# for span in specified_items
# ]))
#def get_label_span_list(self, split_spans: list[Span]) -> list[Span]:
# return split_spans
#def get_additional_inserted_str_pairs(
# self
#) -> list[tuple[Span, tuple[str, str]]]:
# return [
# (span, ("{", "}"))
# for span in self.script_additional_brace_spans
# ]
#def get_command_repl_items(self) -> list[Span, str]:
# return []
#if not is_labelled:
# return []
#result = []
#command_spans = self.entity_spans # TODO
#for cmd_span in command_spans:
# cmd_str = self.get_substr(cmd_span)
# if cmd_str not in TEX_COLOR_COMMANDS_DICT:
# continue
# repl_str = f"{cmd_str}{TEX_COLOR_COMMAND_SUFFIX}"
# result.append((cmd_span, repl_str))
#return result
#def get_predefined_inserted_str_items(
# self, split_items: list[Span]
#) -> list[tuple[Span, tuple[str, str]]]:
# return []
#def get_ignorable_indices(self) -> list[int]:
# return self.chain(
# [
# index
# for index, _ in self.find_spans(r"\s")
# ],
# [
# index
# for index, _ in self.find_spans(r"[_^{}]")
# if (index - 1, index + 1) not in self.command_spans
# ],
# )
#def get_bracket_content_spans(self) -> list[Span]:
# span_begins = []
# span_ends = []
# span_begins_stack = []
# for match_obj in re.finditer(r"[{}]", self.string):
# index = match_obj.start()
# if (index - 1, index + 1) in command_spans:
# continue
# if match_obj.group() == "{":
# span_begins_stack.append(index + 1)
# else:
# if not span_begins_stack:
# raise ValueError("Missing '{' inserted")
# span_begins.append(span_begins_stack.pop())
# span_ends.append(index)
# if span_begins_stack:
# raise ValueError("Missing '}' inserted")
# return list(zip(span_begins, span_ends))
#def get_command_repl_items(self) -> list[tuple[Span, str]]:
# result = []
# for cmd_span in self.command_spans:
# cmd_str = self.get_substr(cmd_span)
# if cmd_str in TEX_COLOR_COMMANDS_DICT:
# repl_str = f"{cmd_str}{TEX_COLOR_COMMAND_SUFFIX}"
# else:
# repl_str = cmd_str
# result.append((cmd_span, repl_str))
# return result
#def get_specified_spans(self) -> list[Span]:
# # Match paired double braces (`{{...}}`).
# sorted_content_spans = sorted(
# self.bracket_content_spans, key=lambda t: t[1]
# )
# inner_content_spans = [
# sorted_content_spans[range_begin]
# for _, (range_begin, range_end) in self.compress_neighbours([
# (span_begin + index, span_end - index)
# for index, (span_begin, span_end) in enumerate(
# sorted_content_spans
# )
# ])
# if range_end - range_begin >= 2
# ]
# #inner_content_spans = [
# # (span_begin + 1, span_end - 1)
# # for span_begin, span_end in inner_brace_spans
# # if span_end - span_begin > 2
# #]
# return self.remove_redundancies(self.chain(
# inner_content_spans,
# *[
# self.find_spans_by_selector(selector)
# for selector in self.tex_to_color_map.keys()
# ],
# self.find_spans_by_selector(self.isolate)
# ))
# #return list(filter(
# # lambda span: not any([
# # entity_begin < index < entity_end
# # for index in span
# # for entity_begin, entity_end in self.command_spans
# # ]),
# # result
# #))
#def get_label_span_list(self) -> tuple[list[int], list[Span]]:
# script_entity_dict = dict(self.chain(
# [
# (span_begin - 1, span_end + 1)
# for span_begin, span_end in self.bracket_content_spans
# ],
# self.command_spans
# ))
# script_items = []
# for match_obj in re.finditer(r"\s*([_^])\s*(?=.)", self.string):
# char_index = match_obj.start(1)
# if (char_index - 1, char_index + 1) in self.command_spans:
# continue
# span_begin, content_begin = match_obj.span()
# span_end = script_entity_dict.get(span_begin, content_begin + 1)
# script_items.append(
# (span_begin, char_index, content_begin, span_end)
# )
# reversed_script_spans_dict = {
# span_end: span_begin
# for span_begin, _, _, span_end in script_items
# }
# ignorable_indices = self.chain(
# [index for index, _ in self.find_spans(r"\s")],
# [char_index for _, char_index, _, _ in script_items]
# )
# result = [
# (content_begin, span_end)
# for _, _, content_begin, span_end in script_items
# ]
# for span in self.specified_spans:
# span_begin, span_end = self.shrink_span(span, ignorable_indices)
# while span_end in reversed_script_spans_dict:
# span_end = reversed_script_spans_dict[span_end]
# if span_begin >= span_end:
# continue
# shrinked_span = (span_begin, span_end)
# if shrinked_span in result:
# continue
# result.append(shrinked_span)
# return result
#def get_command_spans(self) -> list[Span]:
# return self.find_spans()
#def get_command_repl_items(self) -> list[Span]:
# return [
# (span, self.get_substr(span))
# for span in self.find_spans(r"\\(?:[a-zA-Z]+|.)")
# ]
#def get_command_spans(self) -> list[Span]:
# return self.find_spans(r"\\(?:[a-zA-Z]+|.)")
#return [
# self.match(r"\\(?:[a-zA-Z]+|.)", pos=index).span()
# for index in self.backslash_indices
#]
#@staticmethod
#def get_command_repl_dict() -> dict[str | re.Pattern, str]:
# return {
# cmd_name: f"{cmd_name}replaced"
# for cmd_name in TEX_COLOR_COMMANDS_DICT
# }
#def get_backslash_indices(self) -> list[int]:
# # The latter of `\\` doesn't count.
# return self.find_indices(r"\\.")
#def get_unescaped_char_indices(self, char: str) -> list[int]:
# return list(filter(
# lambda index: index - 1 not in self.backslash_indices,
# self.find_indices(re.escape(char))
# ))
#def get_script_items(self) -> list[tuple[int, int, int, int]]:
# script_entity_dict = dict(self.chain(
# self.brace_spans,
# self.command_spans
# ))
# result = []
# for match_obj in re.finditer(r"\s*([_^])\s*(?=.)", self.string):
# char_index = match_obj.start(1)
# if char_index - 1 in self.backslash_indices:
# continue
# span_begin, content_begin = match_obj.span()
# span_end = script_entity_dict.get(span_begin, content_begin + 1)
# result.append((span_begin, char_index, content_begin, span_end))
# return result
#def get_script_char_indices(self) -> list[int]:
# return self.chain(*[
# self.get_unescaped_char_indices(char)
# for char in "_^"
# ])
#def get_script_content_spans(self) -> list[Span]:
# result = []
# script_entity_dict = dict(self.chain(
# self.brace_spans,
# self.command_spans
# ))
# for index in self.script_char_indices:
# span_begin = self.match(r"\s*", pos=index + 1).end()
# if span_begin in script_entity_dict.keys():
# span_end = script_entity_dict[span_begin]
# else:
# match_obj = self.match(r".", pos=span_begin)
# if match_obj is None:
# continue
# span_end = match_obj.end()
# result.append((span_begin, span_end))
# return result
#def get_script_spans(self) -> list[Span]:
# return [
# (
# self.match(r"[\s\S]*?(\s*)$", endpos=index).start(1),
# script_content_span[1]
# )
# for index, script_content_span in zip(
# self.script_char_indices, self.script_content_spans
# )
# ]
#def get_command_repl_items(self) -> list[tuple[Span, str]]:
# result = []
# brace_spans_dict = dict(self.brace_spans)
# brace_begins = list(brace_spans_dict.keys())
# for cmd_span in self.command_spans:
# cmd_name = self.get_substr(cmd_span)
# if cmd_name not in TEX_COLOR_COMMANDS_DICT:
# continue
# n_braces, substitute_cmd = TEX_COLOR_COMMANDS_DICT[cmd_name]
# span_begin, span_end = cmd_span
# for _ in range(n_braces):
# span_end = brace_spans_dict[min(filter(
# lambda index: index >= span_end,
# brace_begins
# ))]
# if substitute_cmd:
# repl_str = cmd_name + n_braces * "{black}"
# else:
# repl_str = ""
# result.append(((span_begin, span_end), repl_str))
# return result
#def get_inserted_string_pairs(
# self, is_labelled: bool
#) -> list[tuple[Span, tuple[str, str]]]:
# if not is_labelled:
# return []
# return [
# (span, (
# "{{" + self.get_color_command_str(label + 1),
# "}}"
# ))
# for label, span in enumerate(self.label_span_list)
# ]
def get_replaced_substr(self, substr: str, flag: int) -> str:
return substr # TODO: replace color commands
return substr
def get_full_content_string(self, content_string: str, is_labelled: bool) -> str:
result = content_string
@ -615,24 +153,6 @@ class MTex(LabelledString):
if self.alignment:
result = "\n".join([self.alignment, result])
#if is_labelled:
# occurred_commands = [
# # TODO
# self.get_substr(span) for span in self.entity_spans
# ]
# newcommand_lines = [
# "".join([
# f"\\newcommand{cmd_name}{TEX_COLOR_COMMAND_SUFFIX}",
# f"[{n_braces + 1}][]",
# "{",
# cmd_name + "{black}" * n_braces if substitute_cmd else "",
# "}"
# ])
# for cmd_name, (n_braces, substitute_cmd)
# in TEX_COLOR_COMMANDS_DICT.items()
# if cmd_name in occurred_commands
# ]
# result = "\n".join([*newcommand_lines, result])
if not is_labelled:
result = "\n".join([
self.get_color_command_str(self.base_color_hex),
@ -646,12 +166,6 @@ class MTex(LabelledString):
backslash_indices = [
index for index, _ in self.find_spans(r"\\[\s\S]")
]
#ignored_spans = [
# ignored_span
# for ignored_span in self.find_spans(r"[\s_^{}]+")
# if ignored_span[0] - 1 not in backslash_indices
#]
#shrinked_span, _ = self.adjust_span(span, ignored_spans)
ignored_indices = [
index
for index, _ in self.find_spans(r"[\s_^{}]")
@ -663,11 +177,6 @@ class MTex(LabelledString):
while span_end - 1 in ignored_indices:
span_end -= 1
shrinked_span = (span_begin, span_end)
#if span_begin >= span_end:
# return ""
#shrinked_span = (span_begin, span_end)
_, unclosed_right_braces, unclosed_left_braces = self.split_span_by_levels(shrinked_span)
whitespace_repl_items = []
for whitespace_span in self.find_spans(r"\s+"):
@ -684,95 +193,13 @@ class MTex(LabelledString):
replaced_substr = ""
whitespace_repl_items.append((whitespace_span, replaced_substr))
_, unclosed_right_braces, unclosed_left_braces = self.split_span_by_levels(shrinked_span)
return "".join([
unclosed_right_braces * "{",
self.replace_string(shrinked_span, whitespace_repl_items),
unclosed_left_braces * "}"
])
#interval_spans = [
# span
# if span[0] - 1 not in backslash_indices
# else (span[0] + 1, span[1])
# for span in self.find_spans(r"[\s_^{}]+")
#]
#adjusted_span, _ = self.adjust_span(span, interval_spans)
#if adjusted_span[0] >= adjusted_span[1]:
# return ""
#left_brace_indices = list(filter(
# lambda index: self.get_substr((index, index + 1)) == "{",
# ignored_indices
#))
#right_brace_indices = list(filter(
# lambda index: self.get_substr((index, index + 1)) == "}",
# ignored_indices
#))
#unclosed_left_braces = 0
#unclosed_right_braces = 0
#for index in range(*adjusted_span):
# if index in left_brace_indices:
# unclosed_left_braces += 1
# elif index in right_brace_indices:
# if unclosed_left_braces == 0:
# unclosed_right_braces += 1
# else:
# unclosed_left_braces -= 1
#adjusted_span, unclosed_left_braces, unclosed_right_braces \
# = self.adjust_span(span, align_level=False)
#print(self.get_substr(span), "".join([
# unclosed_right_braces * "{",
# self.get_substr(shrinked_span),
# unclosed_left_braces * "}"
#]))
#result = "".join([
# unclosed_right_braces * "{",
# self.get_substr(shrinked_span),
# unclosed_left_braces * "}"
#])
#return re.sub(r"\s+", " ", result)
#return (span_begin, span_end)
#return self.get_substr(span) # TODO: test
#left_brace_indices = [
# span_begin - 1
# for span_begin, _ in self.brace_content_spans
#]
#right_brace_indices = [
# span_end
# for _, span_end in self.brace_content_spans
#]
#skippable_indices = self.chain(
# self.ignorable_indices,
# #self.script_char_indices,
# left_brace_indices,
# right_brace_indices
#)
#shrinked_span = self.shrink_span(span, skippable_indices)
##if shrinked_span[0] >= shrinked_span[1]:
## return ""
## Balance braces.
#unclosed_left_braces = 0
#unclosed_right_braces = 0
#for index in range(*shrinked_span):
# if index in left_brace_indices:
# unclosed_left_braces += 1
# elif index in right_brace_indices:
# if unclosed_left_braces == 0:
# unclosed_right_braces += 1
# else:
# unclosed_left_braces -= 1
##adjusted_span, unclosed_left_braces, unclosed_right_braces \
## = self.adjust_span(span, align_level=False)
#return "".join([
# unclosed_right_braces * "{",
# self.get_substr(shrinked_span),
# unclosed_left_braces * "}"
#])
# Method alias
def get_parts_by_tex(self, selector: Selector) -> VGroup:

View file

@ -240,14 +240,6 @@ class MarkupText(LabelledString):
f"{validate_error}"
)
#def parse(self) -> None:
# #self.global_attr_dict = self.get_global_attr_dict()
# #self.items_from_markup = self.get_items_from_markup()
# #self.tag_spans = self.get_tag_spans()
# ##self.items_from_markup = self.get_items_from_markup()
# #self.specified_items = self.get_specified_items()
# super().parse()
#@property
#def sort_labelled_submobs(self) -> bool:
# return True
@ -300,176 +292,6 @@ class MarkupText(LabelledString):
# Parsing
#def parse(self) -> None:
# self.bracket_content_spans, self.command_repl_items \
# = self.get_items_from_markup()
# #self.bracket_content_spans = [
# # span for span, _ in items_from_markup
# #]
# #specified_items = self.get_specified_items()
# #self.command_repl_items = self.get_command_repl_items()
# #self.specified_spans = self.remove_redundancies([
# # span for span, _ in specified_items
# #])
# #self.label_span_list = self.get_label_span_list()
# #self.predefined_items = [
# # (self.full_span, self.get_global_attr_dict()),
# # (self.full_span, self.global_config),
# # *specified_items
# #]
#def parse(self) -> None: # TODO: type
# if not self.is_markup:
# return [], [], [
# (span, (escaped, escaped))
# for char, escaped in XML_ENTITIES
# for span in self.find_spans(re.escape(char))
# ]
#self.entity_spans = self.find_spans(r"&[\s\S]*?;")
#tag_spans = [span for span, _ in command_repl_items]
#begin_tag_spans = [
# begin_tag_span for begin_tag_span, _, _ in markup_tag_items
#]
#end_tag_spans = [
# end_tag_span for _, end_tag_span, _ in markup_tag_items
#]
#tag_spans = self.chain(begin_tag_spans, end_tag_spans)
#command_repl_items = [
# (tag_span, "") for tag_span in tag_spans
#]
#self.chain(
# [
# (begin_tag_span, (
# f"<span {self.get_attr_dict_str(attr_dict, escape_color_keys=False)}>",
# f"<span {self.get_attr_dict_str(attr_dict, escape_color_keys=True)}>"
# ))
# for begin_tag_span, _, attr_dict in markup_tag_items
# ],
# [
# (end_tag_span, ("</span>", "</span>"))
# for _, end_tag_span, _ in markup_tag_items
# ]
#)
#self.piece_spans, self.piece_levels = self.init_piece_items(
# begin_tag_spans, end_tag_spans, self.find_spans(r"&[\s\S]*?;")
#)
#command_repl_items.extend([
# (span, (self.get_substr(span), self.get_substr(span)))
# for span in self.find_spans(r"&[\s\S]*?;")
#])
# Needed in plain text
#specified_items = self.chain(
# [
# ((span_begin, span_end), attr_dict)
# for (_, span_begin), (span_end, _), attr_dict
# in markup_tag_items
# ],
# self.get_specified_items()
#)
#specified_spans = self.remove_redundancies([
# span for span, _ in specified_items
#])
#specified_items = []
#for span, attr_dict in all_specified_items:
# for
# adjusted_span, _, _ = self.adjust_span(span, align_level=True)
# if adjusted_span[0] > adjusted_span[1]:
# continue
# specified_items.append(adjusted_span, attr_dict)
#predefined_items = [
# (self.full_span, self.get_global_attr_dict()),
# (self.full_span, self.global_config),
# *split_items
#]
#inserted_str_items = self.chain(
# [
# (span, (
# (
# f"<span {self.get_attr_dict_str(attr_dict, escape_color_keys=False)}>",
# f"<span {self.get_attr_dict_str(attr_dict, escape_color_keys=True)}>"
# ),
# ("</span>", "</span>")
# ))
# for span, attr_dict in predefined_items
# ],
# [
# (span, (
# ("<span >", f"<span {self.get_color_command_str(label + 1)}>"),
# ("</span>", "</span>"),
# ))
# for label, span in enumerate(label_span_list)
# ]
#)
#command_repl_items = self.chain(
# [
# (tag_span, ("", "")) for tag_span in self.tag_spans
# ],
# [
# ((index, index), str_pair)
# for index, str_pair in self.sort_obj_pairs_by_spans(inserted_str_items)
# ]
#)
#decorated_strings = [
# self.get_replaced_substr(self.full_span, [
# (span, str_pair[flag])
# for span, str_pair in command_repl_items
# ])
# for flag in range(2)
#]
#return specified_spans, label_span_list, decorated_strings
#if is_labelled:
# attr_dict_items = self.chain(
# [
# (span, {
# key:
# "black" if key.lower() in MARKUP_COLOR_KEYS else val
# for key, val in attr_dict.items()
# })
# for span, attr_dict in self.predefined_items
# ],
# [
# (span, {"foreground": self.int_to_hex(label + 1)})
# for label, span in enumerate(self.label_span_list)
# ]
# )
#else:
# attr_dict_items = self.chain(
# self.predefined_items,
# [
# (span, {})
# for span in self.label_span_list
# ]
# )
#return [
# (span, (
# f"<span {self.get_attr_dict_str(attr_dict)}>",
# "</span>"
# ))
# for span, attr_dict in attr_dict_items
#]
#inserted_string_pairs = [
# (indices, str_pair)
# for indices, str_pair in self.get_inserted_string_pairs(
# is_labelled=is_labelled
# )
# if not any(
# cmd_begin < index < cmd_end
# for index in indices
# for (cmd_begin, cmd_end), _ in self.command_repl_items
# )
#]
#return bracket_content_spans, label_span_list, command_repl_items
def get_command_spans(self) -> tuple[list[Span], list[Span], list[Span]]:
begin_cmd_spans = self.find_spans(
r"<\w+\s*(?:\w+\s*\=\s*(['\x22])[\s\S]*?\1\s*)*>"
@ -481,80 +303,6 @@ class MarkupText(LabelledString):
cmd_spans = self.find_spans(r"&[\s\S]*?;") # TODO
return begin_cmd_spans, end_cmd_spans, cmd_spans
#def get_entity_spans(self) -> list[Span]:
# if not self.is_markup:
# return []
# return self.find_spans(r"&[\s\S]*?;")
#def get_internal_items(
# self
#) -> tuple[list[tuple[Span, Span]], list[tuple[Span, dict[str, str]]]]:
# if not self.is_markup:
# return [], []
# tag_pattern = r"<(/?)(\w+)\s*((\w+\s*\=\s*(['\x22])[\s\S]*?\5\s*)*)>"
# attr_pattern = r"(\w+)\s*\=\s*(['\x22])([\s\S]*?)\2"
# begin_match_obj_stack = []
# markup_tag_items = []
# for match_obj in re.finditer(tag_pattern, self.string):
# if not match_obj.group(1):
# begin_match_obj_stack.append(match_obj)
# continue
# begin_match_obj = begin_match_obj_stack.pop()
# tag_name = begin_match_obj.group(2)
# if tag_name == "span":
# attr_dict = {
# attr_match_obj.group(1): attr_match_obj.group(3)
# for attr_match_obj in re.finditer(
# attr_pattern, begin_match_obj.group(3)
# )
# }
# else:
# attr_dict = MARKUP_TAG_CONVERSION_DICT.get(tag_name, {})
# markup_tag_items.append(
# (begin_match_obj.span(), match_obj.span(), attr_dict)
# )
# tag_span_pairs = [
# (tag_begin_span, tag_end_span)
# for tag_begin_span, tag_end_span, _ in markup_tag_items
# ]
# internal_items = [
# ((span_begin, span_end), attr_dict)
# for (_, span_begin), (span_end, _), attr_dict in markup_tag_items
# ]
# return tag_span_pairs, internal_items
#def get_external_items(self) -> list[tuple[Span, dict[str, str]]]:
# return [
# (self.full_span, self.get_global_attr_dict()),
# (self.full_span, self.global_config),
# *[
# (span, {key: val})
# for t2x_dict, key in (
# (self.t2c, "foreground"),
# (self.t2f, "font_family"),
# (self.t2s, "font_style"),
# (self.t2w, "font_weight")
# )
# for selector, val in t2x_dict.items()
# for span in self.find_spans_by_selector(selector)
# ],
# *[
# (span, local_config)
# for selector, local_config in self.local_configs.items()
# for span in self.find_spans_by_selector(selector)
# ]
# ]
#if self.split_words:
# # For backward compatibility
# result.extend([
# (span, {})
# for pattern in (r"[a-zA-Z]+", r"\S+")
# for span in self.find_spans(pattern)
# ])
#return result
def get_specified_items(
self, cmd_span_pairs: list[tuple[Span, Span]]
) -> list[tuple[Span, dict[str, str]]]:
@ -596,233 +344,6 @@ class MarkupText(LabelledString):
]
]
#def get_label_span_list(self, split_spans: list[Span]) -> list[Span]:
#def get_spans_from_items(
# self, specified_items: list[tuple[Span, dict[str, str]]]
#) -> list[Span]:
# return [span for span, _ in specified_items]
#def get_split_items(
# self, specified_items: list[tuple[Span, dict[str, str]]]
#) -> list[tuple[Span, dict[str, str]]]:
# return [
# (span, attr_dict)
# for specified_span, attr_dict in specified_items
# for span in self.split_span(specified_span)
# ]
#def get_label_span_list(self, split_spans: list[Span]) -> list[Span]:
# interval_spans = sorted(self.chain(
# self.tag_spans,
# [
# (index, index)
# for span in split_spans
# for index in span
# ]
# ))
# text_spans = self.get_complement_spans(self.full_span, interval_spans)
# if self.is_markup:
# pattern = r"[0-9a-zA-Z]+|(?:&[\s\S]*?;|[^0-9a-zA-Z\s])+"
# else:
# pattern = r"[0-9a-zA-Z]+|[^0-9a-zA-Z\s]+"
# return self.chain(*[
# self.find_spans(pattern, pos=span_begin, endpos=span_end)
# for span_begin, span_end in text_spans
# ])
#def get_additional_inserted_str_pairs(
# self
#) -> list[tuple[Span, tuple[str, str]]]:
# return []
#def get_command_repl_items(self) -> list[Span, str]:
# result = [
# (tag_span, "") for tag_span in self.tag_spans # TODO
# ]
# if not self.is_markup:
# result.extend([
# (span, escaped)
# for char, escaped in XML_ENTITIES
# for span in self.find_spans(re.escape(char))
# ])
# return result
#def get_predefined_inserted_str_items(
# self, split_items: list[tuple[Span, dict[str, str]]]
#) -> list[tuple[Span, tuple[str, str]]]:
# predefined_items = [
# (self.full_span, self.get_global_attr_dict()),
# (self.full_span, self.global_config),
# *split_items
# ]
# return [
# (span, (
# (
# self.get_tag_str(attr_dict, escape_color_keys=False, is_begin_tag=True),
# self.get_tag_str(attr_dict, escape_color_keys=True, is_begin_tag=True)
# ),
# (
# self.get_tag_str(attr_dict, escape_color_keys=False, is_begin_tag=False),
# self.get_tag_str(attr_dict, escape_color_keys=True, is_begin_tag=False)
# )
# ))
# for span, attr_dict in predefined_items
# ]
#def get_full_content_string(self, replaced_string: str) -> str:
# return replaced_string
#def get_tag_spans(self) -> list[Span]:
# return self.chain(
# (begin_tag_span, end_tag_span)
# for begin_tag_span, end_tag_span, _ in self.items_from_markup
# )
#def get_items_from_markup(self) -> list[tuple[Span, dict[str, str]]]:
# return [
# ((span_begin, span_end), attr_dict)
# for (_, span_begin), (span_end, _), attr_dict
# in self.items_from_markup
# if span_begin < span_end
# ]
#def get_command_repl_items(self) -> list[tuple[Span, str]]:
# result = [
# (tag_span, "")
# for tag_span in self.tag_spans
# ]
# if self.is_markup:
# result.extend([
# (span, self.get_substr(span))
# for span in self.find_spans(r"&[\s\S]*?;")
# ])
# else:
# result.extend([
# (span, escaped)
# for char, escaped in (
# ("&", "&amp;"),
# (">", "&gt;"),
# ("<", "&lt;")
# )
# for span in self.find_spans(re.escape(char))
# ])
# return result
#def get_command_spans(self) -> list[Span]:
# result = self.tag_spans.copy()
# if self.is_markup:
# result.extend(self.find_spans(r"&[\s\S]*?;"))
# else:
# result.extend(self.find_spans(r"[&<>]"))
# return result
#@staticmethod
#def get_command_repl_dict() -> dict[str | re.Pattern, str]:
# return {
# re.compile(r"<.*>"): "",
# "&": "&amp;",
# "<": "&lt;",
# ">": "&gt;"
# }
# #result = [
# # (tag_span, "") for tag_span in self.tag_spans
# #]
# #if self.is_markup:
# # result.extend([
# # (span, self.get_substr(span))
# # for span in self.find_spans(r"&[\s\S]*?;")
# # ])
# #else:
# # result.extend([
# # (span, escaped)
# # for char, escaped in (
# # ("&", "&amp;"),
# # (">", "&gt;"),
# # ("<", "&lt;")
# # )
# # for span in self.find_spans(re.escape(char))
# # ])
# #return result
#entity_spans = self.tag_spans.copy()
#if self.is_markup:
# entity_spans.extend(self.find_spans(r"&[\s\S]*?;"))
#return [
# (span, attr_dict)
# for span, attr_dict in result
# if not self.span_cuts_at_entity(span)
# #if not any([
# # entity_begin < index < entity_end
# # for index in span
# # for entity_begin, entity_end in entity_spans
# #])
#]
#def get_specified_spans(self) -> list[Span]:
# return self.remove_redundancies([
# span for span, _ in self.specified_items
# ])
#def get_label_span_list(self) -> list[Span]:
# interval_spans = sorted(self.chain(
# self.tag_spans,
# [
# (index, index)
# for span in self.specified_spans
# for index in span
# ]
# ))
# text_spans = self.get_complement_spans(interval_spans, self.full_span)
# if self.is_markup:
# pattern = r"[0-9a-zA-Z]+|(?:&[\s\S]*?;|[^0-9a-zA-Z\s])+"
# else:
# pattern = r"[0-9a-zA-Z]+|[^0-9a-zA-Z\s]+"
# return self.chain(*[
# self.find_spans(pattern, pos=span_begin, endpos=span_end)
# for span_begin, span_end in text_spans
# ])
#def get_inserted_string_pairs(
# self, is_labelled: bool
#) -> list[tuple[Span, tuple[str, str]]]:
# #predefined_items = [
# # (self.full_span, self.global_attr_dict),
# # (self.full_span, self.global_config),
# # *self.specified_items
# #]
# if is_labelled:
# attr_dict_items = self.chain(
# [
# (span, {
# key:
# "black" if key.lower() in MARKUP_COLOR_KEYS else val
# for key, val in attr_dict.items()
# })
# for span, attr_dict in self.predefined_items
# ],
# [
# (span, {"foreground": self.int_to_hex(label + 1)})
# for label, span in enumerate(self.label_span_list)
# ]
# )
# else:
# attr_dict_items = self.chain(
# self.predefined_items,
# [
# (span, {})
# for span in self.label_span_list
# ]
# )
# return [
# (span, (
# f"<span {self.get_attr_dict_str(attr_dict)}>",
# "</span>"
# ))
# for span, attr_dict in attr_dict_items
# ]
def get_replaced_substr(self, substr: str, flag: int) -> str:
if flag:
return ""
@ -847,13 +368,7 @@ class MarkupText(LabelledString):
replaced_substr = entity_to_char_dict[replaced_substr]
filtered_repl_items.append((cmd_span, replaced_substr))
return self.replace_string(span, filtered_repl_items).strip() # TODO: test
#repl_items = [
# (cmd_span, repl_str)
# for cmd_span, (repl_str, _) in self.command_repl_items
# if self.span_contains(span, cmd_span)
#]
#return self.get_replaced_substr(span, repl_items).strip()
return self.replace_string(span, filtered_repl_items).strip() # TODO
# Method alias