Improve algorithm

This commit is contained in:
YishiMichael 2022-03-31 18:08:10 +08:00
parent d5ab9a91c4
commit dc816c9f8d
No known key found for this signature in database
GPG key ID: EC615C0C5A86BC80

View file

@ -137,6 +137,8 @@ class MarkupText(LabelledString):
self.full2short(kwargs) self.full2short(kwargs)
digest_config(self, kwargs) digest_config(self, kwargs)
if not self.font:
self.font = get_customization()["style"]["font"]
if self.is_markup: if self.is_markup:
validate_error = MarkupUtils.validate(text) validate_error = MarkupUtils.validate(text)
if validate_error: if validate_error:
@ -236,19 +238,19 @@ class MarkupText(LabelledString):
def pre_parse(self) -> None: def pre_parse(self) -> None:
super().pre_parse() super().pre_parse()
self.global_items_from_config = self.get_global_items_from_config()
self.tag_items_from_markup = self.get_tag_items_from_markup() self.tag_items_from_markup = self.get_tag_items_from_markup()
self.local_items_from_markup = self.get_local_items_from_markup() self.global_dict_from_config = self.get_global_dict_from_config()
self.local_items_from_config = self.get_local_items_from_config() self.local_dicts_from_markup = self.get_local_dicts_from_markup()
self.predefined_items = self.get_predefined_items() self.local_dicts_from_config = self.get_local_dicts_from_config()
self.predefined_attr_dicts = self.get_predefined_attr_dicts()
# Toolkits # Toolkits
@staticmethod @staticmethod
def get_attr_dict_str(attr_dict: dict[str, str]) -> str: def get_attr_dict_str(attr_dict: dict[str, str]) -> str:
return " ".join([ return " ".join([
f"{key}='{value}'" f"{key}='{val}'"
for key, value in attr_dict.items() for key, val in attr_dict.items()
]) ])
@staticmethod @staticmethod
@ -273,20 +275,12 @@ class MarkupText(LabelledString):
return MarkupText.get_end_tag_str() return MarkupText.get_end_tag_str()
@staticmethod @staticmethod
def convert_attr_key(key: str) -> str: def merge_attr_dicts(
return SPAN_ATTR_KEY_CONVERSION[key.lower()] attr_dict_items: list[Span, str, typing.Any]
@staticmethod
def convert_attr_val(val: typing.Any) -> str:
return str(val).lower()
@staticmethod
def merge_attr_items(
attr_items: list[Span, str, str]
) -> list[tuple[Span, dict[str, str]]]: ) -> list[tuple[Span, dict[str, str]]]:
index_seq = [0] index_seq = [0]
attr_dict_list = [{}] attr_dict_list = [{}]
for span, key, value in attr_items: for span, attr_dict in attr_dict_items:
if span[0] >= span[1]: if span[0] >= span[1]:
continue continue
region_indices = [ region_indices = [
@ -304,51 +298,25 @@ class MarkupText(LabelledString):
region_indices[flag] += 1 region_indices[flag] += 1
if flag == 0: if flag == 0:
region_indices[1] += 1 region_indices[1] += 1
if not key: for key, val in attr_dict.items():
continue if not key:
for attr_dict in attr_dict_list[slice(*region_indices)]: continue
attr_dict[key] = value for mid_dict in attr_dict_list[slice(*region_indices)]:
mid_dict[key] = val
return list(zip( return list(zip(
MarkupText.get_neighbouring_pairs(index_seq), attr_dict_list[:-1] MarkupText.get_neighbouring_pairs(index_seq), attr_dict_list[:-1]
)) ))
# Pre-parsing # Pre-parsing
def get_global_items_from_config(self) -> list[str, str]:
global_attr_dict = {
"line_height": (
(self.lsh or DEFAULT_LINE_SPACING_SCALE) + 1
) * 0.6,
"font_family": self.font or get_customization()["style"]["font"],
"font_size": self.font_size * 1024,
"font_style": self.slant,
"font_weight": self.weight
}
global_attr_dict = {
k: v
for k, v in global_attr_dict.items()
if v is not None
}
result = list(it.chain(
global_attr_dict.items(),
self.global_config.items()
))
return [
(
self.convert_attr_key(key),
self.convert_attr_val(val)
)
for key, val in result
]
def get_tag_items_from_markup( def get_tag_items_from_markup(
self self
) -> list[tuple[Span, Span, dict[str, str]]]: ) -> list[tuple[Span, Span, dict[str, str]]]:
if not self.is_markup: if not self.is_markup:
return [] return []
tag_pattern = r"""<(/?)(\w+)\s*((\w+\s*\=\s*('.*?'|".*?")\s*)*)>""" tag_pattern = r"""<(/?)(\w+)\s*((?:\w+\s*\=\s*(['"]).*?\4\s*)*)>"""
attr_pattern = r"""(\w+)\s*\=\s*(?:(?:'(.*?)')|(?:"(.*?)"))""" attr_pattern = r"""(\w+)\s*\=\s*(['"])(.*?)\2"""
begin_match_obj_stack = [] begin_match_obj_stack = []
match_obj_pairs = [] match_obj_pairs = []
for match_obj in re.finditer(tag_pattern, self.string): for match_obj in re.finditer(tag_pattern, self.string):
@ -370,7 +338,7 @@ class MarkupText(LabelledString):
raise ValueError("Attributes shan't exist in ending tags") raise ValueError("Attributes shan't exist in ending tags")
if tag_name == "span": if tag_name == "span":
attr_dict = { attr_dict = {
match.group(1): match.group(2) or match.group(3) match.group(1): match.group(3)
for match in re.finditer( for match in re.finditer(
attr_pattern, begin_match_obj.group(3) attr_pattern, begin_match_obj.group(3)
) )
@ -389,21 +357,33 @@ class MarkupText(LabelledString):
) )
return result return result
def get_local_items_from_markup(self) -> list[tuple[Span, str, str]]: def get_global_dict_from_config(self) -> dict[str, typing.Any]:
result = {
"line_height": (
(self.lsh or DEFAULT_LINE_SPACING_SCALE) + 1
) * 0.6,
"font_family": self.font,
"font_size": self.font_size * 1024,
"font_style": self.slant,
"font_weight": self.weight
}
result.update(self.global_config)
return result
def get_local_dicts_from_markup(
self
) -> list[Span, dict[str, str]]:
return sorted([ return sorted([
( ((begin_tag_span[0], end_tag_span[1]), attr_dict)
(begin_tag_span[0], end_tag_span[1]),
self.convert_attr_key(key),
self.convert_attr_val(val)
)
for begin_tag_span, end_tag_span, attr_dict for begin_tag_span, end_tag_span, attr_dict
in self.tag_items_from_markup in self.tag_items_from_markup
for key, val in attr_dict.items()
]) ])
def get_local_items_from_config(self) -> list[tuple[Span, str, str]]: def get_local_dicts_from_config(
result = [ self
(span, key, val) ) -> list[Span, dict[str, typing.Any]]:
return [
(span, {key: val})
for t2x_dict, key in ( for t2x_dict, key in (
(self.t2c, "foreground"), (self.t2c, "foreground"),
(self.t2f, "font_family"), (self.t2f, "font_family"),
@ -413,29 +393,24 @@ class MarkupText(LabelledString):
for substr, val in t2x_dict.items() for substr, val in t2x_dict.items()
for span in self.find_substr(substr) for span in self.find_substr(substr)
] + [ ] + [
(span, key, val) (span, local_config)
for substr, local_config in self.local_configs.items() for substr, local_config in self.local_configs.items()
for span in self.find_substr(substr) for span in self.find_substr(substr)
for key, val in local_config.items()
]
return [
(
span,
self.convert_attr_key(key),
self.convert_attr_val(val)
)
for span, key, val in result
] ]
def get_predefined_items(self) -> list[Span, str, str]: def get_predefined_attr_dicts(self) -> list[Span, dict[str, str]]:
return list(it.chain( attr_dict_items = [
[ (self.full_span, self.global_dict_from_config),
(self.full_span, key, val) *self.local_dicts_from_markup,
for key, val in self.global_items_from_config *self.local_dicts_from_config
], ]
self.local_items_from_markup, return [
self.local_items_from_config (span, {
)) SPAN_ATTR_KEY_CONVERSION[key.lower()]: str(val)
for key, val in attr_dict.items()
})
for span, attr_dict in attr_dict_items
]
# Parsing # Parsing
@ -460,13 +435,13 @@ class MarkupText(LabelledString):
def get_internal_specified_spans(self) -> list[Span]: def get_internal_specified_spans(self) -> list[Span]:
return [ return [
markup_span markup_span
for markup_span, _, _ in self.local_items_from_markup for markup_span, _ in self.local_dicts_from_markup
] ]
def get_external_specified_spans(self) -> list[Span]: def get_external_specified_spans(self) -> list[Span]:
return [ return [
markup_span markup_span
for markup_span, _, _ in self.local_items_from_config for markup_span, _, _ in self.local_dicts_from_config
] ]
def get_label_span_list(self) -> list[Span]: def get_label_span_list(self) -> list[Span]:
@ -493,18 +468,20 @@ class MarkupText(LabelledString):
def get_inserted_string_pairs( def get_inserted_string_pairs(
self, use_plain_file: bool self, use_plain_file: bool
) -> list[tuple[Span, tuple[str, str]]]: ) -> list[tuple[Span, tuple[str, str]]]:
attr_items = self.predefined_items.copy()
if not use_plain_file: if not use_plain_file:
attr_items = [ attr_dict_items = [
(span, key, WHITE if key in COLOR_RELATED_KEYS else val) (span, {
for span, key, val in attr_items key: WHITE if key in COLOR_RELATED_KEYS else val
for key, val in attr_dict.items()
})
for span, attr_dict in self.predefined_attr_dicts
] + [ ] + [
(span, "foreground", self.rgb_int_to_hex(label)) (span, {"foreground": self.rgb_int_to_hex(label)})
for label, span in enumerate(self.label_span_list) for label, span in enumerate(self.label_span_list)
] ]
else: else:
attr_items += [ attr_dict_items = self.predefined_attr_dicts + [
(span, "", "") (span, {})
for span in self.label_span_list for span in self.label_span_list
] ]
return [ return [
@ -512,7 +489,7 @@ class MarkupText(LabelledString):
self.get_begin_tag_str(attr_dict), self.get_begin_tag_str(attr_dict),
self.get_end_tag_str() self.get_end_tag_str()
)) ))
for span, attr_dict in self.merge_attr_items(attr_items) for span, attr_dict in self.merge_attr_dicts(attr_dict_items)
] ]
def get_other_repl_items( def get_other_repl_items(
@ -523,7 +500,8 @@ class MarkupText(LabelledString):
def get_has_predefined_colors(self) -> bool: def get_has_predefined_colors(self) -> bool:
return any([ return any([
key in COLOR_RELATED_KEYS key in COLOR_RELATED_KEYS
for _, key, _ in self.predefined_items for _, attr_dict in self.predefined_attr_dicts
for key in attr_dict.keys()
]) ])
# Method alias # Method alias