mirror of
https://github.com/3b1b/manim.git
synced 2025-11-14 02:27:46 +00:00
Improve num_tex_symbols
This commit is contained in:
parent
53f19b6620
commit
1feae23566
2 changed files with 205 additions and 222 deletions
|
|
@ -1,57 +1,41 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from functools import lru_cache
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from typing import List, Tuple
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_pattern_symbol_count_pairs() -> List[Tuple[str, int]]:
|
||||
from manimlib.utils.tex_to_symbol_count import TEX_TO_SYMBOL_COUNT
|
||||
|
||||
# Gather all keys of previous map, grouped by common value
|
||||
count_to_tex_list = dict()
|
||||
for command, num in TEX_TO_SYMBOL_COUNT.items():
|
||||
if num not in count_to_tex_list:
|
||||
count_to_tex_list[num] = []
|
||||
count_to_tex_list[num].append(command)
|
||||
|
||||
# Create a list associating each count with a regular expression
|
||||
# that will find any tex commands matching that list
|
||||
pattern_symbol_count_pairs = list()
|
||||
|
||||
# Account for patterns like \begin{align} and \phantom{thing}
|
||||
# which, together with the bracketed content account for zero paths.
|
||||
# Deliberately put this first in the list
|
||||
tex_list = ["begin", "end", "phantom"]
|
||||
pattern_symbol_count_pairs.append(
|
||||
("|".join(r"\\" + s + r"\{[^\\}]+\}" for s in tex_list), 0)
|
||||
)
|
||||
|
||||
for count, tex_list in count_to_tex_list.items():
|
||||
pattern = "|".join(r"\\" + s for s in tex_list)
|
||||
pattern_symbol_count_pairs.append((pattern, count))
|
||||
|
||||
# Assume all other expressions of the form \thing are drawn with one path
|
||||
# Deliberately put this last in the list
|
||||
pattern_symbol_count_pairs.append((r"\\[a-zA-Z]+", 1))
|
||||
|
||||
return pattern_symbol_count_pairs
|
||||
from manimlib.utils.tex_to_symbol_count import TEX_TO_SYMBOL_COUNT
|
||||
|
||||
|
||||
def num_tex_symbols(tex: str) -> int:
|
||||
"""
|
||||
This function attempts to estimate the number of symbols that
|
||||
a given string of tex would produce.
|
||||
|
||||
Warning, it may not behave perfectly
|
||||
"""
|
||||
# First, remove patterns like \begin{align}, \phantom{thing},
|
||||
# \begin{array}{cc}, etc.
|
||||
pattern = "|".join(
|
||||
r"(\\" + s + ")" + r"(\{\w+\})?(\{\w+\})?(\[\w+\])?"
|
||||
for s in ["begin", "end", "phantom"]
|
||||
)
|
||||
for tup in re.findall(pattern, tex):
|
||||
tex = tex.replace("".join(tup), " ")
|
||||
|
||||
# Progressively count the symbols associated with certain tex commands,
|
||||
# and remove those commands from the string, adding the number of symbols
|
||||
# that command creates
|
||||
total = 0
|
||||
for pattern, count in get_pattern_symbol_count_pairs():
|
||||
total += count * len(re.findall(pattern, tex))
|
||||
tex = re.sub(pattern, " ", tex) # Remove that pattern
|
||||
|
||||
# Start with the special case \sqrt[number]
|
||||
for substr in re.findall(r"\\sqrt\[[0-9]+\]", tex):
|
||||
total += len(substr) - 5 # e.g. \sqrt[3] is 3 symbols
|
||||
tex = tex.replace(substr, " ")
|
||||
|
||||
general_command = r"\\[a-zA-Z!,-/:;<>]+"
|
||||
for substr in re.findall(general_command, tex):
|
||||
total += TEX_TO_SYMBOL_COUNT.get(substr, 1)
|
||||
tex = tex.replace(substr, " ")
|
||||
|
||||
# Count remaining characters
|
||||
total += sum(map(lambda c: c not in "^{} \n\t_$", tex))
|
||||
total += sum(map(lambda c: c not in "^{} \n\t_$\\&", tex))
|
||||
return total
|
||||
|
|
|
|||
|
|
@ -1,182 +1,181 @@
|
|||
TEX_TO_SYMBOL_COUNT = {
|
||||
"!": 0,
|
||||
",": 0,
|
||||
",": 0,
|
||||
"-": 0,
|
||||
"-": 0,
|
||||
"/": 0,
|
||||
":": 0,
|
||||
";": 0,
|
||||
";": 0,
|
||||
">": 0,
|
||||
"aa": 0,
|
||||
"AA": 0,
|
||||
"ae": 0,
|
||||
"AE": 0,
|
||||
"arccos": 6,
|
||||
"arcsin": 6,
|
||||
"arctan": 6,
|
||||
"arg": 3,
|
||||
"author": 0,
|
||||
"bf": 0,
|
||||
"bibliography": 0,
|
||||
"bibliographystyle": 0,
|
||||
"big": 0,
|
||||
"Big": 0,
|
||||
"bigodot": 4,
|
||||
"bigoplus": 5,
|
||||
"bigskip": 0,
|
||||
"bmod": 3,
|
||||
"boldmath": 0,
|
||||
"bottomfraction": 2,
|
||||
"bowtie": 2,
|
||||
"cal": 0,
|
||||
"cdots": 3,
|
||||
"centering": 0,
|
||||
"cite": 2,
|
||||
"cong": 2,
|
||||
"contentsline": 0,
|
||||
"cos": 3,
|
||||
"cosh": 4,
|
||||
"cot": 3,
|
||||
"coth": 4,
|
||||
"csc": 3,
|
||||
"date": 0,
|
||||
"dblfloatpagefraction": 2,
|
||||
"dbltopfraction": 2,
|
||||
"ddots": 3,
|
||||
"deg": 3,
|
||||
"det": 3,
|
||||
"dim": 3,
|
||||
"displaystyle": 0,
|
||||
"div": 2,
|
||||
"doteq": 2,
|
||||
"dotfill": 0,
|
||||
"emph": 0,
|
||||
"exp": 3,
|
||||
"fbox": 4,
|
||||
"floatpagefraction": 2,
|
||||
"flushbottom": 0,
|
||||
"footnotesize": 0,
|
||||
"footnotetext": 0,
|
||||
"frame": 2,
|
||||
"framebox": 4,
|
||||
"fussy": 0,
|
||||
"gcd": 3,
|
||||
"ghost": 0,
|
||||
"glossary": 0,
|
||||
"hfill": 0,
|
||||
"hom": 3,
|
||||
"hookleftarrow": 2,
|
||||
"hookrightarrow": 2,
|
||||
"hrulefill": 0,
|
||||
"huge": 0,
|
||||
"Huge": 0,
|
||||
"hyphenation": 0,
|
||||
"iff": 2,
|
||||
"Im": 2,
|
||||
"index": 0,
|
||||
"inf": 3,
|
||||
"it": 0,
|
||||
"ker": 3,
|
||||
"l": 0,
|
||||
"L": 0,
|
||||
"label": 0,
|
||||
"large": 0,
|
||||
"Large": 0,
|
||||
"LARGE": 0,
|
||||
"ldots": 3,
|
||||
"lefteqn": 0,
|
||||
"lg": 2,
|
||||
"lim": 3,
|
||||
"liminf": 6,
|
||||
"limsup": 6,
|
||||
"linebreak": 0,
|
||||
"ln": 2,
|
||||
"log": 3,
|
||||
"longleftarrow": 2,
|
||||
"Longleftarrow": 2,
|
||||
"longleftrightarrow": 2,
|
||||
"Longleftrightarrow": 2,
|
||||
"longmapsto": 3,
|
||||
"longrightarrow": 2,
|
||||
"Longrightarrow": 2,
|
||||
"makebox": 0,
|
||||
"mapsto": 2,
|
||||
"markright": 0,
|
||||
"max": 3,
|
||||
"mbox": 0,
|
||||
"medskip": 0,
|
||||
"min": 3,
|
||||
"mit": 0,
|
||||
"models": 2,
|
||||
"ne": 2,
|
||||
"neq": 2,
|
||||
"newline": 0,
|
||||
"noindent": 0,
|
||||
"nolinebreak": 0,
|
||||
"nonumber": 0,
|
||||
"nopagebreak": 0,
|
||||
"normalmarginpar": 0,
|
||||
"normalsize": 0,
|
||||
"notin": 2,
|
||||
"o": 0,
|
||||
"O": 0,
|
||||
"obeycr": 0,
|
||||
"oe": 0,
|
||||
"OE": 0,
|
||||
"overbrace": 4,
|
||||
"pagebreak": 0,
|
||||
"pagenumbering": 0,
|
||||
"pageref": 2,
|
||||
"pmod": 5,
|
||||
"Pr": 2,
|
||||
"protect": 0,
|
||||
"qquad": 0,
|
||||
"quad": 0,
|
||||
"raggedbottom": 0,
|
||||
"raggedleft": 0,
|
||||
"raggedright": 0,
|
||||
"Re": 2,
|
||||
"ref": 2,
|
||||
"restorecr": 0,
|
||||
"reversemarginpar": 0,
|
||||
"rm": 0,
|
||||
"sc": 0,
|
||||
"scriptscriptstyle": 0,
|
||||
"scriptsize": 0,
|
||||
"scriptstyle": 0,
|
||||
"sec": 3,
|
||||
"sf": 0,
|
||||
"shortstack": 0,
|
||||
"sin": 3,
|
||||
"sinh": 4,
|
||||
"sl": 0,
|
||||
"sloppy": 0,
|
||||
"small": 0,
|
||||
"Small": 0,
|
||||
"smallskip": 0,
|
||||
"sqrt": 2,
|
||||
"ss": 0,
|
||||
"sup": 3,
|
||||
"tan": 3,
|
||||
"tanh": 4,
|
||||
"textbf": 0,
|
||||
"textfraction": 2,
|
||||
"textstyle": 0,
|
||||
"thicklines": 0,
|
||||
"thinlines": 0,
|
||||
"thinspace": 0,
|
||||
"tiny": 0,
|
||||
"title": 0,
|
||||
"today": 15,
|
||||
"topfraction": 2,
|
||||
"tt": 0,
|
||||
"typeout": 0,
|
||||
"unboldmath": 0,
|
||||
"underbrace": 6,
|
||||
"underline": 0,
|
||||
"value": 0,
|
||||
"vdots": 3,
|
||||
"vline": 0
|
||||
R"\!": 0,
|
||||
R"\,": 0,
|
||||
R"\-": 0,
|
||||
R"\/": 0,
|
||||
R"\:": 0,
|
||||
R"\;": 0,
|
||||
R"\>": 0,
|
||||
R"\aa": 0,
|
||||
R"\AA": 0,
|
||||
R"\ae": 0,
|
||||
R"\AE": 0,
|
||||
R"\arccos": 6,
|
||||
R"\arcsin": 6,
|
||||
R"\arctan": 6,
|
||||
R"\arg": 3,
|
||||
R"\author": 0,
|
||||
R"\bf": 0,
|
||||
R"\bibliography": 0,
|
||||
R"\bibliographystyle": 0,
|
||||
R"\big": 0,
|
||||
R"\Big": 0,
|
||||
R"\bigodot": 4,
|
||||
R"\bigoplus": 5,
|
||||
R"\bigskip": 0,
|
||||
R"\bmod": 3,
|
||||
R"\boldmath": 0,
|
||||
R"\bottomfraction": 2,
|
||||
R"\bowtie": 2,
|
||||
R"\cal": 0,
|
||||
R"\cdots": 3,
|
||||
R"\centering": 0,
|
||||
R"\cite": 2,
|
||||
R"\cong": 2,
|
||||
R"\contentsline": 0,
|
||||
R"\cos": 3,
|
||||
R"\cosh": 4,
|
||||
R"\cot": 3,
|
||||
R"\coth": 4,
|
||||
R"\csc": 3,
|
||||
R"\date": 0,
|
||||
R"\dblfloatpagefraction": 2,
|
||||
R"\dbltopfraction": 2,
|
||||
R"\ddots": 3,
|
||||
R"\deg": 3,
|
||||
R"\det": 3,
|
||||
R"\dim": 3,
|
||||
R"\displaystyle": 0,
|
||||
R"\div": 2,
|
||||
R"\doteq": 2,
|
||||
R"\dotfill": 0,
|
||||
R"\emph": 0,
|
||||
R"\exp": 3,
|
||||
R"\fbox": 4,
|
||||
R"\floatpagefraction": 2,
|
||||
R"\flushbottom": 0,
|
||||
R"\footnotesize": 0,
|
||||
R"\footnotetext": 0,
|
||||
R"\frame": 2,
|
||||
R"\framebox": 4,
|
||||
R"\fussy": 0,
|
||||
R"\gcd": 3,
|
||||
R"\ghost": 0,
|
||||
R"\glossary": 0,
|
||||
R"\hfill": 0,
|
||||
R"\hom": 3,
|
||||
R"\hookleftarrow": 2,
|
||||
R"\hookrightarrow": 2,
|
||||
R"\hrulefill": 0,
|
||||
R"\huge": 0,
|
||||
R"\Huge": 0,
|
||||
R"\hyphenation": 0,
|
||||
R"\iff": 2,
|
||||
R"\Im": 2,
|
||||
R"\index": 0,
|
||||
R"\inf": 3,
|
||||
R"\it": 0,
|
||||
R"\ker": 3,
|
||||
R"\l": 0,
|
||||
R"\L": 0,
|
||||
R"\label": 0,
|
||||
R"\large": 0,
|
||||
R"\Large": 0,
|
||||
R"\LARGE": 0,
|
||||
R"\ldots": 3,
|
||||
R"\lefteqn": 0,
|
||||
R"\left": 0,
|
||||
R"\lg": 2,
|
||||
R"\lim": 3,
|
||||
R"\liminf": 6,
|
||||
R"\limsup": 6,
|
||||
R"\linebreak": 0,
|
||||
R"\ln": 2,
|
||||
R"\log": 3,
|
||||
R"\longleftarrow": 2,
|
||||
R"\Longleftarrow": 2,
|
||||
R"\longleftrightarrow": 2,
|
||||
R"\Longleftrightarrow": 2,
|
||||
R"\longmapsto": 3,
|
||||
R"\longrightarrow": 2,
|
||||
R"\Longrightarrow": 2,
|
||||
R"\makebox": 0,
|
||||
R"\mapsto": 2,
|
||||
R"\markright": 0,
|
||||
R"\max": 3,
|
||||
R"\mbox": 0,
|
||||
R"\medskip": 0,
|
||||
R"\min": 3,
|
||||
R"\mit": 0,
|
||||
R"\models": 2,
|
||||
R"\ne": 2,
|
||||
R"\neq": 2,
|
||||
R"\newline": 0,
|
||||
R"\noindent": 0,
|
||||
R"\nolinebreak": 0,
|
||||
R"\nonumber": 0,
|
||||
R"\nopagebreak": 0,
|
||||
R"\normalmarginpar": 0,
|
||||
R"\normalsize": 0,
|
||||
R"\notin": 2,
|
||||
R"\o": 0,
|
||||
R"\O": 0,
|
||||
R"\obeycr": 0,
|
||||
R"\oe": 0,
|
||||
R"\OE": 0,
|
||||
R"\overbrace": 4,
|
||||
R"\pagebreak": 0,
|
||||
R"\pagenumbering": 0,
|
||||
R"\pageref": 2,
|
||||
R"\pmod": 5,
|
||||
R"\Pr": 2,
|
||||
R"\protect": 0,
|
||||
R"\qquad": 0,
|
||||
R"\quad": 0,
|
||||
R"\raggedbottom": 0,
|
||||
R"\raggedleft": 0,
|
||||
R"\raggedright": 0,
|
||||
R"\Re": 2,
|
||||
R"\ref": 2,
|
||||
R"\restorecr": 0,
|
||||
R"\reversemarginpar": 0,
|
||||
R"\right": 0,
|
||||
R"\rm": 0,
|
||||
R"\sc": 0,
|
||||
R"\scriptscriptstyle": 0,
|
||||
R"\scriptsize": 0,
|
||||
R"\scriptstyle": 0,
|
||||
R"\sec": 3,
|
||||
R"\sf": 0,
|
||||
R"\shortstack": 0,
|
||||
R"\sin": 3,
|
||||
R"\sinh": 4,
|
||||
R"\sl": 0,
|
||||
R"\sloppy": 0,
|
||||
R"\small": 0,
|
||||
R"\Small": 0,
|
||||
R"\smallskip": 0,
|
||||
R"\sqrt": 2,
|
||||
R"\ss": 0,
|
||||
R"\sup": 3,
|
||||
R"\tan": 3,
|
||||
R"\tanh": 4,
|
||||
R"\textbf": 0,
|
||||
R"\textfraction": 2,
|
||||
R"\textstyle": 0,
|
||||
R"\thicklines": 0,
|
||||
R"\thinlines": 0,
|
||||
R"\thinspace": 0,
|
||||
R"\tiny": 0,
|
||||
R"\title": 0,
|
||||
R"\today": 15,
|
||||
R"\topfraction": 2,
|
||||
R"\tt": 0,
|
||||
R"\typeout": 0,
|
||||
R"\unboldmath": 0,
|
||||
R"\underbrace": 6,
|
||||
R"\underline": 0,
|
||||
R"\value": 0,
|
||||
R"\vdots": 3,
|
||||
R"\vline": 0
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue