vt: move UCS tables to the "shipped" form

Use the "shipped" mechanism to copy pre-generated tables to the build
tree by default. If GENERATE_UCS_TABLES=1 then they are generated at
build time instead. If GENERATE_UCS_TABLES=2 then
gen_ucs_recompose_table.py is invoked with --full.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Suggested-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250417184849.475581-15-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Nicolas Pitre 2025-04-17 14:45:16 -04:00 committed by Greg Kroah-Hartman
parent d8f81c82b1
commit c2d2c5c0d6
5 changed files with 45 additions and 10 deletions

View file

@ -11,7 +11,8 @@ obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o \
ucs.o
# Files generated that shall be removed upon make clean
clean-files := consolemap_deftbl.c defkeymap.c
clean-files := consolemap_deftbl.c defkeymap.c \
ucs_width_table.h ucs_recompose_table.h
hostprogs += conmakehash
@ -34,3 +35,27 @@ $(obj)/defkeymap.c: $(obj)/%.c: $(src)/%.map
loadkeys --mktable --unicode $< > $@
endif
$(obj)/ucs.o: $(src)/ucs.c $(obj)/ucs_width_table.h $(obj)/ucs_recompose_table.h
# You may uncomment one of those to have the UCS tables be regenerated
# during the build process. By default the _shipped versions are used.
#
#GENERATE_UCS_TABLES := 1
#GENERATE_UCS_TABLES := 2 # invokes gen_ucs_recompose_table.py with --full
ifdef GENERATE_UCS_TABLES
$(obj)/ucs_width_table.h: $(src)/gen_ucs_width_table.py
$(PYTHON3) $< -o $@
ifeq ($(GENERATE_UCS_TABLES),2)
gen_recomp_arg := --full
else
gen_recomp_arg :=
endif
$(obj)/ucs_recompose_table.h: $(src)/gen_ucs_recompose_table.py
$(PYTHON3) $< -o $@ $(gen_recomp_arg)
endif

View file

@ -19,8 +19,8 @@ import textwrap
from pathlib import Path
this_file = Path(__file__).name
# Output file name
out_file = "ucs_recompose_table.h"
# Default output file name
DEFAULT_OUT_FILE = "ucs_recompose_table.h"
common_recompose_description = "most commonly used Latin, Greek, and Cyrillic recomposition pairs only"
COMMON_RECOMPOSITION_PAIRS = [
@ -165,7 +165,7 @@ def validate_common_pairs(full_list):
print(error_msg)
raise ValueError(error_msg)
def generate_recomposition_table(use_full_list=False):
def generate_recomposition_table(use_full_list=False, out_file=DEFAULT_OUT_FILE):
"""Generate the recomposition C table."""
# Collect all recomposition pairs for validation
@ -250,6 +250,8 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate Unicode recomposition table")
parser.add_argument("--full", action="store_true",
help="Generate a full recomposition table (default: common pairs only)")
parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE,
help=f"Output file name (default: {DEFAULT_OUT_FILE})")
args = parser.parse_args()
generate_recomposition_table(use_full_list=args.full)
generate_recomposition_table(use_full_list=args.full, out_file=args.output_file)

View file

@ -5,13 +5,14 @@
import unicodedata
import sys
import argparse
# This script's file name
from pathlib import Path
this_file = Path(__file__).name
# Output file name
out_file = "ucs_width_table.h"
# Default output file name
DEFAULT_OUT_FILE = "ucs_width_table.h"
# --- Global Constants for Width Assignments ---
@ -185,13 +186,14 @@ def create_width_tables():
return zero_width_ranges, double_width_ranges
def write_tables(zero_width_ranges, double_width_ranges):
def write_tables(zero_width_ranges, double_width_ranges, out_file=DEFAULT_OUT_FILE):
"""
Write the generated tables to C header file.
Args:
zero_width_ranges: List of (start, end) ranges for zero-width characters
double_width_ranges: List of (start, end) ranges for double-width characters
out_file: Output file name (default: DEFAULT_OUT_FILE)
"""
# Function to split ranges into BMP (16-bit) and non-BMP (above 16-bit)
@ -286,14 +288,20 @@ static const struct ucs_interval32 ucs_double_width_non_bmp_ranges[] = {
f.write("};\n")
if __name__ == "__main__":
# Parse command line arguments
parser = argparse.ArgumentParser(description="Generate Unicode width tables")
parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE,
help=f"Output file name (default: {DEFAULT_OUT_FILE})")
args = parser.parse_args()
# Write tables to header file
zero_width_ranges, double_width_ranges = create_width_tables()
write_tables(zero_width_ranges, double_width_ranges)
write_tables(zero_width_ranges, double_width_ranges, out_file=args.output_file)
# Print summary
zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges)
double_width_count = sum(end - start + 1 for start, end in double_width_ranges)
print(f"Generated {out_file} with:")
print(f"Generated {args.output_file} with:")
print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points")
print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code points")
print(f"- Unicode Version: {unicodedata.unidata_version}")