From 57de87b14690497057866970b524124340759d9c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 20:20:12 +0200 Subject: [PATCH 001/105] serial: 8250_ni: Switch to use uart_read_port_properties() Since we have now a common helper to read port properties use it instead of sparse home grown solution. Signed-off-by: Andy Shevchenko Tested-by: Chaitanya Vadrevu Reviewed-by: Chaitanya Vadrevu Link: https://lore.kernel.org/r/20250321182119.454507-2-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_ni.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/8250/8250_ni.c b/drivers/tty/serial/8250/8250_ni.c index b10a42d2ad63..03e838f440be 100644 --- a/drivers/tty/serial/8250/8250_ni.c +++ b/drivers/tty/serial/8250/8250_ni.c @@ -285,7 +285,6 @@ static int ni16550_probe(struct platform_device *pdev) const char *portmode; bool rs232_property; int ret; - int irq; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -293,10 +292,6 @@ static int ni16550_probe(struct platform_device *pdev) spin_lock_init(&uart.port.lock); - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - ret = ni16550_get_regs(pdev, &uart.port); if (ret < 0) return ret; @@ -307,10 +302,7 @@ static int ni16550_probe(struct platform_device *pdev) info = device_get_match_data(dev); uart.port.dev = dev; - uart.port.irq = irq; - uart.port.irqflags = IRQF_SHARED; - uart.port.flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF - | UPF_FIXED_PORT | UPF_FIXED_TYPE; + uart.port.flags = UPF_BOOT_AUTOCONF | UPF_FIXED_PORT | UPF_FIXED_TYPE; uart.port.startup = ni16550_port_startup; uart.port.shutdown = ni16550_port_shutdown; @@ -332,12 +324,16 @@ static int ni16550_probe(struct platform_device *pdev) /* * Declaration of the base clock frequency can come from one of: * - static declaration in this driver (for older ACPI IDs) - * - a "clock-frquency" ACPI + * - a "clock-frequency" ACPI */ if (info->uartclk) uart.port.uartclk = info->uartclk; - if (device_property_read_u32(dev, "clock-frequency", - &uart.port.uartclk)) { + + ret = uart_read_port_properties(&uart.port); + if (ret) + return ret; + + if (!uart.port.uartclk) { data->clk = devm_clk_get_enabled(dev, NULL); if (!IS_ERR(data->clk)) uart.port.uartclk = clk_get_rate(data->clk); From 9b4a192adf428198fb676a75e9bb95d26904ae44 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 20:20:13 +0200 Subject: [PATCH 002/105] serial: 8250_ni: Remove duplicate mapping UPF_IOREMAP is for serial core to map the resource on behalf of the driver. No need to perform this explicitly in the driver. Signed-off-by: Andy Shevchenko Tested-by: Chaitanya Vadrevu Reviewed-by: Chaitanya Vadrevu Link: https://lore.kernel.org/r/20250321182119.454507-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_ni.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/tty/serial/8250/8250_ni.c b/drivers/tty/serial/8250/8250_ni.c index 03e838f440be..562f7f29e209 100644 --- a/drivers/tty/serial/8250/8250_ni.c +++ b/drivers/tty/serial/8250/8250_ni.c @@ -239,11 +239,6 @@ static int ni16550_get_regs(struct platform_device *pdev, port->mapsize = resource_size(regs); port->flags |= UPF_IOREMAP; - port->membase = devm_ioremap(&pdev->dev, port->mapbase, - port->mapsize); - if (!port->membase) - return -ENOMEM; - return 0; } From b4694a76180b71cc4040c3c9ab75ea0058b6cc3a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 20:20:14 +0200 Subject: [PATCH 003/105] serial: 8250_ni: Switch to use platform_get_mem_or_io() Switch to use new platform_get_mem_or_io() instead of home grown analogue. Signed-off-by: Andy Shevchenko Tested-by: Chaitanya Vadrevu Reviewed-by: Chaitanya Vadrevu Link: https://lore.kernel.org/r/20250321182119.454507-4-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_ni.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/8250/8250_ni.c b/drivers/tty/serial/8250/8250_ni.c index 562f7f29e209..2dc510c0a5ef 100644 --- a/drivers/tty/serial/8250/8250_ni.c +++ b/drivers/tty/serial/8250/8250_ni.c @@ -224,26 +224,26 @@ static int ni16550_get_regs(struct platform_device *pdev, { struct resource *regs; - regs = platform_get_resource(pdev, IORESOURCE_IO, 0); - if (regs) { + regs = platform_get_mem_or_io(pdev, 0); + if (!regs) + return dev_err_probe(&pdev->dev, -EINVAL, "no registers defined\n"); + + switch (resource_type(regs)) { + case IORESOURCE_IO: port->iotype = UPIO_PORT; port->iobase = regs->start; return 0; - } - - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (regs) { + case IORESOURCE_MEM: port->iotype = UPIO_MEM; port->mapbase = regs->start; port->mapsize = resource_size(regs); port->flags |= UPF_IOREMAP; return 0; + default: + return -EINVAL; } - - dev_err(&pdev->dev, "no registers defined\n"); - return -EINVAL; } /* From 38dbd9517d5872b2cafb576656fb214b48b1e893 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 20:20:15 +0200 Subject: [PATCH 004/105] serial: 8250_ni: Remove unneeded conditionals It doesn't matter if the properties are supplied or not in the struct ni16550_device_info as default in any case is 0. Hence there is no need to check for them being set. Signed-off-by: Andy Shevchenko Tested-by: Chaitanya Vadrevu Reviewed-by: Chaitanya Vadrevu Link: https://lore.kernel.org/r/20250321182119.454507-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_ni.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_ni.c b/drivers/tty/serial/8250/8250_ni.c index 2dc510c0a5ef..8bb8bb7bb4f2 100644 --- a/drivers/tty/serial/8250/8250_ni.c +++ b/drivers/tty/serial/8250/8250_ni.c @@ -275,7 +275,7 @@ static int ni16550_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct uart_8250_port uart = {}; unsigned int txfifosz, rxfifosz; - unsigned int prescaler = 0; + unsigned int prescaler; struct ni16550_data *data; const char *portmode; bool rs232_property; @@ -321,8 +321,7 @@ static int ni16550_probe(struct platform_device *pdev) * - static declaration in this driver (for older ACPI IDs) * - a "clock-frequency" ACPI */ - if (info->uartclk) - uart.port.uartclk = info->uartclk; + uart.port.uartclk = info->uartclk; ret = uart_read_port_properties(&uart.port); if (ret) @@ -340,11 +339,9 @@ static int ni16550_probe(struct platform_device *pdev) goto err; } - if (info->prescaler) - prescaler = info->prescaler; + prescaler = info->prescaler; device_property_read_u32(dev, "clock-prescaler", &prescaler); - - if (prescaler != 0) { + if (prescaler) { uart.port.set_mctrl = ni16550_set_mctrl; ni16550_config_prescaler(&uart, (u8)prescaler); } From 2e4899740ebbbddcbf57b54198c20f3843543aa7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 20:20:16 +0200 Subject: [PATCH 005/105] serial: 8250_ni: use serial_port_in()/serial_port_out() helpers There are serial_port_in()/serial_port_out() helpers to be used instead of direct p->serial_in()/p->serial_out(). Use them in various 8250 drivers. Signed-off-by: Andy Shevchenko Tested-by: Chaitanya Vadrevu Reviewed-by: Chaitanya Vadrevu Link: https://lore.kernel.org/r/20250321182119.454507-6-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_ni.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_ni.c b/drivers/tty/serial/8250/8250_ni.c index 8bb8bb7bb4f2..15bee1b7dc2a 100644 --- a/drivers/tty/serial/8250/8250_ni.c +++ b/drivers/tty/serial/8250/8250_ni.c @@ -90,10 +90,10 @@ static int ni16550_disable_transceivers(struct uart_port *port) { u8 pcr; - pcr = port->serial_in(port, NI16550_PCR_OFFSET); + pcr = serial_port_in(port, NI16550_PCR_OFFSET); pcr &= ~NI16550_PCR_TXVR_ENABLE_BIT; dev_dbg(port->dev, "disable transceivers: write pcr: 0x%02x\n", pcr); - port->serial_out(port, NI16550_PCR_OFFSET, pcr); + serial_port_out(port, NI16550_PCR_OFFSET, pcr); return 0; } @@ -105,7 +105,7 @@ static int ni16550_rs485_config(struct uart_port *port, struct uart_8250_port *up = container_of(port, struct uart_8250_port, port); u8 pcr; - pcr = serial_in(up, NI16550_PCR_OFFSET); + pcr = serial_port_in(port, NI16550_PCR_OFFSET); pcr &= ~NI16550_PCR_WIRE_MODE_MASK; if ((rs485->flags & SER_RS485_MODE_RS422) || @@ -120,7 +120,7 @@ static int ni16550_rs485_config(struct uart_port *port, } dev_dbg(port->dev, "config rs485: write pcr: 0x%02x, acr: %02x\n", pcr, up->acr); - serial_out(up, NI16550_PCR_OFFSET, pcr); + serial_port_out(port, NI16550_PCR_OFFSET, pcr); serial_icr_write(up, UART_ACR, up->acr); return 0; From 030df0ef7cec3232122626b642b5642cf5677fc0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 20:20:17 +0200 Subject: [PATCH 006/105] serial: 8250_ni: Switch to use dev_err_probe() Switch to use dev_err_probe() to simplify the error path and unify a message template. Signed-off-by: Andy Shevchenko Tested-by: Chaitanya Vadrevu Reviewed-by: Chaitanya Vadrevu Link: https://lore.kernel.org/r/20250321182119.454507-7-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_ni.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/8250/8250_ni.c b/drivers/tty/serial/8250/8250_ni.c index 15bee1b7dc2a..c66bfc56838e 100644 --- a/drivers/tty/serial/8250/8250_ni.c +++ b/drivers/tty/serial/8250/8250_ni.c @@ -333,11 +333,8 @@ static int ni16550_probe(struct platform_device *pdev) uart.port.uartclk = clk_get_rate(data->clk); } - if (!uart.port.uartclk) { - dev_err(dev, "unable to determine clock frequency!\n"); - ret = -ENODEV; - goto err; - } + if (!uart.port.uartclk) + return dev_err_probe(dev, -ENODEV, "unable to determine clock frequency!\n"); prescaler = info->prescaler; device_property_read_u32(dev, "clock-prescaler", &prescaler); @@ -381,14 +378,11 @@ static int ni16550_probe(struct platform_device *pdev) ret = serial8250_register_8250_port(&uart); if (ret < 0) - goto err; + return ret; data->line = ret; platform_set_drvdata(pdev, data); return 0; - -err: - return ret; } static void ni16550_remove(struct platform_device *pdev) From 753a55f559085864dd6770240dca03d90bbd48d4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 21 Mar 2025 20:20:18 +0200 Subject: [PATCH 007/105] serial: 8250_ni: Tidy up ACPI ID table Tidy up ACPI ID table: - drop ACPI_PTR() and hence replace acpi.h with mod_devicetable.h et al. - drop comma in the terminator entry With that done, extend compile test coverage. Signed-off-by: Andy Shevchenko Tested-by: Chaitanya Vadrevu Reviewed-by: Chaitanya Vadrevu Link: https://lore.kernel.org/r/20250321182119.454507-8-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_ni.c | 15 +++++++++------ drivers/tty/serial/8250/Kconfig | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_ni.c b/drivers/tty/serial/8250/8250_ni.c index c66bfc56838e..b0e44fb00b3a 100644 --- a/drivers/tty/serial/8250/8250_ni.c +++ b/drivers/tty/serial/8250/8250_ni.c @@ -10,14 +10,18 @@ * Copyright 2012-2023 National Instruments Corporation */ -#include #include +#include +#include #include #include #include +#include #include +#include #include -#include +#include +#include #include "8250.h" @@ -392,7 +396,6 @@ static void ni16550_remove(struct platform_device *pdev) serial8250_unregister_port(data->line); } -#ifdef CONFIG_ACPI /* NI 16550 RS-485 Interface */ static const struct ni16550_device_info nic7750 = { .uartclk = 33333333, @@ -417,20 +420,20 @@ static const struct ni16550_device_info nic7a69 = { .uartclk = 29629629, .prescaler = 0x09, }; + static const struct acpi_device_id ni16550_acpi_match[] = { { "NIC7750", (kernel_ulong_t)&nic7750 }, { "NIC7772", (kernel_ulong_t)&nic7772 }, { "NIC792B", (kernel_ulong_t)&nic792b }, { "NIC7A69", (kernel_ulong_t)&nic7a69 }, - { }, + { } }; MODULE_DEVICE_TABLE(acpi, ni16550_acpi_match); -#endif static struct platform_driver ni16550_driver = { .driver = { .name = "ni16550", - .acpi_match_table = ACPI_PTR(ni16550_acpi_match), + .acpi_match_table = ni16550_acpi_match, }, .probe = ni16550_probe, .remove = ni16550_remove, diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index bd3d636ff962..f64ef0819cd4 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -572,7 +572,7 @@ config SERIAL_8250_BCM7271 config SERIAL_8250_NI tristate "NI 16550 based serial port" depends on SERIAL_8250 - depends on (X86 && ACPI) || COMPILE_TEST + depends on X86 || COMPILE_TEST help This driver supports the integrated serial ports on National Instruments (NI) controller hardware. This is required for all NI From a0003b9d7948b01279ea3cbc8b1f3aad71e9fcdd Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 1 Apr 2025 16:03:37 +0800 Subject: [PATCH 008/105] serial: lantiq: Remove unnecessary print function dev_err() Function dev_err() is redundant because platform_get_irq() already prints an error. Signed-off-by: Chen Ni Acked-by: Mukesh Kumar Savaliya Link: https://lore.kernel.org/r/20250401080337.2187400-1-nichen@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/lantiq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c index 58a3ab030d67..62cd9e0bb377 100644 --- a/drivers/tty/serial/lantiq.c +++ b/drivers/tty/serial/lantiq.c @@ -773,10 +773,8 @@ static int fetch_irq_intel(struct device *dev, struct ltq_uart_port *ltq_port) int ret; ret = platform_get_irq(to_platform_device(dev), 0); - if (ret < 0) { - dev_err(dev, "failed to fetch IRQ for serial port\n"); + if (ret < 0) return ret; - } ltq_port->common_irq = ret; port->irq = ret; From 6bd697b5fc39fd24e2aa418c7b7d14469f550a93 Mon Sep 17 00:00:00 2001 From: Jakub Lewalski Date: Mon, 31 Mar 2025 18:06:19 +0200 Subject: [PATCH 009/105] tty: serial: uartlite: register uart driver in init When two instances of uart devices are probing, a concurrency race can occur. If one thread calls uart_register_driver function, which first allocates and assigns memory to 'uart_state' member of uart_driver structure, the other instance can bypass uart driver registration and call ulite_assign. This calls uart_add_one_port, which expects the uart driver to be fully initialized. This leads to a kernel panic due to a null pointer dereference: [ 8.143581] BUG: kernel NULL pointer dereference, address: 00000000000002b8 [ 8.156982] #PF: supervisor write access in kernel mode [ 8.156984] #PF: error_code(0x0002) - not-present page [ 8.156986] PGD 0 P4D 0 ... [ 8.180668] RIP: 0010:mutex_lock+0x19/0x30 [ 8.188624] Call Trace: [ 8.188629] ? __die_body.cold+0x1a/0x1f [ 8.195260] ? page_fault_oops+0x15c/0x290 [ 8.209183] ? __irq_resolve_mapping+0x47/0x80 [ 8.209187] ? exc_page_fault+0x64/0x140 [ 8.209190] ? asm_exc_page_fault+0x22/0x30 [ 8.209196] ? mutex_lock+0x19/0x30 [ 8.223116] uart_add_one_port+0x60/0x440 [ 8.223122] ? proc_tty_register_driver+0x43/0x50 [ 8.223126] ? tty_register_driver+0x1ca/0x1e0 [ 8.246250] ulite_probe+0x357/0x4b0 [uartlite] To prevent it, move uart driver registration in to init function. This will ensure that uart_driver is always registered when probe function is called. Signed-off-by: Jakub Lewalski Signed-off-by: Elodie Decerle Link: https://lore.kernel.org/r/20250331160732.2042-1-elodie.decerle@nokia.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/uartlite.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index a41e7fc373b7..39c1fd1ff9ce 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -880,16 +880,6 @@ of_err: pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); - if (!ulite_uart_driver.state) { - dev_dbg(&pdev->dev, "uartlite: calling uart_register_driver()\n"); - ret = uart_register_driver(&ulite_uart_driver); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to register driver\n"); - clk_disable_unprepare(pdata->clk); - return ret; - } - } - ret = ulite_assign(&pdev->dev, id, res->start, irq, pdata); pm_runtime_mark_last_busy(&pdev->dev); @@ -929,16 +919,25 @@ static struct platform_driver ulite_platform_driver = { static int __init ulite_init(void) { + int ret; + + pr_debug("uartlite: calling uart_register_driver()\n"); + ret = uart_register_driver(&ulite_uart_driver); + if (ret) + return ret; pr_debug("uartlite: calling platform_driver_register()\n"); - return platform_driver_register(&ulite_platform_driver); + ret = platform_driver_register(&ulite_platform_driver); + if (ret) + uart_unregister_driver(&ulite_uart_driver); + + return ret; } static void __exit ulite_exit(void) { platform_driver_unregister(&ulite_platform_driver); - if (ulite_uart_driver.state) - uart_unregister_driver(&ulite_uart_driver); + uart_unregister_driver(&ulite_uart_driver); } module_init(ulite_init); From a53be6945f5123c19d6fcc30783876705a2e0f00 Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Thu, 27 Mar 2025 12:37:11 +0530 Subject: [PATCH 010/105] serial: qcom-geni: Remove alias dependency from qcom serial driver The absence of an alias in the device tree results in an invalid line number, causing the driver probe to fail for GENI serial. To prevent probe failures, dynamically assign line numbers if an alias is not present in the device tree for non-console ports. Signed-off-by: Viken Dadhaniya Link: https://lore.kernel.org/r/20250327070711.2585887-1-quic_vdadhani@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index a80ce7aaf309..0293b6210aa6 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -98,6 +98,8 @@ #define DMA_RX_BUF_SIZE 2048 +static DEFINE_IDA(port_ida); + struct qcom_geni_device_data { bool console; enum geni_se_xfer_mode mode; @@ -253,10 +255,24 @@ static struct qcom_geni_serial_port *get_port_from_line(int line, bool console) struct qcom_geni_serial_port *port; int nr_ports = console ? GENI_UART_CONS_PORTS : GENI_UART_PORTS; - if (line < 0 || line >= nr_ports) - return ERR_PTR(-ENXIO); + if (console) { + if (line < 0 || line >= nr_ports) + return ERR_PTR(-ENXIO); - port = console ? &qcom_geni_console_port : &qcom_geni_uart_ports[line]; + port = &qcom_geni_console_port; + } else { + int max_alias_num = of_alias_get_highest_id("serial"); + + if (line < 0 || line >= nr_ports) + line = ida_alloc_range(&port_ida, max_alias_num + 1, nr_ports, GFP_KERNEL); + else + line = ida_alloc_range(&port_ida, line, nr_ports, GFP_KERNEL); + + if (line < 0) + return ERR_PTR(-ENXIO); + + port = &qcom_geni_uart_ports[line]; + } return port; } @@ -1761,6 +1777,7 @@ static int qcom_geni_serial_probe(struct platform_device *pdev) port->wakeup_irq); if (ret) { device_init_wakeup(&pdev->dev, false); + ida_free(&port_ida, uport->line); uart_remove_one_port(drv, uport); return ret; } @@ -1772,10 +1789,12 @@ static int qcom_geni_serial_probe(struct platform_device *pdev) static void qcom_geni_serial_remove(struct platform_device *pdev) { struct qcom_geni_serial_port *port = platform_get_drvdata(pdev); + struct uart_port *uport = &port->uport; struct uart_driver *drv = port->private_data.drv; dev_pm_clear_wake_irq(&pdev->dev); device_init_wakeup(&pdev->dev, false); + ida_free(&port_ida, uport->line); uart_remove_one_port(drv, &port->uport); } From 9d64c6ae2d6f9284a8475d02291580457be7bc28 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Mon, 7 Apr 2025 12:07:12 +0800 Subject: [PATCH 011/105] serial: tegra-utc: Remove unneeded semicolon Remove unnecessary semicolons reported by Coccinelle/coccicheck and the semantic patch at scripts/coccinelle/misc/semicolon.cocci. Signed-off-by: Chen Ni Link: https://lore.kernel.org/r/20250407040712.2577607-1-nichen@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/tegra-utc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/tegra-utc.c b/drivers/tty/serial/tegra-utc.c index 39b14fe813c9..0c70d3e7b9b9 100644 --- a/drivers/tty/serial/tegra-utc.c +++ b/drivers/tty/serial/tegra-utc.c @@ -434,7 +434,7 @@ static void tegra_utc_console_write_atomic(struct console *cons, struct nbcon_wr outbuf += burst_size; len -= burst_size; - }; + } nbcon_exit_unsafe(wctxt); } From 2318a488683ab904ccb5604222d457a3c32b8fdc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 8 Apr 2025 09:42:00 +0200 Subject: [PATCH 012/105] serial: max310x: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20250408-gpiochip-set-rv-tty-v1-1-fb49444827d4@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 35369a2f77b2..541c790c0109 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1189,13 +1189,16 @@ static int max310x_gpio_get(struct gpio_chip *chip, unsigned int offset) return !!((val >> 4) & (1 << (offset % 4))); } -static void max310x_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) +static int max310x_gpio_set(struct gpio_chip *chip, unsigned int offset, + int value) { struct max310x_port *s = gpiochip_get_data(chip); struct uart_port *port = &s->p[offset / 4].port; max310x_port_update(port, MAX310X_GPIODATA_REG, 1 << (offset % 4), value ? 1 << (offset % 4) : 0); + + return 0; } static int max310x_gpio_direction_input(struct gpio_chip *chip, unsigned int offset) @@ -1411,7 +1414,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty s->gpio.direction_input = max310x_gpio_direction_input; s->gpio.get = max310x_gpio_get; s->gpio.direction_output= max310x_gpio_direction_output; - s->gpio.set = max310x_gpio_set; + s->gpio.set_rv = max310x_gpio_set; s->gpio.set_config = max310x_gpio_set_config; s->gpio.base = -1; s->gpio.ngpio = devtype->nr * 4; From a5482409a435a2d0e30bd0e179f662860b0e6c6b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 8 Apr 2025 09:42:01 +0200 Subject: [PATCH 013/105] serial: sc16is7xx: use new GPIO line value setter callbacks struct gpio_chip now has callbacks for setting line values that return an integer, allowing to indicate failures. Convert the driver to using them. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20250408-gpiochip-set-rv-tty-v1-2-fb49444827d4@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 560f45ed19ae..5ea8aadb6e69 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1333,13 +1333,16 @@ static int sc16is7xx_gpio_get(struct gpio_chip *chip, unsigned offset) return !!(val & BIT(offset)); } -static void sc16is7xx_gpio_set(struct gpio_chip *chip, unsigned offset, int val) +static int sc16is7xx_gpio_set(struct gpio_chip *chip, unsigned int offset, + int val) { struct sc16is7xx_port *s = gpiochip_get_data(chip); struct uart_port *port = &s->p[0].port; sc16is7xx_port_update(port, SC16IS7XX_IOSTATE_REG, BIT(offset), val ? BIT(offset) : 0); + + return 0; } static int sc16is7xx_gpio_direction_input(struct gpio_chip *chip, @@ -1422,7 +1425,7 @@ static int sc16is7xx_setup_gpio_chip(struct sc16is7xx_port *s) s->gpio.direction_input = sc16is7xx_gpio_direction_input; s->gpio.get = sc16is7xx_gpio_get; s->gpio.direction_output = sc16is7xx_gpio_direction_output; - s->gpio.set = sc16is7xx_gpio_set; + s->gpio.set_rv = sc16is7xx_gpio_set; s->gpio.base = -1; s->gpio.ngpio = s->devtype->nr_gpio; s->gpio.can_sleep = 1; From 0ed22827548583ad1b905c71d1d1cf96309d63bc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Apr 2025 10:00:56 +0200 Subject: [PATCH 014/105] dt-bindings: serial: snps-dw-apb-uart: Simplify DMA-less RZ/N1 rule There is no need to repeat all SoC-specific compatible values in the rule for DMA-less RZ/N1 variants. Use wildcard "{}" instead, to ease maintenance. Signed-off-by: Geert Uytterhoeven Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/90c7aa143beb6a28255b24e8ef8c96180d869cbb.1744271974.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/serial/snps-dw-apb-uart.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml index 1aa3480d8d81..1ffe3834b0a8 100644 --- a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml +++ b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml @@ -17,9 +17,7 @@ allOf: properties: compatible: items: - - enum: - - renesas,r9a06g032-uart - - renesas,r9a06g033-uart + - {} - const: renesas,rzn1-uart - const: snps,dw-apb-uart then: From 2c0594f9f0629a8b4d46e7e1bd069a0bafc2e350 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 9 Apr 2025 14:22:11 -0500 Subject: [PATCH 015/105] dt-bindings: serial: 8250: support an optional second clock The SpacemiT UART driver requires a bus clock to be enabled in addition to the primary function clock. Add the option to specify two clocks for an 8250-compatible UART, named "core" and "bus". If both are needed, require them to be named. Signed-off-by: Alex Elder Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250409192213.1130181-2-elder@riscstar.com Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/serial/8250.yaml | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index dc0d52920575..33d2016b6509 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -135,7 +135,16 @@ properties: clock-frequency: true clocks: - maxItems: 1 + minItems: 1 + items: + - description: The core function clock + - description: An optional bus clock + + clock-names: + minItems: 1 + items: + - const: core + - const: bus resets: maxItems: 1 @@ -224,6 +233,25 @@ required: - reg - interrupts +if: + properties: + compatible: + contains: + const: spacemit,k1-uart +then: + required: [clock-names] + properties: + clocks: + minItems: 2 + clock-names: + minItems: 2 +else: + properties: + clocks: + maxItems: 1 + clock-names: + maxItems: 1 + unevaluatedProperties: false examples: From 81e4de4ba298d73fce72c70eddeb86b151640c27 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 9 Apr 2025 14:22:12 -0500 Subject: [PATCH 016/105] serial: 8250_of: add support for an optional bus clock The SpacemiT UART requires a bus clock to be enabled, in addition to it's "normal" core clock. Look up the optional bus clock by name, and if that's found, look up the core clock using the name "core". Supplying a bus clock is optional. If no bus clock is needed, the the first/only clock is used for the core clock. Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20250409192213.1130181-3-elder@riscstar.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_of.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 11c860ea80f6..a90a5462aa72 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -123,7 +123,16 @@ static int of_platform_serial_setup(struct platform_device *ofdev, /* Get clk rate through clk driver if present */ if (!port->uartclk) { - info->clk = devm_clk_get_enabled(dev, NULL); + struct clk *bus_clk; + + bus_clk = devm_clk_get_optional_enabled(dev, "bus"); + if (IS_ERR(bus_clk)) { + ret = dev_err_probe(dev, PTR_ERR(bus_clk), "failed to get bus clock\n"); + goto err_pmruntime; + } + + /* If the bus clock is required, core clock must be named */ + info->clk = devm_clk_get_enabled(dev, bus_clk ? "core" : NULL); if (IS_ERR(info->clk)) { ret = dev_err_probe(dev, PTR_ERR(info->clk), "failed to get clock\n"); goto err_pmruntime; From 86bcae88c9209e334b2f8c252f4cc66beb261886 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Thu, 3 Apr 2025 15:03:39 +0800 Subject: [PATCH 017/105] serial: Fix potential null-ptr-deref in mlb_usio_probe() devm_ioremap() can return NULL on error. Currently, mlb_usio_probe() does not check for this case, which could result in a NULL pointer dereference. Add NULL check after devm_ioremap() to prevent this issue. Fixes: ba44dc043004 ("serial: Add Milbeaut serial control") Signed-off-by: Henry Martin Link: https://lore.kernel.org/r/20250403070339.64990-1-bsdhenrymartin@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/milbeaut_usio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/milbeaut_usio.c b/drivers/tty/serial/milbeaut_usio.c index 059bea18dbab..4e47dca2c4ed 100644 --- a/drivers/tty/serial/milbeaut_usio.c +++ b/drivers/tty/serial/milbeaut_usio.c @@ -523,7 +523,10 @@ static int mlb_usio_probe(struct platform_device *pdev) } port->membase = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - + if (!port->membase) { + ret = -ENOMEM; + goto failed; + } ret = platform_get_irq_byname(pdev, "rx"); mlb_usio_irq[index][RX] = ret; From 74045f6658f11241a09d93404d79828cc99e94dc Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:53 -0400 Subject: [PATCH 018/105] vt: minor cleanup to vc_translate_unicode() Make it clearer when a sequence is bad. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-2-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index f5642b3038e4..b5f3c8a818ed 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2817,7 +2817,7 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) if ((c & 0xc0) == 0x80) { /* Unexpected continuation byte? */ if (!vc->vc_utf_count) - return 0xfffd; + goto bad_sequence; vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); vc->vc_npar++; @@ -2829,17 +2829,17 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) /* Reject overlong sequences */ if (c <= utf8_length_changes[vc->vc_npar - 1] || c > utf8_length_changes[vc->vc_npar]) - return 0xfffd; + goto bad_sequence; return vc_sanitize_unicode(c); } /* Single ASCII byte or first byte of a sequence received */ if (vc->vc_utf_count) { - /* Continuation byte expected */ + /* A continuation byte was expected */ *rescan = true; vc->vc_utf_count = 0; - return 0xfffd; + goto bad_sequence; } /* Nothing to do if an ASCII byte was received */ @@ -2858,11 +2858,14 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) vc->vc_utf_count = 3; vc->vc_utf_char = (c & 0x07); } else { - return 0xfffd; + goto bad_sequence; } need_more_bytes: return -1; + +bad_sequence: + return 0xfffd; } static int vc_translate(struct vc_data *vc, int *c, bool *rescan) From 2acaf27cd7f4f32bfe8bf7335690618e2417e744 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:54 -0400 Subject: [PATCH 019/105] vt: move unicode processing to a separate file This will make it easier to maintain. Also make it depend on CONFIG_CONSOLE_TRANSLATIONS. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/Makefile | 3 ++- drivers/tty/vt/ucs_width.c | 45 ++++++++++++++++++++++++++++++++++++++ drivers/tty/vt/vt.c | 40 +-------------------------------- include/linux/consolemap.h | 6 +++++ 4 files changed, 54 insertions(+), 40 deletions(-) create mode 100644 drivers/tty/vt/ucs_width.c diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index 2c8ce8b592ed..bee69277bbc3 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -7,7 +7,8 @@ FONTMAPFILE = cp437.uni obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o \ selection.o keyboard.o \ vt.o defkeymap.o -obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o +obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o \ + ucs_width.o # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c diff --git a/drivers/tty/vt/ucs_width.c b/drivers/tty/vt/ucs_width.c new file mode 100644 index 000000000000..5f0bde30a1fb --- /dev/null +++ b/drivers/tty/vt/ucs_width.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +/* ucs_is_double_width() is based on the wcwidth() implementation by + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + +struct interval { + uint32_t first; + uint32_t last; +}; + +static int ucs_cmp(const void *key, const void *elt) +{ + uint32_t cp = *(uint32_t *)key; + struct interval e = *(struct interval *) elt; + + if (cp > e.last) + return 1; + else if (cp < e.first) + return -1; + return 0; +} + +static const struct interval double_width[] = { + { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, + { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, + { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, + { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } +}; + +bool ucs_is_double_width(uint32_t cp) +{ + if (cp < double_width[0].first || + cp > double_width[ARRAY_SIZE(double_width) - 1].last) + return false; + + return bsearch(&cp, double_width, ARRAY_SIZE(double_width), + sizeof(struct interval), ucs_cmp) != NULL; +} diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index b5f3c8a818ed..bcb508bc15ab 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -104,7 +104,6 @@ #include #include #include -#include #include #define MAX_NR_CON_DRIVER 16 @@ -2712,43 +2711,6 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, u8 c) } } -/* is_double_width() is based on the wcwidth() implementation by - * Markus Kuhn -- 2007-05-26 (Unicode 5.0) - * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - */ -struct interval { - uint32_t first; - uint32_t last; -}; - -static int ucs_cmp(const void *key, const void *elt) -{ - uint32_t ucs = *(uint32_t *)key; - struct interval e = *(struct interval *) elt; - - if (ucs > e.last) - return 1; - else if (ucs < e.first) - return -1; - return 0; -} - -static int is_double_width(uint32_t ucs) -{ - static const struct interval double_width[] = { - { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, - { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, - { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, - { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } - }; - if (ucs < double_width[0].first || - ucs > double_width[ARRAY_SIZE(double_width) - 1].last) - return 0; - - return bsearch(&ucs, double_width, ARRAY_SIZE(double_width), - sizeof(struct interval), ucs_cmp) != NULL; -} - struct vc_draw_region { unsigned long from, to; int x; @@ -2953,7 +2915,7 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { - if (is_double_width(c)) + if (ucs_is_double_width(c)) width = 2; } diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index c35db4896c37..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,6 +28,7 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); +bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -57,6 +58,11 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } + +static inline bool ucs_is_double_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From e88391f730e46d208b7fb37b02611d24137af1ef Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:55 -0400 Subject: [PATCH 020/105] vt: properly support zero-width Unicode code points Zero-width Unicode code points are causing misalignment in vertically aligned content, disrupting the visual layout. Let's handle zero-width code points more intelligently. Double-width code points are stored in the screen grid followed by a white space code point to create the expected screen layout. When a double-width code point is followed by a zero-width code point in the console incoming bytestream (e.g., an emoji with a presentation selector) then we may replace the white space padding by that zero-width code point instead of dropping it. This maximize screen content information while preserving proper layout. If a zero-width code point is preceded by a single-width code point then the above trick is not possible and such zero-width code point must be dropped. VS16 (Variation Selector 16, U+FE0F) is special as it doubles the width of the preceding single-width code point. We handle that case by giving VS16 a width of 1 when that happens. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-4-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 46 ++++++++++++++++++++++++++++++++++++-- include/linux/consolemap.h | 10 +++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index bcb508bc15ab..5d53feeb5d2b 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -443,6 +443,15 @@ static void vc_uniscr_scroll(struct vc_data *vc, unsigned int top, } } +static u32 vc_uniscr_getc(struct vc_data *vc, int relative_pos) +{ + int pos = vc->state.x + vc->vc_need_wrap + relative_pos; + + if (vc->vc_uni_lines && pos >= 0 && pos < vc->vc_cols) + return vc->vc_uni_lines[vc->state.y][pos]; + return 0; +} + static void vc_uniscr_copy_area(u32 **dst_lines, unsigned int dst_cols, unsigned int dst_rows, @@ -2905,18 +2914,49 @@ static bool vc_is_control(struct vc_data *vc, int tc, int c) return false; } +static void vc_con_rewind(struct vc_data *vc) +{ + if (vc->state.x && !vc->vc_need_wrap) { + vc->vc_pos -= 2; + vc->state.x--; + } + vc->vc_need_wrap = 0; +} + static int vc_con_write_normal(struct vc_data *vc, int tc, int c, struct vc_draw_region *draw) { - int next_c; + int next_c, prev_c; unsigned char vc_attr = vc->vc_attr; u16 himask = vc->vc_hi_font_mask, charmask = himask ? 0x1ff : 0xff; u8 width = 1; bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { - if (ucs_is_double_width(c)) + if (ucs_is_double_width(c)) { width = 2; + } else if (ucs_is_zero_width(c)) { + prev_c = vc_uniscr_getc(vc, -1); + if (prev_c == ' ' && + ucs_is_double_width(vc_uniscr_getc(vc, -2))) { + /* + * Let's merge this zero-width code point with + * the preceding double-width code point by + * replacing the existing whitespace padding. + */ + vc_con_rewind(vc); + } else if (c == 0xfe0f && prev_c != 0) { + /* + * VS16 (U+FE0F) is special. Let it have a + * width of 1 when preceded by a single-width + * code point effectively making the later + * double-width. + */ + } else { + /* Otherwise zero-width code points are ignored */ + goto out; + } + } } /* Now try to find out how to display it */ @@ -2995,6 +3035,8 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, tc = ' '; next_c = ' '; } + +out: notify_write(vc, c); if (inverse) diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } + +static inline bool ucs_is_zero_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From 26c94eb4842ada96f9709b43ef225417a6b4df63 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:56 -0400 Subject: [PATCH 021/105] vt: introduce gen_ucs_width.py to create ucs_width.c The table in the current ucs_width.c is terribly out of date and incomplete. We also need a second table to store zero-width code points. Properly maintaining those tables manually is impossible. So here's a script to automatically generate them. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-5-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_width.py | 264 ++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100755 drivers/tty/vt/gen_ucs_width.py diff --git a/drivers/tty/vt/gen_ucs_width.py b/drivers/tty/vt/gen_ucs_width.py new file mode 100755 index 000000000000..41997fe00129 --- /dev/null +++ b/drivers/tty/vt/gen_ucs_width.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# This script uses Python's unicodedata module to generate ucs_width.c + +import unicodedata +import sys + +def generate_ucs_width(): + # Output file name + c_file = "ucs_width.c" + + # Width data mapping + width_map = {} # Maps code points to width (0, 1, 2) + + # Define emoji modifiers and components that should have zero width + emoji_zero_width = [ + # Skin tone modifiers + (0x1F3FB, 0x1F3FF), # Emoji modifiers (skin tones) + + # Variation selectors (note: VS16 is treated specially in vt.c) + (0xFE00, 0xFE0F), # Variation Selectors 1-16 + + # Gender and hair style modifiers + (0x2640, 0x2640), # Female sign + (0x2642, 0x2642), # Male sign + (0x26A7, 0x26A7), # Transgender symbol + (0x1F9B0, 0x1F9B3), # Hair components (red, curly, white, bald) + + # Tag characters + (0xE0020, 0xE007E), # Tags + ] + + # Mark these emoji modifiers as zero-width + for start, end in emoji_zero_width: + for cp in range(start, end + 1): + try: + width_map[cp] = 0 + except (ValueError, OverflowError): + continue + + # Mark all regional indicators as single-width as they are usually paired + # providing a combined with of 2. + regional_indicators = (0x1F1E6, 0x1F1FF) # Regional indicator symbols A-Z + start, end = regional_indicators + for cp in range(start, end + 1): + try: + width_map[cp] = 1 + except (ValueError, OverflowError): + continue + + # Process all assigned Unicode code points (Basic Multilingual Plane + Supplementary Planes) + # Range 0x0 to 0x10FFFF (the full Unicode range) + for block_start in range(0, 0x110000, 0x1000): + block_end = block_start + 0x1000 + for cp in range(block_start, block_end): + try: + char = chr(cp) + + # Skip if already processed + if cp in width_map: + continue + + # Check if the character is a combining mark + category = unicodedata.category(char) + + # Combining marks, format characters, zero-width characters + if (category.startswith('M') or # Mark (combining) + (category == 'Cf' and cp not in (0x061C, 0x06DD, 0x070F, 0x180E, 0x200F, 0x202E, 0x2066, 0x2067, 0x2068, 0x2069)) or + cp in (0x200B, 0x200C, 0x200D, 0x2060, 0xFEFF)): # Known zero-width characters + width_map[cp] = 0 + continue + + # Use East Asian Width property + eaw = unicodedata.east_asian_width(char) + + if eaw in ('F', 'W'): # Fullwidth or Wide + width_map[cp] = 2 + elif eaw in ('Na', 'H', 'N', 'A'): # Narrow, Halfwidth, Neutral, Ambiguous + width_map[cp] = 1 + else: + # Default to single-width for unknown + width_map[cp] = 1 + + except (ValueError, OverflowError): + # Skip invalid code points + continue + + # Process Emoji - generally double-width + # Ranges according to Unicode Emoji standard + emoji_ranges = [ + (0x1F000, 0x1F02F), # Mahjong Tiles + (0x1F0A0, 0x1F0FF), # Playing Cards + (0x1F300, 0x1F5FF), # Miscellaneous Symbols and Pictographs + (0x1F600, 0x1F64F), # Emoticons + (0x1F680, 0x1F6FF), # Transport and Map Symbols + (0x1F700, 0x1F77F), # Alchemical Symbols + (0x1F780, 0x1F7FF), # Geometric Shapes Extended + (0x1F800, 0x1F8FF), # Supplemental Arrows-C + (0x1F900, 0x1F9FF), # Supplemental Symbols and Pictographs + (0x1FA00, 0x1FA6F), # Chess Symbols + (0x1FA70, 0x1FAFF), # Symbols and Pictographs Extended-A + ] + + for start, end in emoji_ranges: + for cp in range(start, end + 1): + if cp not in width_map or width_map[cp] != 0: # Don't override zero-width + try: + char = chr(cp) + width_map[cp] = 2 + except (ValueError, OverflowError): + continue + + # Optimize to create range tables + def ranges_optimize(width_data, target_width): + points = sorted([cp for cp, width in width_data.items() if width == target_width]) + if not points: + return [] + + # Group consecutive code points into ranges + ranges = [] + start = points[0] + prev = start + + for cp in points[1:]: + if cp > prev + 1: + ranges.append((start, prev)) + start = cp + prev = cp + + # Add the last range + ranges.append((start, prev)) + return ranges + + # Extract ranges for each width + zero_width_ranges = ranges_optimize(width_map, 0) + double_width_ranges = ranges_optimize(width_map, 2) + + # Get Unicode version information + unicode_version = unicodedata.unidata_version + + # Generate C implementation file + with open(c_file, 'w') as f: + f.write(f"""\ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucs_width.c - Unicode character width lookup + * + * Auto-generated by gen_ucs_width.py + * + * Unicode Version: {unicode_version} + */ + +#include +#include +#include +#include + +struct interval {{ + uint32_t first; + uint32_t last; +}}; + +/* Zero-width character ranges */ +static const struct interval zero_width_ranges[] = {{ +""") + + for start, end in zero_width_ranges: + try: + start_char_desc = unicodedata.name(chr(start)) if start < 0x10000 else f"U+{start:05X}" + if start == end: + comment = f"/* {start_char_desc} */" + else: + end_char_desc = unicodedata.name(chr(end)) if end < 0x10000 else f"U+{end:05X}" + comment = f"/* {start_char_desc} - {end_char_desc} */" + except: + if start == end: + comment = f"/* U+{start:05X} */" + else: + comment = f"/* U+{start:05X} - U+{end:05X} */" + + f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") + + f.write("""\ +}; + +/* Double-width character ranges */ +static const struct interval double_width_ranges[] = { +""") + + for start, end in double_width_ranges: + try: + start_char_desc = unicodedata.name(chr(start)) if start < 0x10000 else f"U+{start:05X}" + if start == end: + comment = f"/* {start_char_desc} */" + else: + end_char_desc = unicodedata.name(chr(end)) if end < 0x10000 else f"U+{end:05X}" + comment = f"/* {start_char_desc} - {end_char_desc} */" + except: + if start == end: + comment = f"/* U+{start:05X} */" + else: + comment = f"/* U+{start:05X} - U+{end:05X} */" + + f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") + + f.write("""\ +}; + + +static int ucs_cmp(const void *key, const void *element) +{ + uint32_t cp = *(uint32_t *)key; + const struct interval *e = element; + + if (cp > e->last) + return 1; + if (cp < e->first) + return -1; + return 0; +} + +static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t count) +{ + if (cp < intervals[0].first || cp > intervals[count - 1].last) + return false; + + return __inline_bsearch(&cp, intervals, count, + sizeof(*intervals), ucs_cmp) != NULL; +} + +/** + * Determine if a Unicode code point is zero-width. + * + * @param ucs: Unicode code point (UCS-4) + * Return: true if the character is zero-width, false otherwise + */ +bool ucs_is_zero_width(uint32_t cp) +{ + return is_in_interval(cp, zero_width_ranges, ARRAY_SIZE(zero_width_ranges)); +} + +/** + * Determine if a Unicode code point is double-width. + * + * @param ucs: Unicode code point (UCS-4) + * Return: true if the character is double-width, false otherwise + */ +bool ucs_is_double_width(uint32_t cp) +{ + return is_in_interval(cp, double_width_ranges, ARRAY_SIZE(double_width_ranges)); +} +""") + + # Print summary + zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges) + double_width_count = sum(end - start + 1 for start, end in double_width_ranges) + + print(f"Generated {c_file} with:") + print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points") + print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code points") + +if __name__ == "__main__": + generate_ucs_width() From 3a1ab63aa05b4736a7d30ae0a769385662f13def Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:57 -0400 Subject: [PATCH 022/105] vt: update ucs_width.c using gen_ucs_width.py This replaces ucs_width.c with the code generated by gen_ucs_width.py providing comprehensive tables for double-width and zero-width Unicode code points. Also make ucs_is_zero_width() effective. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-6-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs_width.c | 495 +++++++++++++++++++++++++++++++++++-- include/linux/consolemap.h | 6 +- 2 files changed, 475 insertions(+), 26 deletions(-) diff --git a/drivers/tty/vt/ucs_width.c b/drivers/tty/vt/ucs_width.c index 5f0bde30a1fb..47b22583bd34 100644 --- a/drivers/tty/vt/ucs_width.c +++ b/drivers/tty/vt/ucs_width.c @@ -1,45 +1,498 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * ucs_width.c - Unicode character width lookup + * + * Auto-generated by gen_ucs_width.py + * + * Unicode Version: 16.0.0 + */ #include #include #include #include -/* ucs_is_double_width() is based on the wcwidth() implementation by - * Markus Kuhn -- 2007-05-26 (Unicode 5.0) - * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - */ - struct interval { uint32_t first; uint32_t last; }; -static int ucs_cmp(const void *key, const void *elt) +/* Zero-width character ranges */ +static const struct interval zero_width_ranges[] = { + { 0x000AD, 0x000AD }, /* SOFT HYPHEN */ + { 0x00300, 0x0036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ + { 0x00483, 0x00489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ + { 0x00591, 0x005BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ + { 0x005BF, 0x005BF }, /* HEBREW POINT RAFE */ + { 0x005C1, 0x005C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ + { 0x005C4, 0x005C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ + { 0x005C7, 0x005C7 }, /* HEBREW POINT QAMATS QATAN */ + { 0x00600, 0x00605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ + { 0x00610, 0x0061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ + { 0x0064B, 0x0065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ + { 0x00670, 0x00670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ + { 0x006D6, 0x006DC }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC SMALL HIGH SEEN */ + { 0x006DF, 0x006E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ + { 0x006E7, 0x006E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ + { 0x006EA, 0x006ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ + { 0x00711, 0x00711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ + { 0x00730, 0x0074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ + { 0x007A6, 0x007B0 }, /* THAANA ABAFILI - THAANA SUKUN */ + { 0x007EB, 0x007F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ + { 0x007FD, 0x007FD }, /* NKO DANTAYALAN */ + { 0x00816, 0x00819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ + { 0x0081B, 0x00823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ + { 0x00825, 0x00827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ + { 0x00829, 0x0082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ + { 0x00859, 0x0085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ + { 0x00890, 0x00891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ + { 0x00897, 0x0089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ + { 0x008CA, 0x00903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ + { 0x0093A, 0x0093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ + { 0x0093E, 0x0094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ + { 0x00951, 0x00957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ + { 0x00962, 0x00963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ + { 0x00981, 0x00983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ + { 0x009BC, 0x009BC }, /* BENGALI SIGN NUKTA */ + { 0x009BE, 0x009C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ + { 0x009C7, 0x009C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ + { 0x009CB, 0x009CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ + { 0x009D7, 0x009D7 }, /* BENGALI AU LENGTH MARK */ + { 0x009E2, 0x009E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ + { 0x009FE, 0x009FE }, /* BENGALI SANDHI MARK */ + { 0x00A01, 0x00A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ + { 0x00A3C, 0x00A3C }, /* GURMUKHI SIGN NUKTA */ + { 0x00A3E, 0x00A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ + { 0x00A47, 0x00A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ + { 0x00A4B, 0x00A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ + { 0x00A51, 0x00A51 }, /* GURMUKHI SIGN UDAAT */ + { 0x00A70, 0x00A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ + { 0x00A75, 0x00A75 }, /* GURMUKHI SIGN YAKASH */ + { 0x00A81, 0x00A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ + { 0x00ABC, 0x00ABC }, /* GUJARATI SIGN NUKTA */ + { 0x00ABE, 0x00AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ + { 0x00AC7, 0x00AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ + { 0x00ACB, 0x00ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ + { 0x00AE2, 0x00AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ + { 0x00AFA, 0x00AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ + { 0x00B01, 0x00B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ + { 0x00B3C, 0x00B3C }, /* ORIYA SIGN NUKTA */ + { 0x00B3E, 0x00B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ + { 0x00B47, 0x00B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ + { 0x00B4B, 0x00B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ + { 0x00B55, 0x00B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ + { 0x00B62, 0x00B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ + { 0x00B82, 0x00B82 }, /* TAMIL SIGN ANUSVARA */ + { 0x00BBE, 0x00BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ + { 0x00BC6, 0x00BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ + { 0x00BCA, 0x00BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ + { 0x00BD7, 0x00BD7 }, /* TAMIL AU LENGTH MARK */ + { 0x00C00, 0x00C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ + { 0x00C3C, 0x00C3C }, /* TELUGU SIGN NUKTA */ + { 0x00C3E, 0x00C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ + { 0x00C46, 0x00C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ + { 0x00C4A, 0x00C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ + { 0x00C55, 0x00C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ + { 0x00C62, 0x00C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ + { 0x00C81, 0x00C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ + { 0x00CBC, 0x00CBC }, /* KANNADA SIGN NUKTA */ + { 0x00CBE, 0x00CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ + { 0x00CC6, 0x00CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ + { 0x00CCA, 0x00CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ + { 0x00CD5, 0x00CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ + { 0x00CE2, 0x00CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ + { 0x00CF3, 0x00CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ + { 0x00D00, 0x00D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ + { 0x00D3B, 0x00D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ + { 0x00D3E, 0x00D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ + { 0x00D46, 0x00D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ + { 0x00D4A, 0x00D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ + { 0x00D57, 0x00D57 }, /* MALAYALAM AU LENGTH MARK */ + { 0x00D62, 0x00D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ + { 0x00D81, 0x00D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ + { 0x00DCA, 0x00DCA }, /* SINHALA SIGN AL-LAKUNA */ + { 0x00DCF, 0x00DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ + { 0x00DD6, 0x00DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ + { 0x00DD8, 0x00DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ + { 0x00DF2, 0x00DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ + { 0x00E31, 0x00E31 }, /* THAI CHARACTER MAI HAN-AKAT */ + { 0x00E34, 0x00E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ + { 0x00E47, 0x00E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ + { 0x00EB1, 0x00EB1 }, /* LAO VOWEL SIGN MAI KAN */ + { 0x00EB4, 0x00EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ + { 0x00EC8, 0x00ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ + { 0x00F18, 0x00F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ + { 0x00F35, 0x00F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ + { 0x00F37, 0x00F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ + { 0x00F39, 0x00F39 }, /* TIBETAN MARK TSA -PHRU */ + { 0x00F3E, 0x00F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ + { 0x00F71, 0x00F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ + { 0x00F86, 0x00F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ + { 0x00F8D, 0x00F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ + { 0x00F99, 0x00FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ + { 0x00FC6, 0x00FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ + { 0x0102B, 0x0103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ + { 0x01056, 0x01059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ + { 0x0105E, 0x01060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ + { 0x01062, 0x01064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ + { 0x01067, 0x0106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ + { 0x01071, 0x01074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ + { 0x01082, 0x0108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ + { 0x0108F, 0x0108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ + { 0x0109A, 0x0109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ + { 0x0135D, 0x0135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ + { 0x01712, 0x01715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ + { 0x01732, 0x01734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ + { 0x01752, 0x01753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ + { 0x01772, 0x01773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ + { 0x017B4, 0x017D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ + { 0x017DD, 0x017DD }, /* KHMER SIGN ATTHACAN */ + { 0x0180B, 0x0180D }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR THREE */ + { 0x0180F, 0x0180F }, /* MONGOLIAN FREE VARIATION SELECTOR FOUR */ + { 0x01885, 0x01886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ + { 0x018A9, 0x018A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ + { 0x01920, 0x0192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ + { 0x01930, 0x0193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ + { 0x01A17, 0x01A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ + { 0x01A55, 0x01A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ + { 0x01A60, 0x01A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ + { 0x01A7F, 0x01A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ + { 0x01AB0, 0x01ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ + { 0x01B00, 0x01B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ + { 0x01B34, 0x01B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ + { 0x01B6B, 0x01B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ + { 0x01B80, 0x01B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ + { 0x01BA1, 0x01BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ + { 0x01BE6, 0x01BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ + { 0x01C24, 0x01C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ + { 0x01CD0, 0x01CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ + { 0x01CD4, 0x01CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ + { 0x01CED, 0x01CED }, /* VEDIC SIGN TIRYAK */ + { 0x01CF4, 0x01CF4 }, /* VEDIC TONE CANDRA ABOVE */ + { 0x01CF7, 0x01CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ + { 0x01DC0, 0x01DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ + { 0x0200B, 0x0200E }, /* ZERO WIDTH SPACE - LEFT-TO-RIGHT MARK */ + { 0x0202A, 0x0202D }, /* LEFT-TO-RIGHT EMBEDDING - LEFT-TO-RIGHT OVERRIDE */ + { 0x02060, 0x02064 }, /* WORD JOINER - INVISIBLE PLUS */ + { 0x0206A, 0x0206F }, /* INHIBIT SYMMETRIC SWAPPING - NOMINAL DIGIT SHAPES */ + { 0x020D0, 0x020F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ + { 0x02640, 0x02640 }, /* FEMALE SIGN */ + { 0x02642, 0x02642 }, /* MALE SIGN */ + { 0x026A7, 0x026A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ + { 0x02CEF, 0x02CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ + { 0x02D7F, 0x02D7F }, /* TIFINAGH CONSONANT JOINER */ + { 0x02DE0, 0x02DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ + { 0x0302A, 0x0302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ + { 0x03099, 0x0309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ + { 0x0A66F, 0x0A672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ + { 0x0A674, 0x0A67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ + { 0x0A69E, 0x0A69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ + { 0x0A6F0, 0x0A6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ + { 0x0A802, 0x0A802 }, /* SYLOTI NAGRI SIGN DVISVARA */ + { 0x0A806, 0x0A806 }, /* SYLOTI NAGRI SIGN HASANTA */ + { 0x0A80B, 0x0A80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ + { 0x0A823, 0x0A827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ + { 0x0A82C, 0x0A82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ + { 0x0A880, 0x0A881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ + { 0x0A8B4, 0x0A8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ + { 0x0A8E0, 0x0A8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ + { 0x0A8FF, 0x0A8FF }, /* DEVANAGARI VOWEL SIGN AY */ + { 0x0A926, 0x0A92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ + { 0x0A947, 0x0A953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ + { 0x0A980, 0x0A983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ + { 0x0A9B3, 0x0A9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ + { 0x0A9E5, 0x0A9E5 }, /* MYANMAR SIGN SHAN SAW */ + { 0x0AA29, 0x0AA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ + { 0x0AA43, 0x0AA43 }, /* CHAM CONSONANT SIGN FINAL NG */ + { 0x0AA4C, 0x0AA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ + { 0x0AA7B, 0x0AA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ + { 0x0AAB0, 0x0AAB0 }, /* TAI VIET MAI KANG */ + { 0x0AAB2, 0x0AAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ + { 0x0AAB7, 0x0AAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ + { 0x0AABE, 0x0AABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ + { 0x0AAC1, 0x0AAC1 }, /* TAI VIET TONE MAI THO */ + { 0x0AAEB, 0x0AAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ + { 0x0AAF5, 0x0AAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ + { 0x0ABE3, 0x0ABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ + { 0x0ABEC, 0x0ABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ + { 0x0FB1E, 0x0FB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ + { 0x0FE00, 0x0FE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ + { 0x0FE20, 0x0FE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ + { 0x0FEFF, 0x0FEFF }, /* ZERO WIDTH NO-BREAK SPACE */ + { 0x0FFF9, 0x0FFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ + { 0x101FD, 0x101FD }, /* U+101FD */ + { 0x102E0, 0x102E0 }, /* U+102E0 */ + { 0x10376, 0x1037A }, /* U+10376 - U+1037A */ + { 0x10A01, 0x10A03 }, /* U+10A01 - U+10A03 */ + { 0x10A05, 0x10A06 }, /* U+10A05 - U+10A06 */ + { 0x10A0C, 0x10A0F }, /* U+10A0C - U+10A0F */ + { 0x10A38, 0x10A3A }, /* U+10A38 - U+10A3A */ + { 0x10A3F, 0x10A3F }, /* U+10A3F */ + { 0x10AE5, 0x10AE6 }, /* U+10AE5 - U+10AE6 */ + { 0x10D24, 0x10D27 }, /* U+10D24 - U+10D27 */ + { 0x10D69, 0x10D6D }, /* U+10D69 - U+10D6D */ + { 0x10EAB, 0x10EAC }, /* U+10EAB - U+10EAC */ + { 0x10EFC, 0x10EFF }, /* U+10EFC - U+10EFF */ + { 0x10F46, 0x10F50 }, /* U+10F46 - U+10F50 */ + { 0x10F82, 0x10F85 }, /* U+10F82 - U+10F85 */ + { 0x11000, 0x11002 }, /* U+11000 - U+11002 */ + { 0x11038, 0x11046 }, /* U+11038 - U+11046 */ + { 0x11070, 0x11070 }, /* U+11070 */ + { 0x11073, 0x11074 }, /* U+11073 - U+11074 */ + { 0x1107F, 0x11082 }, /* U+1107F - U+11082 */ + { 0x110B0, 0x110BA }, /* U+110B0 - U+110BA */ + { 0x110BD, 0x110BD }, /* U+110BD */ + { 0x110C2, 0x110C2 }, /* U+110C2 */ + { 0x110CD, 0x110CD }, /* U+110CD */ + { 0x11100, 0x11102 }, /* U+11100 - U+11102 */ + { 0x11127, 0x11134 }, /* U+11127 - U+11134 */ + { 0x11145, 0x11146 }, /* U+11145 - U+11146 */ + { 0x11173, 0x11173 }, /* U+11173 */ + { 0x11180, 0x11182 }, /* U+11180 - U+11182 */ + { 0x111B3, 0x111C0 }, /* U+111B3 - U+111C0 */ + { 0x111C9, 0x111CC }, /* U+111C9 - U+111CC */ + { 0x111CE, 0x111CF }, /* U+111CE - U+111CF */ + { 0x1122C, 0x11237 }, /* U+1122C - U+11237 */ + { 0x1123E, 0x1123E }, /* U+1123E */ + { 0x11241, 0x11241 }, /* U+11241 */ + { 0x112DF, 0x112EA }, /* U+112DF - U+112EA */ + { 0x11300, 0x11303 }, /* U+11300 - U+11303 */ + { 0x1133B, 0x1133C }, /* U+1133B - U+1133C */ + { 0x1133E, 0x11344 }, /* U+1133E - U+11344 */ + { 0x11347, 0x11348 }, /* U+11347 - U+11348 */ + { 0x1134B, 0x1134D }, /* U+1134B - U+1134D */ + { 0x11357, 0x11357 }, /* U+11357 */ + { 0x11362, 0x11363 }, /* U+11362 - U+11363 */ + { 0x11366, 0x1136C }, /* U+11366 - U+1136C */ + { 0x11370, 0x11374 }, /* U+11370 - U+11374 */ + { 0x113B8, 0x113C0 }, /* U+113B8 - U+113C0 */ + { 0x113C2, 0x113C2 }, /* U+113C2 */ + { 0x113C5, 0x113C5 }, /* U+113C5 */ + { 0x113C7, 0x113CA }, /* U+113C7 - U+113CA */ + { 0x113CC, 0x113D0 }, /* U+113CC - U+113D0 */ + { 0x113D2, 0x113D2 }, /* U+113D2 */ + { 0x113E1, 0x113E2 }, /* U+113E1 - U+113E2 */ + { 0x11435, 0x11446 }, /* U+11435 - U+11446 */ + { 0x1145E, 0x1145E }, /* U+1145E */ + { 0x114B0, 0x114C3 }, /* U+114B0 - U+114C3 */ + { 0x115AF, 0x115B5 }, /* U+115AF - U+115B5 */ + { 0x115B8, 0x115C0 }, /* U+115B8 - U+115C0 */ + { 0x115DC, 0x115DD }, /* U+115DC - U+115DD */ + { 0x11630, 0x11640 }, /* U+11630 - U+11640 */ + { 0x116AB, 0x116B7 }, /* U+116AB - U+116B7 */ + { 0x1171D, 0x1172B }, /* U+1171D - U+1172B */ + { 0x1182C, 0x1183A }, /* U+1182C - U+1183A */ + { 0x11930, 0x11935 }, /* U+11930 - U+11935 */ + { 0x11937, 0x11938 }, /* U+11937 - U+11938 */ + { 0x1193B, 0x1193E }, /* U+1193B - U+1193E */ + { 0x11940, 0x11940 }, /* U+11940 */ + { 0x11942, 0x11943 }, /* U+11942 - U+11943 */ + { 0x119D1, 0x119D7 }, /* U+119D1 - U+119D7 */ + { 0x119DA, 0x119E0 }, /* U+119DA - U+119E0 */ + { 0x119E4, 0x119E4 }, /* U+119E4 */ + { 0x11A01, 0x11A0A }, /* U+11A01 - U+11A0A */ + { 0x11A33, 0x11A39 }, /* U+11A33 - U+11A39 */ + { 0x11A3B, 0x11A3E }, /* U+11A3B - U+11A3E */ + { 0x11A47, 0x11A47 }, /* U+11A47 */ + { 0x11A51, 0x11A5B }, /* U+11A51 - U+11A5B */ + { 0x11A8A, 0x11A99 }, /* U+11A8A - U+11A99 */ + { 0x11C2F, 0x11C36 }, /* U+11C2F - U+11C36 */ + { 0x11C38, 0x11C3F }, /* U+11C38 - U+11C3F */ + { 0x11C92, 0x11CA7 }, /* U+11C92 - U+11CA7 */ + { 0x11CA9, 0x11CB6 }, /* U+11CA9 - U+11CB6 */ + { 0x11D31, 0x11D36 }, /* U+11D31 - U+11D36 */ + { 0x11D3A, 0x11D3A }, /* U+11D3A */ + { 0x11D3C, 0x11D3D }, /* U+11D3C - U+11D3D */ + { 0x11D3F, 0x11D45 }, /* U+11D3F - U+11D45 */ + { 0x11D47, 0x11D47 }, /* U+11D47 */ + { 0x11D8A, 0x11D8E }, /* U+11D8A - U+11D8E */ + { 0x11D90, 0x11D91 }, /* U+11D90 - U+11D91 */ + { 0x11D93, 0x11D97 }, /* U+11D93 - U+11D97 */ + { 0x11EF3, 0x11EF6 }, /* U+11EF3 - U+11EF6 */ + { 0x11F00, 0x11F01 }, /* U+11F00 - U+11F01 */ + { 0x11F03, 0x11F03 }, /* U+11F03 */ + { 0x11F34, 0x11F3A }, /* U+11F34 - U+11F3A */ + { 0x11F3E, 0x11F42 }, /* U+11F3E - U+11F42 */ + { 0x11F5A, 0x11F5A }, /* U+11F5A */ + { 0x13430, 0x13440 }, /* U+13430 - U+13440 */ + { 0x13447, 0x13455 }, /* U+13447 - U+13455 */ + { 0x1611E, 0x1612F }, /* U+1611E - U+1612F */ + { 0x16AF0, 0x16AF4 }, /* U+16AF0 - U+16AF4 */ + { 0x16B30, 0x16B36 }, /* U+16B30 - U+16B36 */ + { 0x16F4F, 0x16F4F }, /* U+16F4F */ + { 0x16F51, 0x16F87 }, /* U+16F51 - U+16F87 */ + { 0x16F8F, 0x16F92 }, /* U+16F8F - U+16F92 */ + { 0x16FE4, 0x16FE4 }, /* U+16FE4 */ + { 0x16FF0, 0x16FF1 }, /* U+16FF0 - U+16FF1 */ + { 0x1BC9D, 0x1BC9E }, /* U+1BC9D - U+1BC9E */ + { 0x1BCA0, 0x1BCA3 }, /* U+1BCA0 - U+1BCA3 */ + { 0x1CF00, 0x1CF2D }, /* U+1CF00 - U+1CF2D */ + { 0x1CF30, 0x1CF46 }, /* U+1CF30 - U+1CF46 */ + { 0x1D165, 0x1D169 }, /* U+1D165 - U+1D169 */ + { 0x1D16D, 0x1D182 }, /* U+1D16D - U+1D182 */ + { 0x1D185, 0x1D18B }, /* U+1D185 - U+1D18B */ + { 0x1D1AA, 0x1D1AD }, /* U+1D1AA - U+1D1AD */ + { 0x1D242, 0x1D244 }, /* U+1D242 - U+1D244 */ + { 0x1DA00, 0x1DA36 }, /* U+1DA00 - U+1DA36 */ + { 0x1DA3B, 0x1DA6C }, /* U+1DA3B - U+1DA6C */ + { 0x1DA75, 0x1DA75 }, /* U+1DA75 */ + { 0x1DA84, 0x1DA84 }, /* U+1DA84 */ + { 0x1DA9B, 0x1DA9F }, /* U+1DA9B - U+1DA9F */ + { 0x1DAA1, 0x1DAAF }, /* U+1DAA1 - U+1DAAF */ + { 0x1E000, 0x1E006 }, /* U+1E000 - U+1E006 */ + { 0x1E008, 0x1E018 }, /* U+1E008 - U+1E018 */ + { 0x1E01B, 0x1E021 }, /* U+1E01B - U+1E021 */ + { 0x1E023, 0x1E024 }, /* U+1E023 - U+1E024 */ + { 0x1E026, 0x1E02A }, /* U+1E026 - U+1E02A */ + { 0x1E08F, 0x1E08F }, /* U+1E08F */ + { 0x1E130, 0x1E136 }, /* U+1E130 - U+1E136 */ + { 0x1E2AE, 0x1E2AE }, /* U+1E2AE */ + { 0x1E2EC, 0x1E2EF }, /* U+1E2EC - U+1E2EF */ + { 0x1E4EC, 0x1E4EF }, /* U+1E4EC - U+1E4EF */ + { 0x1E5EE, 0x1E5EF }, /* U+1E5EE - U+1E5EF */ + { 0x1E8D0, 0x1E8D6 }, /* U+1E8D0 - U+1E8D6 */ + { 0x1E944, 0x1E94A }, /* U+1E944 - U+1E94A */ + { 0x1F3FB, 0x1F3FF }, /* U+1F3FB - U+1F3FF */ + { 0x1F9B0, 0x1F9B3 }, /* U+1F9B0 - U+1F9B3 */ + { 0xE0001, 0xE0001 }, /* U+E0001 */ + { 0xE0020, 0xE007F }, /* U+E0020 - U+E007F */ + { 0xE0100, 0xE01EF }, /* U+E0100 - U+E01EF */ +}; + +/* Double-width character ranges */ +static const struct interval double_width_ranges[] = { + { 0x01100, 0x0115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ + { 0x0231A, 0x0231B }, /* WATCH - HOURGLASS */ + { 0x02329, 0x0232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ + { 0x023E9, 0x023EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ + { 0x023F0, 0x023F0 }, /* ALARM CLOCK */ + { 0x023F3, 0x023F3 }, /* HOURGLASS WITH FLOWING SAND */ + { 0x025FD, 0x025FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ + { 0x02614, 0x02615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ + { 0x02630, 0x02637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ + { 0x02648, 0x02653 }, /* ARIES - PISCES */ + { 0x0267F, 0x0267F }, /* WHEELCHAIR SYMBOL */ + { 0x0268A, 0x0268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ + { 0x02693, 0x02693 }, /* ANCHOR */ + { 0x026A1, 0x026A1 }, /* HIGH VOLTAGE SIGN */ + { 0x026AA, 0x026AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ + { 0x026BD, 0x026BE }, /* SOCCER BALL - BASEBALL */ + { 0x026C4, 0x026C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ + { 0x026CE, 0x026CE }, /* OPHIUCHUS */ + { 0x026D4, 0x026D4 }, /* NO ENTRY */ + { 0x026EA, 0x026EA }, /* CHURCH */ + { 0x026F2, 0x026F3 }, /* FOUNTAIN - FLAG IN HOLE */ + { 0x026F5, 0x026F5 }, /* SAILBOAT */ + { 0x026FA, 0x026FA }, /* TENT */ + { 0x026FD, 0x026FD }, /* FUEL PUMP */ + { 0x02705, 0x02705 }, /* WHITE HEAVY CHECK MARK */ + { 0x0270A, 0x0270B }, /* RAISED FIST - RAISED HAND */ + { 0x02728, 0x02728 }, /* SPARKLES */ + { 0x0274C, 0x0274C }, /* CROSS MARK */ + { 0x0274E, 0x0274E }, /* NEGATIVE SQUARED CROSS MARK */ + { 0x02753, 0x02755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ + { 0x02757, 0x02757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ + { 0x02795, 0x02797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ + { 0x027B0, 0x027B0 }, /* CURLY LOOP */ + { 0x027BF, 0x027BF }, /* DOUBLE CURLY LOOP */ + { 0x02B1B, 0x02B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ + { 0x02B50, 0x02B50 }, /* WHITE MEDIUM STAR */ + { 0x02B55, 0x02B55 }, /* HEAVY LARGE CIRCLE */ + { 0x02E80, 0x02E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ + { 0x02E9B, 0x02EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ + { 0x02F00, 0x02FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ + { 0x02FF0, 0x03029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ + { 0x03030, 0x0303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ + { 0x03041, 0x03096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ + { 0x0309B, 0x030FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ + { 0x03105, 0x0312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ + { 0x03131, 0x0318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ + { 0x03190, 0x031E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ + { 0x031EF, 0x0321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ + { 0x03220, 0x03247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ + { 0x03250, 0x0A48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ + { 0x0A490, 0x0A4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ + { 0x0A960, 0x0A97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ + { 0x0AC00, 0x0D7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ + { 0x0F900, 0x0FAFF }, /* U+0F900 - U+0FAFF */ + { 0x0FE10, 0x0FE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ + { 0x0FE30, 0x0FE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ + { 0x0FE54, 0x0FE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ + { 0x0FE68, 0x0FE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ + { 0x0FF01, 0x0FF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ + { 0x0FFE0, 0x0FFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ + { 0x16FE0, 0x16FE3 }, /* U+16FE0 - U+16FE3 */ + { 0x17000, 0x187F7 }, /* U+17000 - U+187F7 */ + { 0x18800, 0x18CD5 }, /* U+18800 - U+18CD5 */ + { 0x18CFF, 0x18D08 }, /* U+18CFF - U+18D08 */ + { 0x1AFF0, 0x1AFF3 }, /* U+1AFF0 - U+1AFF3 */ + { 0x1AFF5, 0x1AFFB }, /* U+1AFF5 - U+1AFFB */ + { 0x1AFFD, 0x1AFFE }, /* U+1AFFD - U+1AFFE */ + { 0x1B000, 0x1B122 }, /* U+1B000 - U+1B122 */ + { 0x1B132, 0x1B132 }, /* U+1B132 */ + { 0x1B150, 0x1B152 }, /* U+1B150 - U+1B152 */ + { 0x1B155, 0x1B155 }, /* U+1B155 */ + { 0x1B164, 0x1B167 }, /* U+1B164 - U+1B167 */ + { 0x1B170, 0x1B2FB }, /* U+1B170 - U+1B2FB */ + { 0x1D300, 0x1D356 }, /* U+1D300 - U+1D356 */ + { 0x1D360, 0x1D376 }, /* U+1D360 - U+1D376 */ + { 0x1F000, 0x1F02F }, /* U+1F000 - U+1F02F */ + { 0x1F0A0, 0x1F0FF }, /* U+1F0A0 - U+1F0FF */ + { 0x1F18E, 0x1F18E }, /* U+1F18E */ + { 0x1F191, 0x1F19A }, /* U+1F191 - U+1F19A */ + { 0x1F200, 0x1F202 }, /* U+1F200 - U+1F202 */ + { 0x1F210, 0x1F23B }, /* U+1F210 - U+1F23B */ + { 0x1F240, 0x1F248 }, /* U+1F240 - U+1F248 */ + { 0x1F250, 0x1F251 }, /* U+1F250 - U+1F251 */ + { 0x1F260, 0x1F265 }, /* U+1F260 - U+1F265 */ + { 0x1F300, 0x1F3FA }, /* U+1F300 - U+1F3FA */ + { 0x1F400, 0x1F64F }, /* U+1F400 - U+1F64F */ + { 0x1F680, 0x1F9AF }, /* U+1F680 - U+1F9AF */ + { 0x1F9B4, 0x1FAFF }, /* U+1F9B4 - U+1FAFF */ + { 0x20000, 0x2FFFD }, /* U+20000 - U+2FFFD */ + { 0x30000, 0x3FFFD }, /* U+30000 - U+3FFFD */ +}; + + +static int ucs_cmp(const void *key, const void *element) { uint32_t cp = *(uint32_t *)key; - struct interval e = *(struct interval *) elt; + const struct interval *e = element; - if (cp > e.last) + if (cp > e->last) return 1; - else if (cp < e.first) + if (cp < e->first) return -1; return 0; } -static const struct interval double_width[] = { - { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, - { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, - { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, - { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } -}; - -bool ucs_is_double_width(uint32_t cp) +static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t count) { - if (cp < double_width[0].first || - cp > double_width[ARRAY_SIZE(double_width) - 1].last) + if (cp < intervals[0].first || cp > intervals[count - 1].last) return false; - return bsearch(&cp, double_width, ARRAY_SIZE(double_width), - sizeof(struct interval), ucs_cmp) != NULL; + return __inline_bsearch(&cp, intervals, count, + sizeof(*intervals), ucs_cmp) != NULL; +} + +/** + * Determine if a Unicode code point is zero-width. + * + * @param ucs: Unicode code point (UCS-4) + * Return: true if the character is zero-width, false otherwise + */ +bool ucs_is_zero_width(uint32_t cp) +{ + return is_in_interval(cp, zero_width_ranges, ARRAY_SIZE(zero_width_ranges)); +} + +/** + * Determine if a Unicode code point is double-width. + * + * @param ucs: Unicode code point (UCS-4) + * Return: true if the character is double-width, false otherwise + */ +bool ucs_is_double_width(uint32_t cp) +{ + return is_in_interval(cp, double_width_ranges, ARRAY_SIZE(double_width_ranges)); } diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,7 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} +bool ucs_is_zero_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) From f2347b0cdf65e614732c2307863c95304f72d9d9 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:58 -0400 Subject: [PATCH 023/105] vt: introduce gen_ucs_recompose.py to create ucs_recompose.c The generated code includes a table that maps base character + combining mark pairs to their precomposed equivalents using Python's unicodedata module. It also provides the ucs_recompose() function to query that table. The default script behavior is to create a table with most commonly used Latin, Greek, and Cyrillic recomposition pairs only. It is much smaller than the table with all possible recomposition pairs (71 entries vs 1000 entries). But if one needs/wants the full table then simply running the script with the --full argument will generate it. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-7-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_recompose.py | 321 ++++++++++++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100755 drivers/tty/vt/gen_ucs_recompose.py diff --git a/drivers/tty/vt/gen_ucs_recompose.py b/drivers/tty/vt/gen_ucs_recompose.py new file mode 100755 index 000000000000..64418803e49e --- /dev/null +++ b/drivers/tty/vt/gen_ucs_recompose.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# This script uses Python's unicodedata module to generate ucs_recompose.c. +# The generated code maps base character + combining mark pairs to their +# precomposed equivalents. +# +# Usage: +# python gen_ucs_recompose.py # Generate with common recomposition pairs +# python gen_ucs_recompose.py --full # Generate with all recomposition pairs + +import unicodedata +import sys +import argparse +import textwrap + +common_recompose_description = "most commonly used Latin, Greek, and Cyrillic recomposition pairs only" +COMMON_RECOMPOSITION_PAIRS = [ + # Latin letters with accents - uppercase + (0x0041, 0x0300, 0x00C0), # A + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER A WITH GRAVE + (0x0041, 0x0301, 0x00C1), # A + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER A WITH ACUTE + (0x0041, 0x0302, 0x00C2), # A + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER A WITH CIRCUMFLEX + (0x0041, 0x0303, 0x00C3), # A + COMBINING TILDE = LATIN CAPITAL LETTER A WITH TILDE + (0x0041, 0x0308, 0x00C4), # A + COMBINING DIAERESIS = LATIN CAPITAL LETTER A WITH DIAERESIS + (0x0041, 0x030A, 0x00C5), # A + COMBINING RING ABOVE = LATIN CAPITAL LETTER A WITH RING ABOVE + (0x0043, 0x0327, 0x00C7), # C + COMBINING CEDILLA = LATIN CAPITAL LETTER C WITH CEDILLA + (0x0045, 0x0300, 0x00C8), # E + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER E WITH GRAVE + (0x0045, 0x0301, 0x00C9), # E + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER E WITH ACUTE + (0x0045, 0x0302, 0x00CA), # E + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER E WITH CIRCUMFLEX + (0x0045, 0x0308, 0x00CB), # E + COMBINING DIAERESIS = LATIN CAPITAL LETTER E WITH DIAERESIS + (0x0049, 0x0300, 0x00CC), # I + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER I WITH GRAVE + (0x0049, 0x0301, 0x00CD), # I + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER I WITH ACUTE + (0x0049, 0x0302, 0x00CE), # I + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER I WITH CIRCUMFLEX + (0x0049, 0x0308, 0x00CF), # I + COMBINING DIAERESIS = LATIN CAPITAL LETTER I WITH DIAERESIS + (0x004E, 0x0303, 0x00D1), # N + COMBINING TILDE = LATIN CAPITAL LETTER N WITH TILDE + (0x004F, 0x0300, 0x00D2), # O + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER O WITH GRAVE + (0x004F, 0x0301, 0x00D3), # O + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER O WITH ACUTE + (0x004F, 0x0302, 0x00D4), # O + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER O WITH CIRCUMFLEX + (0x004F, 0x0303, 0x00D5), # O + COMBINING TILDE = LATIN CAPITAL LETTER O WITH TILDE + (0x004F, 0x0308, 0x00D6), # O + COMBINING DIAERESIS = LATIN CAPITAL LETTER O WITH DIAERESIS + (0x0055, 0x0300, 0x00D9), # U + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER U WITH GRAVE + (0x0055, 0x0301, 0x00DA), # U + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER U WITH ACUTE + (0x0055, 0x0302, 0x00DB), # U + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER U WITH CIRCUMFLEX + (0x0055, 0x0308, 0x00DC), # U + COMBINING DIAERESIS = LATIN CAPITAL LETTER U WITH DIAERESIS + (0x0059, 0x0301, 0x00DD), # Y + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER Y WITH ACUTE + + # Latin letters with accents - lowercase + (0x0061, 0x0300, 0x00E0), # a + COMBINING GRAVE ACCENT = LATIN SMALL LETTER A WITH GRAVE + (0x0061, 0x0301, 0x00E1), # a + COMBINING ACUTE ACCENT = LATIN SMALL LETTER A WITH ACUTE + (0x0061, 0x0302, 0x00E2), # a + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER A WITH CIRCUMFLEX + (0x0061, 0x0303, 0x00E3), # a + COMBINING TILDE = LATIN SMALL LETTER A WITH TILDE + (0x0061, 0x0308, 0x00E4), # a + COMBINING DIAERESIS = LATIN SMALL LETTER A WITH DIAERESIS + (0x0061, 0x030A, 0x00E5), # a + COMBINING RING ABOVE = LATIN SMALL LETTER A WITH RING ABOVE + (0x0063, 0x0327, 0x00E7), # c + COMBINING CEDILLA = LATIN SMALL LETTER C WITH CEDILLA + (0x0065, 0x0300, 0x00E8), # e + COMBINING GRAVE ACCENT = LATIN SMALL LETTER E WITH GRAVE + (0x0065, 0x0301, 0x00E9), # e + COMBINING ACUTE ACCENT = LATIN SMALL LETTER E WITH ACUTE + (0x0065, 0x0302, 0x00EA), # e + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER E WITH CIRCUMFLEX + (0x0065, 0x0308, 0x00EB), # e + COMBINING DIAERESIS = LATIN SMALL LETTER E WITH DIAERESIS + (0x0069, 0x0300, 0x00EC), # i + COMBINING GRAVE ACCENT = LATIN SMALL LETTER I WITH GRAVE + (0x0069, 0x0301, 0x00ED), # i + COMBINING ACUTE ACCENT = LATIN SMALL LETTER I WITH ACUTE + (0x0069, 0x0302, 0x00EE), # i + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER I WITH CIRCUMFLEX + (0x0069, 0x0308, 0x00EF), # i + COMBINING DIAERESIS = LATIN SMALL LETTER I WITH DIAERESIS + (0x006E, 0x0303, 0x00F1), # n + COMBINING TILDE = LATIN SMALL LETTER N WITH TILDE + (0x006F, 0x0300, 0x00F2), # o + COMBINING GRAVE ACCENT = LATIN SMALL LETTER O WITH GRAVE + (0x006F, 0x0301, 0x00F3), # o + COMBINING ACUTE ACCENT = LATIN SMALL LETTER O WITH ACUTE + (0x006F, 0x0302, 0x00F4), # o + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER O WITH CIRCUMFLEX + (0x006F, 0x0303, 0x00F5), # o + COMBINING TILDE = LATIN SMALL LETTER O WITH TILDE + (0x006F, 0x0308, 0x00F6), # o + COMBINING DIAERESIS = LATIN SMALL LETTER O WITH DIAERESIS + (0x0075, 0x0300, 0x00F9), # u + COMBINING GRAVE ACCENT = LATIN SMALL LETTER U WITH GRAVE + (0x0075, 0x0301, 0x00FA), # u + COMBINING ACUTE ACCENT = LATIN SMALL LETTER U WITH ACUTE + (0x0075, 0x0302, 0x00FB), # u + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER U WITH CIRCUMFLEX + (0x0075, 0x0308, 0x00FC), # u + COMBINING DIAERESIS = LATIN SMALL LETTER U WITH DIAERESIS + (0x0079, 0x0301, 0x00FD), # y + COMBINING ACUTE ACCENT = LATIN SMALL LETTER Y WITH ACUTE + (0x0079, 0x0308, 0x00FF), # y + COMBINING DIAERESIS = LATIN SMALL LETTER Y WITH DIAERESIS + + # Common Greek characters + (0x0391, 0x0301, 0x0386), # Α + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ALPHA WITH TONOS + (0x0395, 0x0301, 0x0388), # Ε + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER EPSILON WITH TONOS + (0x0397, 0x0301, 0x0389), # Η + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ETA WITH TONOS + (0x0399, 0x0301, 0x038A), # Ι + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER IOTA WITH TONOS + (0x039F, 0x0301, 0x038C), # Ο + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMICRON WITH TONOS + (0x03A5, 0x0301, 0x038E), # Υ + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER UPSILON WITH TONOS + (0x03A9, 0x0301, 0x038F), # Ω + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMEGA WITH TONOS + (0x03B1, 0x0301, 0x03AC), # α + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ALPHA WITH TONOS + (0x03B5, 0x0301, 0x03AD), # ε + COMBINING ACUTE ACCENT = GREEK SMALL LETTER EPSILON WITH TONOS + (0x03B7, 0x0301, 0x03AE), # η + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ETA WITH TONOS + (0x03B9, 0x0301, 0x03AF), # ι + COMBINING ACUTE ACCENT = GREEK SMALL LETTER IOTA WITH TONOS + (0x03BF, 0x0301, 0x03CC), # ο + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMICRON WITH TONOS + (0x03C5, 0x0301, 0x03CD), # υ + COMBINING ACUTE ACCENT = GREEK SMALL LETTER UPSILON WITH TONOS + (0x03C9, 0x0301, 0x03CE), # ω + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMEGA WITH TONOS + + # Common Cyrillic characters + (0x0418, 0x0306, 0x0419), # И + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT I + (0x0438, 0x0306, 0x0439), # и + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT I + (0x0423, 0x0306, 0x040E), # У + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT U + (0x0443, 0x0306, 0x045E), # у + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT U +] + +full_recompose_description = "all possible recomposition pairs from the Unicode BMP" +def collect_all_recomposition_pairs(): + """Collect all possible recomposition pairs from the Unicode data.""" + # Map to store recomposition pairs: (base, combining) -> recomposed + recompose_map = {} + + # Process all assigned Unicode code points in BMP (Basic Multilingual Plane) + # We limit to BMP (0x0000-0xFFFF) to keep our table smaller with uint16_t + for cp in range(0, 0x10000): + try: + char = chr(cp) + + # Skip unassigned or control characters + if not unicodedata.name(char, ''): + continue + + # Find decomposition + decomp = unicodedata.decomposition(char) + if not decomp or '<' in decomp: # Skip compatibility decompositions + continue + + # Parse the decomposition + parts = decomp.split() + if len(parts) == 2: # Simple base + combining mark + base = int(parts[0], 16) + combining = int(parts[1], 16) + + # Only store if both are in BMP + if base < 0x10000 and combining < 0x10000: + recompose_map[(base, combining)] = cp + + except (ValueError, TypeError): + continue + + # Convert to a list of tuples and sort for binary search + recompose_list = [(base, combining, recomposed) + for (base, combining), recomposed in recompose_map.items()] + recompose_list.sort() + + return recompose_list + +def validate_common_pairs(full_list): + """Validate that all common pairs are in the full list. + + Raises: + ValueError: If any common pair is missing or has a different recomposition + value than what's in the full table. + """ + full_pairs = {(base, combining): recomposed for base, combining, recomposed in full_list} + for base, combining, recomposed in COMMON_RECOMPOSITION_PAIRS: + full_recomposed = full_pairs.get((base, combining)) + if full_recomposed is None: + error_msg = f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) not found in full data" + print(error_msg) + raise ValueError(error_msg) + elif full_recomposed != recomposed: + error_msg = (f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) has different recomposition: " + f"0x{recomposed:04X} vs 0x{full_recomposed:04X}") + print(error_msg) + raise ValueError(error_msg) + +def generate_recomposition_table(use_full_list=False): + """Generate the recomposition table C code.""" + # Output file name + c_file = "ucs_recompose.c" + + # Get Unicode version information + unicode_version = unicodedata.unidata_version + + # Collect all recomposition pairs for validation + full_recompose_list = collect_all_recomposition_pairs() + + # Decide which list to use + if use_full_list: + print("Using full recomposition list...") + recompose_list = full_recompose_list + table_description = full_recompose_description + alt_list = COMMON_RECOMPOSITION_PAIRS + alt_description = common_recompose_description + else: + print("Using common recomposition list...") + # Validate that all common pairs are in the full list + validate_common_pairs(full_recompose_list) + recompose_list = sorted(COMMON_RECOMPOSITION_PAIRS) + table_description = common_recompose_description + alt_list = full_recompose_list + alt_description = full_recompose_description + generation_mode = " --full" if use_full_list else "" + alternative_mode = " --full" if not use_full_list else "" + table_description_detail = f"{table_description} ({len(recompose_list)} entries)" + alt_description_detail = f"{alt_description} ({len(alt_list)} entries)" + + # Calculate min/max values for boundary checks + min_base = min(base for base, _, _ in recompose_list) + max_base = max(base for base, _, _ in recompose_list) + min_combining = min(combining for _, combining, _ in recompose_list) + max_combining = max(combining for _, combining, _ in recompose_list) + + # Generate implementation file + with open(c_file, 'w') as f: + f.write(f"""\ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucs_recompose.c - Unicode character recomposition + * + * Auto-generated by gen_ucs_recompose.py{generation_mode} + * + * Unicode Version: {unicode_version} + * +{textwrap.fill( + f"This file contains a table with {table_description_detail}. " + + f"To generate a table with {alt_description_detail} instead, run:", + width=75, initial_indent=" * ", subsequent_indent=" * ")} + * + * python gen_ucs_recompose.py{alternative_mode} + */ + +#include +#include +#include +#include + +/* + * Structure for recomposition pairs. + * First element is the base character, second is the combining mark, + * third is the recomposed character. + * Using uint16_t to save space since all values are within BMP range. + */ +struct recomposition {{ + uint16_t base; + uint16_t combining; + uint16_t recomposed; +}}; + +/* + * Table of {table_description} + * Sorted by base character and then combining character for binary search + */ +static const struct recomposition recomposition_table[] = {{ +""") + + # Write the recomposition table with comments + for base, combining, recomposed in recompose_list: + try: + base_name = unicodedata.name(chr(base)) + combining_name = unicodedata.name(chr(combining)) + recomposed_name = unicodedata.name(chr(recomposed)) + comment = f"/* {base_name} + {combining_name} = {recomposed_name} */" + except ValueError: + comment = f"/* U+{base:04X} + U+{combining:04X} = U+{recomposed:04X} */" + f.write(f"\t{{ 0x{base:04X}, 0x{combining:04X}, 0x{recomposed:04X} }}, {comment}\n") + + f.write(f"""\ +}}; + +/* + * Boundary values for quick rejection + * These are calculated by analyzing the table during generation + */ +#define MIN_BASE_CHAR 0x{min_base:04X} +#define MAX_BASE_CHAR 0x{max_base:04X} +#define MIN_COMBINING_CHAR 0x{min_combining:04X} +#define MAX_COMBINING_CHAR 0x{max_combining:04X} + +struct compare_key {{ + uint16_t base; + uint16_t combining; +}}; + +static int recomposition_compare(const void *key, const void *element) +{{ + const struct compare_key *search_key = key; + const struct recomposition *table_entry = element; + + /* Compare base character first */ + if (search_key->base < table_entry->base) + return -1; + if (search_key->base > table_entry->base) + return 1; + + /* Base characters match, now compare combining character */ + if (search_key->combining < table_entry->combining) + return -1; + if (search_key->combining > table_entry->combining) + return 1; + + /* Both match */ + return 0; +}} + +/** + * Attempt to recompose two Unicode characters into a single character. + * + * @param previous: Previous Unicode code point (UCS-4) + * @param current: Current Unicode code point (UCS-4) + * Return: Recomposed Unicode code point, or 0 if no recomposition is possible + */ +uint32_t ucs_recompose(uint32_t base, uint32_t combining) +{{ + /* Check if characters are within the range of our table */ + if (base < MIN_BASE_CHAR || base > MAX_BASE_CHAR || + combining < MIN_COMBINING_CHAR || combining > MAX_COMBINING_CHAR) + return 0; + + struct compare_key key = {{ base, combining }}; + + struct recomposition *result = + __inline_bsearch(&key, recomposition_table, + ARRAY_SIZE(recomposition_table), + sizeof(*recomposition_table), + recomposition_compare); + + return result ? result->recomposed : 0; +}} +""") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Unicode recomposition table") + parser.add_argument("--full", action="store_true", + help="Generate a full recomposition table (default: common pairs only)") + args = parser.parse_args() + + generate_recomposition_table(use_full_list=args.full) From 54af55b990eda5a6a0140a3cded8094b42c0c3b7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:13:59 -0400 Subject: [PATCH 024/105] vt: create ucs_recompose.c using gen_ucs_recompose.py This provides ucs_recompose() to recompose two Unicode characters into a single character if possible. This is needed for the VT to properly display decomposed UTF8 sequences. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-8-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/Makefile | 2 +- drivers/tty/vt/ucs_recompose.c | 170 +++++++++++++++++++++++++++++++++ include/linux/consolemap.h | 6 ++ 3 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 drivers/tty/vt/ucs_recompose.c diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index bee69277bbc3..a63f6c9438da 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o \ selection.o keyboard.o \ vt.o defkeymap.o obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o \ - ucs_width.o + ucs_width.o ucs_recompose.o # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c diff --git a/drivers/tty/vt/ucs_recompose.c b/drivers/tty/vt/ucs_recompose.c new file mode 100644 index 000000000000..5c30c989def3 --- /dev/null +++ b/drivers/tty/vt/ucs_recompose.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucs_recompose.c - Unicode character recomposition + * + * Auto-generated by gen_ucs_recompose.py + * + * Unicode Version: 16.0.0 + * + * This file contains a table with most commonly used Latin, Greek, and + * Cyrillic recomposition pairs only (71 entries). To generate a table with + * all possible recomposition pairs from the Unicode BMP (1000 entries) + * instead, run: + * + * python gen_ucs_recompose.py --full + */ + +#include +#include +#include +#include + +/* + * Structure for recomposition pairs. + * First element is the base character, second is the combining mark, + * third is the recomposed character. + * Using uint16_t to save space since all values are within BMP range. + */ +struct recomposition { + uint16_t base; + uint16_t combining; + uint16_t recomposed; +}; + +/* + * Table of most commonly used Latin, Greek, and Cyrillic recomposition pairs only + * Sorted by base character and then combining character for binary search + */ +static const struct recomposition recomposition_table[] = { + { 0x0041, 0x0300, 0x00C0 }, /* LATIN CAPITAL LETTER A + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER A WITH GRAVE */ + { 0x0041, 0x0301, 0x00C1 }, /* LATIN CAPITAL LETTER A + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER A WITH ACUTE */ + { 0x0041, 0x0302, 0x00C2 }, /* LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + { 0x0041, 0x0303, 0x00C3 }, /* LATIN CAPITAL LETTER A + COMBINING TILDE = LATIN CAPITAL LETTER A WITH TILDE */ + { 0x0041, 0x0308, 0x00C4 }, /* LATIN CAPITAL LETTER A + COMBINING DIAERESIS = LATIN CAPITAL LETTER A WITH DIAERESIS */ + { 0x0041, 0x030A, 0x00C5 }, /* LATIN CAPITAL LETTER A + COMBINING RING ABOVE = LATIN CAPITAL LETTER A WITH RING ABOVE */ + { 0x0043, 0x0327, 0x00C7 }, /* LATIN CAPITAL LETTER C + COMBINING CEDILLA = LATIN CAPITAL LETTER C WITH CEDILLA */ + { 0x0045, 0x0300, 0x00C8 }, /* LATIN CAPITAL LETTER E + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER E WITH GRAVE */ + { 0x0045, 0x0301, 0x00C9 }, /* LATIN CAPITAL LETTER E + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER E WITH ACUTE */ + { 0x0045, 0x0302, 0x00CA }, /* LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ + { 0x0045, 0x0308, 0x00CB }, /* LATIN CAPITAL LETTER E + COMBINING DIAERESIS = LATIN CAPITAL LETTER E WITH DIAERESIS */ + { 0x0049, 0x0300, 0x00CC }, /* LATIN CAPITAL LETTER I + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER I WITH GRAVE */ + { 0x0049, 0x0301, 0x00CD }, /* LATIN CAPITAL LETTER I + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER I WITH ACUTE */ + { 0x0049, 0x0302, 0x00CE }, /* LATIN CAPITAL LETTER I + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ + { 0x0049, 0x0308, 0x00CF }, /* LATIN CAPITAL LETTER I + COMBINING DIAERESIS = LATIN CAPITAL LETTER I WITH DIAERESIS */ + { 0x004E, 0x0303, 0x00D1 }, /* LATIN CAPITAL LETTER N + COMBINING TILDE = LATIN CAPITAL LETTER N WITH TILDE */ + { 0x004F, 0x0300, 0x00D2 }, /* LATIN CAPITAL LETTER O + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER O WITH GRAVE */ + { 0x004F, 0x0301, 0x00D3 }, /* LATIN CAPITAL LETTER O + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER O WITH ACUTE */ + { 0x004F, 0x0302, 0x00D4 }, /* LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ + { 0x004F, 0x0303, 0x00D5 }, /* LATIN CAPITAL LETTER O + COMBINING TILDE = LATIN CAPITAL LETTER O WITH TILDE */ + { 0x004F, 0x0308, 0x00D6 }, /* LATIN CAPITAL LETTER O + COMBINING DIAERESIS = LATIN CAPITAL LETTER O WITH DIAERESIS */ + { 0x0055, 0x0300, 0x00D9 }, /* LATIN CAPITAL LETTER U + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER U WITH GRAVE */ + { 0x0055, 0x0301, 0x00DA }, /* LATIN CAPITAL LETTER U + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER U WITH ACUTE */ + { 0x0055, 0x0302, 0x00DB }, /* LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ + { 0x0055, 0x0308, 0x00DC }, /* LATIN CAPITAL LETTER U + COMBINING DIAERESIS = LATIN CAPITAL LETTER U WITH DIAERESIS */ + { 0x0059, 0x0301, 0x00DD }, /* LATIN CAPITAL LETTER Y + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER Y WITH ACUTE */ + { 0x0061, 0x0300, 0x00E0 }, /* LATIN SMALL LETTER A + COMBINING GRAVE ACCENT = LATIN SMALL LETTER A WITH GRAVE */ + { 0x0061, 0x0301, 0x00E1 }, /* LATIN SMALL LETTER A + COMBINING ACUTE ACCENT = LATIN SMALL LETTER A WITH ACUTE */ + { 0x0061, 0x0302, 0x00E2 }, /* LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER A WITH CIRCUMFLEX */ + { 0x0061, 0x0303, 0x00E3 }, /* LATIN SMALL LETTER A + COMBINING TILDE = LATIN SMALL LETTER A WITH TILDE */ + { 0x0061, 0x0308, 0x00E4 }, /* LATIN SMALL LETTER A + COMBINING DIAERESIS = LATIN SMALL LETTER A WITH DIAERESIS */ + { 0x0061, 0x030A, 0x00E5 }, /* LATIN SMALL LETTER A + COMBINING RING ABOVE = LATIN SMALL LETTER A WITH RING ABOVE */ + { 0x0063, 0x0327, 0x00E7 }, /* LATIN SMALL LETTER C + COMBINING CEDILLA = LATIN SMALL LETTER C WITH CEDILLA */ + { 0x0065, 0x0300, 0x00E8 }, /* LATIN SMALL LETTER E + COMBINING GRAVE ACCENT = LATIN SMALL LETTER E WITH GRAVE */ + { 0x0065, 0x0301, 0x00E9 }, /* LATIN SMALL LETTER E + COMBINING ACUTE ACCENT = LATIN SMALL LETTER E WITH ACUTE */ + { 0x0065, 0x0302, 0x00EA }, /* LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER E WITH CIRCUMFLEX */ + { 0x0065, 0x0308, 0x00EB }, /* LATIN SMALL LETTER E + COMBINING DIAERESIS = LATIN SMALL LETTER E WITH DIAERESIS */ + { 0x0069, 0x0300, 0x00EC }, /* LATIN SMALL LETTER I + COMBINING GRAVE ACCENT = LATIN SMALL LETTER I WITH GRAVE */ + { 0x0069, 0x0301, 0x00ED }, /* LATIN SMALL LETTER I + COMBINING ACUTE ACCENT = LATIN SMALL LETTER I WITH ACUTE */ + { 0x0069, 0x0302, 0x00EE }, /* LATIN SMALL LETTER I + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER I WITH CIRCUMFLEX */ + { 0x0069, 0x0308, 0x00EF }, /* LATIN SMALL LETTER I + COMBINING DIAERESIS = LATIN SMALL LETTER I WITH DIAERESIS */ + { 0x006E, 0x0303, 0x00F1 }, /* LATIN SMALL LETTER N + COMBINING TILDE = LATIN SMALL LETTER N WITH TILDE */ + { 0x006F, 0x0300, 0x00F2 }, /* LATIN SMALL LETTER O + COMBINING GRAVE ACCENT = LATIN SMALL LETTER O WITH GRAVE */ + { 0x006F, 0x0301, 0x00F3 }, /* LATIN SMALL LETTER O + COMBINING ACUTE ACCENT = LATIN SMALL LETTER O WITH ACUTE */ + { 0x006F, 0x0302, 0x00F4 }, /* LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER O WITH CIRCUMFLEX */ + { 0x006F, 0x0303, 0x00F5 }, /* LATIN SMALL LETTER O + COMBINING TILDE = LATIN SMALL LETTER O WITH TILDE */ + { 0x006F, 0x0308, 0x00F6 }, /* LATIN SMALL LETTER O + COMBINING DIAERESIS = LATIN SMALL LETTER O WITH DIAERESIS */ + { 0x0075, 0x0300, 0x00F9 }, /* LATIN SMALL LETTER U + COMBINING GRAVE ACCENT = LATIN SMALL LETTER U WITH GRAVE */ + { 0x0075, 0x0301, 0x00FA }, /* LATIN SMALL LETTER U + COMBINING ACUTE ACCENT = LATIN SMALL LETTER U WITH ACUTE */ + { 0x0075, 0x0302, 0x00FB }, /* LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER U WITH CIRCUMFLEX */ + { 0x0075, 0x0308, 0x00FC }, /* LATIN SMALL LETTER U + COMBINING DIAERESIS = LATIN SMALL LETTER U WITH DIAERESIS */ + { 0x0079, 0x0301, 0x00FD }, /* LATIN SMALL LETTER Y + COMBINING ACUTE ACCENT = LATIN SMALL LETTER Y WITH ACUTE */ + { 0x0079, 0x0308, 0x00FF }, /* LATIN SMALL LETTER Y + COMBINING DIAERESIS = LATIN SMALL LETTER Y WITH DIAERESIS */ + { 0x0391, 0x0301, 0x0386 }, /* GREEK CAPITAL LETTER ALPHA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ALPHA WITH TONOS */ + { 0x0395, 0x0301, 0x0388 }, /* GREEK CAPITAL LETTER EPSILON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER EPSILON WITH TONOS */ + { 0x0397, 0x0301, 0x0389 }, /* GREEK CAPITAL LETTER ETA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ETA WITH TONOS */ + { 0x0399, 0x0301, 0x038A }, /* GREEK CAPITAL LETTER IOTA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER IOTA WITH TONOS */ + { 0x039F, 0x0301, 0x038C }, /* GREEK CAPITAL LETTER OMICRON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMICRON WITH TONOS */ + { 0x03A5, 0x0301, 0x038E }, /* GREEK CAPITAL LETTER UPSILON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER UPSILON WITH TONOS */ + { 0x03A9, 0x0301, 0x038F }, /* GREEK CAPITAL LETTER OMEGA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMEGA WITH TONOS */ + { 0x03B1, 0x0301, 0x03AC }, /* GREEK SMALL LETTER ALPHA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ALPHA WITH TONOS */ + { 0x03B5, 0x0301, 0x03AD }, /* GREEK SMALL LETTER EPSILON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER EPSILON WITH TONOS */ + { 0x03B7, 0x0301, 0x03AE }, /* GREEK SMALL LETTER ETA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ETA WITH TONOS */ + { 0x03B9, 0x0301, 0x03AF }, /* GREEK SMALL LETTER IOTA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER IOTA WITH TONOS */ + { 0x03BF, 0x0301, 0x03CC }, /* GREEK SMALL LETTER OMICRON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMICRON WITH TONOS */ + { 0x03C5, 0x0301, 0x03CD }, /* GREEK SMALL LETTER UPSILON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER UPSILON WITH TONOS */ + { 0x03C9, 0x0301, 0x03CE }, /* GREEK SMALL LETTER OMEGA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMEGA WITH TONOS */ + { 0x0418, 0x0306, 0x0419 }, /* CYRILLIC CAPITAL LETTER I + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT I */ + { 0x0423, 0x0306, 0x040E }, /* CYRILLIC CAPITAL LETTER U + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT U */ + { 0x0438, 0x0306, 0x0439 }, /* CYRILLIC SMALL LETTER I + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT I */ + { 0x0443, 0x0306, 0x045E }, /* CYRILLIC SMALL LETTER U + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT U */ +}; + +/* + * Boundary values for quick rejection + * These are calculated by analyzing the table during generation + */ +#define MIN_BASE_CHAR 0x0041 +#define MAX_BASE_CHAR 0x0443 +#define MIN_COMBINING_CHAR 0x0300 +#define MAX_COMBINING_CHAR 0x0327 + +struct compare_key { + uint16_t base; + uint16_t combining; +}; + +static int recomposition_compare(const void *key, const void *element) +{ + const struct compare_key *search_key = key; + const struct recomposition *table_entry = element; + + /* Compare base character first */ + if (search_key->base < table_entry->base) + return -1; + if (search_key->base > table_entry->base) + return 1; + + /* Base characters match, now compare combining character */ + if (search_key->combining < table_entry->combining) + return -1; + if (search_key->combining > table_entry->combining) + return 1; + + /* Both match */ + return 0; +} + +/** + * Attempt to recompose two Unicode characters into a single character. + * + * @param previous: Previous Unicode code point (UCS-4) + * @param current: Current Unicode code point (UCS-4) + * Return: Recomposed Unicode code point, or 0 if no recomposition is possible + */ +uint32_t ucs_recompose(uint32_t base, uint32_t combining) +{ + /* Check if characters are within the range of our table */ + if (base < MIN_BASE_CHAR || base > MAX_BASE_CHAR || + combining < MIN_COMBINING_CHAR || combining > MAX_COMBINING_CHAR) + return 0; + + struct compare_key key = { base, combining }; + + struct recomposition *result = + __inline_bsearch(&key, recomposition_table, + ARRAY_SIZE(recomposition_table), + sizeof(*recomposition_table), + recomposition_compare); + + return result ? result->recomposed : 0; +} diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..4d3a34c288e5 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); +uint32_t ucs_recompose(uint32_t base, uint32_t combining); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } + +static inline uint32_t ucs_recompose(uint32_t base, uint32_t combining) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From cd6937d42bca46f2143544918e535d6fd22b71b7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:14:00 -0400 Subject: [PATCH 025/105] vt: support Unicode recomposition Try replacing any decomposed Unicode sequence by the corresponding recomposed code point. Code point to glyph correspondance works best after recomposition, and this apply mostly to single-width code points therefore we can't preserve them in their decomposed form anyway. With all the infrastructure in place this is now trivial to do. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-9-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 5d53feeb5d2b..e3d35c4f9204 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2953,8 +2953,15 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, * double-width. */ } else { - /* Otherwise zero-width code points are ignored */ - goto out; + /* try recomposition */ + prev_c = ucs_recompose(prev_c, c); + if (prev_c != 0) { + vc_con_rewind(vc); + c = prev_c; + } else { + /* Otherwise zero-width code points are ignored */ + goto out; + } } } } From 119ff0b0f4541972d829da606599441dace2444d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:14:01 -0400 Subject: [PATCH 026/105] vt: update gen_ucs_width.py to produce more space efficient tables Split table ranges into BMP (16-bit) and non-BMP (above 16-bit). This reduces the corresponding text size by 20-25%. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-10-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_width.py | 156 +++++++++++++++++++++++--------- 1 file changed, 114 insertions(+), 42 deletions(-) diff --git a/drivers/tty/vt/gen_ucs_width.py b/drivers/tty/vt/gen_ucs_width.py index 41997fe00129..c6cbc93e83f2 100755 --- a/drivers/tty/vt/gen_ucs_width.py +++ b/drivers/tty/vt/gen_ucs_width.py @@ -132,13 +132,49 @@ def generate_ucs_width(): ranges.append((start, prev)) return ranges + # Function to split ranges into BMP (16-bit) and non-BMP (above 16-bit) + def split_ranges_by_size(ranges): + bmp_ranges = [] + non_bmp_ranges = [] + + for start, end in ranges: + if end <= 0xFFFF: + bmp_ranges.append((start, end)) + elif start > 0xFFFF: + non_bmp_ranges.append((start, end)) + else: + # Split the range at 0xFFFF + bmp_ranges.append((start, 0xFFFF)) + non_bmp_ranges.append((0x10000, end)) + + return bmp_ranges, non_bmp_ranges + # Extract ranges for each width zero_width_ranges = ranges_optimize(width_map, 0) double_width_ranges = ranges_optimize(width_map, 2) + # Split ranges into BMP and non-BMP + zero_width_bmp, zero_width_non_bmp = split_ranges_by_size(zero_width_ranges) + double_width_bmp, double_width_non_bmp = split_ranges_by_size(double_width_ranges) + # Get Unicode version information unicode_version = unicodedata.unidata_version + # Function to generate code point description comments + def get_code_point_comment(start, end): + try: + start_char_desc = unicodedata.name(chr(start)) + if start == end: + return f"/* {start_char_desc} */" + else: + end_char_desc = unicodedata.name(chr(end)) + return f"/* {start_char_desc} - {end_char_desc} */" + except: + if start == end: + return f"/* U+{start:04X} */" + else: + return f"/* U+{start:04X} - U+{end:04X} */" + # Generate C implementation file with open(c_file, 'w') as f: f.write(f"""\ @@ -156,62 +192,65 @@ def generate_ucs_width(): #include #include -struct interval {{ +struct interval16 {{ + uint16_t first; + uint16_t last; +}}; + +struct interval32 {{ uint32_t first; uint32_t last; }}; -/* Zero-width character ranges */ -static const struct interval zero_width_ranges[] = {{ +/* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ +static const struct interval16 zero_width_bmp[] = {{ """) - for start, end in zero_width_ranges: - try: - start_char_desc = unicodedata.name(chr(start)) if start < 0x10000 else f"U+{start:05X}" - if start == end: - comment = f"/* {start_char_desc} */" - else: - end_char_desc = unicodedata.name(chr(end)) if end < 0x10000 else f"U+{end:05X}" - comment = f"/* {start_char_desc} - {end_char_desc} */" - except: - if start == end: - comment = f"/* U+{start:05X} */" - else: - comment = f"/* U+{start:05X} - U+{end:05X} */" + for start, end in zero_width_bmp: + comment = get_code_point_comment(start, end) + f.write(f"\t{{ 0x{start:04X}, 0x{end:04X} }}, {comment}\n") + f.write("""\ +}; + +/* Zero-width character ranges (non-BMP, U+10000 and above) */ +static const struct interval32 zero_width_non_bmp[] = { +""") + + for start, end in zero_width_non_bmp: + comment = get_code_point_comment(start, end) f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") f.write("""\ }; -/* Double-width character ranges */ -static const struct interval double_width_ranges[] = { +/* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ +static const struct interval16 double_width_bmp[] = { """) - for start, end in double_width_ranges: - try: - start_char_desc = unicodedata.name(chr(start)) if start < 0x10000 else f"U+{start:05X}" - if start == end: - comment = f"/* {start_char_desc} */" - else: - end_char_desc = unicodedata.name(chr(end)) if end < 0x10000 else f"U+{end:05X}" - comment = f"/* {start_char_desc} - {end_char_desc} */" - except: - if start == end: - comment = f"/* U+{start:05X} */" - else: - comment = f"/* U+{start:05X} - U+{end:05X} */" + for start, end in double_width_bmp: + comment = get_code_point_comment(start, end) + f.write(f"\t{{ 0x{start:04X}, 0x{end:04X} }}, {comment}\n") + f.write("""\ +}; + +/* Double-width character ranges (non-BMP, U+10000 and above) */ +static const struct interval32 double_width_non_bmp[] = { +""") + + for start, end in double_width_non_bmp: + comment = get_code_point_comment(start, end) f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") f.write("""\ }; -static int ucs_cmp(const void *key, const void *element) +static int ucs_cmp16(const void *key, const void *element) { - uint32_t cp = *(uint32_t *)key; - const struct interval *e = element; + uint16_t cp = *(uint16_t *)key; + const struct interval16 *e = element; if (cp > e->last) return 1; @@ -220,13 +259,34 @@ static int ucs_cmp(const void *key, const void *element) return 0; } -static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t count) +static int ucs_cmp32(const void *key, const void *element) +{ + uint32_t cp = *(uint32_t *)key; + const struct interval32 *e = element; + + if (cp > e->last) + return 1; + if (cp < e->first) + return -1; + return 0; +} + +static bool is_in_interval16(uint16_t cp, const struct interval16 *intervals, size_t count) { if (cp < intervals[0].first || cp > intervals[count - 1].last) return false; return __inline_bsearch(&cp, intervals, count, - sizeof(*intervals), ucs_cmp) != NULL; + sizeof(*intervals), ucs_cmp16) != NULL; +} + +static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, size_t count) +{ + if (cp < intervals[0].first || cp > intervals[count - 1].last) + return false; + + return __inline_bsearch(&cp, intervals, count, + sizeof(*intervals), ucs_cmp32) != NULL; } /** @@ -237,7 +297,9 @@ static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t */ bool ucs_is_zero_width(uint32_t cp) { - return is_in_interval(cp, zero_width_ranges, ARRAY_SIZE(zero_width_ranges)); + return (cp <= 0xFFFF) + ? is_in_interval16(cp, zero_width_bmp, ARRAY_SIZE(zero_width_bmp)) + : is_in_interval32(cp, zero_width_non_bmp, ARRAY_SIZE(zero_width_non_bmp)); } /** @@ -248,17 +310,27 @@ bool ucs_is_zero_width(uint32_t cp) */ bool ucs_is_double_width(uint32_t cp) { - return is_in_interval(cp, double_width_ranges, ARRAY_SIZE(double_width_ranges)); + return (cp <= 0xFFFF) + ? is_in_interval16(cp, double_width_bmp, ARRAY_SIZE(double_width_bmp)) + : is_in_interval32(cp, double_width_non_bmp, ARRAY_SIZE(double_width_non_bmp)); } """) # Print summary - zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges) - double_width_count = sum(end - start + 1 for start, end in double_width_ranges) + zero_width_bmp_count = sum(end - start + 1 for start, end in zero_width_bmp) + zero_width_non_bmp_count = sum(end - start + 1 for start, end in zero_width_non_bmp) + double_width_bmp_count = sum(end - start + 1 for start, end in double_width_bmp) + double_width_non_bmp_count = sum(end - start + 1 for start, end in double_width_non_bmp) + + total_zero_width = zero_width_bmp_count + zero_width_non_bmp_count + total_double_width = double_width_bmp_count + double_width_non_bmp_count print(f"Generated {c_file} with:") - print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points") - print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code points") + print(f"- {len(zero_width_bmp)} zero-width BMP ranges (16-bit) covering ~{zero_width_bmp_count} code points") + print(f"- {len(zero_width_non_bmp)} zero-width non-BMP ranges (32-bit) covering ~{zero_width_non_bmp_count} code points") + print(f"- {len(double_width_bmp)} double-width BMP ranges (16-bit) covering ~{double_width_bmp_count} code points") + print(f"- {len(double_width_non_bmp)} double-width non-BMP ranges (32-bit) covering ~{double_width_non_bmp_count} code points") + print(f"Total: {len(zero_width_bmp) + len(zero_width_non_bmp) + len(double_width_bmp) + len(double_width_non_bmp)} ranges covering ~{total_zero_width + total_double_width} code points") if __name__ == "__main__": generate_ucs_width() From c7cb5b0779d782c1bda10414af7a9fcadcc87e93 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:14:02 -0400 Subject: [PATCH 027/105] vt: update ucs_width.c following latest gen_ucs_width.py Split table ranges into BMP (16-bit) and non-BMP (above 16-bit). This reduces the corresponding text size by 20-25%. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-11-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs_width.c | 904 +++++++++++++++++++------------------ 1 file changed, 471 insertions(+), 433 deletions(-) diff --git a/drivers/tty/vt/ucs_width.c b/drivers/tty/vt/ucs_width.c index 47b22583bd34..060aa8ae7f16 100644 --- a/drivers/tty/vt/ucs_width.c +++ b/drivers/tty/vt/ucs_width.c @@ -12,452 +12,465 @@ #include #include -struct interval { +struct interval16 { + uint16_t first; + uint16_t last; +}; + +struct interval32 { uint32_t first; uint32_t last; }; -/* Zero-width character ranges */ -static const struct interval zero_width_ranges[] = { - { 0x000AD, 0x000AD }, /* SOFT HYPHEN */ - { 0x00300, 0x0036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ - { 0x00483, 0x00489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ - { 0x00591, 0x005BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ - { 0x005BF, 0x005BF }, /* HEBREW POINT RAFE */ - { 0x005C1, 0x005C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ - { 0x005C4, 0x005C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ - { 0x005C7, 0x005C7 }, /* HEBREW POINT QAMATS QATAN */ - { 0x00600, 0x00605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ - { 0x00610, 0x0061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ - { 0x0064B, 0x0065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ - { 0x00670, 0x00670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ - { 0x006D6, 0x006DC }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC SMALL HIGH SEEN */ - { 0x006DF, 0x006E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ - { 0x006E7, 0x006E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ - { 0x006EA, 0x006ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ - { 0x00711, 0x00711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ - { 0x00730, 0x0074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ - { 0x007A6, 0x007B0 }, /* THAANA ABAFILI - THAANA SUKUN */ - { 0x007EB, 0x007F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ - { 0x007FD, 0x007FD }, /* NKO DANTAYALAN */ - { 0x00816, 0x00819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ - { 0x0081B, 0x00823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ - { 0x00825, 0x00827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ - { 0x00829, 0x0082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ - { 0x00859, 0x0085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ - { 0x00890, 0x00891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ - { 0x00897, 0x0089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ - { 0x008CA, 0x00903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ - { 0x0093A, 0x0093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ - { 0x0093E, 0x0094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ - { 0x00951, 0x00957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ - { 0x00962, 0x00963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ - { 0x00981, 0x00983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ - { 0x009BC, 0x009BC }, /* BENGALI SIGN NUKTA */ - { 0x009BE, 0x009C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ - { 0x009C7, 0x009C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ - { 0x009CB, 0x009CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ - { 0x009D7, 0x009D7 }, /* BENGALI AU LENGTH MARK */ - { 0x009E2, 0x009E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ - { 0x009FE, 0x009FE }, /* BENGALI SANDHI MARK */ - { 0x00A01, 0x00A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ - { 0x00A3C, 0x00A3C }, /* GURMUKHI SIGN NUKTA */ - { 0x00A3E, 0x00A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ - { 0x00A47, 0x00A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ - { 0x00A4B, 0x00A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ - { 0x00A51, 0x00A51 }, /* GURMUKHI SIGN UDAAT */ - { 0x00A70, 0x00A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ - { 0x00A75, 0x00A75 }, /* GURMUKHI SIGN YAKASH */ - { 0x00A81, 0x00A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ - { 0x00ABC, 0x00ABC }, /* GUJARATI SIGN NUKTA */ - { 0x00ABE, 0x00AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ - { 0x00AC7, 0x00AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ - { 0x00ACB, 0x00ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ - { 0x00AE2, 0x00AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ - { 0x00AFA, 0x00AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ - { 0x00B01, 0x00B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ - { 0x00B3C, 0x00B3C }, /* ORIYA SIGN NUKTA */ - { 0x00B3E, 0x00B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ - { 0x00B47, 0x00B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ - { 0x00B4B, 0x00B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ - { 0x00B55, 0x00B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ - { 0x00B62, 0x00B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ - { 0x00B82, 0x00B82 }, /* TAMIL SIGN ANUSVARA */ - { 0x00BBE, 0x00BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ - { 0x00BC6, 0x00BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ - { 0x00BCA, 0x00BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ - { 0x00BD7, 0x00BD7 }, /* TAMIL AU LENGTH MARK */ - { 0x00C00, 0x00C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ - { 0x00C3C, 0x00C3C }, /* TELUGU SIGN NUKTA */ - { 0x00C3E, 0x00C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ - { 0x00C46, 0x00C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ - { 0x00C4A, 0x00C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ - { 0x00C55, 0x00C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ - { 0x00C62, 0x00C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ - { 0x00C81, 0x00C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ - { 0x00CBC, 0x00CBC }, /* KANNADA SIGN NUKTA */ - { 0x00CBE, 0x00CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ - { 0x00CC6, 0x00CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ - { 0x00CCA, 0x00CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ - { 0x00CD5, 0x00CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ - { 0x00CE2, 0x00CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ - { 0x00CF3, 0x00CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ - { 0x00D00, 0x00D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ - { 0x00D3B, 0x00D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ - { 0x00D3E, 0x00D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ - { 0x00D46, 0x00D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ - { 0x00D4A, 0x00D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ - { 0x00D57, 0x00D57 }, /* MALAYALAM AU LENGTH MARK */ - { 0x00D62, 0x00D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ - { 0x00D81, 0x00D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ - { 0x00DCA, 0x00DCA }, /* SINHALA SIGN AL-LAKUNA */ - { 0x00DCF, 0x00DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ - { 0x00DD6, 0x00DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ - { 0x00DD8, 0x00DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ - { 0x00DF2, 0x00DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ - { 0x00E31, 0x00E31 }, /* THAI CHARACTER MAI HAN-AKAT */ - { 0x00E34, 0x00E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ - { 0x00E47, 0x00E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ - { 0x00EB1, 0x00EB1 }, /* LAO VOWEL SIGN MAI KAN */ - { 0x00EB4, 0x00EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ - { 0x00EC8, 0x00ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ - { 0x00F18, 0x00F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ - { 0x00F35, 0x00F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ - { 0x00F37, 0x00F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ - { 0x00F39, 0x00F39 }, /* TIBETAN MARK TSA -PHRU */ - { 0x00F3E, 0x00F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ - { 0x00F71, 0x00F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ - { 0x00F86, 0x00F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ - { 0x00F8D, 0x00F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ - { 0x00F99, 0x00FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ - { 0x00FC6, 0x00FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ - { 0x0102B, 0x0103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ - { 0x01056, 0x01059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ - { 0x0105E, 0x01060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ - { 0x01062, 0x01064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ - { 0x01067, 0x0106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ - { 0x01071, 0x01074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ - { 0x01082, 0x0108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ - { 0x0108F, 0x0108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ - { 0x0109A, 0x0109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ - { 0x0135D, 0x0135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ - { 0x01712, 0x01715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ - { 0x01732, 0x01734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ - { 0x01752, 0x01753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ - { 0x01772, 0x01773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ - { 0x017B4, 0x017D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ - { 0x017DD, 0x017DD }, /* KHMER SIGN ATTHACAN */ - { 0x0180B, 0x0180D }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR THREE */ - { 0x0180F, 0x0180F }, /* MONGOLIAN FREE VARIATION SELECTOR FOUR */ - { 0x01885, 0x01886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ - { 0x018A9, 0x018A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ - { 0x01920, 0x0192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ - { 0x01930, 0x0193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ - { 0x01A17, 0x01A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ - { 0x01A55, 0x01A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ - { 0x01A60, 0x01A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ - { 0x01A7F, 0x01A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ - { 0x01AB0, 0x01ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ - { 0x01B00, 0x01B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ - { 0x01B34, 0x01B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ - { 0x01B6B, 0x01B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ - { 0x01B80, 0x01B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ - { 0x01BA1, 0x01BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ - { 0x01BE6, 0x01BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ - { 0x01C24, 0x01C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ - { 0x01CD0, 0x01CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ - { 0x01CD4, 0x01CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ - { 0x01CED, 0x01CED }, /* VEDIC SIGN TIRYAK */ - { 0x01CF4, 0x01CF4 }, /* VEDIC TONE CANDRA ABOVE */ - { 0x01CF7, 0x01CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ - { 0x01DC0, 0x01DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ - { 0x0200B, 0x0200E }, /* ZERO WIDTH SPACE - LEFT-TO-RIGHT MARK */ - { 0x0202A, 0x0202D }, /* LEFT-TO-RIGHT EMBEDDING - LEFT-TO-RIGHT OVERRIDE */ - { 0x02060, 0x02064 }, /* WORD JOINER - INVISIBLE PLUS */ - { 0x0206A, 0x0206F }, /* INHIBIT SYMMETRIC SWAPPING - NOMINAL DIGIT SHAPES */ - { 0x020D0, 0x020F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ - { 0x02640, 0x02640 }, /* FEMALE SIGN */ - { 0x02642, 0x02642 }, /* MALE SIGN */ - { 0x026A7, 0x026A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ - { 0x02CEF, 0x02CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ - { 0x02D7F, 0x02D7F }, /* TIFINAGH CONSONANT JOINER */ - { 0x02DE0, 0x02DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ - { 0x0302A, 0x0302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ - { 0x03099, 0x0309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ - { 0x0A66F, 0x0A672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ - { 0x0A674, 0x0A67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ - { 0x0A69E, 0x0A69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ - { 0x0A6F0, 0x0A6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ - { 0x0A802, 0x0A802 }, /* SYLOTI NAGRI SIGN DVISVARA */ - { 0x0A806, 0x0A806 }, /* SYLOTI NAGRI SIGN HASANTA */ - { 0x0A80B, 0x0A80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ - { 0x0A823, 0x0A827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ - { 0x0A82C, 0x0A82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ - { 0x0A880, 0x0A881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ - { 0x0A8B4, 0x0A8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ - { 0x0A8E0, 0x0A8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ - { 0x0A8FF, 0x0A8FF }, /* DEVANAGARI VOWEL SIGN AY */ - { 0x0A926, 0x0A92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ - { 0x0A947, 0x0A953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ - { 0x0A980, 0x0A983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ - { 0x0A9B3, 0x0A9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ - { 0x0A9E5, 0x0A9E5 }, /* MYANMAR SIGN SHAN SAW */ - { 0x0AA29, 0x0AA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ - { 0x0AA43, 0x0AA43 }, /* CHAM CONSONANT SIGN FINAL NG */ - { 0x0AA4C, 0x0AA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ - { 0x0AA7B, 0x0AA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ - { 0x0AAB0, 0x0AAB0 }, /* TAI VIET MAI KANG */ - { 0x0AAB2, 0x0AAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ - { 0x0AAB7, 0x0AAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ - { 0x0AABE, 0x0AABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ - { 0x0AAC1, 0x0AAC1 }, /* TAI VIET TONE MAI THO */ - { 0x0AAEB, 0x0AAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ - { 0x0AAF5, 0x0AAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ - { 0x0ABE3, 0x0ABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ - { 0x0ABEC, 0x0ABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ - { 0x0FB1E, 0x0FB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ - { 0x0FE00, 0x0FE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ - { 0x0FE20, 0x0FE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ - { 0x0FEFF, 0x0FEFF }, /* ZERO WIDTH NO-BREAK SPACE */ - { 0x0FFF9, 0x0FFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ - { 0x101FD, 0x101FD }, /* U+101FD */ - { 0x102E0, 0x102E0 }, /* U+102E0 */ - { 0x10376, 0x1037A }, /* U+10376 - U+1037A */ - { 0x10A01, 0x10A03 }, /* U+10A01 - U+10A03 */ - { 0x10A05, 0x10A06 }, /* U+10A05 - U+10A06 */ - { 0x10A0C, 0x10A0F }, /* U+10A0C - U+10A0F */ - { 0x10A38, 0x10A3A }, /* U+10A38 - U+10A3A */ - { 0x10A3F, 0x10A3F }, /* U+10A3F */ - { 0x10AE5, 0x10AE6 }, /* U+10AE5 - U+10AE6 */ - { 0x10D24, 0x10D27 }, /* U+10D24 - U+10D27 */ - { 0x10D69, 0x10D6D }, /* U+10D69 - U+10D6D */ - { 0x10EAB, 0x10EAC }, /* U+10EAB - U+10EAC */ - { 0x10EFC, 0x10EFF }, /* U+10EFC - U+10EFF */ - { 0x10F46, 0x10F50 }, /* U+10F46 - U+10F50 */ - { 0x10F82, 0x10F85 }, /* U+10F82 - U+10F85 */ - { 0x11000, 0x11002 }, /* U+11000 - U+11002 */ - { 0x11038, 0x11046 }, /* U+11038 - U+11046 */ - { 0x11070, 0x11070 }, /* U+11070 */ - { 0x11073, 0x11074 }, /* U+11073 - U+11074 */ - { 0x1107F, 0x11082 }, /* U+1107F - U+11082 */ - { 0x110B0, 0x110BA }, /* U+110B0 - U+110BA */ - { 0x110BD, 0x110BD }, /* U+110BD */ - { 0x110C2, 0x110C2 }, /* U+110C2 */ - { 0x110CD, 0x110CD }, /* U+110CD */ - { 0x11100, 0x11102 }, /* U+11100 - U+11102 */ - { 0x11127, 0x11134 }, /* U+11127 - U+11134 */ - { 0x11145, 0x11146 }, /* U+11145 - U+11146 */ - { 0x11173, 0x11173 }, /* U+11173 */ - { 0x11180, 0x11182 }, /* U+11180 - U+11182 */ - { 0x111B3, 0x111C0 }, /* U+111B3 - U+111C0 */ - { 0x111C9, 0x111CC }, /* U+111C9 - U+111CC */ - { 0x111CE, 0x111CF }, /* U+111CE - U+111CF */ - { 0x1122C, 0x11237 }, /* U+1122C - U+11237 */ - { 0x1123E, 0x1123E }, /* U+1123E */ - { 0x11241, 0x11241 }, /* U+11241 */ - { 0x112DF, 0x112EA }, /* U+112DF - U+112EA */ - { 0x11300, 0x11303 }, /* U+11300 - U+11303 */ - { 0x1133B, 0x1133C }, /* U+1133B - U+1133C */ - { 0x1133E, 0x11344 }, /* U+1133E - U+11344 */ - { 0x11347, 0x11348 }, /* U+11347 - U+11348 */ - { 0x1134B, 0x1134D }, /* U+1134B - U+1134D */ - { 0x11357, 0x11357 }, /* U+11357 */ - { 0x11362, 0x11363 }, /* U+11362 - U+11363 */ - { 0x11366, 0x1136C }, /* U+11366 - U+1136C */ - { 0x11370, 0x11374 }, /* U+11370 - U+11374 */ - { 0x113B8, 0x113C0 }, /* U+113B8 - U+113C0 */ - { 0x113C2, 0x113C2 }, /* U+113C2 */ - { 0x113C5, 0x113C5 }, /* U+113C5 */ - { 0x113C7, 0x113CA }, /* U+113C7 - U+113CA */ - { 0x113CC, 0x113D0 }, /* U+113CC - U+113D0 */ - { 0x113D2, 0x113D2 }, /* U+113D2 */ - { 0x113E1, 0x113E2 }, /* U+113E1 - U+113E2 */ - { 0x11435, 0x11446 }, /* U+11435 - U+11446 */ - { 0x1145E, 0x1145E }, /* U+1145E */ - { 0x114B0, 0x114C3 }, /* U+114B0 - U+114C3 */ - { 0x115AF, 0x115B5 }, /* U+115AF - U+115B5 */ - { 0x115B8, 0x115C0 }, /* U+115B8 - U+115C0 */ - { 0x115DC, 0x115DD }, /* U+115DC - U+115DD */ - { 0x11630, 0x11640 }, /* U+11630 - U+11640 */ - { 0x116AB, 0x116B7 }, /* U+116AB - U+116B7 */ - { 0x1171D, 0x1172B }, /* U+1171D - U+1172B */ - { 0x1182C, 0x1183A }, /* U+1182C - U+1183A */ - { 0x11930, 0x11935 }, /* U+11930 - U+11935 */ - { 0x11937, 0x11938 }, /* U+11937 - U+11938 */ - { 0x1193B, 0x1193E }, /* U+1193B - U+1193E */ - { 0x11940, 0x11940 }, /* U+11940 */ - { 0x11942, 0x11943 }, /* U+11942 - U+11943 */ - { 0x119D1, 0x119D7 }, /* U+119D1 - U+119D7 */ - { 0x119DA, 0x119E0 }, /* U+119DA - U+119E0 */ - { 0x119E4, 0x119E4 }, /* U+119E4 */ - { 0x11A01, 0x11A0A }, /* U+11A01 - U+11A0A */ - { 0x11A33, 0x11A39 }, /* U+11A33 - U+11A39 */ - { 0x11A3B, 0x11A3E }, /* U+11A3B - U+11A3E */ - { 0x11A47, 0x11A47 }, /* U+11A47 */ - { 0x11A51, 0x11A5B }, /* U+11A51 - U+11A5B */ - { 0x11A8A, 0x11A99 }, /* U+11A8A - U+11A99 */ - { 0x11C2F, 0x11C36 }, /* U+11C2F - U+11C36 */ - { 0x11C38, 0x11C3F }, /* U+11C38 - U+11C3F */ - { 0x11C92, 0x11CA7 }, /* U+11C92 - U+11CA7 */ - { 0x11CA9, 0x11CB6 }, /* U+11CA9 - U+11CB6 */ - { 0x11D31, 0x11D36 }, /* U+11D31 - U+11D36 */ - { 0x11D3A, 0x11D3A }, /* U+11D3A */ - { 0x11D3C, 0x11D3D }, /* U+11D3C - U+11D3D */ - { 0x11D3F, 0x11D45 }, /* U+11D3F - U+11D45 */ - { 0x11D47, 0x11D47 }, /* U+11D47 */ - { 0x11D8A, 0x11D8E }, /* U+11D8A - U+11D8E */ - { 0x11D90, 0x11D91 }, /* U+11D90 - U+11D91 */ - { 0x11D93, 0x11D97 }, /* U+11D93 - U+11D97 */ - { 0x11EF3, 0x11EF6 }, /* U+11EF3 - U+11EF6 */ - { 0x11F00, 0x11F01 }, /* U+11F00 - U+11F01 */ - { 0x11F03, 0x11F03 }, /* U+11F03 */ - { 0x11F34, 0x11F3A }, /* U+11F34 - U+11F3A */ - { 0x11F3E, 0x11F42 }, /* U+11F3E - U+11F42 */ - { 0x11F5A, 0x11F5A }, /* U+11F5A */ - { 0x13430, 0x13440 }, /* U+13430 - U+13440 */ - { 0x13447, 0x13455 }, /* U+13447 - U+13455 */ - { 0x1611E, 0x1612F }, /* U+1611E - U+1612F */ - { 0x16AF0, 0x16AF4 }, /* U+16AF0 - U+16AF4 */ - { 0x16B30, 0x16B36 }, /* U+16B30 - U+16B36 */ - { 0x16F4F, 0x16F4F }, /* U+16F4F */ - { 0x16F51, 0x16F87 }, /* U+16F51 - U+16F87 */ - { 0x16F8F, 0x16F92 }, /* U+16F8F - U+16F92 */ - { 0x16FE4, 0x16FE4 }, /* U+16FE4 */ - { 0x16FF0, 0x16FF1 }, /* U+16FF0 - U+16FF1 */ - { 0x1BC9D, 0x1BC9E }, /* U+1BC9D - U+1BC9E */ - { 0x1BCA0, 0x1BCA3 }, /* U+1BCA0 - U+1BCA3 */ - { 0x1CF00, 0x1CF2D }, /* U+1CF00 - U+1CF2D */ - { 0x1CF30, 0x1CF46 }, /* U+1CF30 - U+1CF46 */ - { 0x1D165, 0x1D169 }, /* U+1D165 - U+1D169 */ - { 0x1D16D, 0x1D182 }, /* U+1D16D - U+1D182 */ - { 0x1D185, 0x1D18B }, /* U+1D185 - U+1D18B */ - { 0x1D1AA, 0x1D1AD }, /* U+1D1AA - U+1D1AD */ - { 0x1D242, 0x1D244 }, /* U+1D242 - U+1D244 */ - { 0x1DA00, 0x1DA36 }, /* U+1DA00 - U+1DA36 */ - { 0x1DA3B, 0x1DA6C }, /* U+1DA3B - U+1DA6C */ - { 0x1DA75, 0x1DA75 }, /* U+1DA75 */ - { 0x1DA84, 0x1DA84 }, /* U+1DA84 */ - { 0x1DA9B, 0x1DA9F }, /* U+1DA9B - U+1DA9F */ - { 0x1DAA1, 0x1DAAF }, /* U+1DAA1 - U+1DAAF */ - { 0x1E000, 0x1E006 }, /* U+1E000 - U+1E006 */ - { 0x1E008, 0x1E018 }, /* U+1E008 - U+1E018 */ - { 0x1E01B, 0x1E021 }, /* U+1E01B - U+1E021 */ - { 0x1E023, 0x1E024 }, /* U+1E023 - U+1E024 */ - { 0x1E026, 0x1E02A }, /* U+1E026 - U+1E02A */ - { 0x1E08F, 0x1E08F }, /* U+1E08F */ - { 0x1E130, 0x1E136 }, /* U+1E130 - U+1E136 */ - { 0x1E2AE, 0x1E2AE }, /* U+1E2AE */ - { 0x1E2EC, 0x1E2EF }, /* U+1E2EC - U+1E2EF */ - { 0x1E4EC, 0x1E4EF }, /* U+1E4EC - U+1E4EF */ - { 0x1E5EE, 0x1E5EF }, /* U+1E5EE - U+1E5EF */ - { 0x1E8D0, 0x1E8D6 }, /* U+1E8D0 - U+1E8D6 */ - { 0x1E944, 0x1E94A }, /* U+1E944 - U+1E94A */ - { 0x1F3FB, 0x1F3FF }, /* U+1F3FB - U+1F3FF */ - { 0x1F9B0, 0x1F9B3 }, /* U+1F9B0 - U+1F9B3 */ - { 0xE0001, 0xE0001 }, /* U+E0001 */ - { 0xE0020, 0xE007F }, /* U+E0020 - U+E007F */ - { 0xE0100, 0xE01EF }, /* U+E0100 - U+E01EF */ +/* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ +static const struct interval16 zero_width_bmp[] = { + { 0x00AD, 0x00AD }, /* SOFT HYPHEN */ + { 0x0300, 0x036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ + { 0x0483, 0x0489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ + { 0x0591, 0x05BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ + { 0x05BF, 0x05BF }, /* HEBREW POINT RAFE */ + { 0x05C1, 0x05C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ + { 0x05C4, 0x05C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ + { 0x05C7, 0x05C7 }, /* HEBREW POINT QAMATS QATAN */ + { 0x0600, 0x0605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ + { 0x0610, 0x061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ + { 0x064B, 0x065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ + { 0x0670, 0x0670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ + { 0x06D6, 0x06DC }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC SMALL HIGH SEEN */ + { 0x06DF, 0x06E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ + { 0x06E7, 0x06E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ + { 0x06EA, 0x06ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ + { 0x0711, 0x0711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ + { 0x0730, 0x074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ + { 0x07A6, 0x07B0 }, /* THAANA ABAFILI - THAANA SUKUN */ + { 0x07EB, 0x07F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ + { 0x07FD, 0x07FD }, /* NKO DANTAYALAN */ + { 0x0816, 0x0819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ + { 0x081B, 0x0823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ + { 0x0825, 0x0827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ + { 0x0829, 0x082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ + { 0x0859, 0x085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ + { 0x0890, 0x0891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ + { 0x0897, 0x089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ + { 0x08CA, 0x0903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ + { 0x093A, 0x093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ + { 0x093E, 0x094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ + { 0x0951, 0x0957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ + { 0x0962, 0x0963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ + { 0x0981, 0x0983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ + { 0x09BC, 0x09BC }, /* BENGALI SIGN NUKTA */ + { 0x09BE, 0x09C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ + { 0x09C7, 0x09C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ + { 0x09CB, 0x09CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ + { 0x09D7, 0x09D7 }, /* BENGALI AU LENGTH MARK */ + { 0x09E2, 0x09E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ + { 0x09FE, 0x09FE }, /* BENGALI SANDHI MARK */ + { 0x0A01, 0x0A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ + { 0x0A3C, 0x0A3C }, /* GURMUKHI SIGN NUKTA */ + { 0x0A3E, 0x0A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ + { 0x0A47, 0x0A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ + { 0x0A4B, 0x0A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ + { 0x0A51, 0x0A51 }, /* GURMUKHI SIGN UDAAT */ + { 0x0A70, 0x0A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ + { 0x0A75, 0x0A75 }, /* GURMUKHI SIGN YAKASH */ + { 0x0A81, 0x0A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ + { 0x0ABC, 0x0ABC }, /* GUJARATI SIGN NUKTA */ + { 0x0ABE, 0x0AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ + { 0x0AC7, 0x0AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ + { 0x0ACB, 0x0ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ + { 0x0AE2, 0x0AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ + { 0x0AFA, 0x0AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ + { 0x0B01, 0x0B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ + { 0x0B3C, 0x0B3C }, /* ORIYA SIGN NUKTA */ + { 0x0B3E, 0x0B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ + { 0x0B47, 0x0B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ + { 0x0B4B, 0x0B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ + { 0x0B55, 0x0B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ + { 0x0B62, 0x0B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ + { 0x0B82, 0x0B82 }, /* TAMIL SIGN ANUSVARA */ + { 0x0BBE, 0x0BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ + { 0x0BC6, 0x0BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ + { 0x0BCA, 0x0BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ + { 0x0BD7, 0x0BD7 }, /* TAMIL AU LENGTH MARK */ + { 0x0C00, 0x0C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ + { 0x0C3C, 0x0C3C }, /* TELUGU SIGN NUKTA */ + { 0x0C3E, 0x0C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ + { 0x0C46, 0x0C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ + { 0x0C4A, 0x0C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ + { 0x0C55, 0x0C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ + { 0x0C62, 0x0C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ + { 0x0C81, 0x0C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ + { 0x0CBC, 0x0CBC }, /* KANNADA SIGN NUKTA */ + { 0x0CBE, 0x0CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ + { 0x0CC6, 0x0CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ + { 0x0CCA, 0x0CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ + { 0x0CD5, 0x0CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ + { 0x0CE2, 0x0CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ + { 0x0CF3, 0x0CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ + { 0x0D00, 0x0D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ + { 0x0D3B, 0x0D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ + { 0x0D3E, 0x0D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ + { 0x0D46, 0x0D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ + { 0x0D4A, 0x0D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ + { 0x0D57, 0x0D57 }, /* MALAYALAM AU LENGTH MARK */ + { 0x0D62, 0x0D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ + { 0x0D81, 0x0D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ + { 0x0DCA, 0x0DCA }, /* SINHALA SIGN AL-LAKUNA */ + { 0x0DCF, 0x0DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ + { 0x0DD6, 0x0DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ + { 0x0DD8, 0x0DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ + { 0x0DF2, 0x0DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ + { 0x0E31, 0x0E31 }, /* THAI CHARACTER MAI HAN-AKAT */ + { 0x0E34, 0x0E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ + { 0x0E47, 0x0E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ + { 0x0EB1, 0x0EB1 }, /* LAO VOWEL SIGN MAI KAN */ + { 0x0EB4, 0x0EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ + { 0x0EC8, 0x0ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ + { 0x0F18, 0x0F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ + { 0x0F35, 0x0F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ + { 0x0F37, 0x0F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ + { 0x0F39, 0x0F39 }, /* TIBETAN MARK TSA -PHRU */ + { 0x0F3E, 0x0F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ + { 0x0F71, 0x0F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ + { 0x0F86, 0x0F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ + { 0x0F8D, 0x0F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ + { 0x0F99, 0x0FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ + { 0x0FC6, 0x0FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ + { 0x102B, 0x103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ + { 0x1056, 0x1059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ + { 0x105E, 0x1060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ + { 0x1062, 0x1064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ + { 0x1067, 0x106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ + { 0x1071, 0x1074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ + { 0x1082, 0x108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ + { 0x108F, 0x108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ + { 0x109A, 0x109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ + { 0x135D, 0x135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ + { 0x1712, 0x1715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ + { 0x1732, 0x1734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ + { 0x1752, 0x1753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ + { 0x1772, 0x1773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ + { 0x17B4, 0x17D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ + { 0x17DD, 0x17DD }, /* KHMER SIGN ATTHACAN */ + { 0x180B, 0x180D }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR THREE */ + { 0x180F, 0x180F }, /* MONGOLIAN FREE VARIATION SELECTOR FOUR */ + { 0x1885, 0x1886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ + { 0x18A9, 0x18A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ + { 0x1920, 0x192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ + { 0x1930, 0x193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ + { 0x1A17, 0x1A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ + { 0x1A55, 0x1A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ + { 0x1A60, 0x1A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ + { 0x1A7F, 0x1A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ + { 0x1AB0, 0x1ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ + { 0x1B00, 0x1B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ + { 0x1B34, 0x1B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ + { 0x1B6B, 0x1B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ + { 0x1B80, 0x1B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ + { 0x1BA1, 0x1BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ + { 0x1BE6, 0x1BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ + { 0x1C24, 0x1C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ + { 0x1CD0, 0x1CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ + { 0x1CD4, 0x1CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ + { 0x1CED, 0x1CED }, /* VEDIC SIGN TIRYAK */ + { 0x1CF4, 0x1CF4 }, /* VEDIC TONE CANDRA ABOVE */ + { 0x1CF7, 0x1CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ + { 0x1DC0, 0x1DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ + { 0x200B, 0x200E }, /* ZERO WIDTH SPACE - LEFT-TO-RIGHT MARK */ + { 0x202A, 0x202D }, /* LEFT-TO-RIGHT EMBEDDING - LEFT-TO-RIGHT OVERRIDE */ + { 0x2060, 0x2064 }, /* WORD JOINER - INVISIBLE PLUS */ + { 0x206A, 0x206F }, /* INHIBIT SYMMETRIC SWAPPING - NOMINAL DIGIT SHAPES */ + { 0x20D0, 0x20F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ + { 0x2640, 0x2640 }, /* FEMALE SIGN */ + { 0x2642, 0x2642 }, /* MALE SIGN */ + { 0x26A7, 0x26A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ + { 0x2CEF, 0x2CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ + { 0x2D7F, 0x2D7F }, /* TIFINAGH CONSONANT JOINER */ + { 0x2DE0, 0x2DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ + { 0x302A, 0x302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ + { 0x3099, 0x309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ + { 0xA66F, 0xA672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ + { 0xA674, 0xA67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ + { 0xA69E, 0xA69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ + { 0xA6F0, 0xA6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ + { 0xA802, 0xA802 }, /* SYLOTI NAGRI SIGN DVISVARA */ + { 0xA806, 0xA806 }, /* SYLOTI NAGRI SIGN HASANTA */ + { 0xA80B, 0xA80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ + { 0xA823, 0xA827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ + { 0xA82C, 0xA82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ + { 0xA880, 0xA881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ + { 0xA8B4, 0xA8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ + { 0xA8E0, 0xA8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ + { 0xA8FF, 0xA8FF }, /* DEVANAGARI VOWEL SIGN AY */ + { 0xA926, 0xA92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ + { 0xA947, 0xA953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ + { 0xA980, 0xA983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ + { 0xA9B3, 0xA9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ + { 0xA9E5, 0xA9E5 }, /* MYANMAR SIGN SHAN SAW */ + { 0xAA29, 0xAA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ + { 0xAA43, 0xAA43 }, /* CHAM CONSONANT SIGN FINAL NG */ + { 0xAA4C, 0xAA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ + { 0xAA7B, 0xAA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ + { 0xAAB0, 0xAAB0 }, /* TAI VIET MAI KANG */ + { 0xAAB2, 0xAAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ + { 0xAAB7, 0xAAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ + { 0xAABE, 0xAABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ + { 0xAAC1, 0xAAC1 }, /* TAI VIET TONE MAI THO */ + { 0xAAEB, 0xAAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ + { 0xAAF5, 0xAAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ + { 0xABE3, 0xABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ + { 0xABEC, 0xABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ + { 0xFB1E, 0xFB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ + { 0xFE00, 0xFE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ + { 0xFE20, 0xFE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ + { 0xFEFF, 0xFEFF }, /* ZERO WIDTH NO-BREAK SPACE */ + { 0xFFF9, 0xFFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ }; -/* Double-width character ranges */ -static const struct interval double_width_ranges[] = { - { 0x01100, 0x0115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ - { 0x0231A, 0x0231B }, /* WATCH - HOURGLASS */ - { 0x02329, 0x0232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ - { 0x023E9, 0x023EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ - { 0x023F0, 0x023F0 }, /* ALARM CLOCK */ - { 0x023F3, 0x023F3 }, /* HOURGLASS WITH FLOWING SAND */ - { 0x025FD, 0x025FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ - { 0x02614, 0x02615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ - { 0x02630, 0x02637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ - { 0x02648, 0x02653 }, /* ARIES - PISCES */ - { 0x0267F, 0x0267F }, /* WHEELCHAIR SYMBOL */ - { 0x0268A, 0x0268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ - { 0x02693, 0x02693 }, /* ANCHOR */ - { 0x026A1, 0x026A1 }, /* HIGH VOLTAGE SIGN */ - { 0x026AA, 0x026AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ - { 0x026BD, 0x026BE }, /* SOCCER BALL - BASEBALL */ - { 0x026C4, 0x026C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ - { 0x026CE, 0x026CE }, /* OPHIUCHUS */ - { 0x026D4, 0x026D4 }, /* NO ENTRY */ - { 0x026EA, 0x026EA }, /* CHURCH */ - { 0x026F2, 0x026F3 }, /* FOUNTAIN - FLAG IN HOLE */ - { 0x026F5, 0x026F5 }, /* SAILBOAT */ - { 0x026FA, 0x026FA }, /* TENT */ - { 0x026FD, 0x026FD }, /* FUEL PUMP */ - { 0x02705, 0x02705 }, /* WHITE HEAVY CHECK MARK */ - { 0x0270A, 0x0270B }, /* RAISED FIST - RAISED HAND */ - { 0x02728, 0x02728 }, /* SPARKLES */ - { 0x0274C, 0x0274C }, /* CROSS MARK */ - { 0x0274E, 0x0274E }, /* NEGATIVE SQUARED CROSS MARK */ - { 0x02753, 0x02755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ - { 0x02757, 0x02757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ - { 0x02795, 0x02797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ - { 0x027B0, 0x027B0 }, /* CURLY LOOP */ - { 0x027BF, 0x027BF }, /* DOUBLE CURLY LOOP */ - { 0x02B1B, 0x02B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ - { 0x02B50, 0x02B50 }, /* WHITE MEDIUM STAR */ - { 0x02B55, 0x02B55 }, /* HEAVY LARGE CIRCLE */ - { 0x02E80, 0x02E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ - { 0x02E9B, 0x02EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ - { 0x02F00, 0x02FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ - { 0x02FF0, 0x03029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ - { 0x03030, 0x0303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ - { 0x03041, 0x03096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ - { 0x0309B, 0x030FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ - { 0x03105, 0x0312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ - { 0x03131, 0x0318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ - { 0x03190, 0x031E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ - { 0x031EF, 0x0321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ - { 0x03220, 0x03247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ - { 0x03250, 0x0A48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ - { 0x0A490, 0x0A4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ - { 0x0A960, 0x0A97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ - { 0x0AC00, 0x0D7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ - { 0x0F900, 0x0FAFF }, /* U+0F900 - U+0FAFF */ - { 0x0FE10, 0x0FE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ - { 0x0FE30, 0x0FE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ - { 0x0FE54, 0x0FE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ - { 0x0FE68, 0x0FE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ - { 0x0FF01, 0x0FF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ - { 0x0FFE0, 0x0FFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ - { 0x16FE0, 0x16FE3 }, /* U+16FE0 - U+16FE3 */ +/* Zero-width character ranges (non-BMP, U+10000 and above) */ +static const struct interval32 zero_width_non_bmp[] = { + { 0x101FD, 0x101FD }, /* PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE */ + { 0x102E0, 0x102E0 }, /* COPTIC EPACT THOUSANDS MARK */ + { 0x10376, 0x1037A }, /* COMBINING OLD PERMIC LETTER AN - COMBINING OLD PERMIC LETTER SII */ + { 0x10A01, 0x10A03 }, /* KHAROSHTHI VOWEL SIGN I - KHAROSHTHI VOWEL SIGN VOCALIC R */ + { 0x10A05, 0x10A06 }, /* KHAROSHTHI VOWEL SIGN E - KHAROSHTHI VOWEL SIGN O */ + { 0x10A0C, 0x10A0F }, /* KHAROSHTHI VOWEL LENGTH MARK - KHAROSHTHI SIGN VISARGA */ + { 0x10A38, 0x10A3A }, /* KHAROSHTHI SIGN BAR ABOVE - KHAROSHTHI SIGN DOT BELOW */ + { 0x10A3F, 0x10A3F }, /* KHAROSHTHI VIRAMA */ + { 0x10AE5, 0x10AE6 }, /* MANICHAEAN ABBREVIATION MARK ABOVE - MANICHAEAN ABBREVIATION MARK BELOW */ + { 0x10D24, 0x10D27 }, /* HANIFI ROHINGYA SIGN HARBAHAY - HANIFI ROHINGYA SIGN TASSI */ + { 0x10D69, 0x10D6D }, /* GARAY VOWEL SIGN E - GARAY CONSONANT NASALIZATION MARK */ + { 0x10EAB, 0x10EAC }, /* YEZIDI COMBINING HAMZA MARK - YEZIDI COMBINING MADDA MARK */ + { 0x10EFC, 0x10EFF }, /* ARABIC COMBINING ALEF OVERLAY - ARABIC SMALL LOW WORD MADDA */ + { 0x10F46, 0x10F50 }, /* SOGDIAN COMBINING DOT BELOW - SOGDIAN COMBINING STROKE BELOW */ + { 0x10F82, 0x10F85 }, /* OLD UYGHUR COMBINING DOT ABOVE - OLD UYGHUR COMBINING TWO DOTS BELOW */ + { 0x11000, 0x11002 }, /* BRAHMI SIGN CANDRABINDU - BRAHMI SIGN VISARGA */ + { 0x11038, 0x11046 }, /* BRAHMI VOWEL SIGN AA - BRAHMI VIRAMA */ + { 0x11070, 0x11070 }, /* BRAHMI SIGN OLD TAMIL VIRAMA */ + { 0x11073, 0x11074 }, /* BRAHMI VOWEL SIGN OLD TAMIL SHORT E - BRAHMI VOWEL SIGN OLD TAMIL SHORT O */ + { 0x1107F, 0x11082 }, /* BRAHMI NUMBER JOINER - KAITHI SIGN VISARGA */ + { 0x110B0, 0x110BA }, /* KAITHI VOWEL SIGN AA - KAITHI SIGN NUKTA */ + { 0x110BD, 0x110BD }, /* KAITHI NUMBER SIGN */ + { 0x110C2, 0x110C2 }, /* KAITHI VOWEL SIGN VOCALIC R */ + { 0x110CD, 0x110CD }, /* KAITHI NUMBER SIGN ABOVE */ + { 0x11100, 0x11102 }, /* CHAKMA SIGN CANDRABINDU - CHAKMA SIGN VISARGA */ + { 0x11127, 0x11134 }, /* CHAKMA VOWEL SIGN A - CHAKMA MAAYYAA */ + { 0x11145, 0x11146 }, /* CHAKMA VOWEL SIGN AA - CHAKMA VOWEL SIGN EI */ + { 0x11173, 0x11173 }, /* MAHAJANI SIGN NUKTA */ + { 0x11180, 0x11182 }, /* SHARADA SIGN CANDRABINDU - SHARADA SIGN VISARGA */ + { 0x111B3, 0x111C0 }, /* SHARADA VOWEL SIGN AA - SHARADA SIGN VIRAMA */ + { 0x111C9, 0x111CC }, /* SHARADA SANDHI MARK - SHARADA EXTRA SHORT VOWEL MARK */ + { 0x111CE, 0x111CF }, /* SHARADA VOWEL SIGN PRISHTHAMATRA E - SHARADA SIGN INVERTED CANDRABINDU */ + { 0x1122C, 0x11237 }, /* KHOJKI VOWEL SIGN AA - KHOJKI SIGN SHADDA */ + { 0x1123E, 0x1123E }, /* KHOJKI SIGN SUKUN */ + { 0x11241, 0x11241 }, /* KHOJKI VOWEL SIGN VOCALIC R */ + { 0x112DF, 0x112EA }, /* KHUDAWADI SIGN ANUSVARA - KHUDAWADI SIGN VIRAMA */ + { 0x11300, 0x11303 }, /* GRANTHA SIGN COMBINING ANUSVARA ABOVE - GRANTHA SIGN VISARGA */ + { 0x1133B, 0x1133C }, /* COMBINING BINDU BELOW - GRANTHA SIGN NUKTA */ + { 0x1133E, 0x11344 }, /* GRANTHA VOWEL SIGN AA - GRANTHA VOWEL SIGN VOCALIC RR */ + { 0x11347, 0x11348 }, /* GRANTHA VOWEL SIGN EE - GRANTHA VOWEL SIGN AI */ + { 0x1134B, 0x1134D }, /* GRANTHA VOWEL SIGN OO - GRANTHA SIGN VIRAMA */ + { 0x11357, 0x11357 }, /* GRANTHA AU LENGTH MARK */ + { 0x11362, 0x11363 }, /* GRANTHA VOWEL SIGN VOCALIC L - GRANTHA VOWEL SIGN VOCALIC LL */ + { 0x11366, 0x1136C }, /* COMBINING GRANTHA DIGIT ZERO - COMBINING GRANTHA DIGIT SIX */ + { 0x11370, 0x11374 }, /* COMBINING GRANTHA LETTER A - COMBINING GRANTHA LETTER PA */ + { 0x113B8, 0x113C0 }, /* TULU-TIGALARI VOWEL SIGN AA - TULU-TIGALARI VOWEL SIGN VOCALIC LL */ + { 0x113C2, 0x113C2 }, /* TULU-TIGALARI VOWEL SIGN EE */ + { 0x113C5, 0x113C5 }, /* TULU-TIGALARI VOWEL SIGN AI */ + { 0x113C7, 0x113CA }, /* TULU-TIGALARI VOWEL SIGN OO - TULU-TIGALARI SIGN CANDRA ANUNASIKA */ + { 0x113CC, 0x113D0 }, /* TULU-TIGALARI SIGN ANUSVARA - TULU-TIGALARI CONJOINER */ + { 0x113D2, 0x113D2 }, /* TULU-TIGALARI GEMINATION MARK */ + { 0x113E1, 0x113E2 }, /* TULU-TIGALARI VEDIC TONE SVARITA - TULU-TIGALARI VEDIC TONE ANUDATTA */ + { 0x11435, 0x11446 }, /* NEWA VOWEL SIGN AA - NEWA SIGN NUKTA */ + { 0x1145E, 0x1145E }, /* NEWA SANDHI MARK */ + { 0x114B0, 0x114C3 }, /* TIRHUTA VOWEL SIGN AA - TIRHUTA SIGN NUKTA */ + { 0x115AF, 0x115B5 }, /* SIDDHAM VOWEL SIGN AA - SIDDHAM VOWEL SIGN VOCALIC RR */ + { 0x115B8, 0x115C0 }, /* SIDDHAM VOWEL SIGN E - SIDDHAM SIGN NUKTA */ + { 0x115DC, 0x115DD }, /* SIDDHAM VOWEL SIGN ALTERNATE U - SIDDHAM VOWEL SIGN ALTERNATE UU */ + { 0x11630, 0x11640 }, /* MODI VOWEL SIGN AA - MODI SIGN ARDHACANDRA */ + { 0x116AB, 0x116B7 }, /* TAKRI SIGN ANUSVARA - TAKRI SIGN NUKTA */ + { 0x1171D, 0x1172B }, /* AHOM CONSONANT SIGN MEDIAL LA - AHOM SIGN KILLER */ + { 0x1182C, 0x1183A }, /* DOGRA VOWEL SIGN AA - DOGRA SIGN NUKTA */ + { 0x11930, 0x11935 }, /* DIVES AKURU VOWEL SIGN AA - DIVES AKURU VOWEL SIGN E */ + { 0x11937, 0x11938 }, /* DIVES AKURU VOWEL SIGN AI - DIVES AKURU VOWEL SIGN O */ + { 0x1193B, 0x1193E }, /* DIVES AKURU SIGN ANUSVARA - DIVES AKURU VIRAMA */ + { 0x11940, 0x11940 }, /* DIVES AKURU MEDIAL YA */ + { 0x11942, 0x11943 }, /* DIVES AKURU MEDIAL RA - DIVES AKURU SIGN NUKTA */ + { 0x119D1, 0x119D7 }, /* NANDINAGARI VOWEL SIGN AA - NANDINAGARI VOWEL SIGN VOCALIC RR */ + { 0x119DA, 0x119E0 }, /* NANDINAGARI VOWEL SIGN E - NANDINAGARI SIGN VIRAMA */ + { 0x119E4, 0x119E4 }, /* NANDINAGARI VOWEL SIGN PRISHTHAMATRA E */ + { 0x11A01, 0x11A0A }, /* ZANABAZAR SQUARE VOWEL SIGN I - ZANABAZAR SQUARE VOWEL LENGTH MARK */ + { 0x11A33, 0x11A39 }, /* ZANABAZAR SQUARE FINAL CONSONANT MARK - ZANABAZAR SQUARE SIGN VISARGA */ + { 0x11A3B, 0x11A3E }, /* ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA - ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA */ + { 0x11A47, 0x11A47 }, /* ZANABAZAR SQUARE SUBJOINER */ + { 0x11A51, 0x11A5B }, /* SOYOMBO VOWEL SIGN I - SOYOMBO VOWEL LENGTH MARK */ + { 0x11A8A, 0x11A99 }, /* SOYOMBO FINAL CONSONANT SIGN G - SOYOMBO SUBJOINER */ + { 0x11C2F, 0x11C36 }, /* BHAIKSUKI VOWEL SIGN AA - BHAIKSUKI VOWEL SIGN VOCALIC L */ + { 0x11C38, 0x11C3F }, /* BHAIKSUKI VOWEL SIGN E - BHAIKSUKI SIGN VIRAMA */ + { 0x11C92, 0x11CA7 }, /* MARCHEN SUBJOINED LETTER KA - MARCHEN SUBJOINED LETTER ZA */ + { 0x11CA9, 0x11CB6 }, /* MARCHEN SUBJOINED LETTER YA - MARCHEN SIGN CANDRABINDU */ + { 0x11D31, 0x11D36 }, /* MASARAM GONDI VOWEL SIGN AA - MASARAM GONDI VOWEL SIGN VOCALIC R */ + { 0x11D3A, 0x11D3A }, /* MASARAM GONDI VOWEL SIGN E */ + { 0x11D3C, 0x11D3D }, /* MASARAM GONDI VOWEL SIGN AI - MASARAM GONDI VOWEL SIGN O */ + { 0x11D3F, 0x11D45 }, /* MASARAM GONDI VOWEL SIGN AU - MASARAM GONDI VIRAMA */ + { 0x11D47, 0x11D47 }, /* MASARAM GONDI RA-KARA */ + { 0x11D8A, 0x11D8E }, /* GUNJALA GONDI VOWEL SIGN AA - GUNJALA GONDI VOWEL SIGN UU */ + { 0x11D90, 0x11D91 }, /* GUNJALA GONDI VOWEL SIGN EE - GUNJALA GONDI VOWEL SIGN AI */ + { 0x11D93, 0x11D97 }, /* GUNJALA GONDI VOWEL SIGN OO - GUNJALA GONDI VIRAMA */ + { 0x11EF3, 0x11EF6 }, /* MAKASAR VOWEL SIGN I - MAKASAR VOWEL SIGN O */ + { 0x11F00, 0x11F01 }, /* KAWI SIGN CANDRABINDU - KAWI SIGN ANUSVARA */ + { 0x11F03, 0x11F03 }, /* KAWI SIGN VISARGA */ + { 0x11F34, 0x11F3A }, /* KAWI VOWEL SIGN AA - KAWI VOWEL SIGN VOCALIC R */ + { 0x11F3E, 0x11F42 }, /* KAWI VOWEL SIGN E - KAWI CONJOINER */ + { 0x11F5A, 0x11F5A }, /* KAWI SIGN NUKTA */ + { 0x13430, 0x13440 }, /* EGYPTIAN HIEROGLYPH VERTICAL JOINER - EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY */ + { 0x13447, 0x13455 }, /* EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START - EGYPTIAN HIEROGLYPH MODIFIER DAMAGED */ + { 0x1611E, 0x1612F }, /* GURUNG KHEMA VOWEL SIGN AA - GURUNG KHEMA SIGN THOLHOMA */ + { 0x16AF0, 0x16AF4 }, /* BASSA VAH COMBINING HIGH TONE - BASSA VAH COMBINING HIGH-LOW TONE */ + { 0x16B30, 0x16B36 }, /* PAHAWH HMONG MARK CIM TUB - PAHAWH HMONG MARK CIM TAUM */ + { 0x16F4F, 0x16F4F }, /* MIAO SIGN CONSONANT MODIFIER BAR */ + { 0x16F51, 0x16F87 }, /* MIAO SIGN ASPIRATION - MIAO VOWEL SIGN UI */ + { 0x16F8F, 0x16F92 }, /* MIAO TONE RIGHT - MIAO TONE BELOW */ + { 0x16FE4, 0x16FE4 }, /* KHITAN SMALL SCRIPT FILLER */ + { 0x16FF0, 0x16FF1 }, /* VIETNAMESE ALTERNATE READING MARK CA - VIETNAMESE ALTERNATE READING MARK NHAY */ + { 0x1BC9D, 0x1BC9E }, /* DUPLOYAN THICK LETTER SELECTOR - DUPLOYAN DOUBLE MARK */ + { 0x1BCA0, 0x1BCA3 }, /* SHORTHAND FORMAT LETTER OVERLAP - SHORTHAND FORMAT UP STEP */ + { 0x1CF00, 0x1CF2D }, /* ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT - ZNAMENNY COMBINING MARK KRYZH ON LEFT */ + { 0x1CF30, 0x1CF46 }, /* ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO - ZNAMENNY PRIZNAK MODIFIER ROG */ + { 0x1D165, 0x1D169 }, /* MUSICAL SYMBOL COMBINING STEM - MUSICAL SYMBOL COMBINING TREMOLO-3 */ + { 0x1D16D, 0x1D182 }, /* MUSICAL SYMBOL COMBINING AUGMENTATION DOT - MUSICAL SYMBOL COMBINING LOURE */ + { 0x1D185, 0x1D18B }, /* MUSICAL SYMBOL COMBINING DOIT - MUSICAL SYMBOL COMBINING TRIPLE TONGUE */ + { 0x1D1AA, 0x1D1AD }, /* MUSICAL SYMBOL COMBINING DOWN BOW - MUSICAL SYMBOL COMBINING SNAP PIZZICATO */ + { 0x1D242, 0x1D244 }, /* COMBINING GREEK MUSICAL TRISEME - COMBINING GREEK MUSICAL PENTASEME */ + { 0x1DA00, 0x1DA36 }, /* SIGNWRITING HEAD RIM - SIGNWRITING AIR SUCKING IN */ + { 0x1DA3B, 0x1DA6C }, /* SIGNWRITING MOUTH CLOSED NEUTRAL - SIGNWRITING EXCITEMENT */ + { 0x1DA75, 0x1DA75 }, /* SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS */ + { 0x1DA84, 0x1DA84 }, /* SIGNWRITING LOCATION HEAD NECK */ + { 0x1DA9B, 0x1DA9F }, /* SIGNWRITING FILL MODIFIER-2 - SIGNWRITING FILL MODIFIER-6 */ + { 0x1DAA1, 0x1DAAF }, /* SIGNWRITING ROTATION MODIFIER-2 - SIGNWRITING ROTATION MODIFIER-16 */ + { 0x1E000, 0x1E006 }, /* COMBINING GLAGOLITIC LETTER AZU - COMBINING GLAGOLITIC LETTER ZHIVETE */ + { 0x1E008, 0x1E018 }, /* COMBINING GLAGOLITIC LETTER ZEMLJA - COMBINING GLAGOLITIC LETTER HERU */ + { 0x1E01B, 0x1E021 }, /* COMBINING GLAGOLITIC LETTER SHTA - COMBINING GLAGOLITIC LETTER YATI */ + { 0x1E023, 0x1E024 }, /* COMBINING GLAGOLITIC LETTER YU - COMBINING GLAGOLITIC LETTER SMALL YUS */ + { 0x1E026, 0x1E02A }, /* COMBINING GLAGOLITIC LETTER YO - COMBINING GLAGOLITIC LETTER FITA */ + { 0x1E08F, 0x1E08F }, /* COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */ + { 0x1E130, 0x1E136 }, /* NYIAKENG PUACHUE HMONG TONE-B - NYIAKENG PUACHUE HMONG TONE-D */ + { 0x1E2AE, 0x1E2AE }, /* TOTO SIGN RISING TONE */ + { 0x1E2EC, 0x1E2EF }, /* WANCHO TONE TUP - WANCHO TONE KOINI */ + { 0x1E4EC, 0x1E4EF }, /* NAG MUNDARI SIGN MUHOR - NAG MUNDARI SIGN SUTUH */ + { 0x1E5EE, 0x1E5EF }, /* OL ONAL SIGN MU - OL ONAL SIGN IKIR */ + { 0x1E8D0, 0x1E8D6 }, /* MENDE KIKAKUI COMBINING NUMBER TEENS - MENDE KIKAKUI COMBINING NUMBER MILLIONS */ + { 0x1E944, 0x1E94A }, /* ADLAM ALIF LENGTHENER - ADLAM NUKTA */ + { 0x1F3FB, 0x1F3FF }, /* EMOJI MODIFIER FITZPATRICK TYPE-1-2 - EMOJI MODIFIER FITZPATRICK TYPE-6 */ + { 0x1F9B0, 0x1F9B3 }, /* EMOJI COMPONENT RED HAIR - EMOJI COMPONENT WHITE HAIR */ + { 0xE0001, 0xE0001 }, /* LANGUAGE TAG */ + { 0xE0020, 0xE007F }, /* TAG SPACE - CANCEL TAG */ + { 0xE0100, 0xE01EF }, /* VARIATION SELECTOR-17 - VARIATION SELECTOR-256 */ +}; + +/* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ +static const struct interval16 double_width_bmp[] = { + { 0x1100, 0x115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ + { 0x231A, 0x231B }, /* WATCH - HOURGLASS */ + { 0x2329, 0x232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ + { 0x23E9, 0x23EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ + { 0x23F0, 0x23F0 }, /* ALARM CLOCK */ + { 0x23F3, 0x23F3 }, /* HOURGLASS WITH FLOWING SAND */ + { 0x25FD, 0x25FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ + { 0x2614, 0x2615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ + { 0x2630, 0x2637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ + { 0x2648, 0x2653 }, /* ARIES - PISCES */ + { 0x267F, 0x267F }, /* WHEELCHAIR SYMBOL */ + { 0x268A, 0x268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ + { 0x2693, 0x2693 }, /* ANCHOR */ + { 0x26A1, 0x26A1 }, /* HIGH VOLTAGE SIGN */ + { 0x26AA, 0x26AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ + { 0x26BD, 0x26BE }, /* SOCCER BALL - BASEBALL */ + { 0x26C4, 0x26C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ + { 0x26CE, 0x26CE }, /* OPHIUCHUS */ + { 0x26D4, 0x26D4 }, /* NO ENTRY */ + { 0x26EA, 0x26EA }, /* CHURCH */ + { 0x26F2, 0x26F3 }, /* FOUNTAIN - FLAG IN HOLE */ + { 0x26F5, 0x26F5 }, /* SAILBOAT */ + { 0x26FA, 0x26FA }, /* TENT */ + { 0x26FD, 0x26FD }, /* FUEL PUMP */ + { 0x2705, 0x2705 }, /* WHITE HEAVY CHECK MARK */ + { 0x270A, 0x270B }, /* RAISED FIST - RAISED HAND */ + { 0x2728, 0x2728 }, /* SPARKLES */ + { 0x274C, 0x274C }, /* CROSS MARK */ + { 0x274E, 0x274E }, /* NEGATIVE SQUARED CROSS MARK */ + { 0x2753, 0x2755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ + { 0x2757, 0x2757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ + { 0x2795, 0x2797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ + { 0x27B0, 0x27B0 }, /* CURLY LOOP */ + { 0x27BF, 0x27BF }, /* DOUBLE CURLY LOOP */ + { 0x2B1B, 0x2B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ + { 0x2B50, 0x2B50 }, /* WHITE MEDIUM STAR */ + { 0x2B55, 0x2B55 }, /* HEAVY LARGE CIRCLE */ + { 0x2E80, 0x2E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ + { 0x2E9B, 0x2EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ + { 0x2F00, 0x2FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ + { 0x2FF0, 0x3029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ + { 0x3030, 0x303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ + { 0x3041, 0x3096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ + { 0x309B, 0x30FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ + { 0x3105, 0x312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ + { 0x3131, 0x318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ + { 0x3190, 0x31E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ + { 0x31EF, 0x321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ + { 0x3220, 0x3247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ + { 0x3250, 0xA48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ + { 0xA490, 0xA4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ + { 0xA960, 0xA97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ + { 0xAC00, 0xD7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ + { 0xF900, 0xFAFF }, /* U+F900 - U+FAFF */ + { 0xFE10, 0xFE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ + { 0xFE30, 0xFE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ + { 0xFE54, 0xFE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ + { 0xFE68, 0xFE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ + { 0xFF01, 0xFF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ + { 0xFFE0, 0xFFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ +}; + +/* Double-width character ranges (non-BMP, U+10000 and above) */ +static const struct interval32 double_width_non_bmp[] = { + { 0x16FE0, 0x16FE3 }, /* TANGUT ITERATION MARK - OLD CHINESE ITERATION MARK */ { 0x17000, 0x187F7 }, /* U+17000 - U+187F7 */ - { 0x18800, 0x18CD5 }, /* U+18800 - U+18CD5 */ + { 0x18800, 0x18CD5 }, /* TANGUT COMPONENT-001 - KHITAN SMALL SCRIPT CHARACTER-18CD5 */ { 0x18CFF, 0x18D08 }, /* U+18CFF - U+18D08 */ - { 0x1AFF0, 0x1AFF3 }, /* U+1AFF0 - U+1AFF3 */ - { 0x1AFF5, 0x1AFFB }, /* U+1AFF5 - U+1AFFB */ - { 0x1AFFD, 0x1AFFE }, /* U+1AFFD - U+1AFFE */ - { 0x1B000, 0x1B122 }, /* U+1B000 - U+1B122 */ - { 0x1B132, 0x1B132 }, /* U+1B132 */ - { 0x1B150, 0x1B152 }, /* U+1B150 - U+1B152 */ - { 0x1B155, 0x1B155 }, /* U+1B155 */ - { 0x1B164, 0x1B167 }, /* U+1B164 - U+1B167 */ - { 0x1B170, 0x1B2FB }, /* U+1B170 - U+1B2FB */ - { 0x1D300, 0x1D356 }, /* U+1D300 - U+1D356 */ - { 0x1D360, 0x1D376 }, /* U+1D360 - U+1D376 */ + { 0x1AFF0, 0x1AFF3 }, /* KATAKANA LETTER MINNAN TONE-2 - KATAKANA LETTER MINNAN TONE-5 */ + { 0x1AFF5, 0x1AFFB }, /* KATAKANA LETTER MINNAN TONE-7 - KATAKANA LETTER MINNAN NASALIZED TONE-5 */ + { 0x1AFFD, 0x1AFFE }, /* KATAKANA LETTER MINNAN NASALIZED TONE-7 - KATAKANA LETTER MINNAN NASALIZED TONE-8 */ + { 0x1B000, 0x1B122 }, /* KATAKANA LETTER ARCHAIC E - KATAKANA LETTER ARCHAIC WU */ + { 0x1B132, 0x1B132 }, /* HIRAGANA LETTER SMALL KO */ + { 0x1B150, 0x1B152 }, /* HIRAGANA LETTER SMALL WI - HIRAGANA LETTER SMALL WO */ + { 0x1B155, 0x1B155 }, /* KATAKANA LETTER SMALL KO */ + { 0x1B164, 0x1B167 }, /* KATAKANA LETTER SMALL WI - KATAKANA LETTER SMALL N */ + { 0x1B170, 0x1B2FB }, /* NUSHU CHARACTER-1B170 - NUSHU CHARACTER-1B2FB */ + { 0x1D300, 0x1D356 }, /* MONOGRAM FOR EARTH - TETRAGRAM FOR FOSTERING */ + { 0x1D360, 0x1D376 }, /* COUNTING ROD UNIT DIGIT ONE - IDEOGRAPHIC TALLY MARK FIVE */ { 0x1F000, 0x1F02F }, /* U+1F000 - U+1F02F */ { 0x1F0A0, 0x1F0FF }, /* U+1F0A0 - U+1F0FF */ - { 0x1F18E, 0x1F18E }, /* U+1F18E */ - { 0x1F191, 0x1F19A }, /* U+1F191 - U+1F19A */ - { 0x1F200, 0x1F202 }, /* U+1F200 - U+1F202 */ - { 0x1F210, 0x1F23B }, /* U+1F210 - U+1F23B */ - { 0x1F240, 0x1F248 }, /* U+1F240 - U+1F248 */ - { 0x1F250, 0x1F251 }, /* U+1F250 - U+1F251 */ - { 0x1F260, 0x1F265 }, /* U+1F260 - U+1F265 */ - { 0x1F300, 0x1F3FA }, /* U+1F300 - U+1F3FA */ - { 0x1F400, 0x1F64F }, /* U+1F400 - U+1F64F */ - { 0x1F680, 0x1F9AF }, /* U+1F680 - U+1F9AF */ + { 0x1F18E, 0x1F18E }, /* NEGATIVE SQUARED AB */ + { 0x1F191, 0x1F19A }, /* SQUARED CL - SQUARED VS */ + { 0x1F200, 0x1F202 }, /* SQUARE HIRAGANA HOKA - SQUARED KATAKANA SA */ + { 0x1F210, 0x1F23B }, /* SQUARED CJK UNIFIED IDEOGRAPH-624B - SQUARED CJK UNIFIED IDEOGRAPH-914D */ + { 0x1F240, 0x1F248 }, /* TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C - TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 */ + { 0x1F250, 0x1F251 }, /* CIRCLED IDEOGRAPH ADVANTAGE - CIRCLED IDEOGRAPH ACCEPT */ + { 0x1F260, 0x1F265 }, /* ROUNDED SYMBOL FOR FU - ROUNDED SYMBOL FOR CAI */ + { 0x1F300, 0x1F3FA }, /* CYCLONE - AMPHORA */ + { 0x1F400, 0x1F64F }, /* RAT - PERSON WITH FOLDED HANDS */ + { 0x1F680, 0x1F9AF }, /* ROCKET - PROBING CANE */ { 0x1F9B4, 0x1FAFF }, /* U+1F9B4 - U+1FAFF */ { 0x20000, 0x2FFFD }, /* U+20000 - U+2FFFD */ { 0x30000, 0x3FFFD }, /* U+30000 - U+3FFFD */ }; -static int ucs_cmp(const void *key, const void *element) +static int ucs_cmp16(const void *key, const void *element) { - uint32_t cp = *(uint32_t *)key; - const struct interval *e = element; + uint16_t cp = *(uint16_t *)key; + const struct interval16 *e = element; if (cp > e->last) return 1; @@ -466,13 +479,34 @@ static int ucs_cmp(const void *key, const void *element) return 0; } -static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t count) +static int ucs_cmp32(const void *key, const void *element) +{ + uint32_t cp = *(uint32_t *)key; + const struct interval32 *e = element; + + if (cp > e->last) + return 1; + if (cp < e->first) + return -1; + return 0; +} + +static bool is_in_interval16(uint16_t cp, const struct interval16 *intervals, size_t count) { if (cp < intervals[0].first || cp > intervals[count - 1].last) return false; return __inline_bsearch(&cp, intervals, count, - sizeof(*intervals), ucs_cmp) != NULL; + sizeof(*intervals), ucs_cmp16) != NULL; +} + +static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, size_t count) +{ + if (cp < intervals[0].first || cp > intervals[count - 1].last) + return false; + + return __inline_bsearch(&cp, intervals, count, + sizeof(*intervals), ucs_cmp32) != NULL; } /** @@ -483,7 +517,9 @@ static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t */ bool ucs_is_zero_width(uint32_t cp) { - return is_in_interval(cp, zero_width_ranges, ARRAY_SIZE(zero_width_ranges)); + return (cp <= 0xFFFF) + ? is_in_interval16(cp, zero_width_bmp, ARRAY_SIZE(zero_width_bmp)) + : is_in_interval32(cp, zero_width_non_bmp, ARRAY_SIZE(zero_width_non_bmp)); } /** @@ -494,5 +530,7 @@ bool ucs_is_zero_width(uint32_t cp) */ bool ucs_is_double_width(uint32_t cp) { - return is_in_interval(cp, double_width_ranges, ARRAY_SIZE(double_width_ranges)); + return (cp <= 0xFFFF) + ? is_in_interval16(cp, double_width_bmp, ARRAY_SIZE(double_width_bmp)) + : is_in_interval32(cp, double_width_non_bmp, ARRAY_SIZE(double_width_non_bmp)); } From 547f57b88d5f2ad4e9ab5e0d63a668467c10c736 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 9 Apr 2025 21:14:03 -0400 Subject: [PATCH 028/105] vt: pad double-width code points with a zero-white-space In the Unicode screen buffer, we follow double-width code points with a space to maintain proper column alignment. This, however, creates semantic problems when e.g. using cut and paste or selection. Let's use a better code point for the column padding's purpose i.e. a zero-white-space rather than a full space. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250410011839.64418-12-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index e3d35c4f9204..dc84f9c6b7c6 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2937,12 +2937,13 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, width = 2; } else if (ucs_is_zero_width(c)) { prev_c = vc_uniscr_getc(vc, -1); - if (prev_c == ' ' && + if (prev_c == 0x200B && ucs_is_double_width(vc_uniscr_getc(vc, -2))) { /* * Let's merge this zero-width code point with * the preceding double-width code point by - * replacing the existing whitespace padding. + * replacing the existing zero-white-space + * padding. */ vc_con_rewind(vc); } else if (c == 0xfe0f && prev_c != 0) { @@ -3040,7 +3041,11 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, tc = conv_uni_to_pc(vc, ' '); if (tc < 0) tc = ' '; - next_c = ' '; + /* + * Store a zero-white-space in the Unicode screen given that + * the previous code point is semantically double-width. + */ + next_c = 0x200B; } out: From b35f7a773cbcbfea3bc87a33c7d0f39e34ed83ec Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 10 Apr 2025 15:38:13 -0400 Subject: [PATCH 029/105] vt: remove zero-white-space handling from conv_uni_to_pc() This is now taken care of by ucs_is_zero_width(). And in the case where we do want a padding from some zero-width code point then we should also give the legacy displays a space character to work with. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/6o2ss437-6nps-s943-1n38-54np5587r08s@syhkavp.arg Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/consolemap.c | 2 -- drivers/tty/vt/vt.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/consolemap.c b/drivers/tty/vt/consolemap.c index 82d70083fead..bb4bb272ebec 100644 --- a/drivers/tty/vt/consolemap.c +++ b/drivers/tty/vt/consolemap.c @@ -870,8 +870,6 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs) return -4; /* Not found */ else if (ucs < 0x20) return -1; /* Not a printable character */ - else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) - return -2; /* Zero-width space */ /* * UNI_DIRECT_BASE indicates the start of the region in the User Zone * which always has a 1:1 mapping to the currently loaded font. The diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index dc84f9c6b7c6..0d1d663c7809 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2964,13 +2964,15 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, goto out; } } + /* padding for the legacy display like done below */ + tc = ' '; } } /* Now try to find out how to display it */ tc = conv_uni_to_pc(vc, tc); if (tc & ~charmask) { - if (tc == -1 || tc == -2) + if (tc == -1) return -1; /* nothing to display */ /* Glyph not found */ From 25422e8f46c1fd147886f0dc8851eb66c9ba2d48 Mon Sep 17 00:00:00 2001 From: Thierry Bultel Date: Thu, 3 Apr 2025 23:29:05 +0200 Subject: [PATCH 030/105] dt-bindings: serial: Add compatible for Renesas RZ/T2H SoC in sci RSCI of RZ/T2H SoC (a.k.a r9a09g077), as a lot of similarities with SCI in other Renesas SoC like G2L, G3S, V2L; However, it has a different set of registers, and in addition to serial, this IP also supports SCIe (encoder), SmartCard, i2c and spi. This is why the 'renesas,sci' fallback for generic SCI does not apply for it. Reviewed-by: Rob Herring (Arm) Signed-off-by: Thierry Bultel Link: https://lore.kernel.org/r/20250403212919.1137670-4-thierry.bultel.yh@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/renesas,rsci.yaml | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 Documentation/devicetree/bindings/serial/renesas,rsci.yaml diff --git a/Documentation/devicetree/bindings/serial/renesas,rsci.yaml b/Documentation/devicetree/bindings/serial/renesas,rsci.yaml new file mode 100644 index 000000000000..ea879db5f485 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/renesas,rsci.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/renesas,rsci.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas RSCI Serial Communication Interface + +maintainers: + - Geert Uytterhoeven + - Thierry Bultel + +allOf: + - $ref: serial.yaml# + +properties: + compatible: + const: renesas,r9a09g077-rsci # RZ/T2H + + reg: + maxItems: 1 + + interrupts: + items: + - description: Error interrupt + - description: Receive buffer full interrupt + - description: Transmit buffer empty interrupt + - description: Transmit end interrupt + + interrupt-names: + items: + - const: eri + - const: rxi + - const: txi + - const: tei + + clocks: + maxItems: 1 + + clock-names: + const: fck # UART functional clock + + power-domains: + maxItems: 1 + + uart-has-rtscts: false + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - power-domains + +unevaluatedProperties: false + +examples: + - | + #include + #include + + aliases { + serial0 = &sci0; + }; + + sci0: serial@80005000 { + compatible = "renesas,r9a09g077-rsci"; + reg = <0x80005000 0x400>; + interrupts = , + , + , + ; + interrupt-names = "eri", "rxi", "txi", "tei"; + clocks = <&cpg CPG_MOD 108>; + clock-names = "fck"; + power-domains = <&cpg>; + }; From d004e3595718b8b55009c08ff59cf13be5490f59 Mon Sep 17 00:00:00 2001 From: Thierry Bultel Date: Thu, 3 Apr 2025 23:29:09 +0200 Subject: [PATCH 031/105] serial: sh-sci: Fix a comment about SCIFA The comment was correct when it was added, at that time RZ/T1 was the only SoC in the RZ/T line. Since then, further SoCs have been added with RZ/T names which do not use the same SCIFA register layout and so the comment is now misleading. So we update the comment to explicitly reference only RZ/T1 SoCs. Reviewed-by: Paul Barker Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Thierry Bultel Link: https://lore.kernel.org/r/20250403212919.1137670-8-thierry.bultel.yh@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 7e7813ccda41..0f044ea3855a 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -310,7 +310,7 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { }, /* - * The "SCIFA" that is in RZ/A2, RZ/G2L and RZ/T. + * The "SCIFA" that is in RZ/A2, RZ/G2L and RZ/T1. * It looks like a normal SCIF with FIFO data, but with a * compressed address space. Also, the break out of interrupts * are different: ERI/BRI, RXI, TXI, TEI, DRI. From 21fc3d6b45ba42c471fb718ce922d1f9abe26238 Mon Sep 17 00:00:00 2001 From: Thierry Bultel Date: Thu, 3 Apr 2025 23:29:10 +0200 Subject: [PATCH 032/105] serial: sh-sci: Introduced function pointers The aim here is to prepare support for new sci controllers like the T2H/RSCI whose registers are too much different for being handled in common code. This named serial controller also has 32 bits register, so some return types had to be changed. The needed generic functions are no longer static, with prototypes defined in sh-sci-common.h so that they can be used from specific implementation in a separate file, to keep this driver as little changed as possible. For doing so, a set of 'ops' is added to struct sci_port. Tested-by: Geert Uytterhoeven Reviewed-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Signed-off-by: Thierry Bultel Link: https://lore.kernel.org/r/20250403212919.1137670-9-thierry.bultel.yh@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci-common.h | 159 ++++++++++ drivers/tty/serial/sh-sci.c | 484 +++++++++++++++-------------- drivers/tty/serial/sh-sci.h | 2 - 3 files changed, 407 insertions(+), 238 deletions(-) create mode 100644 drivers/tty/serial/sh-sci-common.h diff --git a/drivers/tty/serial/sh-sci-common.h b/drivers/tty/serial/sh-sci-common.h new file mode 100644 index 000000000000..2ed742bca83f --- /dev/null +++ b/drivers/tty/serial/sh-sci-common.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SH_SCI_COMMON_H__ +#define __SH_SCI_COMMON_H__ + +#include + +enum SCI_CLKS { + SCI_FCK, /* Functional Clock */ + SCI_SCK, /* Optional External Clock */ + SCI_BRG_INT, /* Optional BRG Internal Clock Source */ + SCI_SCIF_CLK, /* Optional BRG External Clock Source */ + SCI_NUM_CLKS +}; + +/* Offsets into the sci_port->irqs array */ +enum { + SCIx_ERI_IRQ, + SCIx_RXI_IRQ, + SCIx_TXI_IRQ, + SCIx_BRI_IRQ, + SCIx_DRI_IRQ, + SCIx_TEI_IRQ, + SCIx_NR_IRQS, + + SCIx_MUX_IRQ = SCIx_NR_IRQS, /* special case */ +}; + +/* Bit x set means sampling rate x + 1 is supported */ +#define SCI_SR(x) BIT((x) - 1) +#define SCI_SR_RANGE(x, y) GENMASK((y) - 1, (x) - 1) + +void sci_release_port(struct uart_port *port); +int sci_request_port(struct uart_port *port); +void sci_config_port(struct uart_port *port, int flags); +int sci_verify_port(struct uart_port *port, struct serial_struct *ser); +void sci_pm(struct uart_port *port, unsigned int state, + unsigned int oldstate); + +struct plat_sci_reg { + u8 offset; + u8 size; +}; + +struct sci_port_params_bits { + unsigned int rxtx_enable; + unsigned int te_clear; + unsigned int poll_sent_bits; +}; + +struct sci_common_regs { + unsigned int status; + unsigned int control; +}; + +/* The actual number of needed registers. This is used by sci only */ +#define SCI_NR_REGS 20 + +struct sci_port_params { + const struct plat_sci_reg regs[SCI_NR_REGS]; + const struct sci_common_regs *common_regs; + const struct sci_port_params_bits *param_bits; + unsigned int fifosize; + unsigned int overrun_reg; + unsigned int overrun_mask; + unsigned int sampling_rate_mask; + unsigned int error_mask; + unsigned int error_clear; +}; + +struct sci_port_ops { + u32 (*read_reg)(struct uart_port *port, int reg); + void (*write_reg)(struct uart_port *port, int reg, int value); + void (*clear_SCxSR)(struct uart_port *port, unsigned int mask); + + void (*transmit_chars)(struct uart_port *port); + void (*receive_chars)(struct uart_port *port); + + void (*poll_put_char)(struct uart_port *port, unsigned char c); + + int (*set_rtrg)(struct uart_port *port, int rx_trig); + int (*rtrg_enabled)(struct uart_port *port); + + void (*shutdown_complete)(struct uart_port *port); + + void (*prepare_console_write)(struct uart_port *port, u32 ctrl); + void (*console_save)(struct uart_port *port); + void (*console_restore)(struct uart_port *port); + size_t (*suspend_regs_size)(void); +}; + +struct sci_port { + struct uart_port port; + + /* Platform configuration */ + const struct sci_port_params *params; + const struct plat_sci_port *cfg; + + unsigned int sampling_rate_mask; + resource_size_t reg_size; + struct mctrl_gpios *gpios; + + /* Clocks */ + struct clk *clks[SCI_NUM_CLKS]; + unsigned long clk_rates[SCI_NUM_CLKS]; + + int irqs[SCIx_NR_IRQS]; + char *irqstr[SCIx_NR_IRQS]; + + struct dma_chan *chan_tx; + struct dma_chan *chan_rx; + + struct reset_control *rstc; + struct sci_suspend_regs *suspend_regs; + +#ifdef CONFIG_SERIAL_SH_SCI_DMA + struct dma_chan *chan_tx_saved; + struct dma_chan *chan_rx_saved; + dma_cookie_t cookie_tx; + dma_cookie_t cookie_rx[2]; + dma_cookie_t active_rx; + dma_addr_t tx_dma_addr; + unsigned int tx_dma_len; + struct scatterlist sg_rx[2]; + void *rx_buf[2]; + size_t buf_len_rx; + struct work_struct work_tx; + struct hrtimer rx_timer; + unsigned int rx_timeout; /* microseconds */ +#endif + unsigned int rx_frame; + int rx_trigger; + struct timer_list rx_fifo_timer; + int rx_fifo_timeout; + u16 hscif_tot; + + const struct sci_port_ops *ops; + + bool has_rtscts; + bool autorts; + bool tx_occurred; +}; + +#define to_sci_port(uart) container_of((uart), struct sci_port, port) + +void sci_port_disable(struct sci_port *sci_port); +void sci_port_enable(struct sci_port *sci_port); + +int sci_startup(struct uart_port *port); +void sci_shutdown(struct uart_port *port); + +#define min_sr(_port) ffs((_port)->sampling_rate_mask) +#define max_sr(_port) fls((_port)->sampling_rate_mask) + +#ifdef CONFIG_SERIAL_SH_SCI_EARLYCON +int __init scix_early_console_setup(struct earlycon_device *device, int); +#endif + +#endif /* __SH_SCI_COMMON_H__ */ diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 0f044ea3855a..3eb27bd1cd1e 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -56,19 +56,7 @@ #include "serial_mctrl_gpio.h" #include "sh-sci.h" - -/* Offsets into the sci_port->irqs array */ -enum { - SCIx_ERI_IRQ, - SCIx_RXI_IRQ, - SCIx_TXI_IRQ, - SCIx_BRI_IRQ, - SCIx_DRI_IRQ, - SCIx_TEI_IRQ, - SCIx_NR_IRQS, - - SCIx_MUX_IRQ = SCIx_NR_IRQS, /* special case */ -}; +#include "sh-sci-common.h" #define SCIx_IRQ_IS_MUXED(port) \ ((port)->irqs[SCIx_ERI_IRQ] == \ @@ -76,32 +64,38 @@ enum { ((port)->irqs[SCIx_ERI_IRQ] && \ ((port)->irqs[SCIx_RXI_IRQ] < 0)) -enum SCI_CLKS { - SCI_FCK, /* Functional Clock */ - SCI_SCK, /* Optional External Clock */ - SCI_BRG_INT, /* Optional BRG Internal Clock Source */ - SCI_SCIF_CLK, /* Optional BRG External Clock Source */ - SCI_NUM_CLKS -}; - -/* Bit x set means sampling rate x + 1 is supported */ -#define SCI_SR(x) BIT((x) - 1) -#define SCI_SR_RANGE(x, y) GENMASK((y) - 1, (x) - 1) - #define SCI_SR_SCIFAB SCI_SR(5) | SCI_SR(7) | SCI_SR(11) | \ SCI_SR(13) | SCI_SR(16) | SCI_SR(17) | \ SCI_SR(19) | SCI_SR(27) -#define min_sr(_port) ffs((_port)->sampling_rate_mask) -#define max_sr(_port) fls((_port)->sampling_rate_mask) - /* Iterate over all supported sampling rates, from high to low */ #define for_each_sr(_sr, _port) \ for ((_sr) = max_sr(_port); (_sr) >= min_sr(_port); (_sr)--) \ if ((_port)->sampling_rate_mask & SCI_SR((_sr))) -struct plat_sci_reg { - u8 offset, size; +#define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS + +static struct sci_port sci_ports[SCI_NPORTS]; +static unsigned long sci_ports_in_use; +static struct uart_driver sci_uart_driver; +static bool sci_uart_earlycon; +static bool sci_uart_earlycon_dev_probing; + +static const struct sci_port_params_bits sci_sci_port_params_bits = { + .rxtx_enable = SCSCR_RE | SCSCR_TE, + .te_clear = SCSCR_TE | SCSCR_TEIE, + .poll_sent_bits = SCI_TDRE | SCI_TEND +}; + +static const struct sci_port_params_bits sci_scif_port_params_bits = { + .rxtx_enable = SCSCR_RE | SCSCR_TE, + .te_clear = SCSCR_TE | SCSCR_TEIE, + .poll_sent_bits = SCIF_TDFE | SCIF_TEND +}; + +static const struct sci_common_regs sci_common_regs = { + .status = SCxSR, + .control = SCSCR, }; struct sci_suspend_regs { @@ -118,77 +112,9 @@ struct sci_suspend_regs { u8 semr; }; -struct sci_port_params { - const struct plat_sci_reg regs[SCIx_NR_REGS]; - unsigned int fifosize; - unsigned int overrun_reg; - unsigned int overrun_mask; - unsigned int sampling_rate_mask; - unsigned int error_mask; - unsigned int error_clear; -}; - -struct sci_port { - struct uart_port port; - - /* Platform configuration */ - const struct sci_port_params *params; - const struct plat_sci_port *cfg; - unsigned int sampling_rate_mask; - resource_size_t reg_size; - struct mctrl_gpios *gpios; - - /* Clocks */ - struct clk *clks[SCI_NUM_CLKS]; - unsigned long clk_rates[SCI_NUM_CLKS]; - - int irqs[SCIx_NR_IRQS]; - char *irqstr[SCIx_NR_IRQS]; - - struct dma_chan *chan_tx; - struct dma_chan *chan_rx; - - struct reset_control *rstc; - -#ifdef CONFIG_SERIAL_SH_SCI_DMA - struct dma_chan *chan_tx_saved; - struct dma_chan *chan_rx_saved; - dma_cookie_t cookie_tx; - dma_cookie_t cookie_rx[2]; - dma_cookie_t active_rx; - dma_addr_t tx_dma_addr; - unsigned int tx_dma_len; - struct scatterlist sg_rx[2]; - void *rx_buf[2]; - size_t buf_len_rx; - struct work_struct work_tx; - struct hrtimer rx_timer; - unsigned int rx_timeout; /* microseconds */ -#endif - unsigned int rx_frame; - int rx_trigger; - struct timer_list rx_fifo_timer; - int rx_fifo_timeout; - struct sci_suspend_regs suspend_regs; - u16 hscif_tot; - - bool has_rtscts; - bool autorts; - bool tx_occurred; -}; - -#define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS - -static struct sci_port sci_ports[SCI_NPORTS]; -static unsigned long sci_ports_in_use; -static struct uart_driver sci_uart_driver; -static bool sci_uart_earlycon; -static bool sci_uart_earlycon_dev_probing; - -static inline struct sci_port * -to_sci_port(struct uart_port *uart) +static size_t sci_suspend_regs_size(void) { - return container_of(uart, struct sci_port, port); + return sizeof(struct sci_suspend_regs); } static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { @@ -211,6 +137,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCI_DEFAULT_ERROR_MASK | SCI_ORER, .error_clear = SCI_ERROR_CLEAR & ~SCI_ORER, + .param_bits = &sci_sci_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -233,6 +161,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCI_DEFAULT_ERROR_MASK | SCI_ORER, .error_clear = SCI_ERROR_CLEAR & ~SCI_ORER, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -257,6 +187,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR_SCIFAB, .error_mask = SCIF_DEFAULT_ERROR_MASK | SCIFA_ORER, .error_clear = SCIF_ERROR_CLEAR & ~SCIFA_ORER, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -282,6 +214,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR_SCIFAB, .error_mask = SCIF_DEFAULT_ERROR_MASK | SCIFA_ORER, .error_clear = SCIF_ERROR_CLEAR & ~SCIFA_ORER, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -307,6 +241,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -335,6 +271,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -366,6 +304,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -388,6 +328,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -412,6 +354,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -439,6 +383,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -468,6 +414,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR_RANGE(8, 32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -492,6 +440,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -519,6 +469,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(32), .error_mask = SCIF_DEFAULT_ERROR_MASK, .error_clear = SCIF_ERROR_CLEAR, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, /* @@ -542,6 +494,8 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = { .sampling_rate_mask = SCI_SR(16), .error_mask = SCIF_DEFAULT_ERROR_MASK | SCIFA_ORER, .error_clear = SCIF_ERROR_CLEAR & ~SCIFA_ORER, + .param_bits = &sci_scif_port_params_bits, + .common_regs = &sci_common_regs, }, }; @@ -579,7 +533,7 @@ static void sci_serial_out(struct uart_port *p, int offset, int value) WARN(1, "Invalid register access\n"); } -static void sci_port_enable(struct sci_port *sci_port) +void sci_port_enable(struct sci_port *sci_port) { unsigned int i; @@ -595,7 +549,7 @@ static void sci_port_enable(struct sci_port *sci_port) sci_port->port.uartclk = sci_port->clk_rates[SCI_FCK]; } -static void sci_port_disable(struct sci_port *sci_port) +void sci_port_disable(struct sci_port *sci_port) { unsigned int i; @@ -735,12 +689,13 @@ static void sci_clear_SCxSR(struct uart_port *port, unsigned int mask) static int sci_poll_get_char(struct uart_port *port) { unsigned short status; + struct sci_port *s = to_sci_port(port); int c; do { status = sci_serial_in(port, SCxSR); if (status & SCxSR_ERRORS(port)) { - sci_clear_SCxSR(port, SCxSR_ERROR_CLEAR(port)); + s->ops->clear_SCxSR(port, SCxSR_ERROR_CLEAR(port)); continue; } break; @@ -753,7 +708,7 @@ static int sci_poll_get_char(struct uart_port *port) /* Dummy read */ sci_serial_in(port, SCxSR); - sci_clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); + s->ops->clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); return c; } @@ -761,14 +716,16 @@ static int sci_poll_get_char(struct uart_port *port) static void sci_poll_put_char(struct uart_port *port, unsigned char c) { - unsigned short status; + struct sci_port *s = to_sci_port(port); + const struct sci_common_regs *regs = s->params->common_regs; + unsigned int status; do { - status = sci_serial_in(port, SCxSR); + status = s->ops->read_reg(port, regs->status); } while (!(status & SCxSR_TDxE(port))); sci_serial_out(port, SCxTDR, c); - sci_clear_SCxSR(port, SCxSR_TDxE_CLEAR(port) & ~SCxSR_TEND(port)); + s->ops->clear_SCxSR(port, SCxSR_TDxE_CLEAR(port) & ~SCxSR_TEND(port)); } #endif /* CONFIG_CONSOLE_POLL || CONFIG_SERIAL_SH_SCI_CONSOLE || CONFIG_SERIAL_SH_SCI_EARLYCON */ @@ -911,7 +868,7 @@ static void sci_transmit_chars(struct uart_port *port) port->icount.tx++; } while (--count > 0); - sci_clear_SCxSR(port, SCxSR_TDxE_CLEAR(port)); + s->ops->clear_SCxSR(port, SCxSR_TDxE_CLEAR(port)); if (kfifo_len(&tport->xmit_fifo) < WAKEUP_CHARS) uart_write_wakeup(port); @@ -930,6 +887,7 @@ static void sci_transmit_chars(struct uart_port *port) static void sci_receive_chars(struct uart_port *port) { struct tty_port *tport = &port->state->port; + struct sci_port *s = to_sci_port(port); int i, count, copied = 0; unsigned short status; unsigned char flag; @@ -984,7 +942,7 @@ static void sci_receive_chars(struct uart_port *port) } sci_serial_in(port, SCxSR); /* dummy read */ - sci_clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); + s->ops->clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); copied += count; port->icount.rx += count; @@ -997,16 +955,17 @@ static void sci_receive_chars(struct uart_port *port) /* TTY buffers full; read from RX reg to prevent lockup */ sci_serial_in(port, SCxRDR); sci_serial_in(port, SCxSR); /* dummy read */ - sci_clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); + s->ops->clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); } } static int sci_handle_errors(struct uart_port *port) { int copied = 0; - unsigned short status = sci_serial_in(port, SCxSR); - struct tty_port *tport = &port->state->port; struct sci_port *s = to_sci_port(port); + const struct sci_common_regs *regs = s->params->common_regs; + unsigned int status = s->ops->read_reg(port, regs->status); + struct tty_port *tport = &port->state->port; /* Handle overruns */ if (status & s->params->overrun_mask) { @@ -1165,7 +1124,7 @@ static void rx_fifo_timer_fn(struct timer_list *t) struct uart_port *port = &s->port; dev_dbg(port->dev, "Rx timed out\n"); - scif_set_rtrg(port, 1); + s->ops->set_rtrg(port, 1); } static ssize_t rx_fifo_trigger_show(struct device *dev, @@ -1190,9 +1149,9 @@ static ssize_t rx_fifo_trigger_store(struct device *dev, if (ret) return ret; - sci->rx_trigger = scif_set_rtrg(port, r); + sci->rx_trigger = sci->ops->set_rtrg(port, r); if (port->type == PORT_SCIFA || port->type == PORT_SCIFB) - scif_set_rtrg(port, 1); + sci->ops->set_rtrg(port, 1); return count; } @@ -1235,7 +1194,7 @@ static ssize_t rx_fifo_timeout_store(struct device *dev, sci->hscif_tot = r << HSSCR_TOT_SHIFT; } else { sci->rx_fifo_timeout = r; - scif_set_rtrg(port, 1); + sci->ops->set_rtrg(port, 1); if (r > 0) timer_setup(&sci->rx_fifo_timer, rx_fifo_timer_fn, 0); } @@ -1360,7 +1319,7 @@ static void sci_dma_rx_reenable_irq(struct sci_port *s) s->cfg->regtype == SCIx_RZ_SCIFA_REGTYPE) { enable_irq(s->irqs[SCIx_RXI_IRQ]); if (s->cfg->regtype == SCIx_RZ_SCIFA_REGTYPE) - scif_set_rtrg(port, s->rx_trigger); + s->ops->set_rtrg(port, s->rx_trigger); else scr &= ~SCSCR_RDRQE; } @@ -1798,7 +1757,7 @@ static irqreturn_t sci_rx_interrupt(int irq, void *ptr) s->cfg->regtype == SCIx_RZ_SCIFA_REGTYPE) { disable_irq_nosync(s->irqs[SCIx_RXI_IRQ]); if (s->cfg->regtype == SCIx_RZ_SCIFA_REGTYPE) { - scif_set_rtrg(port, 1); + s->ops->set_rtrg(port, 1); scr |= SCSCR_RIE; } else { scr |= SCSCR_RDRQE; @@ -1824,8 +1783,8 @@ handle_pio: #endif if (s->rx_trigger > 1 && s->rx_fifo_timeout > 0) { - if (!scif_rtrg_enabled(port)) - scif_set_rtrg(port, s->rx_trigger); + if (!s->ops->rtrg_enabled(port)) + s->ops->set_rtrg(port, s->rx_trigger); mod_timer(&s->rx_fifo_timer, jiffies + DIV_ROUND_UP( s->rx_frame * HZ * s->rx_fifo_timeout, 1000000)); @@ -1835,7 +1794,7 @@ handle_pio: * of whether the I_IXOFF is set, otherwise, how is the interrupt * to be disabled? */ - sci_receive_chars(port); + s->ops->receive_chars(port); return IRQ_HANDLED; } @@ -1844,9 +1803,10 @@ static irqreturn_t sci_tx_interrupt(int irq, void *ptr) { struct uart_port *port = ptr; unsigned long flags; + struct sci_port *s = to_sci_port(port); uart_port_lock_irqsave(port, &flags); - sci_transmit_chars(port); + s->ops->transmit_chars(port); uart_port_unlock_irqrestore(port, flags); return IRQ_HANDLED; @@ -1855,16 +1815,18 @@ static irqreturn_t sci_tx_interrupt(int irq, void *ptr) static irqreturn_t sci_tx_end_interrupt(int irq, void *ptr) { struct uart_port *port = ptr; + struct sci_port *s = to_sci_port(port); + const struct sci_common_regs *regs = s->params->common_regs; unsigned long flags; - unsigned short ctrl; + u32 ctrl; if (port->type != PORT_SCI) return sci_tx_interrupt(irq, ptr); uart_port_lock_irqsave(port, &flags); - ctrl = sci_serial_in(port, SCSCR); - ctrl &= ~(SCSCR_TE | SCSCR_TEIE); - sci_serial_out(port, SCSCR, ctrl); + ctrl = s->ops->read_reg(port, regs->control) & + ~(s->params->param_bits->te_clear); + s->ops->write_reg(port, regs->control, ctrl); uart_port_unlock_irqrestore(port, flags); return IRQ_HANDLED; @@ -1873,6 +1835,7 @@ static irqreturn_t sci_tx_end_interrupt(int irq, void *ptr) static irqreturn_t sci_br_interrupt(int irq, void *ptr) { struct uart_port *port = ptr; + struct sci_port *s = to_sci_port(port); /* Handle BREAKs */ sci_handle_breaks(port); @@ -1880,7 +1843,7 @@ static irqreturn_t sci_br_interrupt(int irq, void *ptr) /* drop invalid character received before break was detected */ sci_serial_in(port, SCxRDR); - sci_clear_SCxSR(port, SCxSR_BREAK_CLEAR(port)); + s->ops->clear_SCxSR(port, SCxSR_BREAK_CLEAR(port)); return IRQ_HANDLED; } @@ -1908,15 +1871,15 @@ static irqreturn_t sci_er_interrupt(int irq, void *ptr) if (sci_handle_errors(port)) { /* discard character in rx buffer */ sci_serial_in(port, SCxSR); - sci_clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); + s->ops->clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); } } else { sci_handle_fifo_overrun(port); if (!s->chan_rx) - sci_receive_chars(port); + s->ops->receive_chars(port); } - sci_clear_SCxSR(port, SCxSR_ERROR_CLEAR(port)); + s->ops->clear_SCxSR(port, SCxSR_ERROR_CLEAR(port)); /* Kick the transmission */ if (!s->chan_tx) @@ -2286,7 +2249,17 @@ static void sci_break_ctl(struct uart_port *port, int break_state) uart_port_unlock_irqrestore(port, flags); } -static int sci_startup(struct uart_port *port) +static void sci_shutdown_complete(struct uart_port *port) +{ + struct sci_port *s = to_sci_port(port); + u16 scr; + + scr = sci_serial_in(port, SCSCR); + sci_serial_out(port, SCSCR, + scr & (SCSCR_CKE1 | SCSCR_CKE0 | s->hscif_tot)); +} + +int sci_startup(struct uart_port *port) { struct sci_port *s = to_sci_port(port); int ret; @@ -2305,11 +2278,10 @@ static int sci_startup(struct uart_port *port) return 0; } -static void sci_shutdown(struct uart_port *port) +void sci_shutdown(struct uart_port *port) { struct sci_port *s = to_sci_port(port); unsigned long flags; - u16 scr; dev_dbg(port->dev, "%s(%d)\n", __func__, port->line); @@ -2319,13 +2291,7 @@ static void sci_shutdown(struct uart_port *port) uart_port_lock_irqsave(port, &flags); sci_stop_rx(port); sci_stop_tx(port); - /* - * Stop RX and TX, disable related interrupts, keep clock source - * and HSCIF TOT bits - */ - scr = sci_serial_in(port, SCSCR); - sci_serial_out(port, SCSCR, - scr & (SCSCR_CKE1 | SCSCR_CKE0 | s->hscif_tot)); + s->ops->shutdown_complete(port); uart_port_unlock_irqrestore(port, flags); #ifdef CONFIG_SERIAL_SH_SCI_DMA @@ -2402,8 +2368,8 @@ static int sci_brg_calc(struct sci_port *s, unsigned int bps, /* calculate sample rate, BRR, and clock select */ static int sci_scbrr_calc(struct sci_port *s, unsigned int bps, - unsigned int *brr, unsigned int *srr, - unsigned int *cks) + unsigned int *brr, unsigned int *srr, + unsigned int *cks) { unsigned long freq = s->clk_rates[SCI_FCK]; unsigned int sr, br, prediv, scrate, c; @@ -2480,9 +2446,9 @@ static void sci_reset(struct uart_port *port) if (reg->size) sci_serial_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST); - sci_clear_SCxSR(port, - SCxSR_RDxF_CLEAR(port) & SCxSR_ERROR_CLEAR(port) & - SCxSR_BREAK_CLEAR(port)); + s->ops->clear_SCxSR(port, + SCxSR_RDxF_CLEAR(port) & SCxSR_ERROR_CLEAR(port) & + SCxSR_BREAK_CLEAR(port)); if (sci_getreg(port, SCLSR)->size) { status = sci_serial_in(port, SCLSR); status &= ~(SCLSR_TO | SCLSR_ORER); @@ -2491,14 +2457,14 @@ static void sci_reset(struct uart_port *port) if (s->rx_trigger > 1) { if (s->rx_fifo_timeout) { - scif_set_rtrg(port, 1); + s->ops->set_rtrg(port, 1); timer_setup(&s->rx_fifo_timer, rx_fifo_timer_fn, 0); } else { if (port->type == PORT_SCIFA || port->type == PORT_SCIFB) - scif_set_rtrg(port, 1); + s->ops->set_rtrg(port, 1); else - scif_set_rtrg(port, s->rx_trigger); + s->ops->set_rtrg(port, s->rx_trigger); } } } @@ -2758,7 +2724,7 @@ done: sci_enable_ms(port); } -static void sci_pm(struct uart_port *port, unsigned int state, +void sci_pm(struct uart_port *port, unsigned int state, unsigned int oldstate) { struct sci_port *sci_port = to_sci_port(port); @@ -2821,7 +2787,7 @@ static int sci_remap_port(struct uart_port *port) return 0; } -static void sci_release_port(struct uart_port *port) +void sci_release_port(struct uart_port *port) { struct sci_port *sport = to_sci_port(port); @@ -2833,7 +2799,7 @@ static void sci_release_port(struct uart_port *port) release_mem_region(port->mapbase, sport->reg_size); } -static int sci_request_port(struct uart_port *port) +int sci_request_port(struct uart_port *port) { struct resource *res; struct sci_port *sport = to_sci_port(port); @@ -2855,7 +2821,7 @@ static int sci_request_port(struct uart_port *port) return 0; } -static void sci_config_port(struct uart_port *port, int flags) +void sci_config_port(struct uart_port *port, int flags) { if (flags & UART_CONFIG_TYPE) { struct sci_port *sport = to_sci_port(port); @@ -2865,7 +2831,7 @@ static void sci_config_port(struct uart_port *port, int flags) } } -static int sci_verify_port(struct uart_port *port, struct serial_struct *ser) +int sci_verify_port(struct uart_port *port, struct serial_struct *ser) { if (ser->baud_base < 2400) /* No paper tape reader for Mitch.. */ @@ -2874,6 +2840,75 @@ static int sci_verify_port(struct uart_port *port, struct serial_struct *ser) return 0; } +static void sci_prepare_console_write(struct uart_port *port, u32 ctrl) +{ + struct sci_port *s = to_sci_port(port); + u32 ctrl_temp = + s->params->param_bits->rxtx_enable | + (s->cfg->scscr & ~(SCSCR_CKE1 | SCSCR_CKE0)) | + (ctrl & (SCSCR_CKE1 | SCSCR_CKE0)) | + s->hscif_tot; + sci_serial_out(port, SCSCR, ctrl_temp); +} + +static void sci_console_save(struct uart_port *port) +{ + struct sci_port *s = to_sci_port(port); + struct sci_suspend_regs *regs = s->suspend_regs; + + if (sci_getreg(port, SCDL)->size) + regs->scdl = sci_serial_in(port, SCDL); + if (sci_getreg(port, SCCKS)->size) + regs->sccks = sci_serial_in(port, SCCKS); + if (sci_getreg(port, SCSMR)->size) + regs->scsmr = sci_serial_in(port, SCSMR); + if (sci_getreg(port, SCSCR)->size) + regs->scscr = sci_serial_in(port, SCSCR); + if (sci_getreg(port, SCFCR)->size) + regs->scfcr = sci_serial_in(port, SCFCR); + if (sci_getreg(port, SCSPTR)->size) + regs->scsptr = sci_serial_in(port, SCSPTR); + if (sci_getreg(port, SCBRR)->size) + regs->scbrr = sci_serial_in(port, SCBRR); + if (sci_getreg(port, HSSRR)->size) + regs->hssrr = sci_serial_in(port, HSSRR); + if (sci_getreg(port, SCPCR)->size) + regs->scpcr = sci_serial_in(port, SCPCR); + if (sci_getreg(port, SCPDR)->size) + regs->scpdr = sci_serial_in(port, SCPDR); + if (sci_getreg(port, SEMR)->size) + regs->semr = sci_serial_in(port, SEMR); +} + +static void sci_console_restore(struct uart_port *port) +{ + struct sci_port *s = to_sci_port(port); + struct sci_suspend_regs *regs = s->suspend_regs; + + if (sci_getreg(port, SCDL)->size) + sci_serial_out(port, SCDL, regs->scdl); + if (sci_getreg(port, SCCKS)->size) + sci_serial_out(port, SCCKS, regs->sccks); + if (sci_getreg(port, SCSMR)->size) + sci_serial_out(port, SCSMR, regs->scsmr); + if (sci_getreg(port, SCSCR)->size) + sci_serial_out(port, SCSCR, regs->scscr); + if (sci_getreg(port, SCFCR)->size) + sci_serial_out(port, SCFCR, regs->scfcr); + if (sci_getreg(port, SCSPTR)->size) + sci_serial_out(port, SCSPTR, regs->scsptr); + if (sci_getreg(port, SCBRR)->size) + sci_serial_out(port, SCBRR, regs->scbrr); + if (sci_getreg(port, HSSRR)->size) + sci_serial_out(port, HSSRR, regs->hssrr); + if (sci_getreg(port, SCPCR)->size) + sci_serial_out(port, SCPCR, regs->scpcr); + if (sci_getreg(port, SCPDR)->size) + sci_serial_out(port, SCPDR, regs->scpdr); + if (sci_getreg(port, SEMR)->size) + sci_serial_out(port, SEMR, regs->semr); +} + static const struct uart_ops sci_uart_ops = { .tx_empty = sci_tx_empty, .set_mctrl = sci_set_mctrl, @@ -2899,6 +2934,25 @@ static const struct uart_ops sci_uart_ops = { #endif }; +static const struct sci_port_ops sci_port_ops = { + .read_reg = sci_serial_in, + .write_reg = sci_serial_out, + .clear_SCxSR = sci_clear_SCxSR, + .transmit_chars = sci_transmit_chars, + .receive_chars = sci_receive_chars, +#if defined(CONFIG_SERIAL_SH_SCI_CONSOLE) || \ + defined(CONFIG_SERIAL_SH_SCI_EARLYCON) + .poll_put_char = sci_poll_put_char, +#endif + .set_rtrg = scif_set_rtrg, + .rtrg_enabled = scif_rtrg_enabled, + .shutdown_complete = sci_shutdown_complete, + .prepare_console_write = sci_prepare_console_write, + .console_save = sci_console_save, + .console_restore = sci_console_restore, + .suspend_regs_size = sci_suspend_regs_size, +}; + static int sci_init_clocks(struct sci_port *sci_port, struct device *dev) { const char *clk_names[] = { @@ -2992,6 +3046,7 @@ static int sci_init_single(struct platform_device *dev, int ret; sci_port->cfg = p; + sci_port->ops = &sci_port_ops; port->ops = &sci_uart_ops; port->iotype = UPIO_MEM; @@ -3104,7 +3159,7 @@ static int sci_init_single(struct platform_device *dev, defined(CONFIG_SERIAL_SH_SCI_EARLYCON) static void serial_console_putchar(struct uart_port *port, unsigned char ch) { - sci_poll_put_char(port, ch); + to_sci_port(port)->ops->poll_put_char(port, ch); } /* @@ -3116,7 +3171,9 @@ static void serial_console_write(struct console *co, const char *s, { struct sci_port *sci_port = &sci_ports[co->index]; struct uart_port *port = &sci_port->port; - unsigned short bits, ctrl, ctrl_temp; + const struct sci_common_regs *regs = sci_port->params->common_regs; + unsigned int bits; + u32 ctrl; unsigned long flags; int locked = 1; @@ -3128,21 +3185,21 @@ static void serial_console_write(struct console *co, const char *s, uart_port_lock_irqsave(port, &flags); /* first save SCSCR then disable interrupts, keep clock source */ - ctrl = sci_serial_in(port, SCSCR); - ctrl_temp = SCSCR_RE | SCSCR_TE | - (sci_port->cfg->scscr & ~(SCSCR_CKE1 | SCSCR_CKE0)) | - (ctrl & (SCSCR_CKE1 | SCSCR_CKE0)); - sci_serial_out(port, SCSCR, ctrl_temp | sci_port->hscif_tot); + + ctrl = sci_port->ops->read_reg(port, regs->control); + sci_port->ops->prepare_console_write(port, ctrl); uart_console_write(port, s, count, serial_console_putchar); /* wait until fifo is empty and last bit has been transmitted */ - bits = SCxSR_TDxE(port) | SCxSR_TEND(port); - while ((sci_serial_in(port, SCxSR) & bits) != bits) + + bits = sci_port->params->param_bits->poll_sent_bits; + + while ((sci_port->ops->read_reg(port, regs->status) & bits) != bits) cpu_relax(); /* restore the SCSCR */ - sci_serial_out(port, SCSCR, ctrl); + sci_port->ops->write_reg(port, regs->control, ctrl); if (locked) uart_port_unlock_irqrestore(port, flags); @@ -3275,7 +3332,6 @@ static void sci_remove(struct platform_device *dev) device_remove_file(&dev->dev, &dev_attr_rx_fifo_timeout); } - #define SCI_OF_DATA(type, regtype) (void *)((type) << 16 | (regtype)) #define SCI_OF_TYPE(data) ((unsigned long)(data) >> 16) #define SCI_OF_REGTYPE(data) ((unsigned long)(data) & 0xffff) @@ -3512,6 +3568,11 @@ static int sci_probe(struct platform_device *dev) } sp = &sci_ports[dev_id]; + sp->suspend_regs = devm_kzalloc(&dev->dev, + sp->ops->suspend_regs_size(), + GFP_KERNEL); + if (!sp->suspend_regs) + return -ENOMEM; /* * In case: @@ -3563,64 +3624,6 @@ static int sci_probe(struct platform_device *dev) return 0; } -static void sci_console_save(struct sci_port *s) -{ - struct sci_suspend_regs *regs = &s->suspend_regs; - struct uart_port *port = &s->port; - - if (sci_getreg(port, SCDL)->size) - regs->scdl = sci_serial_in(port, SCDL); - if (sci_getreg(port, SCCKS)->size) - regs->sccks = sci_serial_in(port, SCCKS); - if (sci_getreg(port, SCSMR)->size) - regs->scsmr = sci_serial_in(port, SCSMR); - if (sci_getreg(port, SCSCR)->size) - regs->scscr = sci_serial_in(port, SCSCR); - if (sci_getreg(port, SCFCR)->size) - regs->scfcr = sci_serial_in(port, SCFCR); - if (sci_getreg(port, SCSPTR)->size) - regs->scsptr = sci_serial_in(port, SCSPTR); - if (sci_getreg(port, SCBRR)->size) - regs->scbrr = sci_serial_in(port, SCBRR); - if (sci_getreg(port, HSSRR)->size) - regs->hssrr = sci_serial_in(port, HSSRR); - if (sci_getreg(port, SCPCR)->size) - regs->scpcr = sci_serial_in(port, SCPCR); - if (sci_getreg(port, SCPDR)->size) - regs->scpdr = sci_serial_in(port, SCPDR); - if (sci_getreg(port, SEMR)->size) - regs->semr = sci_serial_in(port, SEMR); -} - -static void sci_console_restore(struct sci_port *s) -{ - struct sci_suspend_regs *regs = &s->suspend_regs; - struct uart_port *port = &s->port; - - if (sci_getreg(port, SCDL)->size) - sci_serial_out(port, SCDL, regs->scdl); - if (sci_getreg(port, SCCKS)->size) - sci_serial_out(port, SCCKS, regs->sccks); - if (sci_getreg(port, SCSMR)->size) - sci_serial_out(port, SCSMR, regs->scsmr); - if (sci_getreg(port, SCSCR)->size) - sci_serial_out(port, SCSCR, regs->scscr); - if (sci_getreg(port, SCFCR)->size) - sci_serial_out(port, SCFCR, regs->scfcr); - if (sci_getreg(port, SCSPTR)->size) - sci_serial_out(port, SCSPTR, regs->scsptr); - if (sci_getreg(port, SCBRR)->size) - sci_serial_out(port, SCBRR, regs->scbrr); - if (sci_getreg(port, HSSRR)->size) - sci_serial_out(port, HSSRR, regs->hssrr); - if (sci_getreg(port, SCPCR)->size) - sci_serial_out(port, SCPCR, regs->scpcr); - if (sci_getreg(port, SCPDR)->size) - sci_serial_out(port, SCPDR, regs->scpdr); - if (sci_getreg(port, SEMR)->size) - sci_serial_out(port, SEMR, regs->semr); -} - static __maybe_unused int sci_suspend(struct device *dev) { struct sci_port *sport = dev_get_drvdata(dev); @@ -3628,8 +3631,10 @@ static __maybe_unused int sci_suspend(struct device *dev) if (sport) { uart_suspend_port(&sci_uart_driver, &sport->port); - if (!console_suspend_enabled && uart_console(&sport->port)) - sci_console_save(sport); + if (!console_suspend_enabled && uart_console(&sport->port)) { + if (sport->ops->console_save) + sport->ops->console_save(&sport->port); + } else return reset_control_assert(sport->rstc); } @@ -3643,7 +3648,8 @@ static __maybe_unused int sci_resume(struct device *dev) if (sport) { if (!console_suspend_enabled && uart_console(&sport->port)) { - sci_console_restore(sport); + if (sport->ops->console_restore) + sport->ops->console_restore(&sport->port); } else { int ret = reset_control_deassert(sport->rstc); @@ -3707,9 +3713,11 @@ static int early_console_exit(struct console *co) return 0; } -static int __init early_console_setup(struct earlycon_device *device, +int __init scix_early_console_setup(struct earlycon_device *device, int type) { + const struct sci_common_regs *regs; + if (!device->port.membase) return -ENODEV; @@ -3717,11 +3725,15 @@ static int __init early_console_setup(struct earlycon_device *device, sci_ports[0].port = device->port; port_cfg.type = type; sci_ports[0].cfg = &port_cfg; + sci_ports[0].ops = &sci_port_ops; sci_ports[0].params = sci_probe_regmap(&port_cfg); sci_uart_earlycon = true; - port_cfg.scscr = sci_serial_in(&sci_ports[0].port, SCSCR); - sci_serial_out(&sci_ports[0].port, SCSCR, - SCSCR_RE | SCSCR_TE | port_cfg.scscr); + regs = sci_ports[0].params->common_regs; + + port_cfg.scscr = sci_ports[0].ops->read_reg(&sci_ports[0].port, regs->control); + sci_ports[0].ops->write_reg(&sci_ports[0].port, + regs->control, + sci_ports[0].params->param_bits->rxtx_enable | port_cfg.scscr); device->con->write = serial_console_write; device->con->exit = early_console_exit; @@ -3731,41 +3743,41 @@ static int __init early_console_setup(struct earlycon_device *device, static int __init sci_early_console_setup(struct earlycon_device *device, const char *opt) { - return early_console_setup(device, PORT_SCI); + return scix_early_console_setup(device, PORT_SCI); } static int __init scif_early_console_setup(struct earlycon_device *device, const char *opt) { - return early_console_setup(device, PORT_SCIF); + return scix_early_console_setup(device, PORT_SCIF); } static int __init rzscifa_early_console_setup(struct earlycon_device *device, const char *opt) { port_cfg.regtype = SCIx_RZ_SCIFA_REGTYPE; - return early_console_setup(device, PORT_SCIF); + return scix_early_console_setup(device, PORT_SCIF); } static int __init rzv2hscif_early_console_setup(struct earlycon_device *device, const char *opt) { port_cfg.regtype = SCIx_RZV2H_SCIF_REGTYPE; - return early_console_setup(device, PORT_SCIF); + return scix_early_console_setup(device, PORT_SCIF); } static int __init scifa_early_console_setup(struct earlycon_device *device, const char *opt) { - return early_console_setup(device, PORT_SCIFA); + return scix_early_console_setup(device, PORT_SCIFA); } static int __init scifb_early_console_setup(struct earlycon_device *device, const char *opt) { - return early_console_setup(device, PORT_SCIFB); + return scix_early_console_setup(device, PORT_SCIFB); } static int __init hscif_early_console_setup(struct earlycon_device *device, const char *opt) { - return early_console_setup(device, PORT_HSCIF); + return scix_early_console_setup(device, PORT_HSCIF); } OF_EARLYCON_DECLARE(sci, "renesas,sci", sci_early_console_setup); diff --git a/drivers/tty/serial/sh-sci.h b/drivers/tty/serial/sh-sci.h index 0b65563c4e9e..951681aba586 100644 --- a/drivers/tty/serial/sh-sci.h +++ b/drivers/tty/serial/sh-sci.h @@ -32,8 +32,6 @@ enum { HSRTRGR, /* Rx FIFO Data Count Trigger Register */ HSTTRGR, /* Tx FIFO Data Count Trigger Register */ SEMR, /* Serial extended mode register */ - - SCIx_NR_REGS, }; From 043806bc9dbc6597dd15e6ca9220ae2746425f2f Mon Sep 17 00:00:00 2001 From: Thierry Bultel Date: Thu, 3 Apr 2025 23:29:11 +0200 Subject: [PATCH 033/105] serial: sh-sci: Introduced sci_of_data The aim here is to provide an easier support to more different SCI controllers, like the RZ/T2H one. The existing .data field of_sci_match is changed to a structure containing all what that can be statically initialized, and avoid a call to 'sci_probe_regmap', in both 'sci_init_single', and 'early_console_setup'. 'sci_probe_regmap' is now assumed to be called in the only case where the device description is from a board file instead of a dts. In this way, there is no need to patch 'sci_probe_regmap' for adding new SCI type, and also, the specific sci_port_params for a new SCI type can be provided by an external file. Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Thierry Bultel Link: https://lore.kernel.org/r/20250403212919.1137670-10-thierry.bultel.yh@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci-common.h | 10 +- drivers/tty/serial/sh-sci.c | 164 +++++++++++++++++++++-------- 2 files changed, 131 insertions(+), 43 deletions(-) diff --git a/drivers/tty/serial/sh-sci-common.h b/drivers/tty/serial/sh-sci-common.h index 2ed742bca83f..bd9d9cfac1c8 100644 --- a/drivers/tty/serial/sh-sci-common.h +++ b/drivers/tty/serial/sh-sci-common.h @@ -89,6 +89,14 @@ struct sci_port_ops { size_t (*suspend_regs_size)(void); }; +struct sci_of_data { + const struct sci_port_params *params; + const struct uart_ops *uart_ops; + const struct sci_port_ops *ops; + unsigned short regtype; + unsigned short type; +}; + struct sci_port { struct uart_port port; @@ -153,7 +161,7 @@ void sci_shutdown(struct uart_port *port); #define max_sr(_port) fls((_port)->sampling_rate_mask) #ifdef CONFIG_SERIAL_SH_SCI_EARLYCON -int __init scix_early_console_setup(struct earlycon_device *device, int); +int __init scix_early_console_setup(struct earlycon_device *device, const struct sci_of_data *data); #endif #endif /* __SH_SCI_COMMON_H__ */ diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 3eb27bd1cd1e..ff1986dc6af3 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2996,10 +2996,13 @@ static int sci_init_clocks(struct sci_port *sci_port, struct device *dev) } static const struct sci_port_params * -sci_probe_regmap(const struct plat_sci_port *cfg) +sci_probe_regmap(const struct plat_sci_port *cfg, struct sci_port *sci_port) { unsigned int regtype; + sci_port->ops = &sci_port_ops; + sci_port->port.ops = &sci_uart_ops; + if (cfg->regtype != SCIx_PROBE_REGTYPE) return &sci_port_params[cfg->regtype]; @@ -3046,9 +3049,7 @@ static int sci_init_single(struct platform_device *dev, int ret; sci_port->cfg = p; - sci_port->ops = &sci_port_ops; - port->ops = &sci_uart_ops; port->iotype = UPIO_MEM; port->line = index; port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_SH_SCI_CONSOLE); @@ -3088,10 +3089,6 @@ static int sci_init_single(struct platform_device *dev, for (i = 1; i < ARRAY_SIZE(sci_port->irqs); i++) sci_port->irqs[i] = sci_port->irqs[0]; - sci_port->params = sci_probe_regmap(p); - if (unlikely(sci_port->params == NULL)) - return -EINVAL; - switch (p->type) { case PORT_SCIFB: sci_port->rx_trigger = 48; @@ -3277,13 +3274,18 @@ static struct console early_serial_console = { static int sci_probe_earlyprintk(struct platform_device *pdev) { const struct plat_sci_port *cfg = dev_get_platdata(&pdev->dev); + struct sci_port *sp = &sci_ports[pdev->id]; if (early_serial_console.data) return -EEXIST; early_serial_console.index = pdev->id; - sci_init_single(pdev, &sci_ports[pdev->id], pdev->id, cfg, true); + sp->params = sci_probe_regmap(cfg, sp); + if (!sp->params) + return -ENODEV; + + sci_init_single(pdev, sp, pdev->id, cfg, true); if (!strstr(early_serial_buf, "keep")) early_serial_console.flags |= CON_BOOT; @@ -3332,58 +3334,126 @@ static void sci_remove(struct platform_device *dev) device_remove_file(&dev->dev, &dev_attr_rx_fifo_timeout); } -#define SCI_OF_DATA(type, regtype) (void *)((type) << 16 | (regtype)) -#define SCI_OF_TYPE(data) ((unsigned long)(data) >> 16) -#define SCI_OF_REGTYPE(data) ((unsigned long)(data) & 0xffff) +static const struct sci_of_data of_sci_scif_sh2 = { + .type = PORT_SCIF, + .regtype = SCIx_SH2_SCIF_FIFODATA_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_SH2_SCIF_FIFODATA_REGTYPE], +}; + +static const struct sci_of_data of_sci_scif_rz_scifa = { + .type = PORT_SCIF, + .regtype = SCIx_RZ_SCIFA_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_RZ_SCIFA_REGTYPE], +}; + +static const struct sci_of_data of_sci_scif_rzv2h = { + .type = PORT_SCIF, + .regtype = SCIx_RZV2H_SCIF_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_RZV2H_SCIF_REGTYPE], +}; + +static const struct sci_of_data of_sci_rcar_scif = { + .type = PORT_SCIF, + .regtype = SCIx_SH4_SCIF_BRG_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_SH4_SCIF_BRG_REGTYPE], +}; + +static const struct sci_of_data of_sci_scif_sh4 = { + .type = PORT_SCIF, + .regtype = SCIx_SH4_SCIF_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_SH4_SCIF_REGTYPE], +}; + +static const struct sci_of_data of_sci_scifa = { + .type = PORT_SCIFA, + .regtype = SCIx_SCIFA_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_SCIFA_REGTYPE], +}; + +static const struct sci_of_data of_sci_scifb = { + .type = PORT_SCIFB, + .regtype = SCIx_SCIFB_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_SCIFB_REGTYPE], +}; + +static const struct sci_of_data of_sci_hscif = { + .type = PORT_HSCIF, + .regtype = SCIx_HSCIF_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_HSCIF_REGTYPE], +}; + +static const struct sci_of_data of_sci_sci = { + .type = PORT_SCI, + .regtype = SCIx_SCI_REGTYPE, + .ops = &sci_port_ops, + .uart_ops = &sci_uart_ops, + .params = &sci_port_params[SCIx_SCI_REGTYPE], +}; static const struct of_device_id of_sci_match[] __maybe_unused = { /* SoC-specific types */ { .compatible = "renesas,scif-r7s72100", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE), + .data = &of_sci_scif_sh2, }, { .compatible = "renesas,scif-r7s9210", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE), + .data = &of_sci_scif_rz_scifa, }, { .compatible = "renesas,scif-r9a07g044", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE), + .data = &of_sci_scif_rz_scifa, }, { .compatible = "renesas,scif-r9a09g057", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_RZV2H_SCIF_REGTYPE), + .data = &of_sci_scif_rzv2h, }, /* Family-specific types */ { .compatible = "renesas,rcar-gen1-scif", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_SH4_SCIF_BRG_REGTYPE), + .data = &of_sci_rcar_scif, }, { .compatible = "renesas,rcar-gen2-scif", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_SH4_SCIF_BRG_REGTYPE), + .data = &of_sci_rcar_scif, }, { .compatible = "renesas,rcar-gen3-scif", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_SH4_SCIF_BRG_REGTYPE), + .data = &of_sci_rcar_scif }, { .compatible = "renesas,rcar-gen4-scif", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_SH4_SCIF_BRG_REGTYPE), + .data = &of_sci_rcar_scif }, /* Generic types */ { .compatible = "renesas,scif", - .data = SCI_OF_DATA(PORT_SCIF, SCIx_SH4_SCIF_REGTYPE), + .data = &of_sci_scif_sh4, }, { .compatible = "renesas,scifa", - .data = SCI_OF_DATA(PORT_SCIFA, SCIx_SCIFA_REGTYPE), + .data = &of_sci_scifa, }, { .compatible = "renesas,scifb", - .data = SCI_OF_DATA(PORT_SCIFB, SCIx_SCIFB_REGTYPE), + .data = &of_sci_scifb, }, { .compatible = "renesas,hscif", - .data = SCI_OF_DATA(PORT_HSCIF, SCIx_HSCIF_REGTYPE), + .data = &of_sci_hscif, }, { .compatible = "renesas,sci", - .data = SCI_OF_DATA(PORT_SCI, SCIx_SCI_REGTYPE), + .data = &of_sci_sci, }, { /* Terminator */ }, @@ -3402,7 +3472,7 @@ static struct plat_sci_port *sci_parse_dt(struct platform_device *pdev, struct reset_control *rstc; struct plat_sci_port *p; struct sci_port *sp; - const void *data; + const struct sci_of_data *data; int id, ret; if (!IS_ENABLED(CONFIG_OF) || !np) @@ -3449,8 +3519,12 @@ static struct plat_sci_port *sci_parse_dt(struct platform_device *pdev, sp->rstc = rstc; *dev_id = id; - p->type = SCI_OF_TYPE(data); - p->regtype = SCI_OF_REGTYPE(data); + p->type = data->type; + p->regtype = data->regtype; + + sp->ops = data->ops; + sp->port.ops = data->uart_ops; + sp->params = data->params; sp->has_rtscts = of_property_read_bool(np, "uart-has-rtscts"); @@ -3557,6 +3631,7 @@ static int sci_probe(struct platform_device *dev) p = sci_parse_dt(dev, &dev_id); if (IS_ERR(p)) return PTR_ERR(p); + sp = &sci_ports[dev_id]; } else { p = dev->dev.platform_data; if (p == NULL) { @@ -3565,9 +3640,12 @@ static int sci_probe(struct platform_device *dev) } dev_id = dev->id; + sp = &sci_ports[dev_id]; + sp->params = sci_probe_regmap(p, sp); + if (!sp->params) + return -ENODEV; } - sp = &sci_ports[dev_id]; sp->suspend_regs = devm_kzalloc(&dev->dev, sp->ops->suspend_regs_size(), GFP_KERNEL); @@ -3714,19 +3792,23 @@ static int early_console_exit(struct console *co) } int __init scix_early_console_setup(struct earlycon_device *device, - int type) + const struct sci_of_data *data) { const struct sci_common_regs *regs; if (!device->port.membase) return -ENODEV; - device->port.type = type; + device->port.type = data->type; sci_ports[0].port = device->port; - port_cfg.type = type; + + port_cfg.type = data->type; + port_cfg.regtype = data->regtype; + sci_ports[0].cfg = &port_cfg; - sci_ports[0].ops = &sci_port_ops; - sci_ports[0].params = sci_probe_regmap(&port_cfg); + sci_ports[0].params = data->params; + sci_ports[0].ops = data->ops; + sci_ports[0].port.ops = data->uart_ops; sci_uart_earlycon = true; regs = sci_ports[0].params->common_regs; @@ -3743,41 +3825,39 @@ int __init scix_early_console_setup(struct earlycon_device *device, static int __init sci_early_console_setup(struct earlycon_device *device, const char *opt) { - return scix_early_console_setup(device, PORT_SCI); + return scix_early_console_setup(device, &of_sci_sci); } static int __init scif_early_console_setup(struct earlycon_device *device, const char *opt) { - return scix_early_console_setup(device, PORT_SCIF); + return scix_early_console_setup(device, &of_sci_scif_sh4); } static int __init rzscifa_early_console_setup(struct earlycon_device *device, const char *opt) { - port_cfg.regtype = SCIx_RZ_SCIFA_REGTYPE; - return scix_early_console_setup(device, PORT_SCIF); + return scix_early_console_setup(device, &of_sci_scif_rz_scifa); } static int __init rzv2hscif_early_console_setup(struct earlycon_device *device, const char *opt) { - port_cfg.regtype = SCIx_RZV2H_SCIF_REGTYPE; - return scix_early_console_setup(device, PORT_SCIF); + return scix_early_console_setup(device, &of_sci_scif_rzv2h); } static int __init scifa_early_console_setup(struct earlycon_device *device, const char *opt) { - return scix_early_console_setup(device, PORT_SCIFA); + return scix_early_console_setup(device, &of_sci_scifa); } static int __init scifb_early_console_setup(struct earlycon_device *device, const char *opt) { - return scix_early_console_setup(device, PORT_SCIFB); + return scix_early_console_setup(device, &of_sci_scifb); } static int __init hscif_early_console_setup(struct earlycon_device *device, const char *opt) { - return scix_early_console_setup(device, PORT_HSCIF); + return scix_early_console_setup(device, &of_sci_hscif); } OF_EARLYCON_DECLARE(sci, "renesas,sci", sci_early_console_setup); From 8bfabff0bfff8fbbe90673d1a557d15c42b4494a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 11 Apr 2025 13:43:58 -0400 Subject: [PATCH 034/105] vt: fix comment vs definition mismatch Fixes for: ucs_is_zero_width() ucs_is_double_width() ucs_recompose() Signed-off-by: Nicolas Pitre Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504111036.YH1iEqBR-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202504111359.urXWyzvQ-lkp@intel.com/ Link: https://lore.kernel.org/r/o4974349-pp4p-4374-80q9-2oppqqr94r60@syhkavp.arg Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_recompose.py | 5 ++--- drivers/tty/vt/gen_ucs_width.py | 4 ++-- drivers/tty/vt/ucs_recompose.c | 5 ++--- drivers/tty/vt/ucs_width.c | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/tty/vt/gen_ucs_recompose.py b/drivers/tty/vt/gen_ucs_recompose.py index 64418803e49e..dc176d32e225 100755 --- a/drivers/tty/vt/gen_ucs_recompose.py +++ b/drivers/tty/vt/gen_ucs_recompose.py @@ -289,8 +289,8 @@ static int recomposition_compare(const void *key, const void *element) /** * Attempt to recompose two Unicode characters into a single character. * - * @param previous: Previous Unicode code point (UCS-4) - * @param current: Current Unicode code point (UCS-4) + * @param base: Base Unicode code point (UCS-4) + * @param combining: Combining mark Unicode code point (UCS-4) * Return: Recomposed Unicode code point, or 0 if no recomposition is possible */ uint32_t ucs_recompose(uint32_t base, uint32_t combining) @@ -301,7 +301,6 @@ uint32_t ucs_recompose(uint32_t base, uint32_t combining) return 0; struct compare_key key = {{ base, combining }}; - struct recomposition *result = __inline_bsearch(&key, recomposition_table, ARRAY_SIZE(recomposition_table), diff --git a/drivers/tty/vt/gen_ucs_width.py b/drivers/tty/vt/gen_ucs_width.py index c6cbc93e83f2..e65f43e2080a 100755 --- a/drivers/tty/vt/gen_ucs_width.py +++ b/drivers/tty/vt/gen_ucs_width.py @@ -292,7 +292,7 @@ static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, si /** * Determine if a Unicode code point is zero-width. * - * @param ucs: Unicode code point (UCS-4) + * @param cp: Unicode code point (UCS-4) * Return: true if the character is zero-width, false otherwise */ bool ucs_is_zero_width(uint32_t cp) @@ -305,7 +305,7 @@ bool ucs_is_zero_width(uint32_t cp) /** * Determine if a Unicode code point is double-width. * - * @param ucs: Unicode code point (UCS-4) + * @param cp: Unicode code point (UCS-4) * Return: true if the character is double-width, false otherwise */ bool ucs_is_double_width(uint32_t cp) diff --git a/drivers/tty/vt/ucs_recompose.c b/drivers/tty/vt/ucs_recompose.c index 5c30c989def3..52cde1517f89 100644 --- a/drivers/tty/vt/ucs_recompose.c +++ b/drivers/tty/vt/ucs_recompose.c @@ -147,8 +147,8 @@ static int recomposition_compare(const void *key, const void *element) /** * Attempt to recompose two Unicode characters into a single character. * - * @param previous: Previous Unicode code point (UCS-4) - * @param current: Current Unicode code point (UCS-4) + * @param base: Base Unicode code point (UCS-4) + * @param combining: Combining mark Unicode code point (UCS-4) * Return: Recomposed Unicode code point, or 0 if no recomposition is possible */ uint32_t ucs_recompose(uint32_t base, uint32_t combining) @@ -159,7 +159,6 @@ uint32_t ucs_recompose(uint32_t base, uint32_t combining) return 0; struct compare_key key = { base, combining }; - struct recomposition *result = __inline_bsearch(&key, recomposition_table, ARRAY_SIZE(recomposition_table), diff --git a/drivers/tty/vt/ucs_width.c b/drivers/tty/vt/ucs_width.c index 060aa8ae7f16..4d5a0021e33b 100644 --- a/drivers/tty/vt/ucs_width.c +++ b/drivers/tty/vt/ucs_width.c @@ -512,7 +512,7 @@ static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, si /** * Determine if a Unicode code point is zero-width. * - * @param ucs: Unicode code point (UCS-4) + * @param cp: Unicode code point (UCS-4) * Return: true if the character is zero-width, false otherwise */ bool ucs_is_zero_width(uint32_t cp) @@ -525,7 +525,7 @@ bool ucs_is_zero_width(uint32_t cp) /** * Determine if a Unicode code point is double-width. * - * @param ucs: Unicode code point (UCS-4) + * @param cp: Unicode code point (UCS-4) * Return: true if the character is double-width, false otherwise */ bool ucs_is_double_width(uint32_t cp) From 66f5f70ce07a5c4ad88709dc34c072673aaafa25 Mon Sep 17 00:00:00 2001 From: Ryo Takakura Date: Sat, 12 Apr 2025 09:25:44 +0900 Subject: [PATCH 035/105] serial: sifive: Switch to nbcon console Add the necessary callbacks(write_atomic, write_thread, device_lock and device_unlock) and CON_NBCON flag to switch the sifive console driver to perform as nbcon console. Both ->write_atomic() and ->write_thread() will check for console ownership whenever they are accessing registers. The ->device_lock()/unlock() will provide the additional serilization necessary for ->write_thread() which is called from dedicated printing thread. Signed-off-by: Ryo Takakura Reviewed-by: John Ogness Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20250412002544.185038-1-ryotkkr98@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sifive.c | 88 +++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 13 deletions(-) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 5904a2d4cefa..cde2a1ca0040 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -141,6 +141,7 @@ * @baud_rate: UART serial line rate (e.g., 115200 baud) * @clk: reference to this device's clock * @clk_notifier: clock rate change notifier for upstream clock changes + * @console_line_ended: indicate that the console line is fully written * * Configuration data specific to this SiFive UART. */ @@ -151,6 +152,7 @@ struct sifive_serial_port { unsigned long baud_rate; struct clk *clk; struct notifier_block clk_notifier; + bool console_line_ended; }; /* @@ -779,33 +781,88 @@ static void sifive_serial_console_putchar(struct uart_port *port, unsigned char __ssp_wait_for_xmitr(ssp); __ssp_transmit_char(ssp, ch); + + ssp->console_line_ended = (ch == '\n'); } -static void sifive_serial_console_write(struct console *co, const char *s, - unsigned int count) +static void sifive_serial_device_lock(struct console *co, unsigned long *flags) +{ + struct uart_port *up = &sifive_serial_console_ports[co->index]->port; + + __uart_port_lock_irqsave(up, flags); +} + +static void sifive_serial_device_unlock(struct console *co, unsigned long flags) +{ + struct uart_port *up = &sifive_serial_console_ports[co->index]->port; + + __uart_port_unlock_irqrestore(up, flags); +} + +static void sifive_serial_console_write_atomic(struct console *co, + struct nbcon_write_context *wctxt) { struct sifive_serial_port *ssp = sifive_serial_console_ports[co->index]; - unsigned long flags; + struct uart_port *port = &ssp->port; unsigned int ier; - int locked = 1; if (!ssp) return; - if (oops_in_progress) - locked = uart_port_trylock_irqsave(&ssp->port, &flags); - else - uart_port_lock_irqsave(&ssp->port, &flags); + if (!nbcon_enter_unsafe(wctxt)) + return; ier = __ssp_readl(ssp, SIFIVE_SERIAL_IE_OFFS); __ssp_writel(0, SIFIVE_SERIAL_IE_OFFS, ssp); - uart_console_write(&ssp->port, s, count, sifive_serial_console_putchar); + if (!ssp->console_line_ended) + uart_console_write(port, "\n", 1, sifive_serial_console_putchar); + uart_console_write(port, wctxt->outbuf, wctxt->len, + sifive_serial_console_putchar); __ssp_writel(ier, SIFIVE_SERIAL_IE_OFFS, ssp); - if (locked) - uart_port_unlock_irqrestore(&ssp->port, flags); + nbcon_exit_unsafe(wctxt); +} + +static void sifive_serial_console_write_thread(struct console *co, + struct nbcon_write_context *wctxt) +{ + struct sifive_serial_port *ssp = sifive_serial_console_ports[co->index]; + struct uart_port *port = &ssp->port; + unsigned int ier; + + if (!ssp) + return; + + if (!nbcon_enter_unsafe(wctxt)) + return; + + ier = __ssp_readl(ssp, SIFIVE_SERIAL_IE_OFFS); + __ssp_writel(0, SIFIVE_SERIAL_IE_OFFS, ssp); + + if (nbcon_exit_unsafe(wctxt)) { + int len = READ_ONCE(wctxt->len); + int i; + + for (i = 0; i < len; i++) { + if (!nbcon_enter_unsafe(wctxt)) + break; + + uart_console_write(port, wctxt->outbuf + i, 1, + sifive_serial_console_putchar); + + if (!nbcon_exit_unsafe(wctxt)) + break; + } + } + + while (!nbcon_enter_unsafe(wctxt)) + nbcon_reacquire_nobuf(wctxt); + + __ssp_writel(ier, SIFIVE_SERIAL_IE_OFFS, ssp); + + nbcon_exit_unsafe(wctxt); } static int sifive_serial_console_setup(struct console *co, char *options) @@ -823,6 +880,8 @@ static int sifive_serial_console_setup(struct console *co, char *options) if (!ssp) return -ENODEV; + ssp->console_line_ended = true; + if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); @@ -833,10 +892,13 @@ static struct uart_driver sifive_serial_uart_driver; static struct console sifive_serial_console = { .name = SIFIVE_TTY_PREFIX, - .write = sifive_serial_console_write, + .write_atomic = sifive_serial_console_write_atomic, + .write_thread = sifive_serial_console_write_thread, + .device_lock = sifive_serial_device_lock, + .device_unlock = sifive_serial_device_unlock, .device = uart_console_device, .setup = sifive_serial_console_setup, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_NBCON, .index = -1, .data = &sifive_serial_uart_driver, }; From 926040da60642335969ff99fa2ba67e4e0bb2618 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 11 Apr 2025 15:38:27 -0500 Subject: [PATCH 036/105] serial: 8250_of: manage bus clock in suspend/resume Save the bus clock pointer in the of_serial_info structure, and use that to disable the bus clock on suspend and re-enable it on resume. Signed-off-by: Alex Elder Reviewed-by: Andy Shevchenko Reviewed-by: Yixun Lan Link: https://lore.kernel.org/r/20250411203828.1491595-4-elder@riscstar.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_of.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index a90a5462aa72..d178b6c54ea1 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -24,6 +24,7 @@ struct of_serial_info { struct clk *clk; + struct clk *bus_clk; struct reset_control *rst; int type; int line; @@ -138,6 +139,7 @@ static int of_platform_serial_setup(struct platform_device *ofdev, goto err_pmruntime; } + info->bus_clk = bus_clk; port->uartclk = clk_get_rate(info->clk); } /* If current-speed was set, then try not to change it. */ @@ -299,6 +301,7 @@ static int of_serial_suspend(struct device *dev) if (!uart_console(port) || console_suspend_enabled) { pm_runtime_put_sync(dev); clk_disable_unprepare(info->clk); + clk_disable_unprepare(info->bus_clk); } return 0; } @@ -311,6 +314,7 @@ static int of_serial_resume(struct device *dev) if (!uart_console(port) || console_suspend_enabled) { pm_runtime_get_sync(dev); + clk_prepare_enable(info->bus_clk); clk_prepare_enable(info->clk); } From 92557dea58f7e451185b4ef0a582cf46221fe4ed Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 17 Apr 2025 10:13:34 +0200 Subject: [PATCH 037/105] mxser: Use non-hybrid PCI devres API mxser enables its PCI device with pcim_enable_device(). This, implicitly, switches the function pci_request_region() into managed mode, where it becomes a devres function. The PCI subsystem wants to remove this hybrid nature from its interfaces. To do so, users of the aforementioned combination of functions must be ported to non-hybrid functions. Replace the call to sometimes-managed pci_request_region() with one to the always-managed pcim_request_region(). Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250417081333.20917-2-phasta@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/mxser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index 4d45eca4929a..2fc13cc02cc5 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -1812,7 +1812,7 @@ static int mxser_probe(struct pci_dev *pdev, /* io address */ ioaddress = pci_resource_start(pdev, 2); - retval = pci_request_region(pdev, 2, "mxser(IO)"); + retval = pcim_request_region(pdev, 2, "mxser(IO)"); if (retval) goto err_zero; @@ -1822,7 +1822,7 @@ static int mxser_probe(struct pci_dev *pdev, /* vector */ ioaddress = pci_resource_start(pdev, 3); - retval = pci_request_region(pdev, 3, "mxser(vector)"); + retval = pcim_request_region(pdev, 3, "mxser(vector)"); if (retval) goto err_zero; brd->vector = ioaddress; From 05f31711af6417da19a4fb4b46b41039d569dabc Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 16 Apr 2025 14:02:41 +0200 Subject: [PATCH 038/105] dt-bindings: serial: mediatek,uart: Add compatible for MT6893 Add a compatible string for the MediaTek Dimensity 1200 (MT6893) SoC, which UART IPs are fully compatible with MT6577. Signed-off-by: AngeloGioacchino Del Regno Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250416120241.147925-1-angelogioacchino.delregno@collabora.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/mediatek,uart.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/mediatek,uart.yaml b/Documentation/devicetree/bindings/serial/mediatek,uart.yaml index 1b02f0b197ff..c55d9a0efa19 100644 --- a/Documentation/devicetree/bindings/serial/mediatek,uart.yaml +++ b/Documentation/devicetree/bindings/serial/mediatek,uart.yaml @@ -33,6 +33,7 @@ properties: - mediatek,mt6779-uart - mediatek,mt6795-uart - mediatek,mt6797-uart + - mediatek,mt6893-uart - mediatek,mt7622-uart - mediatek,mt7623-uart - mediatek,mt7629-uart From be4e3097c1f800b0f39e7e60b2b28eb6603f5d06 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Wed, 23 Apr 2025 22:40:56 +0800 Subject: [PATCH 039/105] tty: Remove unused API tty_port_register_device_serdev() Remove API tty_port_register_device_serdev() which has no caller. Signed-off-by: Zijun Hu Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250423-remove_api-v1-1-fac673d09feb@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 20 -------------------- include/linux/tty_port.h | 3 --- 2 files changed, 23 deletions(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 14cca33d2269..4af1fbf73f51 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -199,26 +199,6 @@ struct device *tty_port_register_device_attr_serdev(struct tty_port *port, } EXPORT_SYMBOL_GPL(tty_port_register_device_attr_serdev); -/** - * tty_port_register_device_serdev - register tty or serdev device - * @port: tty_port of the device - * @driver: tty_driver for this device - * @index: index of the tty - * @host: serial port hardware controller device - * @parent: parent if exists, otherwise NULL - * - * Register a serdev or tty device depending on if the parent device has any - * defined serdev clients or not. - */ -struct device *tty_port_register_device_serdev(struct tty_port *port, - struct tty_driver *driver, unsigned index, - struct device *host, struct device *parent) -{ - return tty_port_register_device_attr_serdev(port, driver, index, - host, parent, NULL, NULL); -} -EXPORT_SYMBOL_GPL(tty_port_register_device_serdev); - /** * tty_port_unregister_device - deregister a tty or serdev device * @port: tty_port of the device diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 1b861f2100b6..08f89a598366 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -147,9 +147,6 @@ struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); -struct device *tty_port_register_device_serdev(struct tty_port *port, - struct tty_driver *driver, unsigned index, - struct device *host, struct device *parent); struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent, void *drvdata, From e6afad4587c9b40a98cf26e73c55a2fb953ee6dd Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Thu, 24 Apr 2025 16:43:17 +0800 Subject: [PATCH 040/105] dt-bindings: serial: amlogic,meson-uart: Add compatible string for S6/S7/S7D Amlogic S6/S7/7D SoCs uses the same UART controller as S4 SoCs and G12A. There is no need for an extra compatible line in the driver, but add S6/S7/S7D compatible line for documentation. Acked-by: Rob Herring (Arm) Reviewed-by: Neil Armstrong Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20250424-uart-binding-v1-1-eb0f6d97a654@amlogic.com Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/serial/amlogic,meson-uart.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml b/Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml index 0565fb7649c5..d8ad1bb6172d 100644 --- a/Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml +++ b/Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml @@ -56,6 +56,9 @@ properties: items: - enum: - amlogic,a4-uart + - amlogic,s6-uart + - amlogic,s7-uart + - amlogic,s7d-uart - amlogic,t7-uart - const: amlogic,meson-s4-uart From 3eabc1a34b95c39c698fd659babdfd9af05ef845 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 25 Apr 2025 13:13:10 +0200 Subject: [PATCH 041/105] tty: simplify throttling using guard()s tty_throttle_safe() and tty_unthrottle_safe can be made less convoluted using guard()s. Switch them. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250425111315.1036184-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ioctl.c | 48 +++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 85de90eebc7b..90c70d8d14e3 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -122,21 +122,19 @@ EXPORT_SYMBOL(tty_unthrottle); */ bool tty_throttle_safe(struct tty_struct *tty) { - bool ret = true; + guard(mutex)(&tty->throttle_mutex); - mutex_lock(&tty->throttle_mutex); - if (!tty_throttled(tty)) { - if (tty->flow_change != TTY_THROTTLE_SAFE) - ret = false; - else { - set_bit(TTY_THROTTLED, &tty->flags); - if (tty->ops->throttle) - tty->ops->throttle(tty); - } - } - mutex_unlock(&tty->throttle_mutex); + if (tty_throttled(tty)) + return true; - return ret; + if (tty->flow_change != TTY_THROTTLE_SAFE) + return false; + + set_bit(TTY_THROTTLED, &tty->flags); + if (tty->ops->throttle) + tty->ops->throttle(tty); + + return true; } /** @@ -152,21 +150,19 @@ bool tty_throttle_safe(struct tty_struct *tty) */ bool tty_unthrottle_safe(struct tty_struct *tty) { - bool ret = true; + guard(mutex)(&tty->throttle_mutex); - mutex_lock(&tty->throttle_mutex); - if (tty_throttled(tty)) { - if (tty->flow_change != TTY_UNTHROTTLE_SAFE) - ret = false; - else { - clear_bit(TTY_THROTTLED, &tty->flags); - if (tty->ops->unthrottle) - tty->ops->unthrottle(tty); - } - } - mutex_unlock(&tty->throttle_mutex); + if (!tty_throttled(tty)) + return true; - return ret; + if (tty->flow_change != TTY_UNTHROTTLE_SAFE) + return false; + + clear_bit(TTY_THROTTLED, &tty->flags); + if (tty->ops->unthrottle) + tty->ops->unthrottle(tty); + + return true; } /** From f49573f2f53e0f6f74a58895437a46580d1a0033 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 25 Apr 2025 13:13:11 +0200 Subject: [PATCH 042/105] tty: use lock guard()s in tty_io guard()s and scoped_guard()s express more clearly what is protected by locks. And also makes the code cleaner as it can return immediately in case of short returns. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250425111315.1036184-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 96 ++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 56 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index ca9b7d7bad2b..e2d92cf70eb7 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -276,11 +276,10 @@ static void check_tty_count(struct tty_struct *tty, const char *routine) struct list_head *p; int count = 0, kopen_count = 0; - spin_lock(&tty->files_lock); - list_for_each(p, &tty->tty_files) { - count++; - } - spin_unlock(&tty->files_lock); + scoped_guard(spinlock, &tty->files_lock) + list_for_each(p, &tty->tty_files) + count++; + if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_SLAVE && tty->link && tty->link->count) @@ -378,7 +377,7 @@ EXPORT_SYMBOL_GPL(tty_dev_name_to_number); */ struct tty_driver *tty_find_polling_driver(char *name, int *line) { - struct tty_driver *p, *res = NULL; + struct tty_driver *p; int tty_line = 0; int len; char *str, *stp; @@ -392,7 +391,8 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) len = str - name; tty_line = simple_strtoul(str, &str, 10); - mutex_lock(&tty_mutex); + guard(mutex)(&tty_mutex); + /* Search through the tty devices to look for a match */ list_for_each_entry(p, &tty_drivers, tty_drivers) { if (!len || strncmp(name, p->name, len) != 0) @@ -405,14 +405,12 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) if (tty_line >= 0 && tty_line < p->num && p->ops && p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) { - res = tty_driver_kref_get(p); *line = tty_line; - break; + return tty_driver_kref_get(p); } } - mutex_unlock(&tty_mutex); - return res; + return NULL; } EXPORT_SYMBOL_GPL(tty_find_polling_driver); #endif @@ -531,16 +529,15 @@ EXPORT_SYMBOL_GPL(tty_wakeup); */ static struct file *tty_release_redirect(struct tty_struct *tty) { - struct file *f = NULL; + guard(spinlock)(&redirect_lock); - spin_lock(&redirect_lock); if (redirect && file_tty(redirect) == tty) { - f = redirect; + struct file *f = redirect; redirect = NULL; + return f; } - spin_unlock(&redirect_lock); - return f; + return NULL; } /** @@ -765,11 +762,8 @@ void __stop_tty(struct tty_struct *tty) */ void stop_tty(struct tty_struct *tty) { - unsigned long flags; - - spin_lock_irqsave(&tty->flow.lock, flags); + guard(spinlock_irqsave)(&tty->flow.lock); __stop_tty(tty); - spin_unlock_irqrestore(&tty->flow.lock, flags); } EXPORT_SYMBOL(stop_tty); @@ -796,11 +790,8 @@ void __start_tty(struct tty_struct *tty) */ void start_tty(struct tty_struct *tty) { - unsigned long flags; - - spin_lock_irqsave(&tty->flow.lock, flags); + guard(spinlock_irqsave)(&tty->flow.lock); __start_tty(tty); - spin_unlock_irqrestore(&tty->flow.lock, flags); } EXPORT_SYMBOL(start_tty); @@ -809,7 +800,8 @@ static void tty_update_time(struct tty_struct *tty, bool mtime) time64_t sec = ktime_get_real_seconds(); struct tty_file_private *priv; - spin_lock(&tty->files_lock); + guard(spinlock)(&tty->files_lock); + list_for_each_entry(priv, &tty->tty_files, list) { struct inode *inode = file_inode(priv->file); struct timespec64 time = mtime ? inode_get_mtime(inode) : inode_get_atime(inode); @@ -827,7 +819,6 @@ static void tty_update_time(struct tty_struct *tty, bool mtime) inode_set_atime(inode, sec, 0); } } - spin_unlock(&tty->files_lock); } /* @@ -2314,13 +2305,12 @@ static int tiocsti(struct tty_struct *tty, u8 __user *p) */ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg) { - int err; + guard(mutex)(&tty->winsize_mutex); - mutex_lock(&tty->winsize_mutex); - err = copy_to_user(arg, &tty->winsize, sizeof(*arg)); - mutex_unlock(&tty->winsize_mutex); + if (copy_to_user(arg, &tty->winsize, sizeof(*arg))) + return -EFAULT; - return err ? -EFAULT : 0; + return 0; } /** @@ -2335,10 +2325,10 @@ int tty_do_resize(struct tty_struct *tty, struct winsize *ws) { struct pid *pgrp; - /* Lock the tty */ - mutex_lock(&tty->winsize_mutex); + guard(mutex)(&tty->winsize_mutex); + if (!memcmp(ws, &tty->winsize, sizeof(*ws))) - goto done; + return 0; /* Signal the foreground process group */ pgrp = tty_get_pgrp(tty); @@ -2347,8 +2337,7 @@ int tty_do_resize(struct tty_struct *tty, struct winsize *ws) put_pid(pgrp); tty->winsize = *ws; -done: - mutex_unlock(&tty->winsize_mutex); + return 0; } EXPORT_SYMBOL(tty_do_resize); @@ -2409,13 +2398,14 @@ static int tioccons(struct file *file) return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; - spin_lock(&redirect_lock); - if (redirect) { - spin_unlock(&redirect_lock); + + guard(spinlock)(&redirect_lock); + + if (redirect) return -EBUSY; - } + redirect = get_file(file); - spin_unlock(&redirect_lock); + return 0; } @@ -3028,11 +3018,9 @@ void __do_SAK(struct tty_struct *tty) struct task_struct *g, *p; struct pid *session; int i; - unsigned long flags; - spin_lock_irqsave(&tty->ctrl.lock, flags); - session = get_pid(tty->ctrl.session); - spin_unlock_irqrestore(&tty->ctrl.lock, flags); + scoped_guard(spinlock_irqsave, &tty->ctrl.lock) + session = get_pid(tty->ctrl.session); tty_ldisc_flush(tty); @@ -3055,7 +3043,7 @@ void __do_SAK(struct tty_struct *tty) PIDTYPE_SID); continue; } - task_lock(p); + guard(task_lock)(p); i = iterate_fd(p->files, 0, this_tty, tty); if (i != 0) { tty_notice(tty, "SAK: killed process %d (%s): by fd#%d\n", @@ -3063,7 +3051,6 @@ void __do_SAK(struct tty_struct *tty) group_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_SID); } - task_unlock(p); } read_unlock(&tasklist_lock); put_pid(session); @@ -3465,9 +3452,8 @@ int tty_register_driver(struct tty_driver *driver) goto err_unreg_char; } - mutex_lock(&tty_mutex); - list_add(&driver->tty_drivers, &tty_drivers); - mutex_unlock(&tty_mutex); + scoped_guard(mutex, &tty_mutex) + list_add(&driver->tty_drivers, &tty_drivers); if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) { for (i = 0; i < driver->num; i++) { @@ -3486,9 +3472,8 @@ err_unreg_devs: for (i--; i >= 0; i--) tty_unregister_device(driver, i); - mutex_lock(&tty_mutex); - list_del(&driver->tty_drivers); - mutex_unlock(&tty_mutex); + scoped_guard(mutex, &tty_mutex) + list_del(&driver->tty_drivers); err_unreg_char: unregister_chrdev_region(dev, driver->num); @@ -3507,9 +3492,8 @@ void tty_unregister_driver(struct tty_driver *driver) { unregister_chrdev_region(MKDEV(driver->major, driver->minor_start), driver->num); - mutex_lock(&tty_mutex); - list_del(&driver->tty_drivers); - mutex_unlock(&tty_mutex); + scoped_guard(mutex, &tty_mutex) + list_del(&driver->tty_drivers); } EXPORT_SYMBOL(tty_unregister_driver); From 1404d3509c768732be51d0acf8330689936a692a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 25 Apr 2025 13:13:12 +0200 Subject: [PATCH 043/105] serial: switch uart_port::iotype to enum uart_iotype The inline-defined constants look weird. Instead, define a proper enum for them and type uart_port::iotype as that enum. This allows for proper checking in switch-case labels (somewhere, a default or UPIO_UNKNOWN label needs to be added/handled). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250425111315.1036184-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 2 +- drivers/tty/serial/8250/8250_early.c | 2 ++ drivers/tty/serial/8250/8250_port.c | 4 ++++ drivers/tty/serial/8250/8250_rsa.c | 2 ++ drivers/tty/serial/amba-pl011.c | 2 +- drivers/tty/serial/fsl_lpuart.c | 5 ++++- drivers/tty/serial/samsung_tty.c | 4 ++++ drivers/tty/serial/serial_core.c | 8 ++++---- include/linux/serial_core.h | 30 +++++++++++++++------------- 9 files changed, 38 insertions(+), 21 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 5a56f853cf6d..68994a964321 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -461,7 +461,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx, char *options) { char match[] = "uart"; /* 8250-specific earlycon name */ - unsigned char iotype; + enum uart_iotype iotype; resource_size_t addr; int i; diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index 842422921765..dc0371857ecb 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -77,6 +77,8 @@ static void serial8250_early_out(struct uart_port *port, int offset, int value) outb(value, port->iobase + offset); break; #endif + default: + break; } } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 8ac452cea36c..8d9bb91d4bae 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2993,6 +2993,8 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) if (!request_region(port->iobase, size, "serial")) return -EBUSY; return 0; + case UPIO_UNKNOWN: + break; } return 0; @@ -3025,6 +3027,8 @@ static void serial8250_release_std_resource(struct uart_8250_port *up) case UPIO_PORT: release_region(port->iobase, size); break; + case UPIO_UNKNOWN: + break; } } diff --git a/drivers/tty/serial/8250/8250_rsa.c b/drivers/tty/serial/8250/8250_rsa.c index 82f2593b4c59..4c8b9671bd41 100644 --- a/drivers/tty/serial/8250/8250_rsa.c +++ b/drivers/tty/serial/8250/8250_rsa.c @@ -43,6 +43,8 @@ static void rsa8250_release_resource(struct uart_8250_port *up) case UPIO_PORT: release_region(port->iobase + offset, size); break; + default: + break; } } diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 11d65097578c..421ac22555df 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2476,7 +2476,7 @@ static int pl011_console_setup(struct console *co, char *options) static int pl011_console_match(struct console *co, char *name, int idx, char *options) { - unsigned char iotype; + enum uart_iotype iotype; resource_size_t addr; int i; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index fe5aed99d55a..dff6a6c57b5f 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -403,6 +403,8 @@ static inline void lpuart32_write(struct uart_port *port, u32 val, case UPIO_MEM32BE: iowrite32be(val, port->membase + off); break; + default: + break; } } @@ -563,8 +565,9 @@ static dma_addr_t lpuart_dma_datareg_addr(struct lpuart_port *sport) return sport->port.mapbase + UARTDATA; case UPIO_MEM32BE: return sport->port.mapbase + UARTDATA + sizeof(u32) - 1; + default: + return sport->port.mapbase + UARTDR; } - return sport->port.mapbase + UARTDR; } static int lpuart_dma_tx_request(struct uart_port *port) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index 210fff7164c1..73e2866febc1 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -190,6 +190,8 @@ static void wr_reg(const struct uart_port *port, u32 reg, u32 val) case UPIO_MEM32: writel_relaxed(val, portaddr(port, reg)); break; + default: + break; } } @@ -2713,6 +2715,8 @@ static void wr_reg_barrier(const struct uart_port *port, u32 reg, u32 val) case UPIO_MEM32: writel(val, portaddr(port, reg)); break; + default: + break; } } diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 88669972d9a0..5bc145643385 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2178,8 +2178,8 @@ uart_get_console(struct uart_port *ports, int nr, struct console *co) * * Returns: 0 on success or -%EINVAL on failure */ -int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, - char **options) +int uart_parse_earlycon(char *p, enum uart_iotype *iotype, + resource_size_t *addr, char **options) { if (strncmp(p, "mmio,", 5) == 0) { *iotype = UPIO_MEM; @@ -3289,9 +3289,9 @@ bool uart_match_port(const struct uart_port *port1, case UPIO_AU: case UPIO_TSI: return port1->mapbase == port2->mapbase; + default: + return false; } - - return false; } EXPORT_SYMBOL(uart_match_port); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 743b4afaad4c..914b5e97e056 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -427,6 +427,18 @@ struct uart_icount { typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; +enum uart_iotype { + UPIO_UNKNOWN = -1, + UPIO_PORT = SERIAL_IO_PORT, /* 8b I/O port access */ + UPIO_HUB6 = SERIAL_IO_HUB6, /* Hub6 ISA card */ + UPIO_MEM = SERIAL_IO_MEM, /* driver-specific */ + UPIO_MEM32 = SERIAL_IO_MEM32, /* 32b little endian */ + UPIO_AU = SERIAL_IO_AU, /* Au1x00 and RT288x type IO */ + UPIO_TSI = SERIAL_IO_TSI, /* Tsi108/109 type IO */ + UPIO_MEM32BE = SERIAL_IO_MEM32BE, /* 32b big endian */ + UPIO_MEM16 = SERIAL_IO_MEM16, /* 16b little endian */ +}; + struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ @@ -469,23 +481,13 @@ struct uart_port { unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ - unsigned char iotype; /* io access style */ - -#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ -#define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ -#define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ -#define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ -#define UPIO_MEM32 (SERIAL_IO_MEM32) /* 32b little endian */ -#define UPIO_AU (SERIAL_IO_AU) /* Au1x00 and RT288x type IO */ -#define UPIO_TSI (SERIAL_IO_TSI) /* Tsi108/109 type IO */ -#define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ -#define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ - unsigned char quirks; /* internal quirks */ /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) + enum uart_iotype iotype; /* io access style */ + unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ @@ -1101,8 +1103,8 @@ static inline bool uart_console_registered(struct uart_port *port) struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); -int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, - char **options); +int uart_parse_earlycon(char *p, enum uart_iotype *iotype, + resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, From 31e0b7863c9bf97bc3a6d735cb4956c929134a80 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 25 Apr 2025 13:13:13 +0200 Subject: [PATCH 044/105] serial: rename local uart_port_lock() -> uart_port_ref_lock() uart_port_lock() and uart_port_unlock() are (at the same time) defined as: * functions in include/linux/serial_core.h * macros in drivers/tty/serial/serial_core.c The former are sane uart port lock wrappers. The latter _lock() does something completely different: it inspects a uart_state, obtains a uart_port from it, and increases its reference count. And if that all succeeded, the port is locked too. Similarly, the _unlock() counterpart first unlocks and then decrements the refcount too. This state is REALLY CONFUSING. So rename the latter (local .c macros): * uart_port_lock() -> uart_port_ref_lock(), and * uart_port_unlock() -> uart_port_unlock_deref(). Now, the forbidden while-at-it part: convert from a macro to an inline -- do it here as the passed 'flags' have to be pointer to ulong now. So we avoid doubled changes on identical LOCs. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250425111315.1036184-5-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 73 ++++++++++++++++---------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 5bc145643385..52e764be42c4 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -75,22 +75,23 @@ static inline void uart_port_deref(struct uart_port *uport) wake_up(&uport->state->remove_wait); } -#define uart_port_lock(state, flags) \ - ({ \ - struct uart_port *__uport = uart_port_ref(state); \ - if (__uport) \ - uart_port_lock_irqsave(__uport, &flags); \ - __uport; \ - }) +static inline struct uart_port *uart_port_ref_lock(struct uart_state *state, unsigned long *flags) +{ + struct uart_port *uport = uart_port_ref(state); -#define uart_port_unlock(uport, flags) \ - ({ \ - struct uart_port *__uport = uport; \ - if (__uport) { \ - uart_port_unlock_irqrestore(__uport, flags); \ - uart_port_deref(__uport); \ - } \ - }) + if (uport) + uart_port_lock_irqsave(uport, flags); + + return uport; +} + +static inline void uart_port_unlock_deref(struct uart_port *uport, unsigned long flags) +{ + if (uport) { + uart_port_unlock_irqrestore(uport, flags); + uart_port_deref(uport); + } +} static inline struct uart_port *uart_port_check(struct uart_state *state) { @@ -127,10 +128,10 @@ static void uart_stop(struct tty_struct *tty) struct uart_port *port; unsigned long flags; - port = uart_port_lock(state, flags); + port = uart_port_ref_lock(state, &flags); if (port) port->ops->stop_tx(port); - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); } static void __uart_start(struct uart_state *state) @@ -168,9 +169,9 @@ static void uart_start(struct tty_struct *tty) struct uart_port *port; unsigned long flags; - port = uart_port_lock(state, flags); + port = uart_port_ref_lock(state, &flags); __uart_start(state); - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); } static void @@ -258,14 +259,14 @@ static int uart_alloc_xmit_buf(struct tty_port *port) if (!page) return -ENOMEM; - uport = uart_port_lock(state, flags); + uport = uart_port_ref_lock(state, &flags); if (!state->port.xmit_buf) { state->port.xmit_buf = (unsigned char *)page; kfifo_init(&state->port.xmit_fifo, state->port.xmit_buf, PAGE_SIZE); - uart_port_unlock(uport, flags); + uart_port_unlock_deref(uport, flags); } else { - uart_port_unlock(uport, flags); + uart_port_unlock_deref(uport, flags); /* * Do not free() the page under the port lock, see * uart_free_xmit_buf(). @@ -289,11 +290,11 @@ static void uart_free_xmit_buf(struct tty_port *port) * console driver may need to allocate/free a debug object, which * can end up in printk() recursion. */ - uport = uart_port_lock(state, flags); + uport = uart_port_ref_lock(state, &flags); xmit_buf = port->xmit_buf; port->xmit_buf = NULL; INIT_KFIFO(port->xmit_fifo); - uart_port_unlock(uport, flags); + uart_port_unlock_deref(uport, flags); free_page((unsigned long)xmit_buf); } @@ -592,15 +593,15 @@ static int uart_put_char(struct tty_struct *tty, u8 c) unsigned long flags; int ret = 0; - port = uart_port_lock(state, flags); + port = uart_port_ref_lock(state, &flags); if (!state->port.xmit_buf) { - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); return 0; } if (port) ret = kfifo_put(&state->port.xmit_fifo, c); - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); return ret; } @@ -623,9 +624,9 @@ static ssize_t uart_write(struct tty_struct *tty, const u8 *buf, size_t count) if (WARN_ON(!state)) return -EL3HLT; - port = uart_port_lock(state, flags); + port = uart_port_ref_lock(state, &flags); if (!state->port.xmit_buf) { - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); return 0; } @@ -633,7 +634,7 @@ static ssize_t uart_write(struct tty_struct *tty, const u8 *buf, size_t count) ret = kfifo_in(&state->port.xmit_fifo, buf, count); __uart_start(state); - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); return ret; } @@ -644,9 +645,9 @@ static unsigned int uart_write_room(struct tty_struct *tty) unsigned long flags; unsigned int ret; - port = uart_port_lock(state, flags); + port = uart_port_ref_lock(state, &flags); ret = kfifo_avail(&state->port.xmit_fifo); - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); return ret; } @@ -657,9 +658,9 @@ static unsigned int uart_chars_in_buffer(struct tty_struct *tty) unsigned long flags; unsigned int ret; - port = uart_port_lock(state, flags); + port = uart_port_ref_lock(state, &flags); ret = kfifo_len(&state->port.xmit_fifo); - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); return ret; } @@ -678,13 +679,13 @@ static void uart_flush_buffer(struct tty_struct *tty) pr_debug("uart_flush_buffer(%d) called\n", tty->index); - port = uart_port_lock(state, flags); + port = uart_port_ref_lock(state, &flags); if (!port) return; kfifo_reset(&state->port.xmit_fifo); if (port->ops->flush_buffer) port->ops->flush_buffer(port); - uart_port_unlock(port, flags); + uart_port_unlock_deref(port, flags); tty_port_tty_wakeup(&state->port); } From 2b369a1e9930ef6c13858b0ed082401d731e4936 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 25 Apr 2025 13:13:14 +0200 Subject: [PATCH 045/105] serial: use uart_port_ref_lock() helper uart_get_icount() and uart_carrier_raised() open code uart_port_ref_lock(). Use the helper instead. The difference is we use _irqsave() variants of a spinlock now. But that's "safer" than _irq(). Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250425111315.1036184-6-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 52e764be42c4..1f7708a91fc6 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1276,14 +1276,13 @@ static int uart_get_icount(struct tty_struct *tty, struct uart_state *state = tty->driver_data; struct uart_icount cnow; struct uart_port *uport; + unsigned long flags; - uport = uart_port_ref(state); + uport = uart_port_ref_lock(state, &flags); if (!uport) return -EIO; - uart_port_lock_irq(uport); memcpy(&cnow, &uport->icount, sizeof(struct uart_icount)); - uart_port_unlock_irq(uport); - uart_port_deref(uport); + uart_port_unlock_deref(uport, flags); icount->cts = cnow.cts; icount->dsr = cnow.dsr; @@ -1915,9 +1914,10 @@ static bool uart_carrier_raised(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport; + unsigned long flags; int mctrl; - uport = uart_port_ref(state); + uport = uart_port_ref_lock(state, &flags); /* * Should never observe uport == NULL since checks for hangup should * abort the tty_port_block_til_ready() loop before checking for carrier @@ -1926,11 +1926,9 @@ static bool uart_carrier_raised(struct tty_port *port) */ if (WARN_ON(!uport)) return true; - uart_port_lock_irq(uport); uart_enable_ms(uport); mctrl = uport->ops->get_mctrl(uport); - uart_port_unlock_irq(uport); - uart_port_deref(uport); + uart_port_unlock_deref(uport, flags); return mctrl & TIOCM_CAR; } From 7ba4f02e12e6f2409c5b2afae2963089b5673482 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 25 Apr 2025 13:13:15 +0200 Subject: [PATCH 046/105] serial: 8250: unexport serial8250_rpm_*() functions Since commit 8700a7ea5519 (serial: 8250_omap: Drop pm_runtime_irq_safe()), all the serial8250_rpm_*() functions are used solely in 8250_port. Unexport them. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/r/20250425111315.1036184-7-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 6 ------ drivers/tty/serial/8250/8250_port.c | 12 ++++-------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index b861585ca02a..18530c31a598 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -223,12 +223,6 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up) struct uart_8250_port *serial8250_setup_port(int index); struct uart_8250_port *serial8250_get_port(int line); -void serial8250_rpm_get(struct uart_8250_port *p); -void serial8250_rpm_put(struct uart_8250_port *p); - -void serial8250_rpm_get_tx(struct uart_8250_port *p); -void serial8250_rpm_put_tx(struct uart_8250_port *p); - int serial8250_em485_config(struct uart_port *port, struct ktermios *termios, struct serial_rs485 *rs485); void serial8250_em485_start_tx(struct uart_8250_port *p, bool toggle_ier); diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 8d9bb91d4bae..6d7b8c4667c9 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -517,22 +517,20 @@ void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p) } EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos); -void serial8250_rpm_get(struct uart_8250_port *p) +static void serial8250_rpm_get(struct uart_8250_port *p) { if (!(p->capabilities & UART_CAP_RPM)) return; pm_runtime_get_sync(p->port.dev); } -EXPORT_SYMBOL_GPL(serial8250_rpm_get); -void serial8250_rpm_put(struct uart_8250_port *p) +static void serial8250_rpm_put(struct uart_8250_port *p) { if (!(p->capabilities & UART_CAP_RPM)) return; pm_runtime_mark_last_busy(p->port.dev); pm_runtime_put_autosuspend(p->port.dev); } -EXPORT_SYMBOL_GPL(serial8250_rpm_put); /** * serial8250_em485_init() - put uart_8250_port into rs485 emulating @@ -647,7 +645,7 @@ EXPORT_SYMBOL_GPL(serial8250_em485_config); * once and disable_runtime_pm_tx() will still disable RPM because the fifo is * empty and the HW can idle again. */ -void serial8250_rpm_get_tx(struct uart_8250_port *p) +static void serial8250_rpm_get_tx(struct uart_8250_port *p) { unsigned char rpm_active; @@ -659,9 +657,8 @@ void serial8250_rpm_get_tx(struct uart_8250_port *p) return; pm_runtime_get_sync(p->port.dev); } -EXPORT_SYMBOL_GPL(serial8250_rpm_get_tx); -void serial8250_rpm_put_tx(struct uart_8250_port *p) +static void serial8250_rpm_put_tx(struct uart_8250_port *p) { unsigned char rpm_active; @@ -674,7 +671,6 @@ void serial8250_rpm_put_tx(struct uart_8250_port *p) pm_runtime_mark_last_busy(p->port.dev); pm_runtime_put_autosuspend(p->port.dev); } -EXPORT_SYMBOL_GPL(serial8250_rpm_put_tx); /* * IER sleep support. UARTs which have EFRs need the "extended From 366cf0c3af1aac03c2bf08cf4b11f4ab9cba73e4 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Fri, 21 Feb 2025 13:43:52 +0100 Subject: [PATCH 047/105] tty/vt: Use KVAL instead of use bit operation The K_HANDLERS always gets KVAL as an argument. It is better to use the KVAL macro itself instead of bit operation. Signed-off-by: Alexey Gladkov Link: https://lore.kernel.org/r/4f199d90c7f0bc86bcaafd2f25da4cd006adcc80.1740141518.git.legion@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index ae92e6a50a65..ce2dcf3c824a 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -1519,7 +1519,7 @@ static void kbd_keycode(unsigned int keycode, int down, bool hw_raw) if ((raw_mode || kbd->kbdmode == VC_OFF) && type != KT_SPEC && type != KT_SHIFT) return; - (*k_handler[type])(vc, keysym & 0xff, !down); + (*k_handler[type])(vc, KVAL(keysym), !down); param.ledstate = kbd->ledflagstate; atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, ¶m); From f92217683a44f79759d805194d6d36af1bde6e10 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Fri, 21 Feb 2025 13:43:53 +0100 Subject: [PATCH 048/105] tty/vt: Gather the code that outputs char with utf8 in mind When we putting character to the tty, we take into account the keyboard mode to properly handle utf8. This code is duplicated few times. Signed-off-by: Alexey Gladkov Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/c0d10193e61f977b518862d8f216bbaf234138fd.1740141518.git.legion@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index ce2dcf3c824a..dc585079c2fb 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -376,6 +376,17 @@ static void to_utf8(struct vc_data *vc, uint c) } } +static void put_queue_utf8(struct vc_data *vc, u32 value) +{ + if (kbd->kbdmode == VC_UNICODE) + to_utf8(vc, value); + else { + int c = conv_uni_to_8bit(value); + if (c != -1) + put_queue(vc, c); + } +} + /* FIXME: review locking for vt.c callers */ static void set_leds(void) { @@ -454,13 +465,7 @@ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch) if (ch == ' ' || ch == (BRL_UC_ROW|0) || ch == d) return d; - if (kbd->kbdmode == VC_UNICODE) - to_utf8(vc, d); - else { - int c = conv_uni_to_8bit(d); - if (c != -1) - put_queue(vc, c); - } + put_queue_utf8(vc, d); return ch; } @@ -471,13 +476,7 @@ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch) static void fn_enter(struct vc_data *vc) { if (diacr) { - if (kbd->kbdmode == VC_UNICODE) - to_utf8(vc, diacr); - else { - int c = conv_uni_to_8bit(diacr); - if (c != -1) - put_queue(vc, c); - } + put_queue_utf8(vc, diacr); diacr = 0; } @@ -685,13 +684,7 @@ static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag) diacr = value; return; } - if (kbd->kbdmode == VC_UNICODE) - to_utf8(vc, value); - else { - int c = conv_uni_to_8bit(value); - if (c != -1) - put_queue(vc, c); - } + put_queue_utf8(vc, value); } /* From cb0ce93c8ba6882c591e7776a258cfd483af5717 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:20:25 +0200 Subject: [PATCH 049/105] Revert "vt: fix comment vs definition mismatch" This reverts commit 8bfabff0bfff8fbbe90673d1a557d15c42b4494a. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_recompose.py | 5 +++-- drivers/tty/vt/gen_ucs_width.py | 4 ++-- drivers/tty/vt/ucs_recompose.c | 5 +++-- drivers/tty/vt/ucs_width.c | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/tty/vt/gen_ucs_recompose.py b/drivers/tty/vt/gen_ucs_recompose.py index dc176d32e225..64418803e49e 100755 --- a/drivers/tty/vt/gen_ucs_recompose.py +++ b/drivers/tty/vt/gen_ucs_recompose.py @@ -289,8 +289,8 @@ static int recomposition_compare(const void *key, const void *element) /** * Attempt to recompose two Unicode characters into a single character. * - * @param base: Base Unicode code point (UCS-4) - * @param combining: Combining mark Unicode code point (UCS-4) + * @param previous: Previous Unicode code point (UCS-4) + * @param current: Current Unicode code point (UCS-4) * Return: Recomposed Unicode code point, or 0 if no recomposition is possible */ uint32_t ucs_recompose(uint32_t base, uint32_t combining) @@ -301,6 +301,7 @@ uint32_t ucs_recompose(uint32_t base, uint32_t combining) return 0; struct compare_key key = {{ base, combining }}; + struct recomposition *result = __inline_bsearch(&key, recomposition_table, ARRAY_SIZE(recomposition_table), diff --git a/drivers/tty/vt/gen_ucs_width.py b/drivers/tty/vt/gen_ucs_width.py index e65f43e2080a..c6cbc93e83f2 100755 --- a/drivers/tty/vt/gen_ucs_width.py +++ b/drivers/tty/vt/gen_ucs_width.py @@ -292,7 +292,7 @@ static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, si /** * Determine if a Unicode code point is zero-width. * - * @param cp: Unicode code point (UCS-4) + * @param ucs: Unicode code point (UCS-4) * Return: true if the character is zero-width, false otherwise */ bool ucs_is_zero_width(uint32_t cp) @@ -305,7 +305,7 @@ bool ucs_is_zero_width(uint32_t cp) /** * Determine if a Unicode code point is double-width. * - * @param cp: Unicode code point (UCS-4) + * @param ucs: Unicode code point (UCS-4) * Return: true if the character is double-width, false otherwise */ bool ucs_is_double_width(uint32_t cp) diff --git a/drivers/tty/vt/ucs_recompose.c b/drivers/tty/vt/ucs_recompose.c index 52cde1517f89..5c30c989def3 100644 --- a/drivers/tty/vt/ucs_recompose.c +++ b/drivers/tty/vt/ucs_recompose.c @@ -147,8 +147,8 @@ static int recomposition_compare(const void *key, const void *element) /** * Attempt to recompose two Unicode characters into a single character. * - * @param base: Base Unicode code point (UCS-4) - * @param combining: Combining mark Unicode code point (UCS-4) + * @param previous: Previous Unicode code point (UCS-4) + * @param current: Current Unicode code point (UCS-4) * Return: Recomposed Unicode code point, or 0 if no recomposition is possible */ uint32_t ucs_recompose(uint32_t base, uint32_t combining) @@ -159,6 +159,7 @@ uint32_t ucs_recompose(uint32_t base, uint32_t combining) return 0; struct compare_key key = { base, combining }; + struct recomposition *result = __inline_bsearch(&key, recomposition_table, ARRAY_SIZE(recomposition_table), diff --git a/drivers/tty/vt/ucs_width.c b/drivers/tty/vt/ucs_width.c index 4d5a0021e33b..060aa8ae7f16 100644 --- a/drivers/tty/vt/ucs_width.c +++ b/drivers/tty/vt/ucs_width.c @@ -512,7 +512,7 @@ static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, si /** * Determine if a Unicode code point is zero-width. * - * @param cp: Unicode code point (UCS-4) + * @param ucs: Unicode code point (UCS-4) * Return: true if the character is zero-width, false otherwise */ bool ucs_is_zero_width(uint32_t cp) @@ -525,7 +525,7 @@ bool ucs_is_zero_width(uint32_t cp) /** * Determine if a Unicode code point is double-width. * - * @param cp: Unicode code point (UCS-4) + * @param ucs: Unicode code point (UCS-4) * Return: true if the character is double-width, false otherwise */ bool ucs_is_double_width(uint32_t cp) From ab67c4622c3618edc35e623b53a0fd1a0a1ef062 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:12 +0200 Subject: [PATCH 050/105] Revert "vt: remove zero-white-space handling from conv_uni_to_pc()" This reverts commit b35f7a773cbcbfea3bc87a33c7d0f39e34ed83ec. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/consolemap.c | 2 ++ drivers/tty/vt/vt.c | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/consolemap.c b/drivers/tty/vt/consolemap.c index bb4bb272ebec..82d70083fead 100644 --- a/drivers/tty/vt/consolemap.c +++ b/drivers/tty/vt/consolemap.c @@ -870,6 +870,8 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs) return -4; /* Not found */ else if (ucs < 0x20) return -1; /* Not a printable character */ + else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) + return -2; /* Zero-width space */ /* * UNI_DIRECT_BASE indicates the start of the region in the User Zone * which always has a 1:1 mapping to the currently loaded font. The diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 0d1d663c7809..dc84f9c6b7c6 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2964,15 +2964,13 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, goto out; } } - /* padding for the legacy display like done below */ - tc = ' '; } } /* Now try to find out how to display it */ tc = conv_uni_to_pc(vc, tc); if (tc & ~charmask) { - if (tc == -1) + if (tc == -1 || tc == -2) return -1; /* nothing to display */ /* Glyph not found */ From 7eaf91626e1c02e46382ed154e389bb08074865b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:14 +0200 Subject: [PATCH 051/105] Revert "vt: pad double-width code points with a zero-white-space" This reverts commit 547f57b88d5f2ad4e9ab5e0d63a668467c10c736. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index dc84f9c6b7c6..e3d35c4f9204 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2937,13 +2937,12 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, width = 2; } else if (ucs_is_zero_width(c)) { prev_c = vc_uniscr_getc(vc, -1); - if (prev_c == 0x200B && + if (prev_c == ' ' && ucs_is_double_width(vc_uniscr_getc(vc, -2))) { /* * Let's merge this zero-width code point with * the preceding double-width code point by - * replacing the existing zero-white-space - * padding. + * replacing the existing whitespace padding. */ vc_con_rewind(vc); } else if (c == 0xfe0f && prev_c != 0) { @@ -3041,11 +3040,7 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, tc = conv_uni_to_pc(vc, ' '); if (tc < 0) tc = ' '; - /* - * Store a zero-white-space in the Unicode screen given that - * the previous code point is semantically double-width. - */ - next_c = 0x200B; + next_c = ' '; } out: From a01caec7c60c4875c10c4363a05ce60506cb2daa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:15 +0200 Subject: [PATCH 052/105] Revert "vt: update ucs_width.c following latest gen_ucs_width.py" This reverts commit c7cb5b0779d782c1bda10414af7a9fcadcc87e93. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs_width.c | 902 ++++++++++++++++++------------------- 1 file changed, 432 insertions(+), 470 deletions(-) diff --git a/drivers/tty/vt/ucs_width.c b/drivers/tty/vt/ucs_width.c index 060aa8ae7f16..47b22583bd34 100644 --- a/drivers/tty/vt/ucs_width.c +++ b/drivers/tty/vt/ucs_width.c @@ -12,477 +12,452 @@ #include #include -struct interval16 { - uint16_t first; - uint16_t last; -}; - -struct interval32 { +struct interval { uint32_t first; uint32_t last; }; -/* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ -static const struct interval16 zero_width_bmp[] = { - { 0x00AD, 0x00AD }, /* SOFT HYPHEN */ - { 0x0300, 0x036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ - { 0x0483, 0x0489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ - { 0x0591, 0x05BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ - { 0x05BF, 0x05BF }, /* HEBREW POINT RAFE */ - { 0x05C1, 0x05C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ - { 0x05C4, 0x05C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ - { 0x05C7, 0x05C7 }, /* HEBREW POINT QAMATS QATAN */ - { 0x0600, 0x0605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ - { 0x0610, 0x061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ - { 0x064B, 0x065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ - { 0x0670, 0x0670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ - { 0x06D6, 0x06DC }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC SMALL HIGH SEEN */ - { 0x06DF, 0x06E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ - { 0x06E7, 0x06E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ - { 0x06EA, 0x06ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ - { 0x0711, 0x0711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ - { 0x0730, 0x074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ - { 0x07A6, 0x07B0 }, /* THAANA ABAFILI - THAANA SUKUN */ - { 0x07EB, 0x07F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ - { 0x07FD, 0x07FD }, /* NKO DANTAYALAN */ - { 0x0816, 0x0819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ - { 0x081B, 0x0823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ - { 0x0825, 0x0827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ - { 0x0829, 0x082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ - { 0x0859, 0x085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ - { 0x0890, 0x0891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ - { 0x0897, 0x089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ - { 0x08CA, 0x0903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ - { 0x093A, 0x093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ - { 0x093E, 0x094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ - { 0x0951, 0x0957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ - { 0x0962, 0x0963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ - { 0x0981, 0x0983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ - { 0x09BC, 0x09BC }, /* BENGALI SIGN NUKTA */ - { 0x09BE, 0x09C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ - { 0x09C7, 0x09C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ - { 0x09CB, 0x09CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ - { 0x09D7, 0x09D7 }, /* BENGALI AU LENGTH MARK */ - { 0x09E2, 0x09E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ - { 0x09FE, 0x09FE }, /* BENGALI SANDHI MARK */ - { 0x0A01, 0x0A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ - { 0x0A3C, 0x0A3C }, /* GURMUKHI SIGN NUKTA */ - { 0x0A3E, 0x0A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ - { 0x0A47, 0x0A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ - { 0x0A4B, 0x0A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ - { 0x0A51, 0x0A51 }, /* GURMUKHI SIGN UDAAT */ - { 0x0A70, 0x0A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ - { 0x0A75, 0x0A75 }, /* GURMUKHI SIGN YAKASH */ - { 0x0A81, 0x0A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ - { 0x0ABC, 0x0ABC }, /* GUJARATI SIGN NUKTA */ - { 0x0ABE, 0x0AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ - { 0x0AC7, 0x0AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ - { 0x0ACB, 0x0ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ - { 0x0AE2, 0x0AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ - { 0x0AFA, 0x0AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ - { 0x0B01, 0x0B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ - { 0x0B3C, 0x0B3C }, /* ORIYA SIGN NUKTA */ - { 0x0B3E, 0x0B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ - { 0x0B47, 0x0B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ - { 0x0B4B, 0x0B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ - { 0x0B55, 0x0B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ - { 0x0B62, 0x0B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ - { 0x0B82, 0x0B82 }, /* TAMIL SIGN ANUSVARA */ - { 0x0BBE, 0x0BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ - { 0x0BC6, 0x0BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ - { 0x0BCA, 0x0BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ - { 0x0BD7, 0x0BD7 }, /* TAMIL AU LENGTH MARK */ - { 0x0C00, 0x0C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ - { 0x0C3C, 0x0C3C }, /* TELUGU SIGN NUKTA */ - { 0x0C3E, 0x0C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ - { 0x0C46, 0x0C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ - { 0x0C4A, 0x0C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ - { 0x0C55, 0x0C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ - { 0x0C62, 0x0C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ - { 0x0C81, 0x0C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ - { 0x0CBC, 0x0CBC }, /* KANNADA SIGN NUKTA */ - { 0x0CBE, 0x0CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ - { 0x0CC6, 0x0CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ - { 0x0CCA, 0x0CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ - { 0x0CD5, 0x0CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ - { 0x0CE2, 0x0CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ - { 0x0CF3, 0x0CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ - { 0x0D00, 0x0D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ - { 0x0D3B, 0x0D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ - { 0x0D3E, 0x0D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ - { 0x0D46, 0x0D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ - { 0x0D4A, 0x0D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ - { 0x0D57, 0x0D57 }, /* MALAYALAM AU LENGTH MARK */ - { 0x0D62, 0x0D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ - { 0x0D81, 0x0D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ - { 0x0DCA, 0x0DCA }, /* SINHALA SIGN AL-LAKUNA */ - { 0x0DCF, 0x0DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ - { 0x0DD6, 0x0DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ - { 0x0DD8, 0x0DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ - { 0x0DF2, 0x0DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ - { 0x0E31, 0x0E31 }, /* THAI CHARACTER MAI HAN-AKAT */ - { 0x0E34, 0x0E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ - { 0x0E47, 0x0E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ - { 0x0EB1, 0x0EB1 }, /* LAO VOWEL SIGN MAI KAN */ - { 0x0EB4, 0x0EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ - { 0x0EC8, 0x0ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ - { 0x0F18, 0x0F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ - { 0x0F35, 0x0F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ - { 0x0F37, 0x0F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ - { 0x0F39, 0x0F39 }, /* TIBETAN MARK TSA -PHRU */ - { 0x0F3E, 0x0F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ - { 0x0F71, 0x0F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ - { 0x0F86, 0x0F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ - { 0x0F8D, 0x0F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ - { 0x0F99, 0x0FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ - { 0x0FC6, 0x0FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ - { 0x102B, 0x103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ - { 0x1056, 0x1059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ - { 0x105E, 0x1060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ - { 0x1062, 0x1064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ - { 0x1067, 0x106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ - { 0x1071, 0x1074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ - { 0x1082, 0x108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ - { 0x108F, 0x108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ - { 0x109A, 0x109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ - { 0x135D, 0x135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ - { 0x1712, 0x1715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ - { 0x1732, 0x1734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ - { 0x1752, 0x1753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ - { 0x1772, 0x1773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ - { 0x17B4, 0x17D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ - { 0x17DD, 0x17DD }, /* KHMER SIGN ATTHACAN */ - { 0x180B, 0x180D }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR THREE */ - { 0x180F, 0x180F }, /* MONGOLIAN FREE VARIATION SELECTOR FOUR */ - { 0x1885, 0x1886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ - { 0x18A9, 0x18A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ - { 0x1920, 0x192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ - { 0x1930, 0x193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ - { 0x1A17, 0x1A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ - { 0x1A55, 0x1A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ - { 0x1A60, 0x1A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ - { 0x1A7F, 0x1A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ - { 0x1AB0, 0x1ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ - { 0x1B00, 0x1B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ - { 0x1B34, 0x1B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ - { 0x1B6B, 0x1B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ - { 0x1B80, 0x1B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ - { 0x1BA1, 0x1BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ - { 0x1BE6, 0x1BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ - { 0x1C24, 0x1C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ - { 0x1CD0, 0x1CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ - { 0x1CD4, 0x1CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ - { 0x1CED, 0x1CED }, /* VEDIC SIGN TIRYAK */ - { 0x1CF4, 0x1CF4 }, /* VEDIC TONE CANDRA ABOVE */ - { 0x1CF7, 0x1CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ - { 0x1DC0, 0x1DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ - { 0x200B, 0x200E }, /* ZERO WIDTH SPACE - LEFT-TO-RIGHT MARK */ - { 0x202A, 0x202D }, /* LEFT-TO-RIGHT EMBEDDING - LEFT-TO-RIGHT OVERRIDE */ - { 0x2060, 0x2064 }, /* WORD JOINER - INVISIBLE PLUS */ - { 0x206A, 0x206F }, /* INHIBIT SYMMETRIC SWAPPING - NOMINAL DIGIT SHAPES */ - { 0x20D0, 0x20F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ - { 0x2640, 0x2640 }, /* FEMALE SIGN */ - { 0x2642, 0x2642 }, /* MALE SIGN */ - { 0x26A7, 0x26A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ - { 0x2CEF, 0x2CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ - { 0x2D7F, 0x2D7F }, /* TIFINAGH CONSONANT JOINER */ - { 0x2DE0, 0x2DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ - { 0x302A, 0x302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ - { 0x3099, 0x309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ - { 0xA66F, 0xA672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ - { 0xA674, 0xA67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ - { 0xA69E, 0xA69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ - { 0xA6F0, 0xA6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ - { 0xA802, 0xA802 }, /* SYLOTI NAGRI SIGN DVISVARA */ - { 0xA806, 0xA806 }, /* SYLOTI NAGRI SIGN HASANTA */ - { 0xA80B, 0xA80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ - { 0xA823, 0xA827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ - { 0xA82C, 0xA82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ - { 0xA880, 0xA881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ - { 0xA8B4, 0xA8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ - { 0xA8E0, 0xA8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ - { 0xA8FF, 0xA8FF }, /* DEVANAGARI VOWEL SIGN AY */ - { 0xA926, 0xA92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ - { 0xA947, 0xA953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ - { 0xA980, 0xA983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ - { 0xA9B3, 0xA9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ - { 0xA9E5, 0xA9E5 }, /* MYANMAR SIGN SHAN SAW */ - { 0xAA29, 0xAA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ - { 0xAA43, 0xAA43 }, /* CHAM CONSONANT SIGN FINAL NG */ - { 0xAA4C, 0xAA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ - { 0xAA7B, 0xAA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ - { 0xAAB0, 0xAAB0 }, /* TAI VIET MAI KANG */ - { 0xAAB2, 0xAAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ - { 0xAAB7, 0xAAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ - { 0xAABE, 0xAABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ - { 0xAAC1, 0xAAC1 }, /* TAI VIET TONE MAI THO */ - { 0xAAEB, 0xAAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ - { 0xAAF5, 0xAAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ - { 0xABE3, 0xABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ - { 0xABEC, 0xABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ - { 0xFB1E, 0xFB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ - { 0xFE00, 0xFE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ - { 0xFE20, 0xFE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ - { 0xFEFF, 0xFEFF }, /* ZERO WIDTH NO-BREAK SPACE */ - { 0xFFF9, 0xFFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ +/* Zero-width character ranges */ +static const struct interval zero_width_ranges[] = { + { 0x000AD, 0x000AD }, /* SOFT HYPHEN */ + { 0x00300, 0x0036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ + { 0x00483, 0x00489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ + { 0x00591, 0x005BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ + { 0x005BF, 0x005BF }, /* HEBREW POINT RAFE */ + { 0x005C1, 0x005C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ + { 0x005C4, 0x005C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ + { 0x005C7, 0x005C7 }, /* HEBREW POINT QAMATS QATAN */ + { 0x00600, 0x00605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ + { 0x00610, 0x0061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ + { 0x0064B, 0x0065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ + { 0x00670, 0x00670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ + { 0x006D6, 0x006DC }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC SMALL HIGH SEEN */ + { 0x006DF, 0x006E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ + { 0x006E7, 0x006E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ + { 0x006EA, 0x006ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ + { 0x00711, 0x00711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ + { 0x00730, 0x0074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ + { 0x007A6, 0x007B0 }, /* THAANA ABAFILI - THAANA SUKUN */ + { 0x007EB, 0x007F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ + { 0x007FD, 0x007FD }, /* NKO DANTAYALAN */ + { 0x00816, 0x00819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ + { 0x0081B, 0x00823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ + { 0x00825, 0x00827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ + { 0x00829, 0x0082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ + { 0x00859, 0x0085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ + { 0x00890, 0x00891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ + { 0x00897, 0x0089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ + { 0x008CA, 0x00903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ + { 0x0093A, 0x0093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ + { 0x0093E, 0x0094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ + { 0x00951, 0x00957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ + { 0x00962, 0x00963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ + { 0x00981, 0x00983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ + { 0x009BC, 0x009BC }, /* BENGALI SIGN NUKTA */ + { 0x009BE, 0x009C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ + { 0x009C7, 0x009C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ + { 0x009CB, 0x009CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ + { 0x009D7, 0x009D7 }, /* BENGALI AU LENGTH MARK */ + { 0x009E2, 0x009E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ + { 0x009FE, 0x009FE }, /* BENGALI SANDHI MARK */ + { 0x00A01, 0x00A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ + { 0x00A3C, 0x00A3C }, /* GURMUKHI SIGN NUKTA */ + { 0x00A3E, 0x00A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ + { 0x00A47, 0x00A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ + { 0x00A4B, 0x00A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ + { 0x00A51, 0x00A51 }, /* GURMUKHI SIGN UDAAT */ + { 0x00A70, 0x00A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ + { 0x00A75, 0x00A75 }, /* GURMUKHI SIGN YAKASH */ + { 0x00A81, 0x00A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ + { 0x00ABC, 0x00ABC }, /* GUJARATI SIGN NUKTA */ + { 0x00ABE, 0x00AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ + { 0x00AC7, 0x00AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ + { 0x00ACB, 0x00ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ + { 0x00AE2, 0x00AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ + { 0x00AFA, 0x00AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ + { 0x00B01, 0x00B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ + { 0x00B3C, 0x00B3C }, /* ORIYA SIGN NUKTA */ + { 0x00B3E, 0x00B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ + { 0x00B47, 0x00B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ + { 0x00B4B, 0x00B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ + { 0x00B55, 0x00B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ + { 0x00B62, 0x00B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ + { 0x00B82, 0x00B82 }, /* TAMIL SIGN ANUSVARA */ + { 0x00BBE, 0x00BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ + { 0x00BC6, 0x00BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ + { 0x00BCA, 0x00BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ + { 0x00BD7, 0x00BD7 }, /* TAMIL AU LENGTH MARK */ + { 0x00C00, 0x00C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ + { 0x00C3C, 0x00C3C }, /* TELUGU SIGN NUKTA */ + { 0x00C3E, 0x00C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ + { 0x00C46, 0x00C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ + { 0x00C4A, 0x00C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ + { 0x00C55, 0x00C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ + { 0x00C62, 0x00C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ + { 0x00C81, 0x00C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ + { 0x00CBC, 0x00CBC }, /* KANNADA SIGN NUKTA */ + { 0x00CBE, 0x00CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ + { 0x00CC6, 0x00CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ + { 0x00CCA, 0x00CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ + { 0x00CD5, 0x00CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ + { 0x00CE2, 0x00CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ + { 0x00CF3, 0x00CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ + { 0x00D00, 0x00D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ + { 0x00D3B, 0x00D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ + { 0x00D3E, 0x00D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ + { 0x00D46, 0x00D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ + { 0x00D4A, 0x00D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ + { 0x00D57, 0x00D57 }, /* MALAYALAM AU LENGTH MARK */ + { 0x00D62, 0x00D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ + { 0x00D81, 0x00D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ + { 0x00DCA, 0x00DCA }, /* SINHALA SIGN AL-LAKUNA */ + { 0x00DCF, 0x00DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ + { 0x00DD6, 0x00DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ + { 0x00DD8, 0x00DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ + { 0x00DF2, 0x00DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ + { 0x00E31, 0x00E31 }, /* THAI CHARACTER MAI HAN-AKAT */ + { 0x00E34, 0x00E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ + { 0x00E47, 0x00E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ + { 0x00EB1, 0x00EB1 }, /* LAO VOWEL SIGN MAI KAN */ + { 0x00EB4, 0x00EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ + { 0x00EC8, 0x00ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ + { 0x00F18, 0x00F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ + { 0x00F35, 0x00F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ + { 0x00F37, 0x00F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ + { 0x00F39, 0x00F39 }, /* TIBETAN MARK TSA -PHRU */ + { 0x00F3E, 0x00F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ + { 0x00F71, 0x00F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ + { 0x00F86, 0x00F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ + { 0x00F8D, 0x00F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ + { 0x00F99, 0x00FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ + { 0x00FC6, 0x00FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ + { 0x0102B, 0x0103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ + { 0x01056, 0x01059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ + { 0x0105E, 0x01060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ + { 0x01062, 0x01064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ + { 0x01067, 0x0106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ + { 0x01071, 0x01074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ + { 0x01082, 0x0108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ + { 0x0108F, 0x0108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ + { 0x0109A, 0x0109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ + { 0x0135D, 0x0135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ + { 0x01712, 0x01715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ + { 0x01732, 0x01734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ + { 0x01752, 0x01753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ + { 0x01772, 0x01773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ + { 0x017B4, 0x017D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ + { 0x017DD, 0x017DD }, /* KHMER SIGN ATTHACAN */ + { 0x0180B, 0x0180D }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR THREE */ + { 0x0180F, 0x0180F }, /* MONGOLIAN FREE VARIATION SELECTOR FOUR */ + { 0x01885, 0x01886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ + { 0x018A9, 0x018A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ + { 0x01920, 0x0192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ + { 0x01930, 0x0193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ + { 0x01A17, 0x01A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ + { 0x01A55, 0x01A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ + { 0x01A60, 0x01A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ + { 0x01A7F, 0x01A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ + { 0x01AB0, 0x01ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ + { 0x01B00, 0x01B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ + { 0x01B34, 0x01B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ + { 0x01B6B, 0x01B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ + { 0x01B80, 0x01B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ + { 0x01BA1, 0x01BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ + { 0x01BE6, 0x01BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ + { 0x01C24, 0x01C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ + { 0x01CD0, 0x01CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ + { 0x01CD4, 0x01CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ + { 0x01CED, 0x01CED }, /* VEDIC SIGN TIRYAK */ + { 0x01CF4, 0x01CF4 }, /* VEDIC TONE CANDRA ABOVE */ + { 0x01CF7, 0x01CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ + { 0x01DC0, 0x01DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ + { 0x0200B, 0x0200E }, /* ZERO WIDTH SPACE - LEFT-TO-RIGHT MARK */ + { 0x0202A, 0x0202D }, /* LEFT-TO-RIGHT EMBEDDING - LEFT-TO-RIGHT OVERRIDE */ + { 0x02060, 0x02064 }, /* WORD JOINER - INVISIBLE PLUS */ + { 0x0206A, 0x0206F }, /* INHIBIT SYMMETRIC SWAPPING - NOMINAL DIGIT SHAPES */ + { 0x020D0, 0x020F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ + { 0x02640, 0x02640 }, /* FEMALE SIGN */ + { 0x02642, 0x02642 }, /* MALE SIGN */ + { 0x026A7, 0x026A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ + { 0x02CEF, 0x02CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ + { 0x02D7F, 0x02D7F }, /* TIFINAGH CONSONANT JOINER */ + { 0x02DE0, 0x02DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ + { 0x0302A, 0x0302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ + { 0x03099, 0x0309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ + { 0x0A66F, 0x0A672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ + { 0x0A674, 0x0A67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ + { 0x0A69E, 0x0A69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ + { 0x0A6F0, 0x0A6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ + { 0x0A802, 0x0A802 }, /* SYLOTI NAGRI SIGN DVISVARA */ + { 0x0A806, 0x0A806 }, /* SYLOTI NAGRI SIGN HASANTA */ + { 0x0A80B, 0x0A80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ + { 0x0A823, 0x0A827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ + { 0x0A82C, 0x0A82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ + { 0x0A880, 0x0A881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ + { 0x0A8B4, 0x0A8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ + { 0x0A8E0, 0x0A8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ + { 0x0A8FF, 0x0A8FF }, /* DEVANAGARI VOWEL SIGN AY */ + { 0x0A926, 0x0A92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ + { 0x0A947, 0x0A953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ + { 0x0A980, 0x0A983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ + { 0x0A9B3, 0x0A9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ + { 0x0A9E5, 0x0A9E5 }, /* MYANMAR SIGN SHAN SAW */ + { 0x0AA29, 0x0AA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ + { 0x0AA43, 0x0AA43 }, /* CHAM CONSONANT SIGN FINAL NG */ + { 0x0AA4C, 0x0AA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ + { 0x0AA7B, 0x0AA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ + { 0x0AAB0, 0x0AAB0 }, /* TAI VIET MAI KANG */ + { 0x0AAB2, 0x0AAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ + { 0x0AAB7, 0x0AAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ + { 0x0AABE, 0x0AABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ + { 0x0AAC1, 0x0AAC1 }, /* TAI VIET TONE MAI THO */ + { 0x0AAEB, 0x0AAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ + { 0x0AAF5, 0x0AAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ + { 0x0ABE3, 0x0ABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ + { 0x0ABEC, 0x0ABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ + { 0x0FB1E, 0x0FB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ + { 0x0FE00, 0x0FE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ + { 0x0FE20, 0x0FE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ + { 0x0FEFF, 0x0FEFF }, /* ZERO WIDTH NO-BREAK SPACE */ + { 0x0FFF9, 0x0FFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ + { 0x101FD, 0x101FD }, /* U+101FD */ + { 0x102E0, 0x102E0 }, /* U+102E0 */ + { 0x10376, 0x1037A }, /* U+10376 - U+1037A */ + { 0x10A01, 0x10A03 }, /* U+10A01 - U+10A03 */ + { 0x10A05, 0x10A06 }, /* U+10A05 - U+10A06 */ + { 0x10A0C, 0x10A0F }, /* U+10A0C - U+10A0F */ + { 0x10A38, 0x10A3A }, /* U+10A38 - U+10A3A */ + { 0x10A3F, 0x10A3F }, /* U+10A3F */ + { 0x10AE5, 0x10AE6 }, /* U+10AE5 - U+10AE6 */ + { 0x10D24, 0x10D27 }, /* U+10D24 - U+10D27 */ + { 0x10D69, 0x10D6D }, /* U+10D69 - U+10D6D */ + { 0x10EAB, 0x10EAC }, /* U+10EAB - U+10EAC */ + { 0x10EFC, 0x10EFF }, /* U+10EFC - U+10EFF */ + { 0x10F46, 0x10F50 }, /* U+10F46 - U+10F50 */ + { 0x10F82, 0x10F85 }, /* U+10F82 - U+10F85 */ + { 0x11000, 0x11002 }, /* U+11000 - U+11002 */ + { 0x11038, 0x11046 }, /* U+11038 - U+11046 */ + { 0x11070, 0x11070 }, /* U+11070 */ + { 0x11073, 0x11074 }, /* U+11073 - U+11074 */ + { 0x1107F, 0x11082 }, /* U+1107F - U+11082 */ + { 0x110B0, 0x110BA }, /* U+110B0 - U+110BA */ + { 0x110BD, 0x110BD }, /* U+110BD */ + { 0x110C2, 0x110C2 }, /* U+110C2 */ + { 0x110CD, 0x110CD }, /* U+110CD */ + { 0x11100, 0x11102 }, /* U+11100 - U+11102 */ + { 0x11127, 0x11134 }, /* U+11127 - U+11134 */ + { 0x11145, 0x11146 }, /* U+11145 - U+11146 */ + { 0x11173, 0x11173 }, /* U+11173 */ + { 0x11180, 0x11182 }, /* U+11180 - U+11182 */ + { 0x111B3, 0x111C0 }, /* U+111B3 - U+111C0 */ + { 0x111C9, 0x111CC }, /* U+111C9 - U+111CC */ + { 0x111CE, 0x111CF }, /* U+111CE - U+111CF */ + { 0x1122C, 0x11237 }, /* U+1122C - U+11237 */ + { 0x1123E, 0x1123E }, /* U+1123E */ + { 0x11241, 0x11241 }, /* U+11241 */ + { 0x112DF, 0x112EA }, /* U+112DF - U+112EA */ + { 0x11300, 0x11303 }, /* U+11300 - U+11303 */ + { 0x1133B, 0x1133C }, /* U+1133B - U+1133C */ + { 0x1133E, 0x11344 }, /* U+1133E - U+11344 */ + { 0x11347, 0x11348 }, /* U+11347 - U+11348 */ + { 0x1134B, 0x1134D }, /* U+1134B - U+1134D */ + { 0x11357, 0x11357 }, /* U+11357 */ + { 0x11362, 0x11363 }, /* U+11362 - U+11363 */ + { 0x11366, 0x1136C }, /* U+11366 - U+1136C */ + { 0x11370, 0x11374 }, /* U+11370 - U+11374 */ + { 0x113B8, 0x113C0 }, /* U+113B8 - U+113C0 */ + { 0x113C2, 0x113C2 }, /* U+113C2 */ + { 0x113C5, 0x113C5 }, /* U+113C5 */ + { 0x113C7, 0x113CA }, /* U+113C7 - U+113CA */ + { 0x113CC, 0x113D0 }, /* U+113CC - U+113D0 */ + { 0x113D2, 0x113D2 }, /* U+113D2 */ + { 0x113E1, 0x113E2 }, /* U+113E1 - U+113E2 */ + { 0x11435, 0x11446 }, /* U+11435 - U+11446 */ + { 0x1145E, 0x1145E }, /* U+1145E */ + { 0x114B0, 0x114C3 }, /* U+114B0 - U+114C3 */ + { 0x115AF, 0x115B5 }, /* U+115AF - U+115B5 */ + { 0x115B8, 0x115C0 }, /* U+115B8 - U+115C0 */ + { 0x115DC, 0x115DD }, /* U+115DC - U+115DD */ + { 0x11630, 0x11640 }, /* U+11630 - U+11640 */ + { 0x116AB, 0x116B7 }, /* U+116AB - U+116B7 */ + { 0x1171D, 0x1172B }, /* U+1171D - U+1172B */ + { 0x1182C, 0x1183A }, /* U+1182C - U+1183A */ + { 0x11930, 0x11935 }, /* U+11930 - U+11935 */ + { 0x11937, 0x11938 }, /* U+11937 - U+11938 */ + { 0x1193B, 0x1193E }, /* U+1193B - U+1193E */ + { 0x11940, 0x11940 }, /* U+11940 */ + { 0x11942, 0x11943 }, /* U+11942 - U+11943 */ + { 0x119D1, 0x119D7 }, /* U+119D1 - U+119D7 */ + { 0x119DA, 0x119E0 }, /* U+119DA - U+119E0 */ + { 0x119E4, 0x119E4 }, /* U+119E4 */ + { 0x11A01, 0x11A0A }, /* U+11A01 - U+11A0A */ + { 0x11A33, 0x11A39 }, /* U+11A33 - U+11A39 */ + { 0x11A3B, 0x11A3E }, /* U+11A3B - U+11A3E */ + { 0x11A47, 0x11A47 }, /* U+11A47 */ + { 0x11A51, 0x11A5B }, /* U+11A51 - U+11A5B */ + { 0x11A8A, 0x11A99 }, /* U+11A8A - U+11A99 */ + { 0x11C2F, 0x11C36 }, /* U+11C2F - U+11C36 */ + { 0x11C38, 0x11C3F }, /* U+11C38 - U+11C3F */ + { 0x11C92, 0x11CA7 }, /* U+11C92 - U+11CA7 */ + { 0x11CA9, 0x11CB6 }, /* U+11CA9 - U+11CB6 */ + { 0x11D31, 0x11D36 }, /* U+11D31 - U+11D36 */ + { 0x11D3A, 0x11D3A }, /* U+11D3A */ + { 0x11D3C, 0x11D3D }, /* U+11D3C - U+11D3D */ + { 0x11D3F, 0x11D45 }, /* U+11D3F - U+11D45 */ + { 0x11D47, 0x11D47 }, /* U+11D47 */ + { 0x11D8A, 0x11D8E }, /* U+11D8A - U+11D8E */ + { 0x11D90, 0x11D91 }, /* U+11D90 - U+11D91 */ + { 0x11D93, 0x11D97 }, /* U+11D93 - U+11D97 */ + { 0x11EF3, 0x11EF6 }, /* U+11EF3 - U+11EF6 */ + { 0x11F00, 0x11F01 }, /* U+11F00 - U+11F01 */ + { 0x11F03, 0x11F03 }, /* U+11F03 */ + { 0x11F34, 0x11F3A }, /* U+11F34 - U+11F3A */ + { 0x11F3E, 0x11F42 }, /* U+11F3E - U+11F42 */ + { 0x11F5A, 0x11F5A }, /* U+11F5A */ + { 0x13430, 0x13440 }, /* U+13430 - U+13440 */ + { 0x13447, 0x13455 }, /* U+13447 - U+13455 */ + { 0x1611E, 0x1612F }, /* U+1611E - U+1612F */ + { 0x16AF0, 0x16AF4 }, /* U+16AF0 - U+16AF4 */ + { 0x16B30, 0x16B36 }, /* U+16B30 - U+16B36 */ + { 0x16F4F, 0x16F4F }, /* U+16F4F */ + { 0x16F51, 0x16F87 }, /* U+16F51 - U+16F87 */ + { 0x16F8F, 0x16F92 }, /* U+16F8F - U+16F92 */ + { 0x16FE4, 0x16FE4 }, /* U+16FE4 */ + { 0x16FF0, 0x16FF1 }, /* U+16FF0 - U+16FF1 */ + { 0x1BC9D, 0x1BC9E }, /* U+1BC9D - U+1BC9E */ + { 0x1BCA0, 0x1BCA3 }, /* U+1BCA0 - U+1BCA3 */ + { 0x1CF00, 0x1CF2D }, /* U+1CF00 - U+1CF2D */ + { 0x1CF30, 0x1CF46 }, /* U+1CF30 - U+1CF46 */ + { 0x1D165, 0x1D169 }, /* U+1D165 - U+1D169 */ + { 0x1D16D, 0x1D182 }, /* U+1D16D - U+1D182 */ + { 0x1D185, 0x1D18B }, /* U+1D185 - U+1D18B */ + { 0x1D1AA, 0x1D1AD }, /* U+1D1AA - U+1D1AD */ + { 0x1D242, 0x1D244 }, /* U+1D242 - U+1D244 */ + { 0x1DA00, 0x1DA36 }, /* U+1DA00 - U+1DA36 */ + { 0x1DA3B, 0x1DA6C }, /* U+1DA3B - U+1DA6C */ + { 0x1DA75, 0x1DA75 }, /* U+1DA75 */ + { 0x1DA84, 0x1DA84 }, /* U+1DA84 */ + { 0x1DA9B, 0x1DA9F }, /* U+1DA9B - U+1DA9F */ + { 0x1DAA1, 0x1DAAF }, /* U+1DAA1 - U+1DAAF */ + { 0x1E000, 0x1E006 }, /* U+1E000 - U+1E006 */ + { 0x1E008, 0x1E018 }, /* U+1E008 - U+1E018 */ + { 0x1E01B, 0x1E021 }, /* U+1E01B - U+1E021 */ + { 0x1E023, 0x1E024 }, /* U+1E023 - U+1E024 */ + { 0x1E026, 0x1E02A }, /* U+1E026 - U+1E02A */ + { 0x1E08F, 0x1E08F }, /* U+1E08F */ + { 0x1E130, 0x1E136 }, /* U+1E130 - U+1E136 */ + { 0x1E2AE, 0x1E2AE }, /* U+1E2AE */ + { 0x1E2EC, 0x1E2EF }, /* U+1E2EC - U+1E2EF */ + { 0x1E4EC, 0x1E4EF }, /* U+1E4EC - U+1E4EF */ + { 0x1E5EE, 0x1E5EF }, /* U+1E5EE - U+1E5EF */ + { 0x1E8D0, 0x1E8D6 }, /* U+1E8D0 - U+1E8D6 */ + { 0x1E944, 0x1E94A }, /* U+1E944 - U+1E94A */ + { 0x1F3FB, 0x1F3FF }, /* U+1F3FB - U+1F3FF */ + { 0x1F9B0, 0x1F9B3 }, /* U+1F9B0 - U+1F9B3 */ + { 0xE0001, 0xE0001 }, /* U+E0001 */ + { 0xE0020, 0xE007F }, /* U+E0020 - U+E007F */ + { 0xE0100, 0xE01EF }, /* U+E0100 - U+E01EF */ }; -/* Zero-width character ranges (non-BMP, U+10000 and above) */ -static const struct interval32 zero_width_non_bmp[] = { - { 0x101FD, 0x101FD }, /* PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE */ - { 0x102E0, 0x102E0 }, /* COPTIC EPACT THOUSANDS MARK */ - { 0x10376, 0x1037A }, /* COMBINING OLD PERMIC LETTER AN - COMBINING OLD PERMIC LETTER SII */ - { 0x10A01, 0x10A03 }, /* KHAROSHTHI VOWEL SIGN I - KHAROSHTHI VOWEL SIGN VOCALIC R */ - { 0x10A05, 0x10A06 }, /* KHAROSHTHI VOWEL SIGN E - KHAROSHTHI VOWEL SIGN O */ - { 0x10A0C, 0x10A0F }, /* KHAROSHTHI VOWEL LENGTH MARK - KHAROSHTHI SIGN VISARGA */ - { 0x10A38, 0x10A3A }, /* KHAROSHTHI SIGN BAR ABOVE - KHAROSHTHI SIGN DOT BELOW */ - { 0x10A3F, 0x10A3F }, /* KHAROSHTHI VIRAMA */ - { 0x10AE5, 0x10AE6 }, /* MANICHAEAN ABBREVIATION MARK ABOVE - MANICHAEAN ABBREVIATION MARK BELOW */ - { 0x10D24, 0x10D27 }, /* HANIFI ROHINGYA SIGN HARBAHAY - HANIFI ROHINGYA SIGN TASSI */ - { 0x10D69, 0x10D6D }, /* GARAY VOWEL SIGN E - GARAY CONSONANT NASALIZATION MARK */ - { 0x10EAB, 0x10EAC }, /* YEZIDI COMBINING HAMZA MARK - YEZIDI COMBINING MADDA MARK */ - { 0x10EFC, 0x10EFF }, /* ARABIC COMBINING ALEF OVERLAY - ARABIC SMALL LOW WORD MADDA */ - { 0x10F46, 0x10F50 }, /* SOGDIAN COMBINING DOT BELOW - SOGDIAN COMBINING STROKE BELOW */ - { 0x10F82, 0x10F85 }, /* OLD UYGHUR COMBINING DOT ABOVE - OLD UYGHUR COMBINING TWO DOTS BELOW */ - { 0x11000, 0x11002 }, /* BRAHMI SIGN CANDRABINDU - BRAHMI SIGN VISARGA */ - { 0x11038, 0x11046 }, /* BRAHMI VOWEL SIGN AA - BRAHMI VIRAMA */ - { 0x11070, 0x11070 }, /* BRAHMI SIGN OLD TAMIL VIRAMA */ - { 0x11073, 0x11074 }, /* BRAHMI VOWEL SIGN OLD TAMIL SHORT E - BRAHMI VOWEL SIGN OLD TAMIL SHORT O */ - { 0x1107F, 0x11082 }, /* BRAHMI NUMBER JOINER - KAITHI SIGN VISARGA */ - { 0x110B0, 0x110BA }, /* KAITHI VOWEL SIGN AA - KAITHI SIGN NUKTA */ - { 0x110BD, 0x110BD }, /* KAITHI NUMBER SIGN */ - { 0x110C2, 0x110C2 }, /* KAITHI VOWEL SIGN VOCALIC R */ - { 0x110CD, 0x110CD }, /* KAITHI NUMBER SIGN ABOVE */ - { 0x11100, 0x11102 }, /* CHAKMA SIGN CANDRABINDU - CHAKMA SIGN VISARGA */ - { 0x11127, 0x11134 }, /* CHAKMA VOWEL SIGN A - CHAKMA MAAYYAA */ - { 0x11145, 0x11146 }, /* CHAKMA VOWEL SIGN AA - CHAKMA VOWEL SIGN EI */ - { 0x11173, 0x11173 }, /* MAHAJANI SIGN NUKTA */ - { 0x11180, 0x11182 }, /* SHARADA SIGN CANDRABINDU - SHARADA SIGN VISARGA */ - { 0x111B3, 0x111C0 }, /* SHARADA VOWEL SIGN AA - SHARADA SIGN VIRAMA */ - { 0x111C9, 0x111CC }, /* SHARADA SANDHI MARK - SHARADA EXTRA SHORT VOWEL MARK */ - { 0x111CE, 0x111CF }, /* SHARADA VOWEL SIGN PRISHTHAMATRA E - SHARADA SIGN INVERTED CANDRABINDU */ - { 0x1122C, 0x11237 }, /* KHOJKI VOWEL SIGN AA - KHOJKI SIGN SHADDA */ - { 0x1123E, 0x1123E }, /* KHOJKI SIGN SUKUN */ - { 0x11241, 0x11241 }, /* KHOJKI VOWEL SIGN VOCALIC R */ - { 0x112DF, 0x112EA }, /* KHUDAWADI SIGN ANUSVARA - KHUDAWADI SIGN VIRAMA */ - { 0x11300, 0x11303 }, /* GRANTHA SIGN COMBINING ANUSVARA ABOVE - GRANTHA SIGN VISARGA */ - { 0x1133B, 0x1133C }, /* COMBINING BINDU BELOW - GRANTHA SIGN NUKTA */ - { 0x1133E, 0x11344 }, /* GRANTHA VOWEL SIGN AA - GRANTHA VOWEL SIGN VOCALIC RR */ - { 0x11347, 0x11348 }, /* GRANTHA VOWEL SIGN EE - GRANTHA VOWEL SIGN AI */ - { 0x1134B, 0x1134D }, /* GRANTHA VOWEL SIGN OO - GRANTHA SIGN VIRAMA */ - { 0x11357, 0x11357 }, /* GRANTHA AU LENGTH MARK */ - { 0x11362, 0x11363 }, /* GRANTHA VOWEL SIGN VOCALIC L - GRANTHA VOWEL SIGN VOCALIC LL */ - { 0x11366, 0x1136C }, /* COMBINING GRANTHA DIGIT ZERO - COMBINING GRANTHA DIGIT SIX */ - { 0x11370, 0x11374 }, /* COMBINING GRANTHA LETTER A - COMBINING GRANTHA LETTER PA */ - { 0x113B8, 0x113C0 }, /* TULU-TIGALARI VOWEL SIGN AA - TULU-TIGALARI VOWEL SIGN VOCALIC LL */ - { 0x113C2, 0x113C2 }, /* TULU-TIGALARI VOWEL SIGN EE */ - { 0x113C5, 0x113C5 }, /* TULU-TIGALARI VOWEL SIGN AI */ - { 0x113C7, 0x113CA }, /* TULU-TIGALARI VOWEL SIGN OO - TULU-TIGALARI SIGN CANDRA ANUNASIKA */ - { 0x113CC, 0x113D0 }, /* TULU-TIGALARI SIGN ANUSVARA - TULU-TIGALARI CONJOINER */ - { 0x113D2, 0x113D2 }, /* TULU-TIGALARI GEMINATION MARK */ - { 0x113E1, 0x113E2 }, /* TULU-TIGALARI VEDIC TONE SVARITA - TULU-TIGALARI VEDIC TONE ANUDATTA */ - { 0x11435, 0x11446 }, /* NEWA VOWEL SIGN AA - NEWA SIGN NUKTA */ - { 0x1145E, 0x1145E }, /* NEWA SANDHI MARK */ - { 0x114B0, 0x114C3 }, /* TIRHUTA VOWEL SIGN AA - TIRHUTA SIGN NUKTA */ - { 0x115AF, 0x115B5 }, /* SIDDHAM VOWEL SIGN AA - SIDDHAM VOWEL SIGN VOCALIC RR */ - { 0x115B8, 0x115C0 }, /* SIDDHAM VOWEL SIGN E - SIDDHAM SIGN NUKTA */ - { 0x115DC, 0x115DD }, /* SIDDHAM VOWEL SIGN ALTERNATE U - SIDDHAM VOWEL SIGN ALTERNATE UU */ - { 0x11630, 0x11640 }, /* MODI VOWEL SIGN AA - MODI SIGN ARDHACANDRA */ - { 0x116AB, 0x116B7 }, /* TAKRI SIGN ANUSVARA - TAKRI SIGN NUKTA */ - { 0x1171D, 0x1172B }, /* AHOM CONSONANT SIGN MEDIAL LA - AHOM SIGN KILLER */ - { 0x1182C, 0x1183A }, /* DOGRA VOWEL SIGN AA - DOGRA SIGN NUKTA */ - { 0x11930, 0x11935 }, /* DIVES AKURU VOWEL SIGN AA - DIVES AKURU VOWEL SIGN E */ - { 0x11937, 0x11938 }, /* DIVES AKURU VOWEL SIGN AI - DIVES AKURU VOWEL SIGN O */ - { 0x1193B, 0x1193E }, /* DIVES AKURU SIGN ANUSVARA - DIVES AKURU VIRAMA */ - { 0x11940, 0x11940 }, /* DIVES AKURU MEDIAL YA */ - { 0x11942, 0x11943 }, /* DIVES AKURU MEDIAL RA - DIVES AKURU SIGN NUKTA */ - { 0x119D1, 0x119D7 }, /* NANDINAGARI VOWEL SIGN AA - NANDINAGARI VOWEL SIGN VOCALIC RR */ - { 0x119DA, 0x119E0 }, /* NANDINAGARI VOWEL SIGN E - NANDINAGARI SIGN VIRAMA */ - { 0x119E4, 0x119E4 }, /* NANDINAGARI VOWEL SIGN PRISHTHAMATRA E */ - { 0x11A01, 0x11A0A }, /* ZANABAZAR SQUARE VOWEL SIGN I - ZANABAZAR SQUARE VOWEL LENGTH MARK */ - { 0x11A33, 0x11A39 }, /* ZANABAZAR SQUARE FINAL CONSONANT MARK - ZANABAZAR SQUARE SIGN VISARGA */ - { 0x11A3B, 0x11A3E }, /* ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA - ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA */ - { 0x11A47, 0x11A47 }, /* ZANABAZAR SQUARE SUBJOINER */ - { 0x11A51, 0x11A5B }, /* SOYOMBO VOWEL SIGN I - SOYOMBO VOWEL LENGTH MARK */ - { 0x11A8A, 0x11A99 }, /* SOYOMBO FINAL CONSONANT SIGN G - SOYOMBO SUBJOINER */ - { 0x11C2F, 0x11C36 }, /* BHAIKSUKI VOWEL SIGN AA - BHAIKSUKI VOWEL SIGN VOCALIC L */ - { 0x11C38, 0x11C3F }, /* BHAIKSUKI VOWEL SIGN E - BHAIKSUKI SIGN VIRAMA */ - { 0x11C92, 0x11CA7 }, /* MARCHEN SUBJOINED LETTER KA - MARCHEN SUBJOINED LETTER ZA */ - { 0x11CA9, 0x11CB6 }, /* MARCHEN SUBJOINED LETTER YA - MARCHEN SIGN CANDRABINDU */ - { 0x11D31, 0x11D36 }, /* MASARAM GONDI VOWEL SIGN AA - MASARAM GONDI VOWEL SIGN VOCALIC R */ - { 0x11D3A, 0x11D3A }, /* MASARAM GONDI VOWEL SIGN E */ - { 0x11D3C, 0x11D3D }, /* MASARAM GONDI VOWEL SIGN AI - MASARAM GONDI VOWEL SIGN O */ - { 0x11D3F, 0x11D45 }, /* MASARAM GONDI VOWEL SIGN AU - MASARAM GONDI VIRAMA */ - { 0x11D47, 0x11D47 }, /* MASARAM GONDI RA-KARA */ - { 0x11D8A, 0x11D8E }, /* GUNJALA GONDI VOWEL SIGN AA - GUNJALA GONDI VOWEL SIGN UU */ - { 0x11D90, 0x11D91 }, /* GUNJALA GONDI VOWEL SIGN EE - GUNJALA GONDI VOWEL SIGN AI */ - { 0x11D93, 0x11D97 }, /* GUNJALA GONDI VOWEL SIGN OO - GUNJALA GONDI VIRAMA */ - { 0x11EF3, 0x11EF6 }, /* MAKASAR VOWEL SIGN I - MAKASAR VOWEL SIGN O */ - { 0x11F00, 0x11F01 }, /* KAWI SIGN CANDRABINDU - KAWI SIGN ANUSVARA */ - { 0x11F03, 0x11F03 }, /* KAWI SIGN VISARGA */ - { 0x11F34, 0x11F3A }, /* KAWI VOWEL SIGN AA - KAWI VOWEL SIGN VOCALIC R */ - { 0x11F3E, 0x11F42 }, /* KAWI VOWEL SIGN E - KAWI CONJOINER */ - { 0x11F5A, 0x11F5A }, /* KAWI SIGN NUKTA */ - { 0x13430, 0x13440 }, /* EGYPTIAN HIEROGLYPH VERTICAL JOINER - EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY */ - { 0x13447, 0x13455 }, /* EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START - EGYPTIAN HIEROGLYPH MODIFIER DAMAGED */ - { 0x1611E, 0x1612F }, /* GURUNG KHEMA VOWEL SIGN AA - GURUNG KHEMA SIGN THOLHOMA */ - { 0x16AF0, 0x16AF4 }, /* BASSA VAH COMBINING HIGH TONE - BASSA VAH COMBINING HIGH-LOW TONE */ - { 0x16B30, 0x16B36 }, /* PAHAWH HMONG MARK CIM TUB - PAHAWH HMONG MARK CIM TAUM */ - { 0x16F4F, 0x16F4F }, /* MIAO SIGN CONSONANT MODIFIER BAR */ - { 0x16F51, 0x16F87 }, /* MIAO SIGN ASPIRATION - MIAO VOWEL SIGN UI */ - { 0x16F8F, 0x16F92 }, /* MIAO TONE RIGHT - MIAO TONE BELOW */ - { 0x16FE4, 0x16FE4 }, /* KHITAN SMALL SCRIPT FILLER */ - { 0x16FF0, 0x16FF1 }, /* VIETNAMESE ALTERNATE READING MARK CA - VIETNAMESE ALTERNATE READING MARK NHAY */ - { 0x1BC9D, 0x1BC9E }, /* DUPLOYAN THICK LETTER SELECTOR - DUPLOYAN DOUBLE MARK */ - { 0x1BCA0, 0x1BCA3 }, /* SHORTHAND FORMAT LETTER OVERLAP - SHORTHAND FORMAT UP STEP */ - { 0x1CF00, 0x1CF2D }, /* ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT - ZNAMENNY COMBINING MARK KRYZH ON LEFT */ - { 0x1CF30, 0x1CF46 }, /* ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO - ZNAMENNY PRIZNAK MODIFIER ROG */ - { 0x1D165, 0x1D169 }, /* MUSICAL SYMBOL COMBINING STEM - MUSICAL SYMBOL COMBINING TREMOLO-3 */ - { 0x1D16D, 0x1D182 }, /* MUSICAL SYMBOL COMBINING AUGMENTATION DOT - MUSICAL SYMBOL COMBINING LOURE */ - { 0x1D185, 0x1D18B }, /* MUSICAL SYMBOL COMBINING DOIT - MUSICAL SYMBOL COMBINING TRIPLE TONGUE */ - { 0x1D1AA, 0x1D1AD }, /* MUSICAL SYMBOL COMBINING DOWN BOW - MUSICAL SYMBOL COMBINING SNAP PIZZICATO */ - { 0x1D242, 0x1D244 }, /* COMBINING GREEK MUSICAL TRISEME - COMBINING GREEK MUSICAL PENTASEME */ - { 0x1DA00, 0x1DA36 }, /* SIGNWRITING HEAD RIM - SIGNWRITING AIR SUCKING IN */ - { 0x1DA3B, 0x1DA6C }, /* SIGNWRITING MOUTH CLOSED NEUTRAL - SIGNWRITING EXCITEMENT */ - { 0x1DA75, 0x1DA75 }, /* SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS */ - { 0x1DA84, 0x1DA84 }, /* SIGNWRITING LOCATION HEAD NECK */ - { 0x1DA9B, 0x1DA9F }, /* SIGNWRITING FILL MODIFIER-2 - SIGNWRITING FILL MODIFIER-6 */ - { 0x1DAA1, 0x1DAAF }, /* SIGNWRITING ROTATION MODIFIER-2 - SIGNWRITING ROTATION MODIFIER-16 */ - { 0x1E000, 0x1E006 }, /* COMBINING GLAGOLITIC LETTER AZU - COMBINING GLAGOLITIC LETTER ZHIVETE */ - { 0x1E008, 0x1E018 }, /* COMBINING GLAGOLITIC LETTER ZEMLJA - COMBINING GLAGOLITIC LETTER HERU */ - { 0x1E01B, 0x1E021 }, /* COMBINING GLAGOLITIC LETTER SHTA - COMBINING GLAGOLITIC LETTER YATI */ - { 0x1E023, 0x1E024 }, /* COMBINING GLAGOLITIC LETTER YU - COMBINING GLAGOLITIC LETTER SMALL YUS */ - { 0x1E026, 0x1E02A }, /* COMBINING GLAGOLITIC LETTER YO - COMBINING GLAGOLITIC LETTER FITA */ - { 0x1E08F, 0x1E08F }, /* COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */ - { 0x1E130, 0x1E136 }, /* NYIAKENG PUACHUE HMONG TONE-B - NYIAKENG PUACHUE HMONG TONE-D */ - { 0x1E2AE, 0x1E2AE }, /* TOTO SIGN RISING TONE */ - { 0x1E2EC, 0x1E2EF }, /* WANCHO TONE TUP - WANCHO TONE KOINI */ - { 0x1E4EC, 0x1E4EF }, /* NAG MUNDARI SIGN MUHOR - NAG MUNDARI SIGN SUTUH */ - { 0x1E5EE, 0x1E5EF }, /* OL ONAL SIGN MU - OL ONAL SIGN IKIR */ - { 0x1E8D0, 0x1E8D6 }, /* MENDE KIKAKUI COMBINING NUMBER TEENS - MENDE KIKAKUI COMBINING NUMBER MILLIONS */ - { 0x1E944, 0x1E94A }, /* ADLAM ALIF LENGTHENER - ADLAM NUKTA */ - { 0x1F3FB, 0x1F3FF }, /* EMOJI MODIFIER FITZPATRICK TYPE-1-2 - EMOJI MODIFIER FITZPATRICK TYPE-6 */ - { 0x1F9B0, 0x1F9B3 }, /* EMOJI COMPONENT RED HAIR - EMOJI COMPONENT WHITE HAIR */ - { 0xE0001, 0xE0001 }, /* LANGUAGE TAG */ - { 0xE0020, 0xE007F }, /* TAG SPACE - CANCEL TAG */ - { 0xE0100, 0xE01EF }, /* VARIATION SELECTOR-17 - VARIATION SELECTOR-256 */ -}; - -/* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ -static const struct interval16 double_width_bmp[] = { - { 0x1100, 0x115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ - { 0x231A, 0x231B }, /* WATCH - HOURGLASS */ - { 0x2329, 0x232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ - { 0x23E9, 0x23EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ - { 0x23F0, 0x23F0 }, /* ALARM CLOCK */ - { 0x23F3, 0x23F3 }, /* HOURGLASS WITH FLOWING SAND */ - { 0x25FD, 0x25FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ - { 0x2614, 0x2615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ - { 0x2630, 0x2637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ - { 0x2648, 0x2653 }, /* ARIES - PISCES */ - { 0x267F, 0x267F }, /* WHEELCHAIR SYMBOL */ - { 0x268A, 0x268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ - { 0x2693, 0x2693 }, /* ANCHOR */ - { 0x26A1, 0x26A1 }, /* HIGH VOLTAGE SIGN */ - { 0x26AA, 0x26AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ - { 0x26BD, 0x26BE }, /* SOCCER BALL - BASEBALL */ - { 0x26C4, 0x26C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ - { 0x26CE, 0x26CE }, /* OPHIUCHUS */ - { 0x26D4, 0x26D4 }, /* NO ENTRY */ - { 0x26EA, 0x26EA }, /* CHURCH */ - { 0x26F2, 0x26F3 }, /* FOUNTAIN - FLAG IN HOLE */ - { 0x26F5, 0x26F5 }, /* SAILBOAT */ - { 0x26FA, 0x26FA }, /* TENT */ - { 0x26FD, 0x26FD }, /* FUEL PUMP */ - { 0x2705, 0x2705 }, /* WHITE HEAVY CHECK MARK */ - { 0x270A, 0x270B }, /* RAISED FIST - RAISED HAND */ - { 0x2728, 0x2728 }, /* SPARKLES */ - { 0x274C, 0x274C }, /* CROSS MARK */ - { 0x274E, 0x274E }, /* NEGATIVE SQUARED CROSS MARK */ - { 0x2753, 0x2755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ - { 0x2757, 0x2757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ - { 0x2795, 0x2797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ - { 0x27B0, 0x27B0 }, /* CURLY LOOP */ - { 0x27BF, 0x27BF }, /* DOUBLE CURLY LOOP */ - { 0x2B1B, 0x2B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ - { 0x2B50, 0x2B50 }, /* WHITE MEDIUM STAR */ - { 0x2B55, 0x2B55 }, /* HEAVY LARGE CIRCLE */ - { 0x2E80, 0x2E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ - { 0x2E9B, 0x2EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ - { 0x2F00, 0x2FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ - { 0x2FF0, 0x3029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ - { 0x3030, 0x303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ - { 0x3041, 0x3096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ - { 0x309B, 0x30FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ - { 0x3105, 0x312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ - { 0x3131, 0x318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ - { 0x3190, 0x31E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ - { 0x31EF, 0x321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ - { 0x3220, 0x3247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ - { 0x3250, 0xA48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ - { 0xA490, 0xA4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ - { 0xA960, 0xA97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ - { 0xAC00, 0xD7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ - { 0xF900, 0xFAFF }, /* U+F900 - U+FAFF */ - { 0xFE10, 0xFE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ - { 0xFE30, 0xFE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ - { 0xFE54, 0xFE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ - { 0xFE68, 0xFE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ - { 0xFF01, 0xFF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ - { 0xFFE0, 0xFFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ -}; - -/* Double-width character ranges (non-BMP, U+10000 and above) */ -static const struct interval32 double_width_non_bmp[] = { - { 0x16FE0, 0x16FE3 }, /* TANGUT ITERATION MARK - OLD CHINESE ITERATION MARK */ +/* Double-width character ranges */ +static const struct interval double_width_ranges[] = { + { 0x01100, 0x0115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ + { 0x0231A, 0x0231B }, /* WATCH - HOURGLASS */ + { 0x02329, 0x0232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ + { 0x023E9, 0x023EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ + { 0x023F0, 0x023F0 }, /* ALARM CLOCK */ + { 0x023F3, 0x023F3 }, /* HOURGLASS WITH FLOWING SAND */ + { 0x025FD, 0x025FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ + { 0x02614, 0x02615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ + { 0x02630, 0x02637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ + { 0x02648, 0x02653 }, /* ARIES - PISCES */ + { 0x0267F, 0x0267F }, /* WHEELCHAIR SYMBOL */ + { 0x0268A, 0x0268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ + { 0x02693, 0x02693 }, /* ANCHOR */ + { 0x026A1, 0x026A1 }, /* HIGH VOLTAGE SIGN */ + { 0x026AA, 0x026AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ + { 0x026BD, 0x026BE }, /* SOCCER BALL - BASEBALL */ + { 0x026C4, 0x026C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ + { 0x026CE, 0x026CE }, /* OPHIUCHUS */ + { 0x026D4, 0x026D4 }, /* NO ENTRY */ + { 0x026EA, 0x026EA }, /* CHURCH */ + { 0x026F2, 0x026F3 }, /* FOUNTAIN - FLAG IN HOLE */ + { 0x026F5, 0x026F5 }, /* SAILBOAT */ + { 0x026FA, 0x026FA }, /* TENT */ + { 0x026FD, 0x026FD }, /* FUEL PUMP */ + { 0x02705, 0x02705 }, /* WHITE HEAVY CHECK MARK */ + { 0x0270A, 0x0270B }, /* RAISED FIST - RAISED HAND */ + { 0x02728, 0x02728 }, /* SPARKLES */ + { 0x0274C, 0x0274C }, /* CROSS MARK */ + { 0x0274E, 0x0274E }, /* NEGATIVE SQUARED CROSS MARK */ + { 0x02753, 0x02755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ + { 0x02757, 0x02757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ + { 0x02795, 0x02797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ + { 0x027B0, 0x027B0 }, /* CURLY LOOP */ + { 0x027BF, 0x027BF }, /* DOUBLE CURLY LOOP */ + { 0x02B1B, 0x02B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ + { 0x02B50, 0x02B50 }, /* WHITE MEDIUM STAR */ + { 0x02B55, 0x02B55 }, /* HEAVY LARGE CIRCLE */ + { 0x02E80, 0x02E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ + { 0x02E9B, 0x02EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ + { 0x02F00, 0x02FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ + { 0x02FF0, 0x03029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ + { 0x03030, 0x0303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ + { 0x03041, 0x03096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ + { 0x0309B, 0x030FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ + { 0x03105, 0x0312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ + { 0x03131, 0x0318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ + { 0x03190, 0x031E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ + { 0x031EF, 0x0321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ + { 0x03220, 0x03247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ + { 0x03250, 0x0A48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ + { 0x0A490, 0x0A4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ + { 0x0A960, 0x0A97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ + { 0x0AC00, 0x0D7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ + { 0x0F900, 0x0FAFF }, /* U+0F900 - U+0FAFF */ + { 0x0FE10, 0x0FE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ + { 0x0FE30, 0x0FE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ + { 0x0FE54, 0x0FE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ + { 0x0FE68, 0x0FE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ + { 0x0FF01, 0x0FF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ + { 0x0FFE0, 0x0FFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ + { 0x16FE0, 0x16FE3 }, /* U+16FE0 - U+16FE3 */ { 0x17000, 0x187F7 }, /* U+17000 - U+187F7 */ - { 0x18800, 0x18CD5 }, /* TANGUT COMPONENT-001 - KHITAN SMALL SCRIPT CHARACTER-18CD5 */ + { 0x18800, 0x18CD5 }, /* U+18800 - U+18CD5 */ { 0x18CFF, 0x18D08 }, /* U+18CFF - U+18D08 */ - { 0x1AFF0, 0x1AFF3 }, /* KATAKANA LETTER MINNAN TONE-2 - KATAKANA LETTER MINNAN TONE-5 */ - { 0x1AFF5, 0x1AFFB }, /* KATAKANA LETTER MINNAN TONE-7 - KATAKANA LETTER MINNAN NASALIZED TONE-5 */ - { 0x1AFFD, 0x1AFFE }, /* KATAKANA LETTER MINNAN NASALIZED TONE-7 - KATAKANA LETTER MINNAN NASALIZED TONE-8 */ - { 0x1B000, 0x1B122 }, /* KATAKANA LETTER ARCHAIC E - KATAKANA LETTER ARCHAIC WU */ - { 0x1B132, 0x1B132 }, /* HIRAGANA LETTER SMALL KO */ - { 0x1B150, 0x1B152 }, /* HIRAGANA LETTER SMALL WI - HIRAGANA LETTER SMALL WO */ - { 0x1B155, 0x1B155 }, /* KATAKANA LETTER SMALL KO */ - { 0x1B164, 0x1B167 }, /* KATAKANA LETTER SMALL WI - KATAKANA LETTER SMALL N */ - { 0x1B170, 0x1B2FB }, /* NUSHU CHARACTER-1B170 - NUSHU CHARACTER-1B2FB */ - { 0x1D300, 0x1D356 }, /* MONOGRAM FOR EARTH - TETRAGRAM FOR FOSTERING */ - { 0x1D360, 0x1D376 }, /* COUNTING ROD UNIT DIGIT ONE - IDEOGRAPHIC TALLY MARK FIVE */ + { 0x1AFF0, 0x1AFF3 }, /* U+1AFF0 - U+1AFF3 */ + { 0x1AFF5, 0x1AFFB }, /* U+1AFF5 - U+1AFFB */ + { 0x1AFFD, 0x1AFFE }, /* U+1AFFD - U+1AFFE */ + { 0x1B000, 0x1B122 }, /* U+1B000 - U+1B122 */ + { 0x1B132, 0x1B132 }, /* U+1B132 */ + { 0x1B150, 0x1B152 }, /* U+1B150 - U+1B152 */ + { 0x1B155, 0x1B155 }, /* U+1B155 */ + { 0x1B164, 0x1B167 }, /* U+1B164 - U+1B167 */ + { 0x1B170, 0x1B2FB }, /* U+1B170 - U+1B2FB */ + { 0x1D300, 0x1D356 }, /* U+1D300 - U+1D356 */ + { 0x1D360, 0x1D376 }, /* U+1D360 - U+1D376 */ { 0x1F000, 0x1F02F }, /* U+1F000 - U+1F02F */ { 0x1F0A0, 0x1F0FF }, /* U+1F0A0 - U+1F0FF */ - { 0x1F18E, 0x1F18E }, /* NEGATIVE SQUARED AB */ - { 0x1F191, 0x1F19A }, /* SQUARED CL - SQUARED VS */ - { 0x1F200, 0x1F202 }, /* SQUARE HIRAGANA HOKA - SQUARED KATAKANA SA */ - { 0x1F210, 0x1F23B }, /* SQUARED CJK UNIFIED IDEOGRAPH-624B - SQUARED CJK UNIFIED IDEOGRAPH-914D */ - { 0x1F240, 0x1F248 }, /* TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C - TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 */ - { 0x1F250, 0x1F251 }, /* CIRCLED IDEOGRAPH ADVANTAGE - CIRCLED IDEOGRAPH ACCEPT */ - { 0x1F260, 0x1F265 }, /* ROUNDED SYMBOL FOR FU - ROUNDED SYMBOL FOR CAI */ - { 0x1F300, 0x1F3FA }, /* CYCLONE - AMPHORA */ - { 0x1F400, 0x1F64F }, /* RAT - PERSON WITH FOLDED HANDS */ - { 0x1F680, 0x1F9AF }, /* ROCKET - PROBING CANE */ + { 0x1F18E, 0x1F18E }, /* U+1F18E */ + { 0x1F191, 0x1F19A }, /* U+1F191 - U+1F19A */ + { 0x1F200, 0x1F202 }, /* U+1F200 - U+1F202 */ + { 0x1F210, 0x1F23B }, /* U+1F210 - U+1F23B */ + { 0x1F240, 0x1F248 }, /* U+1F240 - U+1F248 */ + { 0x1F250, 0x1F251 }, /* U+1F250 - U+1F251 */ + { 0x1F260, 0x1F265 }, /* U+1F260 - U+1F265 */ + { 0x1F300, 0x1F3FA }, /* U+1F300 - U+1F3FA */ + { 0x1F400, 0x1F64F }, /* U+1F400 - U+1F64F */ + { 0x1F680, 0x1F9AF }, /* U+1F680 - U+1F9AF */ { 0x1F9B4, 0x1FAFF }, /* U+1F9B4 - U+1FAFF */ { 0x20000, 0x2FFFD }, /* U+20000 - U+2FFFD */ { 0x30000, 0x3FFFD }, /* U+30000 - U+3FFFD */ }; -static int ucs_cmp16(const void *key, const void *element) -{ - uint16_t cp = *(uint16_t *)key; - const struct interval16 *e = element; - - if (cp > e->last) - return 1; - if (cp < e->first) - return -1; - return 0; -} - -static int ucs_cmp32(const void *key, const void *element) +static int ucs_cmp(const void *key, const void *element) { uint32_t cp = *(uint32_t *)key; - const struct interval32 *e = element; + const struct interval *e = element; if (cp > e->last) return 1; @@ -491,22 +466,13 @@ static int ucs_cmp32(const void *key, const void *element) return 0; } -static bool is_in_interval16(uint16_t cp, const struct interval16 *intervals, size_t count) +static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t count) { if (cp < intervals[0].first || cp > intervals[count - 1].last) return false; return __inline_bsearch(&cp, intervals, count, - sizeof(*intervals), ucs_cmp16) != NULL; -} - -static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, size_t count) -{ - if (cp < intervals[0].first || cp > intervals[count - 1].last) - return false; - - return __inline_bsearch(&cp, intervals, count, - sizeof(*intervals), ucs_cmp32) != NULL; + sizeof(*intervals), ucs_cmp) != NULL; } /** @@ -517,9 +483,7 @@ static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, si */ bool ucs_is_zero_width(uint32_t cp) { - return (cp <= 0xFFFF) - ? is_in_interval16(cp, zero_width_bmp, ARRAY_SIZE(zero_width_bmp)) - : is_in_interval32(cp, zero_width_non_bmp, ARRAY_SIZE(zero_width_non_bmp)); + return is_in_interval(cp, zero_width_ranges, ARRAY_SIZE(zero_width_ranges)); } /** @@ -530,7 +494,5 @@ bool ucs_is_zero_width(uint32_t cp) */ bool ucs_is_double_width(uint32_t cp) { - return (cp <= 0xFFFF) - ? is_in_interval16(cp, double_width_bmp, ARRAY_SIZE(double_width_bmp)) - : is_in_interval32(cp, double_width_non_bmp, ARRAY_SIZE(double_width_non_bmp)); + return is_in_interval(cp, double_width_ranges, ARRAY_SIZE(double_width_ranges)); } From 7a149499f6b6b7789a45752c7bbf1669b69cdd0b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:16 +0200 Subject: [PATCH 053/105] Revert "vt: update gen_ucs_width.py to produce more space efficient tables" This reverts commit 119ff0b0f4541972d829da606599441dace2444d. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_width.py | 154 +++++++++----------------------- 1 file changed, 41 insertions(+), 113 deletions(-) diff --git a/drivers/tty/vt/gen_ucs_width.py b/drivers/tty/vt/gen_ucs_width.py index c6cbc93e83f2..41997fe00129 100755 --- a/drivers/tty/vt/gen_ucs_width.py +++ b/drivers/tty/vt/gen_ucs_width.py @@ -132,49 +132,13 @@ def generate_ucs_width(): ranges.append((start, prev)) return ranges - # Function to split ranges into BMP (16-bit) and non-BMP (above 16-bit) - def split_ranges_by_size(ranges): - bmp_ranges = [] - non_bmp_ranges = [] - - for start, end in ranges: - if end <= 0xFFFF: - bmp_ranges.append((start, end)) - elif start > 0xFFFF: - non_bmp_ranges.append((start, end)) - else: - # Split the range at 0xFFFF - bmp_ranges.append((start, 0xFFFF)) - non_bmp_ranges.append((0x10000, end)) - - return bmp_ranges, non_bmp_ranges - # Extract ranges for each width zero_width_ranges = ranges_optimize(width_map, 0) double_width_ranges = ranges_optimize(width_map, 2) - # Split ranges into BMP and non-BMP - zero_width_bmp, zero_width_non_bmp = split_ranges_by_size(zero_width_ranges) - double_width_bmp, double_width_non_bmp = split_ranges_by_size(double_width_ranges) - # Get Unicode version information unicode_version = unicodedata.unidata_version - # Function to generate code point description comments - def get_code_point_comment(start, end): - try: - start_char_desc = unicodedata.name(chr(start)) - if start == end: - return f"/* {start_char_desc} */" - else: - end_char_desc = unicodedata.name(chr(end)) - return f"/* {start_char_desc} - {end_char_desc} */" - except: - if start == end: - return f"/* U+{start:04X} */" - else: - return f"/* U+{start:04X} - U+{end:04X} */" - # Generate C implementation file with open(c_file, 'w') as f: f.write(f"""\ @@ -192,77 +156,62 @@ def generate_ucs_width(): #include #include -struct interval16 {{ - uint16_t first; - uint16_t last; -}}; - -struct interval32 {{ +struct interval {{ uint32_t first; uint32_t last; }}; -/* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ -static const struct interval16 zero_width_bmp[] = {{ +/* Zero-width character ranges */ +static const struct interval zero_width_ranges[] = {{ """) - for start, end in zero_width_bmp: - comment = get_code_point_comment(start, end) - f.write(f"\t{{ 0x{start:04X}, 0x{end:04X} }}, {comment}\n") + for start, end in zero_width_ranges: + try: + start_char_desc = unicodedata.name(chr(start)) if start < 0x10000 else f"U+{start:05X}" + if start == end: + comment = f"/* {start_char_desc} */" + else: + end_char_desc = unicodedata.name(chr(end)) if end < 0x10000 else f"U+{end:05X}" + comment = f"/* {start_char_desc} - {end_char_desc} */" + except: + if start == end: + comment = f"/* U+{start:05X} */" + else: + comment = f"/* U+{start:05X} - U+{end:05X} */" - f.write("""\ -}; - -/* Zero-width character ranges (non-BMP, U+10000 and above) */ -static const struct interval32 zero_width_non_bmp[] = { -""") - - for start, end in zero_width_non_bmp: - comment = get_code_point_comment(start, end) f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") f.write("""\ }; -/* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ -static const struct interval16 double_width_bmp[] = { +/* Double-width character ranges */ +static const struct interval double_width_ranges[] = { """) - for start, end in double_width_bmp: - comment = get_code_point_comment(start, end) - f.write(f"\t{{ 0x{start:04X}, 0x{end:04X} }}, {comment}\n") + for start, end in double_width_ranges: + try: + start_char_desc = unicodedata.name(chr(start)) if start < 0x10000 else f"U+{start:05X}" + if start == end: + comment = f"/* {start_char_desc} */" + else: + end_char_desc = unicodedata.name(chr(end)) if end < 0x10000 else f"U+{end:05X}" + comment = f"/* {start_char_desc} - {end_char_desc} */" + except: + if start == end: + comment = f"/* U+{start:05X} */" + else: + comment = f"/* U+{start:05X} - U+{end:05X} */" - f.write("""\ -}; - -/* Double-width character ranges (non-BMP, U+10000 and above) */ -static const struct interval32 double_width_non_bmp[] = { -""") - - for start, end in double_width_non_bmp: - comment = get_code_point_comment(start, end) f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") f.write("""\ }; -static int ucs_cmp16(const void *key, const void *element) -{ - uint16_t cp = *(uint16_t *)key; - const struct interval16 *e = element; - - if (cp > e->last) - return 1; - if (cp < e->first) - return -1; - return 0; -} - -static int ucs_cmp32(const void *key, const void *element) +static int ucs_cmp(const void *key, const void *element) { uint32_t cp = *(uint32_t *)key; - const struct interval32 *e = element; + const struct interval *e = element; if (cp > e->last) return 1; @@ -271,22 +220,13 @@ static int ucs_cmp32(const void *key, const void *element) return 0; } -static bool is_in_interval16(uint16_t cp, const struct interval16 *intervals, size_t count) +static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t count) { if (cp < intervals[0].first || cp > intervals[count - 1].last) return false; return __inline_bsearch(&cp, intervals, count, - sizeof(*intervals), ucs_cmp16) != NULL; -} - -static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, size_t count) -{ - if (cp < intervals[0].first || cp > intervals[count - 1].last) - return false; - - return __inline_bsearch(&cp, intervals, count, - sizeof(*intervals), ucs_cmp32) != NULL; + sizeof(*intervals), ucs_cmp) != NULL; } /** @@ -297,9 +237,7 @@ static bool is_in_interval32(uint32_t cp, const struct interval32 *intervals, si */ bool ucs_is_zero_width(uint32_t cp) { - return (cp <= 0xFFFF) - ? is_in_interval16(cp, zero_width_bmp, ARRAY_SIZE(zero_width_bmp)) - : is_in_interval32(cp, zero_width_non_bmp, ARRAY_SIZE(zero_width_non_bmp)); + return is_in_interval(cp, zero_width_ranges, ARRAY_SIZE(zero_width_ranges)); } /** @@ -310,27 +248,17 @@ bool ucs_is_zero_width(uint32_t cp) */ bool ucs_is_double_width(uint32_t cp) { - return (cp <= 0xFFFF) - ? is_in_interval16(cp, double_width_bmp, ARRAY_SIZE(double_width_bmp)) - : is_in_interval32(cp, double_width_non_bmp, ARRAY_SIZE(double_width_non_bmp)); + return is_in_interval(cp, double_width_ranges, ARRAY_SIZE(double_width_ranges)); } """) # Print summary - zero_width_bmp_count = sum(end - start + 1 for start, end in zero_width_bmp) - zero_width_non_bmp_count = sum(end - start + 1 for start, end in zero_width_non_bmp) - double_width_bmp_count = sum(end - start + 1 for start, end in double_width_bmp) - double_width_non_bmp_count = sum(end - start + 1 for start, end in double_width_non_bmp) - - total_zero_width = zero_width_bmp_count + zero_width_non_bmp_count - total_double_width = double_width_bmp_count + double_width_non_bmp_count + zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges) + double_width_count = sum(end - start + 1 for start, end in double_width_ranges) print(f"Generated {c_file} with:") - print(f"- {len(zero_width_bmp)} zero-width BMP ranges (16-bit) covering ~{zero_width_bmp_count} code points") - print(f"- {len(zero_width_non_bmp)} zero-width non-BMP ranges (32-bit) covering ~{zero_width_non_bmp_count} code points") - print(f"- {len(double_width_bmp)} double-width BMP ranges (16-bit) covering ~{double_width_bmp_count} code points") - print(f"- {len(double_width_non_bmp)} double-width non-BMP ranges (32-bit) covering ~{double_width_non_bmp_count} code points") - print(f"Total: {len(zero_width_bmp) + len(zero_width_non_bmp) + len(double_width_bmp) + len(double_width_non_bmp)} ranges covering ~{total_zero_width + total_double_width} code points") + print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points") + print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code points") if __name__ == "__main__": generate_ucs_width() From 3cf3987b572f71ee609d73601ccfe785dd4ffd50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:18 +0200 Subject: [PATCH 054/105] Revert "vt: support Unicode recomposition" This reverts commit cd6937d42bca46f2143544918e535d6fd22b71b7. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index e3d35c4f9204..5d53feeb5d2b 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2953,15 +2953,8 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, * double-width. */ } else { - /* try recomposition */ - prev_c = ucs_recompose(prev_c, c); - if (prev_c != 0) { - vc_con_rewind(vc); - c = prev_c; - } else { - /* Otherwise zero-width code points are ignored */ - goto out; - } + /* Otherwise zero-width code points are ignored */ + goto out; } } } From 6cccf837ac8d72e13c651f60d93545f9fb4e84ed Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:19 +0200 Subject: [PATCH 055/105] Revert "vt: create ucs_recompose.c using gen_ucs_recompose.py" This reverts commit 54af55b990eda5a6a0140a3cded8094b42c0c3b7. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/Makefile | 2 +- drivers/tty/vt/ucs_recompose.c | 170 --------------------------------- include/linux/consolemap.h | 6 -- 3 files changed, 1 insertion(+), 177 deletions(-) delete mode 100644 drivers/tty/vt/ucs_recompose.c diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index a63f6c9438da..bee69277bbc3 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o \ selection.o keyboard.o \ vt.o defkeymap.o obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o \ - ucs_width.o ucs_recompose.o + ucs_width.o # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c diff --git a/drivers/tty/vt/ucs_recompose.c b/drivers/tty/vt/ucs_recompose.c deleted file mode 100644 index 5c30c989def3..000000000000 --- a/drivers/tty/vt/ucs_recompose.c +++ /dev/null @@ -1,170 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucs_recompose.c - Unicode character recomposition - * - * Auto-generated by gen_ucs_recompose.py - * - * Unicode Version: 16.0.0 - * - * This file contains a table with most commonly used Latin, Greek, and - * Cyrillic recomposition pairs only (71 entries). To generate a table with - * all possible recomposition pairs from the Unicode BMP (1000 entries) - * instead, run: - * - * python gen_ucs_recompose.py --full - */ - -#include -#include -#include -#include - -/* - * Structure for recomposition pairs. - * First element is the base character, second is the combining mark, - * third is the recomposed character. - * Using uint16_t to save space since all values are within BMP range. - */ -struct recomposition { - uint16_t base; - uint16_t combining; - uint16_t recomposed; -}; - -/* - * Table of most commonly used Latin, Greek, and Cyrillic recomposition pairs only - * Sorted by base character and then combining character for binary search - */ -static const struct recomposition recomposition_table[] = { - { 0x0041, 0x0300, 0x00C0 }, /* LATIN CAPITAL LETTER A + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER A WITH GRAVE */ - { 0x0041, 0x0301, 0x00C1 }, /* LATIN CAPITAL LETTER A + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER A WITH ACUTE */ - { 0x0041, 0x0302, 0x00C2 }, /* LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ - { 0x0041, 0x0303, 0x00C3 }, /* LATIN CAPITAL LETTER A + COMBINING TILDE = LATIN CAPITAL LETTER A WITH TILDE */ - { 0x0041, 0x0308, 0x00C4 }, /* LATIN CAPITAL LETTER A + COMBINING DIAERESIS = LATIN CAPITAL LETTER A WITH DIAERESIS */ - { 0x0041, 0x030A, 0x00C5 }, /* LATIN CAPITAL LETTER A + COMBINING RING ABOVE = LATIN CAPITAL LETTER A WITH RING ABOVE */ - { 0x0043, 0x0327, 0x00C7 }, /* LATIN CAPITAL LETTER C + COMBINING CEDILLA = LATIN CAPITAL LETTER C WITH CEDILLA */ - { 0x0045, 0x0300, 0x00C8 }, /* LATIN CAPITAL LETTER E + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER E WITH GRAVE */ - { 0x0045, 0x0301, 0x00C9 }, /* LATIN CAPITAL LETTER E + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER E WITH ACUTE */ - { 0x0045, 0x0302, 0x00CA }, /* LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ - { 0x0045, 0x0308, 0x00CB }, /* LATIN CAPITAL LETTER E + COMBINING DIAERESIS = LATIN CAPITAL LETTER E WITH DIAERESIS */ - { 0x0049, 0x0300, 0x00CC }, /* LATIN CAPITAL LETTER I + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER I WITH GRAVE */ - { 0x0049, 0x0301, 0x00CD }, /* LATIN CAPITAL LETTER I + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER I WITH ACUTE */ - { 0x0049, 0x0302, 0x00CE }, /* LATIN CAPITAL LETTER I + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ - { 0x0049, 0x0308, 0x00CF }, /* LATIN CAPITAL LETTER I + COMBINING DIAERESIS = LATIN CAPITAL LETTER I WITH DIAERESIS */ - { 0x004E, 0x0303, 0x00D1 }, /* LATIN CAPITAL LETTER N + COMBINING TILDE = LATIN CAPITAL LETTER N WITH TILDE */ - { 0x004F, 0x0300, 0x00D2 }, /* LATIN CAPITAL LETTER O + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER O WITH GRAVE */ - { 0x004F, 0x0301, 0x00D3 }, /* LATIN CAPITAL LETTER O + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER O WITH ACUTE */ - { 0x004F, 0x0302, 0x00D4 }, /* LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ - { 0x004F, 0x0303, 0x00D5 }, /* LATIN CAPITAL LETTER O + COMBINING TILDE = LATIN CAPITAL LETTER O WITH TILDE */ - { 0x004F, 0x0308, 0x00D6 }, /* LATIN CAPITAL LETTER O + COMBINING DIAERESIS = LATIN CAPITAL LETTER O WITH DIAERESIS */ - { 0x0055, 0x0300, 0x00D9 }, /* LATIN CAPITAL LETTER U + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER U WITH GRAVE */ - { 0x0055, 0x0301, 0x00DA }, /* LATIN CAPITAL LETTER U + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER U WITH ACUTE */ - { 0x0055, 0x0302, 0x00DB }, /* LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ - { 0x0055, 0x0308, 0x00DC }, /* LATIN CAPITAL LETTER U + COMBINING DIAERESIS = LATIN CAPITAL LETTER U WITH DIAERESIS */ - { 0x0059, 0x0301, 0x00DD }, /* LATIN CAPITAL LETTER Y + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER Y WITH ACUTE */ - { 0x0061, 0x0300, 0x00E0 }, /* LATIN SMALL LETTER A + COMBINING GRAVE ACCENT = LATIN SMALL LETTER A WITH GRAVE */ - { 0x0061, 0x0301, 0x00E1 }, /* LATIN SMALL LETTER A + COMBINING ACUTE ACCENT = LATIN SMALL LETTER A WITH ACUTE */ - { 0x0061, 0x0302, 0x00E2 }, /* LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER A WITH CIRCUMFLEX */ - { 0x0061, 0x0303, 0x00E3 }, /* LATIN SMALL LETTER A + COMBINING TILDE = LATIN SMALL LETTER A WITH TILDE */ - { 0x0061, 0x0308, 0x00E4 }, /* LATIN SMALL LETTER A + COMBINING DIAERESIS = LATIN SMALL LETTER A WITH DIAERESIS */ - { 0x0061, 0x030A, 0x00E5 }, /* LATIN SMALL LETTER A + COMBINING RING ABOVE = LATIN SMALL LETTER A WITH RING ABOVE */ - { 0x0063, 0x0327, 0x00E7 }, /* LATIN SMALL LETTER C + COMBINING CEDILLA = LATIN SMALL LETTER C WITH CEDILLA */ - { 0x0065, 0x0300, 0x00E8 }, /* LATIN SMALL LETTER E + COMBINING GRAVE ACCENT = LATIN SMALL LETTER E WITH GRAVE */ - { 0x0065, 0x0301, 0x00E9 }, /* LATIN SMALL LETTER E + COMBINING ACUTE ACCENT = LATIN SMALL LETTER E WITH ACUTE */ - { 0x0065, 0x0302, 0x00EA }, /* LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER E WITH CIRCUMFLEX */ - { 0x0065, 0x0308, 0x00EB }, /* LATIN SMALL LETTER E + COMBINING DIAERESIS = LATIN SMALL LETTER E WITH DIAERESIS */ - { 0x0069, 0x0300, 0x00EC }, /* LATIN SMALL LETTER I + COMBINING GRAVE ACCENT = LATIN SMALL LETTER I WITH GRAVE */ - { 0x0069, 0x0301, 0x00ED }, /* LATIN SMALL LETTER I + COMBINING ACUTE ACCENT = LATIN SMALL LETTER I WITH ACUTE */ - { 0x0069, 0x0302, 0x00EE }, /* LATIN SMALL LETTER I + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER I WITH CIRCUMFLEX */ - { 0x0069, 0x0308, 0x00EF }, /* LATIN SMALL LETTER I + COMBINING DIAERESIS = LATIN SMALL LETTER I WITH DIAERESIS */ - { 0x006E, 0x0303, 0x00F1 }, /* LATIN SMALL LETTER N + COMBINING TILDE = LATIN SMALL LETTER N WITH TILDE */ - { 0x006F, 0x0300, 0x00F2 }, /* LATIN SMALL LETTER O + COMBINING GRAVE ACCENT = LATIN SMALL LETTER O WITH GRAVE */ - { 0x006F, 0x0301, 0x00F3 }, /* LATIN SMALL LETTER O + COMBINING ACUTE ACCENT = LATIN SMALL LETTER O WITH ACUTE */ - { 0x006F, 0x0302, 0x00F4 }, /* LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER O WITH CIRCUMFLEX */ - { 0x006F, 0x0303, 0x00F5 }, /* LATIN SMALL LETTER O + COMBINING TILDE = LATIN SMALL LETTER O WITH TILDE */ - { 0x006F, 0x0308, 0x00F6 }, /* LATIN SMALL LETTER O + COMBINING DIAERESIS = LATIN SMALL LETTER O WITH DIAERESIS */ - { 0x0075, 0x0300, 0x00F9 }, /* LATIN SMALL LETTER U + COMBINING GRAVE ACCENT = LATIN SMALL LETTER U WITH GRAVE */ - { 0x0075, 0x0301, 0x00FA }, /* LATIN SMALL LETTER U + COMBINING ACUTE ACCENT = LATIN SMALL LETTER U WITH ACUTE */ - { 0x0075, 0x0302, 0x00FB }, /* LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER U WITH CIRCUMFLEX */ - { 0x0075, 0x0308, 0x00FC }, /* LATIN SMALL LETTER U + COMBINING DIAERESIS = LATIN SMALL LETTER U WITH DIAERESIS */ - { 0x0079, 0x0301, 0x00FD }, /* LATIN SMALL LETTER Y + COMBINING ACUTE ACCENT = LATIN SMALL LETTER Y WITH ACUTE */ - { 0x0079, 0x0308, 0x00FF }, /* LATIN SMALL LETTER Y + COMBINING DIAERESIS = LATIN SMALL LETTER Y WITH DIAERESIS */ - { 0x0391, 0x0301, 0x0386 }, /* GREEK CAPITAL LETTER ALPHA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ALPHA WITH TONOS */ - { 0x0395, 0x0301, 0x0388 }, /* GREEK CAPITAL LETTER EPSILON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER EPSILON WITH TONOS */ - { 0x0397, 0x0301, 0x0389 }, /* GREEK CAPITAL LETTER ETA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ETA WITH TONOS */ - { 0x0399, 0x0301, 0x038A }, /* GREEK CAPITAL LETTER IOTA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER IOTA WITH TONOS */ - { 0x039F, 0x0301, 0x038C }, /* GREEK CAPITAL LETTER OMICRON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMICRON WITH TONOS */ - { 0x03A5, 0x0301, 0x038E }, /* GREEK CAPITAL LETTER UPSILON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER UPSILON WITH TONOS */ - { 0x03A9, 0x0301, 0x038F }, /* GREEK CAPITAL LETTER OMEGA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMEGA WITH TONOS */ - { 0x03B1, 0x0301, 0x03AC }, /* GREEK SMALL LETTER ALPHA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ALPHA WITH TONOS */ - { 0x03B5, 0x0301, 0x03AD }, /* GREEK SMALL LETTER EPSILON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER EPSILON WITH TONOS */ - { 0x03B7, 0x0301, 0x03AE }, /* GREEK SMALL LETTER ETA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ETA WITH TONOS */ - { 0x03B9, 0x0301, 0x03AF }, /* GREEK SMALL LETTER IOTA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER IOTA WITH TONOS */ - { 0x03BF, 0x0301, 0x03CC }, /* GREEK SMALL LETTER OMICRON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMICRON WITH TONOS */ - { 0x03C5, 0x0301, 0x03CD }, /* GREEK SMALL LETTER UPSILON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER UPSILON WITH TONOS */ - { 0x03C9, 0x0301, 0x03CE }, /* GREEK SMALL LETTER OMEGA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMEGA WITH TONOS */ - { 0x0418, 0x0306, 0x0419 }, /* CYRILLIC CAPITAL LETTER I + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT I */ - { 0x0423, 0x0306, 0x040E }, /* CYRILLIC CAPITAL LETTER U + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT U */ - { 0x0438, 0x0306, 0x0439 }, /* CYRILLIC SMALL LETTER I + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT I */ - { 0x0443, 0x0306, 0x045E }, /* CYRILLIC SMALL LETTER U + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT U */ -}; - -/* - * Boundary values for quick rejection - * These are calculated by analyzing the table during generation - */ -#define MIN_BASE_CHAR 0x0041 -#define MAX_BASE_CHAR 0x0443 -#define MIN_COMBINING_CHAR 0x0300 -#define MAX_COMBINING_CHAR 0x0327 - -struct compare_key { - uint16_t base; - uint16_t combining; -}; - -static int recomposition_compare(const void *key, const void *element) -{ - const struct compare_key *search_key = key; - const struct recomposition *table_entry = element; - - /* Compare base character first */ - if (search_key->base < table_entry->base) - return -1; - if (search_key->base > table_entry->base) - return 1; - - /* Base characters match, now compare combining character */ - if (search_key->combining < table_entry->combining) - return -1; - if (search_key->combining > table_entry->combining) - return 1; - - /* Both match */ - return 0; -} - -/** - * Attempt to recompose two Unicode characters into a single character. - * - * @param previous: Previous Unicode code point (UCS-4) - * @param current: Current Unicode code point (UCS-4) - * Return: Recomposed Unicode code point, or 0 if no recomposition is possible - */ -uint32_t ucs_recompose(uint32_t base, uint32_t combining) -{ - /* Check if characters are within the range of our table */ - if (base < MIN_BASE_CHAR || base > MAX_BASE_CHAR || - combining < MIN_COMBINING_CHAR || combining > MAX_COMBINING_CHAR) - return 0; - - struct compare_key key = { base, combining }; - - struct recomposition *result = - __inline_bsearch(&key, recomposition_table, - ARRAY_SIZE(recomposition_table), - sizeof(*recomposition_table), - recomposition_compare); - - return result ? result->recomposed : 0; -} diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 4d3a34c288e5..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,7 +30,6 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); -uint32_t ucs_recompose(uint32_t base, uint32_t combining); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -70,11 +69,6 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } - -static inline uint32_t ucs_recompose(uint32_t base, uint32_t combining) -{ - return 0; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From 06df3bcefa1ef8d0b36164880163f9e2d9349246 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:20 +0200 Subject: [PATCH 056/105] Revert "vt: introduce gen_ucs_recompose.py to create ucs_recompose.c" This reverts commit f2347b0cdf65e614732c2307863c95304f72d9d9. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_recompose.py | 321 ---------------------------- 1 file changed, 321 deletions(-) delete mode 100755 drivers/tty/vt/gen_ucs_recompose.py diff --git a/drivers/tty/vt/gen_ucs_recompose.py b/drivers/tty/vt/gen_ucs_recompose.py deleted file mode 100755 index 64418803e49e..000000000000 --- a/drivers/tty/vt/gen_ucs_recompose.py +++ /dev/null @@ -1,321 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# -# This script uses Python's unicodedata module to generate ucs_recompose.c. -# The generated code maps base character + combining mark pairs to their -# precomposed equivalents. -# -# Usage: -# python gen_ucs_recompose.py # Generate with common recomposition pairs -# python gen_ucs_recompose.py --full # Generate with all recomposition pairs - -import unicodedata -import sys -import argparse -import textwrap - -common_recompose_description = "most commonly used Latin, Greek, and Cyrillic recomposition pairs only" -COMMON_RECOMPOSITION_PAIRS = [ - # Latin letters with accents - uppercase - (0x0041, 0x0300, 0x00C0), # A + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER A WITH GRAVE - (0x0041, 0x0301, 0x00C1), # A + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER A WITH ACUTE - (0x0041, 0x0302, 0x00C2), # A + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER A WITH CIRCUMFLEX - (0x0041, 0x0303, 0x00C3), # A + COMBINING TILDE = LATIN CAPITAL LETTER A WITH TILDE - (0x0041, 0x0308, 0x00C4), # A + COMBINING DIAERESIS = LATIN CAPITAL LETTER A WITH DIAERESIS - (0x0041, 0x030A, 0x00C5), # A + COMBINING RING ABOVE = LATIN CAPITAL LETTER A WITH RING ABOVE - (0x0043, 0x0327, 0x00C7), # C + COMBINING CEDILLA = LATIN CAPITAL LETTER C WITH CEDILLA - (0x0045, 0x0300, 0x00C8), # E + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER E WITH GRAVE - (0x0045, 0x0301, 0x00C9), # E + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER E WITH ACUTE - (0x0045, 0x0302, 0x00CA), # E + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER E WITH CIRCUMFLEX - (0x0045, 0x0308, 0x00CB), # E + COMBINING DIAERESIS = LATIN CAPITAL LETTER E WITH DIAERESIS - (0x0049, 0x0300, 0x00CC), # I + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER I WITH GRAVE - (0x0049, 0x0301, 0x00CD), # I + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER I WITH ACUTE - (0x0049, 0x0302, 0x00CE), # I + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER I WITH CIRCUMFLEX - (0x0049, 0x0308, 0x00CF), # I + COMBINING DIAERESIS = LATIN CAPITAL LETTER I WITH DIAERESIS - (0x004E, 0x0303, 0x00D1), # N + COMBINING TILDE = LATIN CAPITAL LETTER N WITH TILDE - (0x004F, 0x0300, 0x00D2), # O + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER O WITH GRAVE - (0x004F, 0x0301, 0x00D3), # O + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER O WITH ACUTE - (0x004F, 0x0302, 0x00D4), # O + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER O WITH CIRCUMFLEX - (0x004F, 0x0303, 0x00D5), # O + COMBINING TILDE = LATIN CAPITAL LETTER O WITH TILDE - (0x004F, 0x0308, 0x00D6), # O + COMBINING DIAERESIS = LATIN CAPITAL LETTER O WITH DIAERESIS - (0x0055, 0x0300, 0x00D9), # U + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER U WITH GRAVE - (0x0055, 0x0301, 0x00DA), # U + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER U WITH ACUTE - (0x0055, 0x0302, 0x00DB), # U + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER U WITH CIRCUMFLEX - (0x0055, 0x0308, 0x00DC), # U + COMBINING DIAERESIS = LATIN CAPITAL LETTER U WITH DIAERESIS - (0x0059, 0x0301, 0x00DD), # Y + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER Y WITH ACUTE - - # Latin letters with accents - lowercase - (0x0061, 0x0300, 0x00E0), # a + COMBINING GRAVE ACCENT = LATIN SMALL LETTER A WITH GRAVE - (0x0061, 0x0301, 0x00E1), # a + COMBINING ACUTE ACCENT = LATIN SMALL LETTER A WITH ACUTE - (0x0061, 0x0302, 0x00E2), # a + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER A WITH CIRCUMFLEX - (0x0061, 0x0303, 0x00E3), # a + COMBINING TILDE = LATIN SMALL LETTER A WITH TILDE - (0x0061, 0x0308, 0x00E4), # a + COMBINING DIAERESIS = LATIN SMALL LETTER A WITH DIAERESIS - (0x0061, 0x030A, 0x00E5), # a + COMBINING RING ABOVE = LATIN SMALL LETTER A WITH RING ABOVE - (0x0063, 0x0327, 0x00E7), # c + COMBINING CEDILLA = LATIN SMALL LETTER C WITH CEDILLA - (0x0065, 0x0300, 0x00E8), # e + COMBINING GRAVE ACCENT = LATIN SMALL LETTER E WITH GRAVE - (0x0065, 0x0301, 0x00E9), # e + COMBINING ACUTE ACCENT = LATIN SMALL LETTER E WITH ACUTE - (0x0065, 0x0302, 0x00EA), # e + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER E WITH CIRCUMFLEX - (0x0065, 0x0308, 0x00EB), # e + COMBINING DIAERESIS = LATIN SMALL LETTER E WITH DIAERESIS - (0x0069, 0x0300, 0x00EC), # i + COMBINING GRAVE ACCENT = LATIN SMALL LETTER I WITH GRAVE - (0x0069, 0x0301, 0x00ED), # i + COMBINING ACUTE ACCENT = LATIN SMALL LETTER I WITH ACUTE - (0x0069, 0x0302, 0x00EE), # i + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER I WITH CIRCUMFLEX - (0x0069, 0x0308, 0x00EF), # i + COMBINING DIAERESIS = LATIN SMALL LETTER I WITH DIAERESIS - (0x006E, 0x0303, 0x00F1), # n + COMBINING TILDE = LATIN SMALL LETTER N WITH TILDE - (0x006F, 0x0300, 0x00F2), # o + COMBINING GRAVE ACCENT = LATIN SMALL LETTER O WITH GRAVE - (0x006F, 0x0301, 0x00F3), # o + COMBINING ACUTE ACCENT = LATIN SMALL LETTER O WITH ACUTE - (0x006F, 0x0302, 0x00F4), # o + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER O WITH CIRCUMFLEX - (0x006F, 0x0303, 0x00F5), # o + COMBINING TILDE = LATIN SMALL LETTER O WITH TILDE - (0x006F, 0x0308, 0x00F6), # o + COMBINING DIAERESIS = LATIN SMALL LETTER O WITH DIAERESIS - (0x0075, 0x0300, 0x00F9), # u + COMBINING GRAVE ACCENT = LATIN SMALL LETTER U WITH GRAVE - (0x0075, 0x0301, 0x00FA), # u + COMBINING ACUTE ACCENT = LATIN SMALL LETTER U WITH ACUTE - (0x0075, 0x0302, 0x00FB), # u + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER U WITH CIRCUMFLEX - (0x0075, 0x0308, 0x00FC), # u + COMBINING DIAERESIS = LATIN SMALL LETTER U WITH DIAERESIS - (0x0079, 0x0301, 0x00FD), # y + COMBINING ACUTE ACCENT = LATIN SMALL LETTER Y WITH ACUTE - (0x0079, 0x0308, 0x00FF), # y + COMBINING DIAERESIS = LATIN SMALL LETTER Y WITH DIAERESIS - - # Common Greek characters - (0x0391, 0x0301, 0x0386), # Α + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ALPHA WITH TONOS - (0x0395, 0x0301, 0x0388), # Ε + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER EPSILON WITH TONOS - (0x0397, 0x0301, 0x0389), # Η + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ETA WITH TONOS - (0x0399, 0x0301, 0x038A), # Ι + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER IOTA WITH TONOS - (0x039F, 0x0301, 0x038C), # Ο + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMICRON WITH TONOS - (0x03A5, 0x0301, 0x038E), # Υ + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER UPSILON WITH TONOS - (0x03A9, 0x0301, 0x038F), # Ω + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMEGA WITH TONOS - (0x03B1, 0x0301, 0x03AC), # α + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ALPHA WITH TONOS - (0x03B5, 0x0301, 0x03AD), # ε + COMBINING ACUTE ACCENT = GREEK SMALL LETTER EPSILON WITH TONOS - (0x03B7, 0x0301, 0x03AE), # η + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ETA WITH TONOS - (0x03B9, 0x0301, 0x03AF), # ι + COMBINING ACUTE ACCENT = GREEK SMALL LETTER IOTA WITH TONOS - (0x03BF, 0x0301, 0x03CC), # ο + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMICRON WITH TONOS - (0x03C5, 0x0301, 0x03CD), # υ + COMBINING ACUTE ACCENT = GREEK SMALL LETTER UPSILON WITH TONOS - (0x03C9, 0x0301, 0x03CE), # ω + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMEGA WITH TONOS - - # Common Cyrillic characters - (0x0418, 0x0306, 0x0419), # И + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT I - (0x0438, 0x0306, 0x0439), # и + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT I - (0x0423, 0x0306, 0x040E), # У + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT U - (0x0443, 0x0306, 0x045E), # у + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT U -] - -full_recompose_description = "all possible recomposition pairs from the Unicode BMP" -def collect_all_recomposition_pairs(): - """Collect all possible recomposition pairs from the Unicode data.""" - # Map to store recomposition pairs: (base, combining) -> recomposed - recompose_map = {} - - # Process all assigned Unicode code points in BMP (Basic Multilingual Plane) - # We limit to BMP (0x0000-0xFFFF) to keep our table smaller with uint16_t - for cp in range(0, 0x10000): - try: - char = chr(cp) - - # Skip unassigned or control characters - if not unicodedata.name(char, ''): - continue - - # Find decomposition - decomp = unicodedata.decomposition(char) - if not decomp or '<' in decomp: # Skip compatibility decompositions - continue - - # Parse the decomposition - parts = decomp.split() - if len(parts) == 2: # Simple base + combining mark - base = int(parts[0], 16) - combining = int(parts[1], 16) - - # Only store if both are in BMP - if base < 0x10000 and combining < 0x10000: - recompose_map[(base, combining)] = cp - - except (ValueError, TypeError): - continue - - # Convert to a list of tuples and sort for binary search - recompose_list = [(base, combining, recomposed) - for (base, combining), recomposed in recompose_map.items()] - recompose_list.sort() - - return recompose_list - -def validate_common_pairs(full_list): - """Validate that all common pairs are in the full list. - - Raises: - ValueError: If any common pair is missing or has a different recomposition - value than what's in the full table. - """ - full_pairs = {(base, combining): recomposed for base, combining, recomposed in full_list} - for base, combining, recomposed in COMMON_RECOMPOSITION_PAIRS: - full_recomposed = full_pairs.get((base, combining)) - if full_recomposed is None: - error_msg = f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) not found in full data" - print(error_msg) - raise ValueError(error_msg) - elif full_recomposed != recomposed: - error_msg = (f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) has different recomposition: " - f"0x{recomposed:04X} vs 0x{full_recomposed:04X}") - print(error_msg) - raise ValueError(error_msg) - -def generate_recomposition_table(use_full_list=False): - """Generate the recomposition table C code.""" - # Output file name - c_file = "ucs_recompose.c" - - # Get Unicode version information - unicode_version = unicodedata.unidata_version - - # Collect all recomposition pairs for validation - full_recompose_list = collect_all_recomposition_pairs() - - # Decide which list to use - if use_full_list: - print("Using full recomposition list...") - recompose_list = full_recompose_list - table_description = full_recompose_description - alt_list = COMMON_RECOMPOSITION_PAIRS - alt_description = common_recompose_description - else: - print("Using common recomposition list...") - # Validate that all common pairs are in the full list - validate_common_pairs(full_recompose_list) - recompose_list = sorted(COMMON_RECOMPOSITION_PAIRS) - table_description = common_recompose_description - alt_list = full_recompose_list - alt_description = full_recompose_description - generation_mode = " --full" if use_full_list else "" - alternative_mode = " --full" if not use_full_list else "" - table_description_detail = f"{table_description} ({len(recompose_list)} entries)" - alt_description_detail = f"{alt_description} ({len(alt_list)} entries)" - - # Calculate min/max values for boundary checks - min_base = min(base for base, _, _ in recompose_list) - max_base = max(base for base, _, _ in recompose_list) - min_combining = min(combining for _, combining, _ in recompose_list) - max_combining = max(combining for _, combining, _ in recompose_list) - - # Generate implementation file - with open(c_file, 'w') as f: - f.write(f"""\ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucs_recompose.c - Unicode character recomposition - * - * Auto-generated by gen_ucs_recompose.py{generation_mode} - * - * Unicode Version: {unicode_version} - * -{textwrap.fill( - f"This file contains a table with {table_description_detail}. " + - f"To generate a table with {alt_description_detail} instead, run:", - width=75, initial_indent=" * ", subsequent_indent=" * ")} - * - * python gen_ucs_recompose.py{alternative_mode} - */ - -#include -#include -#include -#include - -/* - * Structure for recomposition pairs. - * First element is the base character, second is the combining mark, - * third is the recomposed character. - * Using uint16_t to save space since all values are within BMP range. - */ -struct recomposition {{ - uint16_t base; - uint16_t combining; - uint16_t recomposed; -}}; - -/* - * Table of {table_description} - * Sorted by base character and then combining character for binary search - */ -static const struct recomposition recomposition_table[] = {{ -""") - - # Write the recomposition table with comments - for base, combining, recomposed in recompose_list: - try: - base_name = unicodedata.name(chr(base)) - combining_name = unicodedata.name(chr(combining)) - recomposed_name = unicodedata.name(chr(recomposed)) - comment = f"/* {base_name} + {combining_name} = {recomposed_name} */" - except ValueError: - comment = f"/* U+{base:04X} + U+{combining:04X} = U+{recomposed:04X} */" - f.write(f"\t{{ 0x{base:04X}, 0x{combining:04X}, 0x{recomposed:04X} }}, {comment}\n") - - f.write(f"""\ -}}; - -/* - * Boundary values for quick rejection - * These are calculated by analyzing the table during generation - */ -#define MIN_BASE_CHAR 0x{min_base:04X} -#define MAX_BASE_CHAR 0x{max_base:04X} -#define MIN_COMBINING_CHAR 0x{min_combining:04X} -#define MAX_COMBINING_CHAR 0x{max_combining:04X} - -struct compare_key {{ - uint16_t base; - uint16_t combining; -}}; - -static int recomposition_compare(const void *key, const void *element) -{{ - const struct compare_key *search_key = key; - const struct recomposition *table_entry = element; - - /* Compare base character first */ - if (search_key->base < table_entry->base) - return -1; - if (search_key->base > table_entry->base) - return 1; - - /* Base characters match, now compare combining character */ - if (search_key->combining < table_entry->combining) - return -1; - if (search_key->combining > table_entry->combining) - return 1; - - /* Both match */ - return 0; -}} - -/** - * Attempt to recompose two Unicode characters into a single character. - * - * @param previous: Previous Unicode code point (UCS-4) - * @param current: Current Unicode code point (UCS-4) - * Return: Recomposed Unicode code point, or 0 if no recomposition is possible - */ -uint32_t ucs_recompose(uint32_t base, uint32_t combining) -{{ - /* Check if characters are within the range of our table */ - if (base < MIN_BASE_CHAR || base > MAX_BASE_CHAR || - combining < MIN_COMBINING_CHAR || combining > MAX_COMBINING_CHAR) - return 0; - - struct compare_key key = {{ base, combining }}; - - struct recomposition *result = - __inline_bsearch(&key, recomposition_table, - ARRAY_SIZE(recomposition_table), - sizeof(*recomposition_table), - recomposition_compare); - - return result ? result->recomposed : 0; -}} -""") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate Unicode recomposition table") - parser.add_argument("--full", action="store_true", - help="Generate a full recomposition table (default: common pairs only)") - args = parser.parse_args() - - generate_recomposition_table(use_full_list=args.full) From 67a4bb27461b0e3b39b27db688b5981b6eb62175 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:21 +0200 Subject: [PATCH 057/105] Revert "vt: update ucs_width.c using gen_ucs_width.py" This reverts commit 3a1ab63aa05b4736a7d30ae0a769385662f13def. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs_width.c | 495 ++----------------------------------- include/linux/consolemap.h | 6 +- 2 files changed, 26 insertions(+), 475 deletions(-) diff --git a/drivers/tty/vt/ucs_width.c b/drivers/tty/vt/ucs_width.c index 47b22583bd34..5f0bde30a1fb 100644 --- a/drivers/tty/vt/ucs_width.c +++ b/drivers/tty/vt/ucs_width.c @@ -1,498 +1,45 @@ // SPDX-License-Identifier: GPL-2.0 -/* - * ucs_width.c - Unicode character width lookup - * - * Auto-generated by gen_ucs_width.py - * - * Unicode Version: 16.0.0 - */ #include #include #include #include +/* ucs_is_double_width() is based on the wcwidth() implementation by + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + struct interval { uint32_t first; uint32_t last; }; -/* Zero-width character ranges */ -static const struct interval zero_width_ranges[] = { - { 0x000AD, 0x000AD }, /* SOFT HYPHEN */ - { 0x00300, 0x0036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ - { 0x00483, 0x00489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ - { 0x00591, 0x005BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ - { 0x005BF, 0x005BF }, /* HEBREW POINT RAFE */ - { 0x005C1, 0x005C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ - { 0x005C4, 0x005C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ - { 0x005C7, 0x005C7 }, /* HEBREW POINT QAMATS QATAN */ - { 0x00600, 0x00605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ - { 0x00610, 0x0061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ - { 0x0064B, 0x0065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ - { 0x00670, 0x00670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ - { 0x006D6, 0x006DC }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC SMALL HIGH SEEN */ - { 0x006DF, 0x006E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ - { 0x006E7, 0x006E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ - { 0x006EA, 0x006ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ - { 0x00711, 0x00711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ - { 0x00730, 0x0074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ - { 0x007A6, 0x007B0 }, /* THAANA ABAFILI - THAANA SUKUN */ - { 0x007EB, 0x007F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ - { 0x007FD, 0x007FD }, /* NKO DANTAYALAN */ - { 0x00816, 0x00819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ - { 0x0081B, 0x00823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ - { 0x00825, 0x00827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ - { 0x00829, 0x0082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ - { 0x00859, 0x0085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ - { 0x00890, 0x00891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ - { 0x00897, 0x0089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ - { 0x008CA, 0x00903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ - { 0x0093A, 0x0093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ - { 0x0093E, 0x0094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ - { 0x00951, 0x00957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ - { 0x00962, 0x00963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ - { 0x00981, 0x00983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ - { 0x009BC, 0x009BC }, /* BENGALI SIGN NUKTA */ - { 0x009BE, 0x009C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ - { 0x009C7, 0x009C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ - { 0x009CB, 0x009CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ - { 0x009D7, 0x009D7 }, /* BENGALI AU LENGTH MARK */ - { 0x009E2, 0x009E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ - { 0x009FE, 0x009FE }, /* BENGALI SANDHI MARK */ - { 0x00A01, 0x00A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ - { 0x00A3C, 0x00A3C }, /* GURMUKHI SIGN NUKTA */ - { 0x00A3E, 0x00A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ - { 0x00A47, 0x00A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ - { 0x00A4B, 0x00A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ - { 0x00A51, 0x00A51 }, /* GURMUKHI SIGN UDAAT */ - { 0x00A70, 0x00A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ - { 0x00A75, 0x00A75 }, /* GURMUKHI SIGN YAKASH */ - { 0x00A81, 0x00A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ - { 0x00ABC, 0x00ABC }, /* GUJARATI SIGN NUKTA */ - { 0x00ABE, 0x00AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ - { 0x00AC7, 0x00AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ - { 0x00ACB, 0x00ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ - { 0x00AE2, 0x00AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ - { 0x00AFA, 0x00AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ - { 0x00B01, 0x00B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ - { 0x00B3C, 0x00B3C }, /* ORIYA SIGN NUKTA */ - { 0x00B3E, 0x00B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ - { 0x00B47, 0x00B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ - { 0x00B4B, 0x00B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ - { 0x00B55, 0x00B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ - { 0x00B62, 0x00B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ - { 0x00B82, 0x00B82 }, /* TAMIL SIGN ANUSVARA */ - { 0x00BBE, 0x00BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ - { 0x00BC6, 0x00BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ - { 0x00BCA, 0x00BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ - { 0x00BD7, 0x00BD7 }, /* TAMIL AU LENGTH MARK */ - { 0x00C00, 0x00C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ - { 0x00C3C, 0x00C3C }, /* TELUGU SIGN NUKTA */ - { 0x00C3E, 0x00C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ - { 0x00C46, 0x00C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ - { 0x00C4A, 0x00C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ - { 0x00C55, 0x00C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ - { 0x00C62, 0x00C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ - { 0x00C81, 0x00C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ - { 0x00CBC, 0x00CBC }, /* KANNADA SIGN NUKTA */ - { 0x00CBE, 0x00CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ - { 0x00CC6, 0x00CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ - { 0x00CCA, 0x00CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ - { 0x00CD5, 0x00CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ - { 0x00CE2, 0x00CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ - { 0x00CF3, 0x00CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ - { 0x00D00, 0x00D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ - { 0x00D3B, 0x00D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ - { 0x00D3E, 0x00D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ - { 0x00D46, 0x00D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ - { 0x00D4A, 0x00D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ - { 0x00D57, 0x00D57 }, /* MALAYALAM AU LENGTH MARK */ - { 0x00D62, 0x00D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ - { 0x00D81, 0x00D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ - { 0x00DCA, 0x00DCA }, /* SINHALA SIGN AL-LAKUNA */ - { 0x00DCF, 0x00DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ - { 0x00DD6, 0x00DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ - { 0x00DD8, 0x00DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ - { 0x00DF2, 0x00DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ - { 0x00E31, 0x00E31 }, /* THAI CHARACTER MAI HAN-AKAT */ - { 0x00E34, 0x00E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ - { 0x00E47, 0x00E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ - { 0x00EB1, 0x00EB1 }, /* LAO VOWEL SIGN MAI KAN */ - { 0x00EB4, 0x00EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ - { 0x00EC8, 0x00ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ - { 0x00F18, 0x00F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ - { 0x00F35, 0x00F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ - { 0x00F37, 0x00F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ - { 0x00F39, 0x00F39 }, /* TIBETAN MARK TSA -PHRU */ - { 0x00F3E, 0x00F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ - { 0x00F71, 0x00F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ - { 0x00F86, 0x00F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ - { 0x00F8D, 0x00F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ - { 0x00F99, 0x00FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ - { 0x00FC6, 0x00FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ - { 0x0102B, 0x0103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ - { 0x01056, 0x01059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ - { 0x0105E, 0x01060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ - { 0x01062, 0x01064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ - { 0x01067, 0x0106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ - { 0x01071, 0x01074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ - { 0x01082, 0x0108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ - { 0x0108F, 0x0108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ - { 0x0109A, 0x0109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ - { 0x0135D, 0x0135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ - { 0x01712, 0x01715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ - { 0x01732, 0x01734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ - { 0x01752, 0x01753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ - { 0x01772, 0x01773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ - { 0x017B4, 0x017D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ - { 0x017DD, 0x017DD }, /* KHMER SIGN ATTHACAN */ - { 0x0180B, 0x0180D }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR THREE */ - { 0x0180F, 0x0180F }, /* MONGOLIAN FREE VARIATION SELECTOR FOUR */ - { 0x01885, 0x01886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ - { 0x018A9, 0x018A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ - { 0x01920, 0x0192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ - { 0x01930, 0x0193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ - { 0x01A17, 0x01A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ - { 0x01A55, 0x01A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ - { 0x01A60, 0x01A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ - { 0x01A7F, 0x01A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ - { 0x01AB0, 0x01ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ - { 0x01B00, 0x01B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ - { 0x01B34, 0x01B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ - { 0x01B6B, 0x01B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ - { 0x01B80, 0x01B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ - { 0x01BA1, 0x01BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ - { 0x01BE6, 0x01BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ - { 0x01C24, 0x01C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ - { 0x01CD0, 0x01CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ - { 0x01CD4, 0x01CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ - { 0x01CED, 0x01CED }, /* VEDIC SIGN TIRYAK */ - { 0x01CF4, 0x01CF4 }, /* VEDIC TONE CANDRA ABOVE */ - { 0x01CF7, 0x01CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ - { 0x01DC0, 0x01DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ - { 0x0200B, 0x0200E }, /* ZERO WIDTH SPACE - LEFT-TO-RIGHT MARK */ - { 0x0202A, 0x0202D }, /* LEFT-TO-RIGHT EMBEDDING - LEFT-TO-RIGHT OVERRIDE */ - { 0x02060, 0x02064 }, /* WORD JOINER - INVISIBLE PLUS */ - { 0x0206A, 0x0206F }, /* INHIBIT SYMMETRIC SWAPPING - NOMINAL DIGIT SHAPES */ - { 0x020D0, 0x020F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ - { 0x02640, 0x02640 }, /* FEMALE SIGN */ - { 0x02642, 0x02642 }, /* MALE SIGN */ - { 0x026A7, 0x026A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ - { 0x02CEF, 0x02CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ - { 0x02D7F, 0x02D7F }, /* TIFINAGH CONSONANT JOINER */ - { 0x02DE0, 0x02DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ - { 0x0302A, 0x0302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ - { 0x03099, 0x0309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ - { 0x0A66F, 0x0A672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ - { 0x0A674, 0x0A67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ - { 0x0A69E, 0x0A69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ - { 0x0A6F0, 0x0A6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ - { 0x0A802, 0x0A802 }, /* SYLOTI NAGRI SIGN DVISVARA */ - { 0x0A806, 0x0A806 }, /* SYLOTI NAGRI SIGN HASANTA */ - { 0x0A80B, 0x0A80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ - { 0x0A823, 0x0A827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ - { 0x0A82C, 0x0A82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ - { 0x0A880, 0x0A881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ - { 0x0A8B4, 0x0A8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ - { 0x0A8E0, 0x0A8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ - { 0x0A8FF, 0x0A8FF }, /* DEVANAGARI VOWEL SIGN AY */ - { 0x0A926, 0x0A92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ - { 0x0A947, 0x0A953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ - { 0x0A980, 0x0A983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ - { 0x0A9B3, 0x0A9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ - { 0x0A9E5, 0x0A9E5 }, /* MYANMAR SIGN SHAN SAW */ - { 0x0AA29, 0x0AA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ - { 0x0AA43, 0x0AA43 }, /* CHAM CONSONANT SIGN FINAL NG */ - { 0x0AA4C, 0x0AA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ - { 0x0AA7B, 0x0AA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ - { 0x0AAB0, 0x0AAB0 }, /* TAI VIET MAI KANG */ - { 0x0AAB2, 0x0AAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ - { 0x0AAB7, 0x0AAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ - { 0x0AABE, 0x0AABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ - { 0x0AAC1, 0x0AAC1 }, /* TAI VIET TONE MAI THO */ - { 0x0AAEB, 0x0AAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ - { 0x0AAF5, 0x0AAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ - { 0x0ABE3, 0x0ABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ - { 0x0ABEC, 0x0ABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ - { 0x0FB1E, 0x0FB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ - { 0x0FE00, 0x0FE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ - { 0x0FE20, 0x0FE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ - { 0x0FEFF, 0x0FEFF }, /* ZERO WIDTH NO-BREAK SPACE */ - { 0x0FFF9, 0x0FFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ - { 0x101FD, 0x101FD }, /* U+101FD */ - { 0x102E0, 0x102E0 }, /* U+102E0 */ - { 0x10376, 0x1037A }, /* U+10376 - U+1037A */ - { 0x10A01, 0x10A03 }, /* U+10A01 - U+10A03 */ - { 0x10A05, 0x10A06 }, /* U+10A05 - U+10A06 */ - { 0x10A0C, 0x10A0F }, /* U+10A0C - U+10A0F */ - { 0x10A38, 0x10A3A }, /* U+10A38 - U+10A3A */ - { 0x10A3F, 0x10A3F }, /* U+10A3F */ - { 0x10AE5, 0x10AE6 }, /* U+10AE5 - U+10AE6 */ - { 0x10D24, 0x10D27 }, /* U+10D24 - U+10D27 */ - { 0x10D69, 0x10D6D }, /* U+10D69 - U+10D6D */ - { 0x10EAB, 0x10EAC }, /* U+10EAB - U+10EAC */ - { 0x10EFC, 0x10EFF }, /* U+10EFC - U+10EFF */ - { 0x10F46, 0x10F50 }, /* U+10F46 - U+10F50 */ - { 0x10F82, 0x10F85 }, /* U+10F82 - U+10F85 */ - { 0x11000, 0x11002 }, /* U+11000 - U+11002 */ - { 0x11038, 0x11046 }, /* U+11038 - U+11046 */ - { 0x11070, 0x11070 }, /* U+11070 */ - { 0x11073, 0x11074 }, /* U+11073 - U+11074 */ - { 0x1107F, 0x11082 }, /* U+1107F - U+11082 */ - { 0x110B0, 0x110BA }, /* U+110B0 - U+110BA */ - { 0x110BD, 0x110BD }, /* U+110BD */ - { 0x110C2, 0x110C2 }, /* U+110C2 */ - { 0x110CD, 0x110CD }, /* U+110CD */ - { 0x11100, 0x11102 }, /* U+11100 - U+11102 */ - { 0x11127, 0x11134 }, /* U+11127 - U+11134 */ - { 0x11145, 0x11146 }, /* U+11145 - U+11146 */ - { 0x11173, 0x11173 }, /* U+11173 */ - { 0x11180, 0x11182 }, /* U+11180 - U+11182 */ - { 0x111B3, 0x111C0 }, /* U+111B3 - U+111C0 */ - { 0x111C9, 0x111CC }, /* U+111C9 - U+111CC */ - { 0x111CE, 0x111CF }, /* U+111CE - U+111CF */ - { 0x1122C, 0x11237 }, /* U+1122C - U+11237 */ - { 0x1123E, 0x1123E }, /* U+1123E */ - { 0x11241, 0x11241 }, /* U+11241 */ - { 0x112DF, 0x112EA }, /* U+112DF - U+112EA */ - { 0x11300, 0x11303 }, /* U+11300 - U+11303 */ - { 0x1133B, 0x1133C }, /* U+1133B - U+1133C */ - { 0x1133E, 0x11344 }, /* U+1133E - U+11344 */ - { 0x11347, 0x11348 }, /* U+11347 - U+11348 */ - { 0x1134B, 0x1134D }, /* U+1134B - U+1134D */ - { 0x11357, 0x11357 }, /* U+11357 */ - { 0x11362, 0x11363 }, /* U+11362 - U+11363 */ - { 0x11366, 0x1136C }, /* U+11366 - U+1136C */ - { 0x11370, 0x11374 }, /* U+11370 - U+11374 */ - { 0x113B8, 0x113C0 }, /* U+113B8 - U+113C0 */ - { 0x113C2, 0x113C2 }, /* U+113C2 */ - { 0x113C5, 0x113C5 }, /* U+113C5 */ - { 0x113C7, 0x113CA }, /* U+113C7 - U+113CA */ - { 0x113CC, 0x113D0 }, /* U+113CC - U+113D0 */ - { 0x113D2, 0x113D2 }, /* U+113D2 */ - { 0x113E1, 0x113E2 }, /* U+113E1 - U+113E2 */ - { 0x11435, 0x11446 }, /* U+11435 - U+11446 */ - { 0x1145E, 0x1145E }, /* U+1145E */ - { 0x114B0, 0x114C3 }, /* U+114B0 - U+114C3 */ - { 0x115AF, 0x115B5 }, /* U+115AF - U+115B5 */ - { 0x115B8, 0x115C0 }, /* U+115B8 - U+115C0 */ - { 0x115DC, 0x115DD }, /* U+115DC - U+115DD */ - { 0x11630, 0x11640 }, /* U+11630 - U+11640 */ - { 0x116AB, 0x116B7 }, /* U+116AB - U+116B7 */ - { 0x1171D, 0x1172B }, /* U+1171D - U+1172B */ - { 0x1182C, 0x1183A }, /* U+1182C - U+1183A */ - { 0x11930, 0x11935 }, /* U+11930 - U+11935 */ - { 0x11937, 0x11938 }, /* U+11937 - U+11938 */ - { 0x1193B, 0x1193E }, /* U+1193B - U+1193E */ - { 0x11940, 0x11940 }, /* U+11940 */ - { 0x11942, 0x11943 }, /* U+11942 - U+11943 */ - { 0x119D1, 0x119D7 }, /* U+119D1 - U+119D7 */ - { 0x119DA, 0x119E0 }, /* U+119DA - U+119E0 */ - { 0x119E4, 0x119E4 }, /* U+119E4 */ - { 0x11A01, 0x11A0A }, /* U+11A01 - U+11A0A */ - { 0x11A33, 0x11A39 }, /* U+11A33 - U+11A39 */ - { 0x11A3B, 0x11A3E }, /* U+11A3B - U+11A3E */ - { 0x11A47, 0x11A47 }, /* U+11A47 */ - { 0x11A51, 0x11A5B }, /* U+11A51 - U+11A5B */ - { 0x11A8A, 0x11A99 }, /* U+11A8A - U+11A99 */ - { 0x11C2F, 0x11C36 }, /* U+11C2F - U+11C36 */ - { 0x11C38, 0x11C3F }, /* U+11C38 - U+11C3F */ - { 0x11C92, 0x11CA7 }, /* U+11C92 - U+11CA7 */ - { 0x11CA9, 0x11CB6 }, /* U+11CA9 - U+11CB6 */ - { 0x11D31, 0x11D36 }, /* U+11D31 - U+11D36 */ - { 0x11D3A, 0x11D3A }, /* U+11D3A */ - { 0x11D3C, 0x11D3D }, /* U+11D3C - U+11D3D */ - { 0x11D3F, 0x11D45 }, /* U+11D3F - U+11D45 */ - { 0x11D47, 0x11D47 }, /* U+11D47 */ - { 0x11D8A, 0x11D8E }, /* U+11D8A - U+11D8E */ - { 0x11D90, 0x11D91 }, /* U+11D90 - U+11D91 */ - { 0x11D93, 0x11D97 }, /* U+11D93 - U+11D97 */ - { 0x11EF3, 0x11EF6 }, /* U+11EF3 - U+11EF6 */ - { 0x11F00, 0x11F01 }, /* U+11F00 - U+11F01 */ - { 0x11F03, 0x11F03 }, /* U+11F03 */ - { 0x11F34, 0x11F3A }, /* U+11F34 - U+11F3A */ - { 0x11F3E, 0x11F42 }, /* U+11F3E - U+11F42 */ - { 0x11F5A, 0x11F5A }, /* U+11F5A */ - { 0x13430, 0x13440 }, /* U+13430 - U+13440 */ - { 0x13447, 0x13455 }, /* U+13447 - U+13455 */ - { 0x1611E, 0x1612F }, /* U+1611E - U+1612F */ - { 0x16AF0, 0x16AF4 }, /* U+16AF0 - U+16AF4 */ - { 0x16B30, 0x16B36 }, /* U+16B30 - U+16B36 */ - { 0x16F4F, 0x16F4F }, /* U+16F4F */ - { 0x16F51, 0x16F87 }, /* U+16F51 - U+16F87 */ - { 0x16F8F, 0x16F92 }, /* U+16F8F - U+16F92 */ - { 0x16FE4, 0x16FE4 }, /* U+16FE4 */ - { 0x16FF0, 0x16FF1 }, /* U+16FF0 - U+16FF1 */ - { 0x1BC9D, 0x1BC9E }, /* U+1BC9D - U+1BC9E */ - { 0x1BCA0, 0x1BCA3 }, /* U+1BCA0 - U+1BCA3 */ - { 0x1CF00, 0x1CF2D }, /* U+1CF00 - U+1CF2D */ - { 0x1CF30, 0x1CF46 }, /* U+1CF30 - U+1CF46 */ - { 0x1D165, 0x1D169 }, /* U+1D165 - U+1D169 */ - { 0x1D16D, 0x1D182 }, /* U+1D16D - U+1D182 */ - { 0x1D185, 0x1D18B }, /* U+1D185 - U+1D18B */ - { 0x1D1AA, 0x1D1AD }, /* U+1D1AA - U+1D1AD */ - { 0x1D242, 0x1D244 }, /* U+1D242 - U+1D244 */ - { 0x1DA00, 0x1DA36 }, /* U+1DA00 - U+1DA36 */ - { 0x1DA3B, 0x1DA6C }, /* U+1DA3B - U+1DA6C */ - { 0x1DA75, 0x1DA75 }, /* U+1DA75 */ - { 0x1DA84, 0x1DA84 }, /* U+1DA84 */ - { 0x1DA9B, 0x1DA9F }, /* U+1DA9B - U+1DA9F */ - { 0x1DAA1, 0x1DAAF }, /* U+1DAA1 - U+1DAAF */ - { 0x1E000, 0x1E006 }, /* U+1E000 - U+1E006 */ - { 0x1E008, 0x1E018 }, /* U+1E008 - U+1E018 */ - { 0x1E01B, 0x1E021 }, /* U+1E01B - U+1E021 */ - { 0x1E023, 0x1E024 }, /* U+1E023 - U+1E024 */ - { 0x1E026, 0x1E02A }, /* U+1E026 - U+1E02A */ - { 0x1E08F, 0x1E08F }, /* U+1E08F */ - { 0x1E130, 0x1E136 }, /* U+1E130 - U+1E136 */ - { 0x1E2AE, 0x1E2AE }, /* U+1E2AE */ - { 0x1E2EC, 0x1E2EF }, /* U+1E2EC - U+1E2EF */ - { 0x1E4EC, 0x1E4EF }, /* U+1E4EC - U+1E4EF */ - { 0x1E5EE, 0x1E5EF }, /* U+1E5EE - U+1E5EF */ - { 0x1E8D0, 0x1E8D6 }, /* U+1E8D0 - U+1E8D6 */ - { 0x1E944, 0x1E94A }, /* U+1E944 - U+1E94A */ - { 0x1F3FB, 0x1F3FF }, /* U+1F3FB - U+1F3FF */ - { 0x1F9B0, 0x1F9B3 }, /* U+1F9B0 - U+1F9B3 */ - { 0xE0001, 0xE0001 }, /* U+E0001 */ - { 0xE0020, 0xE007F }, /* U+E0020 - U+E007F */ - { 0xE0100, 0xE01EF }, /* U+E0100 - U+E01EF */ -}; - -/* Double-width character ranges */ -static const struct interval double_width_ranges[] = { - { 0x01100, 0x0115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ - { 0x0231A, 0x0231B }, /* WATCH - HOURGLASS */ - { 0x02329, 0x0232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ - { 0x023E9, 0x023EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ - { 0x023F0, 0x023F0 }, /* ALARM CLOCK */ - { 0x023F3, 0x023F3 }, /* HOURGLASS WITH FLOWING SAND */ - { 0x025FD, 0x025FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ - { 0x02614, 0x02615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ - { 0x02630, 0x02637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ - { 0x02648, 0x02653 }, /* ARIES - PISCES */ - { 0x0267F, 0x0267F }, /* WHEELCHAIR SYMBOL */ - { 0x0268A, 0x0268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ - { 0x02693, 0x02693 }, /* ANCHOR */ - { 0x026A1, 0x026A1 }, /* HIGH VOLTAGE SIGN */ - { 0x026AA, 0x026AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ - { 0x026BD, 0x026BE }, /* SOCCER BALL - BASEBALL */ - { 0x026C4, 0x026C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ - { 0x026CE, 0x026CE }, /* OPHIUCHUS */ - { 0x026D4, 0x026D4 }, /* NO ENTRY */ - { 0x026EA, 0x026EA }, /* CHURCH */ - { 0x026F2, 0x026F3 }, /* FOUNTAIN - FLAG IN HOLE */ - { 0x026F5, 0x026F5 }, /* SAILBOAT */ - { 0x026FA, 0x026FA }, /* TENT */ - { 0x026FD, 0x026FD }, /* FUEL PUMP */ - { 0x02705, 0x02705 }, /* WHITE HEAVY CHECK MARK */ - { 0x0270A, 0x0270B }, /* RAISED FIST - RAISED HAND */ - { 0x02728, 0x02728 }, /* SPARKLES */ - { 0x0274C, 0x0274C }, /* CROSS MARK */ - { 0x0274E, 0x0274E }, /* NEGATIVE SQUARED CROSS MARK */ - { 0x02753, 0x02755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ - { 0x02757, 0x02757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ - { 0x02795, 0x02797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ - { 0x027B0, 0x027B0 }, /* CURLY LOOP */ - { 0x027BF, 0x027BF }, /* DOUBLE CURLY LOOP */ - { 0x02B1B, 0x02B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ - { 0x02B50, 0x02B50 }, /* WHITE MEDIUM STAR */ - { 0x02B55, 0x02B55 }, /* HEAVY LARGE CIRCLE */ - { 0x02E80, 0x02E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ - { 0x02E9B, 0x02EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ - { 0x02F00, 0x02FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ - { 0x02FF0, 0x03029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ - { 0x03030, 0x0303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ - { 0x03041, 0x03096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ - { 0x0309B, 0x030FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ - { 0x03105, 0x0312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ - { 0x03131, 0x0318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ - { 0x03190, 0x031E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ - { 0x031EF, 0x0321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ - { 0x03220, 0x03247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ - { 0x03250, 0x0A48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ - { 0x0A490, 0x0A4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ - { 0x0A960, 0x0A97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ - { 0x0AC00, 0x0D7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ - { 0x0F900, 0x0FAFF }, /* U+0F900 - U+0FAFF */ - { 0x0FE10, 0x0FE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ - { 0x0FE30, 0x0FE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ - { 0x0FE54, 0x0FE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ - { 0x0FE68, 0x0FE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ - { 0x0FF01, 0x0FF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ - { 0x0FFE0, 0x0FFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ - { 0x16FE0, 0x16FE3 }, /* U+16FE0 - U+16FE3 */ - { 0x17000, 0x187F7 }, /* U+17000 - U+187F7 */ - { 0x18800, 0x18CD5 }, /* U+18800 - U+18CD5 */ - { 0x18CFF, 0x18D08 }, /* U+18CFF - U+18D08 */ - { 0x1AFF0, 0x1AFF3 }, /* U+1AFF0 - U+1AFF3 */ - { 0x1AFF5, 0x1AFFB }, /* U+1AFF5 - U+1AFFB */ - { 0x1AFFD, 0x1AFFE }, /* U+1AFFD - U+1AFFE */ - { 0x1B000, 0x1B122 }, /* U+1B000 - U+1B122 */ - { 0x1B132, 0x1B132 }, /* U+1B132 */ - { 0x1B150, 0x1B152 }, /* U+1B150 - U+1B152 */ - { 0x1B155, 0x1B155 }, /* U+1B155 */ - { 0x1B164, 0x1B167 }, /* U+1B164 - U+1B167 */ - { 0x1B170, 0x1B2FB }, /* U+1B170 - U+1B2FB */ - { 0x1D300, 0x1D356 }, /* U+1D300 - U+1D356 */ - { 0x1D360, 0x1D376 }, /* U+1D360 - U+1D376 */ - { 0x1F000, 0x1F02F }, /* U+1F000 - U+1F02F */ - { 0x1F0A0, 0x1F0FF }, /* U+1F0A0 - U+1F0FF */ - { 0x1F18E, 0x1F18E }, /* U+1F18E */ - { 0x1F191, 0x1F19A }, /* U+1F191 - U+1F19A */ - { 0x1F200, 0x1F202 }, /* U+1F200 - U+1F202 */ - { 0x1F210, 0x1F23B }, /* U+1F210 - U+1F23B */ - { 0x1F240, 0x1F248 }, /* U+1F240 - U+1F248 */ - { 0x1F250, 0x1F251 }, /* U+1F250 - U+1F251 */ - { 0x1F260, 0x1F265 }, /* U+1F260 - U+1F265 */ - { 0x1F300, 0x1F3FA }, /* U+1F300 - U+1F3FA */ - { 0x1F400, 0x1F64F }, /* U+1F400 - U+1F64F */ - { 0x1F680, 0x1F9AF }, /* U+1F680 - U+1F9AF */ - { 0x1F9B4, 0x1FAFF }, /* U+1F9B4 - U+1FAFF */ - { 0x20000, 0x2FFFD }, /* U+20000 - U+2FFFD */ - { 0x30000, 0x3FFFD }, /* U+30000 - U+3FFFD */ -}; - - -static int ucs_cmp(const void *key, const void *element) +static int ucs_cmp(const void *key, const void *elt) { uint32_t cp = *(uint32_t *)key; - const struct interval *e = element; + struct interval e = *(struct interval *) elt; - if (cp > e->last) + if (cp > e.last) return 1; - if (cp < e->first) + else if (cp < e.first) return -1; return 0; } -static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t count) -{ - if (cp < intervals[0].first || cp > intervals[count - 1].last) - return false; +static const struct interval double_width[] = { + { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, + { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, + { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, + { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } +}; - return __inline_bsearch(&cp, intervals, count, - sizeof(*intervals), ucs_cmp) != NULL; -} - -/** - * Determine if a Unicode code point is zero-width. - * - * @param ucs: Unicode code point (UCS-4) - * Return: true if the character is zero-width, false otherwise - */ -bool ucs_is_zero_width(uint32_t cp) -{ - return is_in_interval(cp, zero_width_ranges, ARRAY_SIZE(zero_width_ranges)); -} - -/** - * Determine if a Unicode code point is double-width. - * - * @param ucs: Unicode code point (UCS-4) - * Return: true if the character is double-width, false otherwise - */ bool ucs_is_double_width(uint32_t cp) { - return is_in_interval(cp, double_width_ranges, ARRAY_SIZE(double_width_ranges)); + if (cp < double_width[0].first || + cp > double_width[ARRAY_SIZE(double_width) - 1].last) + return false; + + return bsearch(&cp, double_width, ARRAY_SIZE(double_width), + sizeof(struct interval), ucs_cmp) != NULL; } diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,7 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -bool ucs_is_zero_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) From b1614dd1aef4bb5a37cf422fc6d7403d68a397c1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:23 +0200 Subject: [PATCH 058/105] Revert "vt: introduce gen_ucs_width.py to create ucs_width.c" This reverts commit 26c94eb4842ada96f9709b43ef225417a6b4df63. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_width.py | 264 -------------------------------- 1 file changed, 264 deletions(-) delete mode 100755 drivers/tty/vt/gen_ucs_width.py diff --git a/drivers/tty/vt/gen_ucs_width.py b/drivers/tty/vt/gen_ucs_width.py deleted file mode 100755 index 41997fe00129..000000000000 --- a/drivers/tty/vt/gen_ucs_width.py +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# -# This script uses Python's unicodedata module to generate ucs_width.c - -import unicodedata -import sys - -def generate_ucs_width(): - # Output file name - c_file = "ucs_width.c" - - # Width data mapping - width_map = {} # Maps code points to width (0, 1, 2) - - # Define emoji modifiers and components that should have zero width - emoji_zero_width = [ - # Skin tone modifiers - (0x1F3FB, 0x1F3FF), # Emoji modifiers (skin tones) - - # Variation selectors (note: VS16 is treated specially in vt.c) - (0xFE00, 0xFE0F), # Variation Selectors 1-16 - - # Gender and hair style modifiers - (0x2640, 0x2640), # Female sign - (0x2642, 0x2642), # Male sign - (0x26A7, 0x26A7), # Transgender symbol - (0x1F9B0, 0x1F9B3), # Hair components (red, curly, white, bald) - - # Tag characters - (0xE0020, 0xE007E), # Tags - ] - - # Mark these emoji modifiers as zero-width - for start, end in emoji_zero_width: - for cp in range(start, end + 1): - try: - width_map[cp] = 0 - except (ValueError, OverflowError): - continue - - # Mark all regional indicators as single-width as they are usually paired - # providing a combined with of 2. - regional_indicators = (0x1F1E6, 0x1F1FF) # Regional indicator symbols A-Z - start, end = regional_indicators - for cp in range(start, end + 1): - try: - width_map[cp] = 1 - except (ValueError, OverflowError): - continue - - # Process all assigned Unicode code points (Basic Multilingual Plane + Supplementary Planes) - # Range 0x0 to 0x10FFFF (the full Unicode range) - for block_start in range(0, 0x110000, 0x1000): - block_end = block_start + 0x1000 - for cp in range(block_start, block_end): - try: - char = chr(cp) - - # Skip if already processed - if cp in width_map: - continue - - # Check if the character is a combining mark - category = unicodedata.category(char) - - # Combining marks, format characters, zero-width characters - if (category.startswith('M') or # Mark (combining) - (category == 'Cf' and cp not in (0x061C, 0x06DD, 0x070F, 0x180E, 0x200F, 0x202E, 0x2066, 0x2067, 0x2068, 0x2069)) or - cp in (0x200B, 0x200C, 0x200D, 0x2060, 0xFEFF)): # Known zero-width characters - width_map[cp] = 0 - continue - - # Use East Asian Width property - eaw = unicodedata.east_asian_width(char) - - if eaw in ('F', 'W'): # Fullwidth or Wide - width_map[cp] = 2 - elif eaw in ('Na', 'H', 'N', 'A'): # Narrow, Halfwidth, Neutral, Ambiguous - width_map[cp] = 1 - else: - # Default to single-width for unknown - width_map[cp] = 1 - - except (ValueError, OverflowError): - # Skip invalid code points - continue - - # Process Emoji - generally double-width - # Ranges according to Unicode Emoji standard - emoji_ranges = [ - (0x1F000, 0x1F02F), # Mahjong Tiles - (0x1F0A0, 0x1F0FF), # Playing Cards - (0x1F300, 0x1F5FF), # Miscellaneous Symbols and Pictographs - (0x1F600, 0x1F64F), # Emoticons - (0x1F680, 0x1F6FF), # Transport and Map Symbols - (0x1F700, 0x1F77F), # Alchemical Symbols - (0x1F780, 0x1F7FF), # Geometric Shapes Extended - (0x1F800, 0x1F8FF), # Supplemental Arrows-C - (0x1F900, 0x1F9FF), # Supplemental Symbols and Pictographs - (0x1FA00, 0x1FA6F), # Chess Symbols - (0x1FA70, 0x1FAFF), # Symbols and Pictographs Extended-A - ] - - for start, end in emoji_ranges: - for cp in range(start, end + 1): - if cp not in width_map or width_map[cp] != 0: # Don't override zero-width - try: - char = chr(cp) - width_map[cp] = 2 - except (ValueError, OverflowError): - continue - - # Optimize to create range tables - def ranges_optimize(width_data, target_width): - points = sorted([cp for cp, width in width_data.items() if width == target_width]) - if not points: - return [] - - # Group consecutive code points into ranges - ranges = [] - start = points[0] - prev = start - - for cp in points[1:]: - if cp > prev + 1: - ranges.append((start, prev)) - start = cp - prev = cp - - # Add the last range - ranges.append((start, prev)) - return ranges - - # Extract ranges for each width - zero_width_ranges = ranges_optimize(width_map, 0) - double_width_ranges = ranges_optimize(width_map, 2) - - # Get Unicode version information - unicode_version = unicodedata.unidata_version - - # Generate C implementation file - with open(c_file, 'w') as f: - f.write(f"""\ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucs_width.c - Unicode character width lookup - * - * Auto-generated by gen_ucs_width.py - * - * Unicode Version: {unicode_version} - */ - -#include -#include -#include -#include - -struct interval {{ - uint32_t first; - uint32_t last; -}}; - -/* Zero-width character ranges */ -static const struct interval zero_width_ranges[] = {{ -""") - - for start, end in zero_width_ranges: - try: - start_char_desc = unicodedata.name(chr(start)) if start < 0x10000 else f"U+{start:05X}" - if start == end: - comment = f"/* {start_char_desc} */" - else: - end_char_desc = unicodedata.name(chr(end)) if end < 0x10000 else f"U+{end:05X}" - comment = f"/* {start_char_desc} - {end_char_desc} */" - except: - if start == end: - comment = f"/* U+{start:05X} */" - else: - comment = f"/* U+{start:05X} - U+{end:05X} */" - - f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") - - f.write("""\ -}; - -/* Double-width character ranges */ -static const struct interval double_width_ranges[] = { -""") - - for start, end in double_width_ranges: - try: - start_char_desc = unicodedata.name(chr(start)) if start < 0x10000 else f"U+{start:05X}" - if start == end: - comment = f"/* {start_char_desc} */" - else: - end_char_desc = unicodedata.name(chr(end)) if end < 0x10000 else f"U+{end:05X}" - comment = f"/* {start_char_desc} - {end_char_desc} */" - except: - if start == end: - comment = f"/* U+{start:05X} */" - else: - comment = f"/* U+{start:05X} - U+{end:05X} */" - - f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") - - f.write("""\ -}; - - -static int ucs_cmp(const void *key, const void *element) -{ - uint32_t cp = *(uint32_t *)key; - const struct interval *e = element; - - if (cp > e->last) - return 1; - if (cp < e->first) - return -1; - return 0; -} - -static bool is_in_interval(uint32_t cp, const struct interval *intervals, size_t count) -{ - if (cp < intervals[0].first || cp > intervals[count - 1].last) - return false; - - return __inline_bsearch(&cp, intervals, count, - sizeof(*intervals), ucs_cmp) != NULL; -} - -/** - * Determine if a Unicode code point is zero-width. - * - * @param ucs: Unicode code point (UCS-4) - * Return: true if the character is zero-width, false otherwise - */ -bool ucs_is_zero_width(uint32_t cp) -{ - return is_in_interval(cp, zero_width_ranges, ARRAY_SIZE(zero_width_ranges)); -} - -/** - * Determine if a Unicode code point is double-width. - * - * @param ucs: Unicode code point (UCS-4) - * Return: true if the character is double-width, false otherwise - */ -bool ucs_is_double_width(uint32_t cp) -{ - return is_in_interval(cp, double_width_ranges, ARRAY_SIZE(double_width_ranges)); -} -""") - - # Print summary - zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges) - double_width_count = sum(end - start + 1 for start, end in double_width_ranges) - - print(f"Generated {c_file} with:") - print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points") - print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code points") - -if __name__ == "__main__": - generate_ucs_width() From d3e92076c1af713e65edac109499c25c37f38c16 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:24 +0200 Subject: [PATCH 059/105] Revert "vt: properly support zero-width Unicode code points" This reverts commit e88391f730e46d208b7fb37b02611d24137af1ef. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 46 ++------------------------------------ include/linux/consolemap.h | 10 --------- 2 files changed, 2 insertions(+), 54 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 5d53feeb5d2b..bcb508bc15ab 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -443,15 +443,6 @@ static void vc_uniscr_scroll(struct vc_data *vc, unsigned int top, } } -static u32 vc_uniscr_getc(struct vc_data *vc, int relative_pos) -{ - int pos = vc->state.x + vc->vc_need_wrap + relative_pos; - - if (vc->vc_uni_lines && pos >= 0 && pos < vc->vc_cols) - return vc->vc_uni_lines[vc->state.y][pos]; - return 0; -} - static void vc_uniscr_copy_area(u32 **dst_lines, unsigned int dst_cols, unsigned int dst_rows, @@ -2914,49 +2905,18 @@ static bool vc_is_control(struct vc_data *vc, int tc, int c) return false; } -static void vc_con_rewind(struct vc_data *vc) -{ - if (vc->state.x && !vc->vc_need_wrap) { - vc->vc_pos -= 2; - vc->state.x--; - } - vc->vc_need_wrap = 0; -} - static int vc_con_write_normal(struct vc_data *vc, int tc, int c, struct vc_draw_region *draw) { - int next_c, prev_c; + int next_c; unsigned char vc_attr = vc->vc_attr; u16 himask = vc->vc_hi_font_mask, charmask = himask ? 0x1ff : 0xff; u8 width = 1; bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { - if (ucs_is_double_width(c)) { + if (ucs_is_double_width(c)) width = 2; - } else if (ucs_is_zero_width(c)) { - prev_c = vc_uniscr_getc(vc, -1); - if (prev_c == ' ' && - ucs_is_double_width(vc_uniscr_getc(vc, -2))) { - /* - * Let's merge this zero-width code point with - * the preceding double-width code point by - * replacing the existing whitespace padding. - */ - vc_con_rewind(vc); - } else if (c == 0xfe0f && prev_c != 0) { - /* - * VS16 (U+FE0F) is special. Let it have a - * width of 1 when preceded by a single-width - * code point effectively making the later - * double-width. - */ - } else { - /* Otherwise zero-width code points are ignored */ - goto out; - } - } } /* Now try to find out how to display it */ @@ -3035,8 +2995,6 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, tc = ' '; next_c = ' '; } - -out: notify_write(vc, c); if (inverse) diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,6 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -68,11 +63,6 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } - -static inline bool ucs_is_zero_width(uint32_t cp) -{ - return false; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From e42e607aefc4132d508a0e5724b5d0975d0a53e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:25 +0200 Subject: [PATCH 060/105] Revert "vt: move unicode processing to a separate file" This reverts commit 2acaf27cd7f4f32bfe8bf7335690618e2417e744. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/Makefile | 3 +-- drivers/tty/vt/ucs_width.c | 45 -------------------------------------- drivers/tty/vt/vt.c | 40 ++++++++++++++++++++++++++++++++- include/linux/consolemap.h | 6 ----- 4 files changed, 40 insertions(+), 54 deletions(-) delete mode 100644 drivers/tty/vt/ucs_width.c diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index bee69277bbc3..2c8ce8b592ed 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -7,8 +7,7 @@ FONTMAPFILE = cp437.uni obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o \ selection.o keyboard.o \ vt.o defkeymap.o -obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o \ - ucs_width.o +obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c diff --git a/drivers/tty/vt/ucs_width.c b/drivers/tty/vt/ucs_width.c deleted file mode 100644 index 5f0bde30a1fb..000000000000 --- a/drivers/tty/vt/ucs_width.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include -#include -#include - -/* ucs_is_double_width() is based on the wcwidth() implementation by - * Markus Kuhn -- 2007-05-26 (Unicode 5.0) - * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - */ - -struct interval { - uint32_t first; - uint32_t last; -}; - -static int ucs_cmp(const void *key, const void *elt) -{ - uint32_t cp = *(uint32_t *)key; - struct interval e = *(struct interval *) elt; - - if (cp > e.last) - return 1; - else if (cp < e.first) - return -1; - return 0; -} - -static const struct interval double_width[] = { - { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, - { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, - { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, - { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } -}; - -bool ucs_is_double_width(uint32_t cp) -{ - if (cp < double_width[0].first || - cp > double_width[ARRAY_SIZE(double_width) - 1].last) - return false; - - return bsearch(&cp, double_width, ARRAY_SIZE(double_width), - sizeof(struct interval), ucs_cmp) != NULL; -} diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index bcb508bc15ab..b5f3c8a818ed 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -104,6 +104,7 @@ #include #include #include +#include #include #define MAX_NR_CON_DRIVER 16 @@ -2711,6 +2712,43 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, u8 c) } } +/* is_double_width() is based on the wcwidth() implementation by + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ +struct interval { + uint32_t first; + uint32_t last; +}; + +static int ucs_cmp(const void *key, const void *elt) +{ + uint32_t ucs = *(uint32_t *)key; + struct interval e = *(struct interval *) elt; + + if (ucs > e.last) + return 1; + else if (ucs < e.first) + return -1; + return 0; +} + +static int is_double_width(uint32_t ucs) +{ + static const struct interval double_width[] = { + { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, + { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, + { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, + { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } + }; + if (ucs < double_width[0].first || + ucs > double_width[ARRAY_SIZE(double_width) - 1].last) + return 0; + + return bsearch(&ucs, double_width, ARRAY_SIZE(double_width), + sizeof(struct interval), ucs_cmp) != NULL; +} + struct vc_draw_region { unsigned long from, to; int x; @@ -2915,7 +2953,7 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { - if (ucs_is_double_width(c)) + if (is_double_width(c)) width = 2; } diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..c35db4896c37 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,7 +28,6 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); -bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -58,11 +57,6 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } - -static inline bool ucs_is_double_width(uint32_t cp) -{ - return false; -} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From 3702f72748b2cf91f5b7aefa4038e226f1a5fc81 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Apr 2025 11:21:26 +0200 Subject: [PATCH 061/105] Revert "vt: minor cleanup to vc_translate_unicode()" This reverts commit 74045f6658f11241a09d93404d79828cc99e94dc. A new version of the series was submitted, so it's easier to revert the old one and add the new one due to the changes invovled. Cc: Nicolas Pitre Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index b5f3c8a818ed..f5642b3038e4 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2817,7 +2817,7 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) if ((c & 0xc0) == 0x80) { /* Unexpected continuation byte? */ if (!vc->vc_utf_count) - goto bad_sequence; + return 0xfffd; vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); vc->vc_npar++; @@ -2829,17 +2829,17 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) /* Reject overlong sequences */ if (c <= utf8_length_changes[vc->vc_npar - 1] || c > utf8_length_changes[vc->vc_npar]) - goto bad_sequence; + return 0xfffd; return vc_sanitize_unicode(c); } /* Single ASCII byte or first byte of a sequence received */ if (vc->vc_utf_count) { - /* A continuation byte was expected */ + /* Continuation byte expected */ *rescan = true; vc->vc_utf_count = 0; - goto bad_sequence; + return 0xfffd; } /* Nothing to do if an ASCII byte was received */ @@ -2858,14 +2858,11 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) vc->vc_utf_count = 3; vc->vc_utf_char = (c & 0x07); } else { - goto bad_sequence; + return 0xfffd; } need_more_bytes: return -1; - -bad_sequence: - return 0xfffd; } static int vc_translate(struct vc_data *vc, int *c, bool *rescan) From d066989a3d41bc75c537b86bcdb2911fc5ffdb07 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:03 -0400 Subject: [PATCH 062/105] vt: minor cleanup to vc_translate_unicode() Make it clearer when a sequence is bad. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-2-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index f5642b3038e4..b5f3c8a818ed 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2817,7 +2817,7 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) if ((c & 0xc0) == 0x80) { /* Unexpected continuation byte? */ if (!vc->vc_utf_count) - return 0xfffd; + goto bad_sequence; vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); vc->vc_npar++; @@ -2829,17 +2829,17 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) /* Reject overlong sequences */ if (c <= utf8_length_changes[vc->vc_npar - 1] || c > utf8_length_changes[vc->vc_npar]) - return 0xfffd; + goto bad_sequence; return vc_sanitize_unicode(c); } /* Single ASCII byte or first byte of a sequence received */ if (vc->vc_utf_count) { - /* Continuation byte expected */ + /* A continuation byte was expected */ *rescan = true; vc->vc_utf_count = 0; - return 0xfffd; + goto bad_sequence; } /* Nothing to do if an ASCII byte was received */ @@ -2858,11 +2858,14 @@ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) vc->vc_utf_count = 3; vc->vc_utf_char = (c & 0x07); } else { - return 0xfffd; + goto bad_sequence; } need_more_bytes: return -1; + +bad_sequence: + return 0xfffd; } static int vc_translate(struct vc_data *vc, int *c, bool *rescan) From 07bc3f442f47b4d158468c2e0146475bdf009091 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:04 -0400 Subject: [PATCH 063/105] vt: move unicode processing to a separate file This will make it easier to maintain. Also make it depend on CONFIG_CONSOLE_TRANSLATIONS. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/Makefile | 3 +- drivers/tty/vt/ucs.c | 57 ++++++++++++++++++++++++++++++++++++++ drivers/tty/vt/vt.c | 40 +------------------------- include/linux/consolemap.h | 6 ++++ 4 files changed, 66 insertions(+), 40 deletions(-) create mode 100644 drivers/tty/vt/ucs.c diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index 2c8ce8b592ed..e24c8546ac12 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -7,7 +7,8 @@ FONTMAPFILE = cp437.uni obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o \ selection.o keyboard.o \ vt.o defkeymap.o -obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o +obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o \ + ucs.o # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c new file mode 100644 index 000000000000..dc4a6e794531 --- /dev/null +++ b/drivers/tty/vt/ucs.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucs.c - Universal Character Set processing + */ + +#include +#include +#include +#include + +/* ucs_is_double_width() is based on the wcwidth() implementation by + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + +struct ucs_interval { + u32 first; + u32 last; +}; + +static const struct ucs_interval ucs_double_width_ranges[] = { + { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, + { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, + { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, + { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } +}; + +static int interval_cmp(const void *key, const void *element) +{ + u32 cp = *(u32 *)key; + const struct ucs_interval *entry = element; + + if (cp < entry->first) + return -1; + if (cp > entry->last) + return 1; + return 0; +} + +/** + * ucs_is_double_width() - Determine if a Unicode code point is double-width. + * @cp: Unicode code point (UCS-4) + * + * Return: true if the character is double-width, false otherwise + */ +bool ucs_is_double_width(u32 cp) +{ + size_t size = ARRAY_SIZE(ucs_double_width_ranges); + + if (!in_range(cp, ucs_double_width_ranges[0].first, + ucs_double_width_ranges[size - 1].last)) + return false; + + return __inline_bsearch(&cp, ucs_double_width_ranges, size, + sizeof(*ucs_double_width_ranges), + interval_cmp) != NULL; +} diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index b5f3c8a818ed..bcb508bc15ab 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -104,7 +104,6 @@ #include #include #include -#include #include #define MAX_NR_CON_DRIVER 16 @@ -2712,43 +2711,6 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, u8 c) } } -/* is_double_width() is based on the wcwidth() implementation by - * Markus Kuhn -- 2007-05-26 (Unicode 5.0) - * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - */ -struct interval { - uint32_t first; - uint32_t last; -}; - -static int ucs_cmp(const void *key, const void *elt) -{ - uint32_t ucs = *(uint32_t *)key; - struct interval e = *(struct interval *) elt; - - if (ucs > e.last) - return 1; - else if (ucs < e.first) - return -1; - return 0; -} - -static int is_double_width(uint32_t ucs) -{ - static const struct interval double_width[] = { - { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, - { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, - { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, - { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } - }; - if (ucs < double_width[0].first || - ucs > double_width[ARRAY_SIZE(double_width) - 1].last) - return 0; - - return bsearch(&ucs, double_width, ARRAY_SIZE(double_width), - sizeof(struct interval), ucs_cmp) != NULL; -} - struct vc_draw_region { unsigned long from, to; int x; @@ -2953,7 +2915,7 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { - if (is_double_width(c)) + if (ucs_is_double_width(c)) width = 2; } diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index c35db4896c37..caf079bcb8c9 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,6 +28,7 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); +bool ucs_is_double_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -57,6 +58,11 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } + +static inline bool ucs_is_double_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From 95b05de0a56699392e67590d000df76fedec609a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:05 -0400 Subject: [PATCH 064/105] vt: properly support zero-width Unicode code points Zero-width Unicode code points are causing misalignment in vertically aligned content, disrupting the visual layout. Let's handle zero-width code points more intelligently. Double-width code points are stored in the screen grid followed by a white space code point to create the expected screen layout. When a double-width code point is followed by a zero-width code point in the console incoming bytestream (e.g., an emoji with a presentation selector) then we may replace the white space padding by that zero-width code point instead of dropping it. This maximize screen content information while preserving proper layout. If a zero-width code point is preceded by a single-width code point then the above trick is not possible and such zero-width code point must be dropped. VS16 (Variation Selector 16, U+FE0F) is special as it typically doubles the width of the preceding single-width code point. We handle that case by giving VS16 a width of 1 instead of 0 when that happens. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-4-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 70 ++++++++++++++++++++++++++++++++++++-- include/linux/consolemap.h | 10 ++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index bcb508bc15ab..a989feffad5e 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -443,6 +443,15 @@ static void vc_uniscr_scroll(struct vc_data *vc, unsigned int top, } } +static u32 vc_uniscr_getc(struct vc_data *vc, int relative_pos) +{ + int pos = vc->state.x + vc->vc_need_wrap + relative_pos; + + if (vc->vc_uni_lines && in_range(pos, 0, vc->vc_cols)) + return vc->vc_uni_lines[vc->state.y][pos]; + return 0; +} + static void vc_uniscr_copy_area(u32 **dst_lines, unsigned int dst_cols, unsigned int dst_rows, @@ -2905,6 +2914,60 @@ static bool vc_is_control(struct vc_data *vc, int tc, int c) return false; } +static void vc_con_rewind(struct vc_data *vc) +{ + if (vc->state.x && !vc->vc_need_wrap) { + vc->vc_pos -= 2; + vc->state.x--; + } + vc->vc_need_wrap = 0; +} + +#define UCS_VS16 0xfe0f /* Variation Selector 16 */ + +static int vc_process_ucs(struct vc_data *vc, int c, int *tc) +{ + u32 prev_c, curr_c = c; + + if (ucs_is_double_width(curr_c)) + return 2; + + if (!ucs_is_zero_width(curr_c)) + return 1; + + /* From here curr_c is known to be zero-width. */ + + if (ucs_is_double_width(vc_uniscr_getc(vc, -2))) { + /* + * Let's merge this zero-width code point with the preceding + * double-width code point by replacing the existing + * whitespace padding. To do so we rewind one column and + * pretend this has a width of 1. + * We give the legacy display the same initial space padding. + */ + vc_con_rewind(vc); + *tc = ' '; + return 1; + } + + /* From here the preceding character, if any, must be single-width. */ + prev_c = vc_uniscr_getc(vc, -1); + + if (curr_c == UCS_VS16 && prev_c != 0) { + /* + * VS16 (U+FE0F) is special. It typically turns the preceding + * single-width character into a double-width one. Let it + * have a width of 1 effectively making the combination with + * the preceding character double-width. + */ + *tc = ' '; + return 1; + } + + /* Otherwise zero-width code points are ignored. */ + return 0; +} + static int vc_con_write_normal(struct vc_data *vc, int tc, int c, struct vc_draw_region *draw) { @@ -2915,8 +2978,9 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { - if (ucs_is_double_width(c)) - width = 2; + width = vc_process_ucs(vc, c, &tc); + if (!width) + goto out; } /* Now try to find out how to display it */ @@ -2995,6 +3059,8 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, tc = ' '; next_c = ' '; } + +out: notify_write(vc, c); if (inverse) diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index caf079bcb8c9..7d778752dcef 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,6 +29,11 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); +static inline bool ucs_is_zero_width(uint32_t cp) +{ + /* coming soon */ + return false; +} #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp) { return false; } + +static inline bool ucs_is_zero_width(uint32_t cp) +{ + return false; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From b11a041179e70abac27e0e4a6a3cb1f8781b9750 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:06 -0400 Subject: [PATCH 065/105] vt: introduce gen_ucs_width_table.py to create ucs_width_table.h The table in ucs.c is terribly out of date and incomplete. We also need a second table to store zero-width code points. Properly maintaining those tables manually is impossible. So here's a script to generate them. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-5-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_width_table.py | 256 ++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100755 drivers/tty/vt/gen_ucs_width_table.py diff --git a/drivers/tty/vt/gen_ucs_width_table.py b/drivers/tty/vt/gen_ucs_width_table.py new file mode 100755 index 000000000000..00510444a727 --- /dev/null +++ b/drivers/tty/vt/gen_ucs_width_table.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Leverage Python's unicodedata module to generate ucs_width_table.h + +import unicodedata +import sys + +# This script's file name +from pathlib import Path +this_file = Path(__file__).name + +# Output file name +out_file = "ucs_width_table.h" + +# --- Global Constants for Width Assignments --- + +# Known zero-width characters +KNOWN_ZERO_WIDTH = ( + 0x200B, # ZERO WIDTH SPACE + 0x200C, # ZERO WIDTH NON-JOINER + 0x200D, # ZERO WIDTH JOINER + 0x2060, # WORD JOINER + 0xFEFF # ZERO WIDTH NO-BREAK SPACE (BOM) +) + +# Zero-width emoji modifiers and components +# NOTE: Some of these characters would normally be single-width according to +# East Asian Width properties, but we deliberately override them to be +# zero-width because they function as modifiers in emoji sequences. +EMOJI_ZERO_WIDTH = [ + # Skin tone modifiers + (0x1F3FB, 0x1F3FF), # Emoji modifiers (skin tones) + + # Variation selectors (note: VS16 is treated specially in vt.c) + (0xFE00, 0xFE0F), # Variation Selectors 1-16 + + # Gender and hair style modifiers + # These would be single-width by Unicode properties, but are zero-width + # when part of emoji + (0x2640, 0x2640), # Female sign + (0x2642, 0x2642), # Male sign + (0x26A7, 0x26A7), # Transgender symbol + (0x1F9B0, 0x1F9B3), # Hair components (red, curly, white, bald) + + # Tag characters + (0xE0020, 0xE007E), # Tags +] + +# Regional indicators (flag components) +REGIONAL_INDICATORS = (0x1F1E6, 0x1F1FF) # Regional indicator symbols A-Z + +# Double-width emoji ranges +# +# Many emoji characters are classified as single-width according to Unicode +# Standard Annex #11 East Asian Width property (N or Neutral), but we +# deliberately override them to be double-width. References: +# 1. Unicode Technical Standard #51: Unicode Emoji +# (https://www.unicode.org/reports/tr51/) +# 2. Principle of "emoji presentation" in WHATWG CSS Text specification +# (https://drafts.csswg.org/css-text-3/#character-properties) +# 3. Terminal emulator implementations (iTerm2, Windows Terminal, etc.) which +# universally render emoji as double-width characters regardless of their +# Unicode EAW property +# 4. W3C Work Item: Requirements for Japanese Text Layout - Section 3.8.1 +# Emoji width (https://www.w3.org/TR/jlreq/) +EMOJI_RANGES = [ + (0x1F000, 0x1F02F), # Mahjong Tiles (EAW: N, but displayed as double-width) + (0x1F0A0, 0x1F0FF), # Playing Cards (EAW: N, but displayed as double-width) + (0x1F300, 0x1F5FF), # Miscellaneous Symbols and Pictographs + (0x1F600, 0x1F64F), # Emoticons + (0x1F680, 0x1F6FF), # Transport and Map Symbols + (0x1F700, 0x1F77F), # Alchemical Symbols + (0x1F780, 0x1F7FF), # Geometric Shapes Extended + (0x1F800, 0x1F8FF), # Supplemental Arrows-C + (0x1F900, 0x1F9FF), # Supplemental Symbols and Pictographs + (0x1FA00, 0x1FA6F), # Chess Symbols + (0x1FA70, 0x1FAFF), # Symbols and Pictographs Extended-A +] + +def create_width_tables(): + """ + Creates Unicode character width tables and returns the data structures. + + Returns: + tuple: (zero_width_ranges, double_width_ranges) + """ + + # Width data mapping + width_map = {} # Maps code points to width (0, 1, 2) + + # Mark emoji modifiers as zero-width + for start, end in EMOJI_ZERO_WIDTH: + for cp in range(start, end + 1): + width_map[cp] = 0 + + # Mark all regional indicators as single-width as they are usually paired + # providing a combined width of 2 when displayed together. + start, end = REGIONAL_INDICATORS + for cp in range(start, end + 1): + width_map[cp] = 1 + + # Process all assigned Unicode code points (Basic Multilingual Plane + + # Supplementary Planes) Range 0x0 to 0x10FFFF (the full Unicode range) + for block_start in range(0, 0x110000, 0x1000): + block_end = block_start + 0x1000 + for cp in range(block_start, block_end): + try: + char = chr(cp) + + # Skip if already processed + if cp in width_map: + continue + + # Check for combining marks and a format characters + category = unicodedata.category(char) + + # Combining marks + if category.startswith('M'): + width_map[cp] = 0 + continue + + # Format characters + # Since we have no support for bidirectional text, all format + # characters (category Cf) can be treated with width 0 (zero) + # for simplicity, as they don't need to occupy visual space + # in a non-bidirectional text environment. + if category == 'Cf': + width_map[cp] = 0 + continue + + # Known zero-width characters + if cp in KNOWN_ZERO_WIDTH: + width_map[cp] = 0 + continue + + # Use East Asian Width property + eaw = unicodedata.east_asian_width(char) + if eaw in ('F', 'W'): # Fullwidth or Wide + width_map[cp] = 2 + elif eaw in ('Na', 'H', 'N', 'A'): # Narrow, Halfwidth, Neutral, Ambiguous + width_map[cp] = 1 + else: + # Default to single-width for unknown + width_map[cp] = 1 + + except (ValueError, OverflowError): + # Skip invalid code points + continue + + # Process Emoji - generally double-width + for start, end in EMOJI_RANGES: + for cp in range(start, end + 1): + if cp not in width_map or width_map[cp] != 0: # Don't override zero-width + try: + char = chr(cp) + width_map[cp] = 2 + except (ValueError, OverflowError): + continue + + # Optimize to create range tables + def ranges_optimize(width_data, target_width): + points = sorted([cp for cp, width in width_data.items() if width == target_width]) + if not points: + return [] + + # Group consecutive code points into ranges + ranges = [] + start = points[0] + prev = start + + for cp in points[1:]: + if cp > prev + 1: + ranges.append((start, prev)) + start = cp + prev = cp + + # Add the last range + ranges.append((start, prev)) + return ranges + + # Extract ranges for each width + zero_width_ranges = ranges_optimize(width_map, 0) + double_width_ranges = ranges_optimize(width_map, 2) + + return zero_width_ranges, double_width_ranges + +def write_tables(zero_width_ranges, double_width_ranges): + """ + Write the generated tables to C header file. + + Args: + zero_width_ranges: List of (start, end) ranges for zero-width characters + double_width_ranges: List of (start, end) ranges for double-width characters + """ + + # Function to generate code point description comments + def get_code_point_comment(start, end): + try: + start_char_desc = unicodedata.name(chr(start)) + if start == end: + return f"/* {start_char_desc} */" + else: + end_char_desc = unicodedata.name(chr(end)) + return f"/* {start_char_desc} - {end_char_desc} */" + except: + if start == end: + return f"/* U+{start:04X} */" + else: + return f"/* U+{start:04X} - U+{end:04X} */" + + # Generate C tables + with open(out_file, 'w') as f: + f.write(f"""\ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * {out_file} - Unicode character width + * + * Auto-generated by {this_file} + * + * Unicode Version: {unicodedata.unidata_version} + */ + +/* Zero-width character ranges */ +static const struct ucs_interval ucs_zero_width_ranges[] = {{ +""") + + for start, end in zero_width_ranges: + comment = get_code_point_comment(start, end) + f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") + + f.write("""\ +}; + +/* Double-width character ranges */ +static const struct ucs_interval ucs_double_width_ranges[] = { +""") + + for start, end in double_width_ranges: + comment = get_code_point_comment(start, end) + f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") + + f.write("};\n") + +if __name__ == "__main__": + # Write tables to header file + zero_width_ranges, double_width_ranges = create_width_tables() + write_tables(zero_width_ranges, double_width_ranges) + + # Print summary + zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges) + double_width_count = sum(end - start + 1 for start, end in double_width_ranges) + print(f"Generated {out_file} with:") + print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points") + print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code points") + print(f"- Unicode Version: {unicodedata.unidata_version}") From 05ea6d71aa7a4f42c773c96dcd1519ac0dcdec86 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:07 -0400 Subject: [PATCH 066/105] vt: create ucs_width_table.h with gen_ucs_width_table.py Provide comprehensive ranges for double-width and zero-width Unicode code points. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-6-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs_width_table.h | 445 +++++++++++++++++++++++++++++++ 1 file changed, 445 insertions(+) create mode 100644 drivers/tty/vt/ucs_width_table.h diff --git a/drivers/tty/vt/ucs_width_table.h b/drivers/tty/vt/ucs_width_table.h new file mode 100644 index 000000000000..9cc86b5cdf92 --- /dev/null +++ b/drivers/tty/vt/ucs_width_table.h @@ -0,0 +1,445 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ucs_width_table.h - Unicode character width + * + * Auto-generated by gen_ucs_width_table.py + * + * Unicode Version: 16.0.0 + */ + +/* Zero-width character ranges */ +static const struct ucs_interval ucs_zero_width_ranges[] = { + { 0x000AD, 0x000AD }, /* SOFT HYPHEN */ + { 0x00300, 0x0036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ + { 0x00483, 0x00489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ + { 0x00591, 0x005BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ + { 0x005BF, 0x005BF }, /* HEBREW POINT RAFE */ + { 0x005C1, 0x005C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ + { 0x005C4, 0x005C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ + { 0x005C7, 0x005C7 }, /* HEBREW POINT QAMATS QATAN */ + { 0x00600, 0x00605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ + { 0x00610, 0x0061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ + { 0x0061C, 0x0061C }, /* ARABIC LETTER MARK */ + { 0x0064B, 0x0065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ + { 0x00670, 0x00670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ + { 0x006D6, 0x006DD }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC END OF AYAH */ + { 0x006DF, 0x006E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ + { 0x006E7, 0x006E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ + { 0x006EA, 0x006ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ + { 0x0070F, 0x0070F }, /* SYRIAC ABBREVIATION MARK */ + { 0x00711, 0x00711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ + { 0x00730, 0x0074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ + { 0x007A6, 0x007B0 }, /* THAANA ABAFILI - THAANA SUKUN */ + { 0x007EB, 0x007F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ + { 0x007FD, 0x007FD }, /* NKO DANTAYALAN */ + { 0x00816, 0x00819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ + { 0x0081B, 0x00823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ + { 0x00825, 0x00827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ + { 0x00829, 0x0082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ + { 0x00859, 0x0085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ + { 0x00890, 0x00891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ + { 0x00897, 0x0089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ + { 0x008CA, 0x00903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ + { 0x0093A, 0x0093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ + { 0x0093E, 0x0094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ + { 0x00951, 0x00957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ + { 0x00962, 0x00963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ + { 0x00981, 0x00983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ + { 0x009BC, 0x009BC }, /* BENGALI SIGN NUKTA */ + { 0x009BE, 0x009C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ + { 0x009C7, 0x009C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ + { 0x009CB, 0x009CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ + { 0x009D7, 0x009D7 }, /* BENGALI AU LENGTH MARK */ + { 0x009E2, 0x009E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ + { 0x009FE, 0x009FE }, /* BENGALI SANDHI MARK */ + { 0x00A01, 0x00A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ + { 0x00A3C, 0x00A3C }, /* GURMUKHI SIGN NUKTA */ + { 0x00A3E, 0x00A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ + { 0x00A47, 0x00A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ + { 0x00A4B, 0x00A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ + { 0x00A51, 0x00A51 }, /* GURMUKHI SIGN UDAAT */ + { 0x00A70, 0x00A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ + { 0x00A75, 0x00A75 }, /* GURMUKHI SIGN YAKASH */ + { 0x00A81, 0x00A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ + { 0x00ABC, 0x00ABC }, /* GUJARATI SIGN NUKTA */ + { 0x00ABE, 0x00AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ + { 0x00AC7, 0x00AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ + { 0x00ACB, 0x00ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ + { 0x00AE2, 0x00AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ + { 0x00AFA, 0x00AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ + { 0x00B01, 0x00B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ + { 0x00B3C, 0x00B3C }, /* ORIYA SIGN NUKTA */ + { 0x00B3E, 0x00B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ + { 0x00B47, 0x00B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ + { 0x00B4B, 0x00B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ + { 0x00B55, 0x00B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ + { 0x00B62, 0x00B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ + { 0x00B82, 0x00B82 }, /* TAMIL SIGN ANUSVARA */ + { 0x00BBE, 0x00BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ + { 0x00BC6, 0x00BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ + { 0x00BCA, 0x00BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ + { 0x00BD7, 0x00BD7 }, /* TAMIL AU LENGTH MARK */ + { 0x00C00, 0x00C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ + { 0x00C3C, 0x00C3C }, /* TELUGU SIGN NUKTA */ + { 0x00C3E, 0x00C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ + { 0x00C46, 0x00C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ + { 0x00C4A, 0x00C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ + { 0x00C55, 0x00C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ + { 0x00C62, 0x00C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ + { 0x00C81, 0x00C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ + { 0x00CBC, 0x00CBC }, /* KANNADA SIGN NUKTA */ + { 0x00CBE, 0x00CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ + { 0x00CC6, 0x00CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ + { 0x00CCA, 0x00CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ + { 0x00CD5, 0x00CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ + { 0x00CE2, 0x00CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ + { 0x00CF3, 0x00CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ + { 0x00D00, 0x00D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ + { 0x00D3B, 0x00D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ + { 0x00D3E, 0x00D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ + { 0x00D46, 0x00D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ + { 0x00D4A, 0x00D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ + { 0x00D57, 0x00D57 }, /* MALAYALAM AU LENGTH MARK */ + { 0x00D62, 0x00D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ + { 0x00D81, 0x00D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ + { 0x00DCA, 0x00DCA }, /* SINHALA SIGN AL-LAKUNA */ + { 0x00DCF, 0x00DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ + { 0x00DD6, 0x00DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ + { 0x00DD8, 0x00DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ + { 0x00DF2, 0x00DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ + { 0x00E31, 0x00E31 }, /* THAI CHARACTER MAI HAN-AKAT */ + { 0x00E34, 0x00E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ + { 0x00E47, 0x00E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ + { 0x00EB1, 0x00EB1 }, /* LAO VOWEL SIGN MAI KAN */ + { 0x00EB4, 0x00EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ + { 0x00EC8, 0x00ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ + { 0x00F18, 0x00F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ + { 0x00F35, 0x00F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ + { 0x00F37, 0x00F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ + { 0x00F39, 0x00F39 }, /* TIBETAN MARK TSA -PHRU */ + { 0x00F3E, 0x00F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ + { 0x00F71, 0x00F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ + { 0x00F86, 0x00F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ + { 0x00F8D, 0x00F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ + { 0x00F99, 0x00FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ + { 0x00FC6, 0x00FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ + { 0x0102B, 0x0103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ + { 0x01056, 0x01059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ + { 0x0105E, 0x01060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ + { 0x01062, 0x01064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ + { 0x01067, 0x0106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ + { 0x01071, 0x01074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ + { 0x01082, 0x0108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ + { 0x0108F, 0x0108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ + { 0x0109A, 0x0109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ + { 0x0135D, 0x0135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ + { 0x01712, 0x01715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ + { 0x01732, 0x01734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ + { 0x01752, 0x01753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ + { 0x01772, 0x01773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ + { 0x017B4, 0x017D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ + { 0x017DD, 0x017DD }, /* KHMER SIGN ATTHACAN */ + { 0x0180B, 0x0180F }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR FOUR */ + { 0x01885, 0x01886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ + { 0x018A9, 0x018A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ + { 0x01920, 0x0192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ + { 0x01930, 0x0193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ + { 0x01A17, 0x01A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ + { 0x01A55, 0x01A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ + { 0x01A60, 0x01A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ + { 0x01A7F, 0x01A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ + { 0x01AB0, 0x01ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ + { 0x01B00, 0x01B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ + { 0x01B34, 0x01B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ + { 0x01B6B, 0x01B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ + { 0x01B80, 0x01B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ + { 0x01BA1, 0x01BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ + { 0x01BE6, 0x01BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ + { 0x01C24, 0x01C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ + { 0x01CD0, 0x01CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ + { 0x01CD4, 0x01CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ + { 0x01CED, 0x01CED }, /* VEDIC SIGN TIRYAK */ + { 0x01CF4, 0x01CF4 }, /* VEDIC TONE CANDRA ABOVE */ + { 0x01CF7, 0x01CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ + { 0x01DC0, 0x01DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ + { 0x0200B, 0x0200F }, /* ZERO WIDTH SPACE - RIGHT-TO-LEFT MARK */ + { 0x0202A, 0x0202E }, /* LEFT-TO-RIGHT EMBEDDING - RIGHT-TO-LEFT OVERRIDE */ + { 0x02060, 0x02064 }, /* WORD JOINER - INVISIBLE PLUS */ + { 0x02066, 0x0206F }, /* LEFT-TO-RIGHT ISOLATE - NOMINAL DIGIT SHAPES */ + { 0x020D0, 0x020F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ + { 0x02640, 0x02640 }, /* FEMALE SIGN */ + { 0x02642, 0x02642 }, /* MALE SIGN */ + { 0x026A7, 0x026A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ + { 0x02CEF, 0x02CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ + { 0x02D7F, 0x02D7F }, /* TIFINAGH CONSONANT JOINER */ + { 0x02DE0, 0x02DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ + { 0x0302A, 0x0302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ + { 0x03099, 0x0309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ + { 0x0A66F, 0x0A672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ + { 0x0A674, 0x0A67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ + { 0x0A69E, 0x0A69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ + { 0x0A6F0, 0x0A6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ + { 0x0A802, 0x0A802 }, /* SYLOTI NAGRI SIGN DVISVARA */ + { 0x0A806, 0x0A806 }, /* SYLOTI NAGRI SIGN HASANTA */ + { 0x0A80B, 0x0A80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ + { 0x0A823, 0x0A827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ + { 0x0A82C, 0x0A82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ + { 0x0A880, 0x0A881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ + { 0x0A8B4, 0x0A8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ + { 0x0A8E0, 0x0A8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ + { 0x0A8FF, 0x0A8FF }, /* DEVANAGARI VOWEL SIGN AY */ + { 0x0A926, 0x0A92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ + { 0x0A947, 0x0A953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ + { 0x0A980, 0x0A983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ + { 0x0A9B3, 0x0A9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ + { 0x0A9E5, 0x0A9E5 }, /* MYANMAR SIGN SHAN SAW */ + { 0x0AA29, 0x0AA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ + { 0x0AA43, 0x0AA43 }, /* CHAM CONSONANT SIGN FINAL NG */ + { 0x0AA4C, 0x0AA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ + { 0x0AA7B, 0x0AA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ + { 0x0AAB0, 0x0AAB0 }, /* TAI VIET MAI KANG */ + { 0x0AAB2, 0x0AAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ + { 0x0AAB7, 0x0AAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ + { 0x0AABE, 0x0AABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ + { 0x0AAC1, 0x0AAC1 }, /* TAI VIET TONE MAI THO */ + { 0x0AAEB, 0x0AAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ + { 0x0AAF5, 0x0AAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ + { 0x0ABE3, 0x0ABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ + { 0x0ABEC, 0x0ABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ + { 0x0FB1E, 0x0FB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ + { 0x0FE00, 0x0FE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ + { 0x0FE20, 0x0FE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ + { 0x0FEFF, 0x0FEFF }, /* ZERO WIDTH NO-BREAK SPACE */ + { 0x0FFF9, 0x0FFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ + { 0x101FD, 0x101FD }, /* PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE */ + { 0x102E0, 0x102E0 }, /* COPTIC EPACT THOUSANDS MARK */ + { 0x10376, 0x1037A }, /* COMBINING OLD PERMIC LETTER AN - COMBINING OLD PERMIC LETTER SII */ + { 0x10A01, 0x10A03 }, /* KHAROSHTHI VOWEL SIGN I - KHAROSHTHI VOWEL SIGN VOCALIC R */ + { 0x10A05, 0x10A06 }, /* KHAROSHTHI VOWEL SIGN E - KHAROSHTHI VOWEL SIGN O */ + { 0x10A0C, 0x10A0F }, /* KHAROSHTHI VOWEL LENGTH MARK - KHAROSHTHI SIGN VISARGA */ + { 0x10A38, 0x10A3A }, /* KHAROSHTHI SIGN BAR ABOVE - KHAROSHTHI SIGN DOT BELOW */ + { 0x10A3F, 0x10A3F }, /* KHAROSHTHI VIRAMA */ + { 0x10AE5, 0x10AE6 }, /* MANICHAEAN ABBREVIATION MARK ABOVE - MANICHAEAN ABBREVIATION MARK BELOW */ + { 0x10D24, 0x10D27 }, /* HANIFI ROHINGYA SIGN HARBAHAY - HANIFI ROHINGYA SIGN TASSI */ + { 0x10D69, 0x10D6D }, /* GARAY VOWEL SIGN E - GARAY CONSONANT NASALIZATION MARK */ + { 0x10EAB, 0x10EAC }, /* YEZIDI COMBINING HAMZA MARK - YEZIDI COMBINING MADDA MARK */ + { 0x10EFC, 0x10EFF }, /* ARABIC COMBINING ALEF OVERLAY - ARABIC SMALL LOW WORD MADDA */ + { 0x10F46, 0x10F50 }, /* SOGDIAN COMBINING DOT BELOW - SOGDIAN COMBINING STROKE BELOW */ + { 0x10F82, 0x10F85 }, /* OLD UYGHUR COMBINING DOT ABOVE - OLD UYGHUR COMBINING TWO DOTS BELOW */ + { 0x11000, 0x11002 }, /* BRAHMI SIGN CANDRABINDU - BRAHMI SIGN VISARGA */ + { 0x11038, 0x11046 }, /* BRAHMI VOWEL SIGN AA - BRAHMI VIRAMA */ + { 0x11070, 0x11070 }, /* BRAHMI SIGN OLD TAMIL VIRAMA */ + { 0x11073, 0x11074 }, /* BRAHMI VOWEL SIGN OLD TAMIL SHORT E - BRAHMI VOWEL SIGN OLD TAMIL SHORT O */ + { 0x1107F, 0x11082 }, /* BRAHMI NUMBER JOINER - KAITHI SIGN VISARGA */ + { 0x110B0, 0x110BA }, /* KAITHI VOWEL SIGN AA - KAITHI SIGN NUKTA */ + { 0x110BD, 0x110BD }, /* KAITHI NUMBER SIGN */ + { 0x110C2, 0x110C2 }, /* KAITHI VOWEL SIGN VOCALIC R */ + { 0x110CD, 0x110CD }, /* KAITHI NUMBER SIGN ABOVE */ + { 0x11100, 0x11102 }, /* CHAKMA SIGN CANDRABINDU - CHAKMA SIGN VISARGA */ + { 0x11127, 0x11134 }, /* CHAKMA VOWEL SIGN A - CHAKMA MAAYYAA */ + { 0x11145, 0x11146 }, /* CHAKMA VOWEL SIGN AA - CHAKMA VOWEL SIGN EI */ + { 0x11173, 0x11173 }, /* MAHAJANI SIGN NUKTA */ + { 0x11180, 0x11182 }, /* SHARADA SIGN CANDRABINDU - SHARADA SIGN VISARGA */ + { 0x111B3, 0x111C0 }, /* SHARADA VOWEL SIGN AA - SHARADA SIGN VIRAMA */ + { 0x111C9, 0x111CC }, /* SHARADA SANDHI MARK - SHARADA EXTRA SHORT VOWEL MARK */ + { 0x111CE, 0x111CF }, /* SHARADA VOWEL SIGN PRISHTHAMATRA E - SHARADA SIGN INVERTED CANDRABINDU */ + { 0x1122C, 0x11237 }, /* KHOJKI VOWEL SIGN AA - KHOJKI SIGN SHADDA */ + { 0x1123E, 0x1123E }, /* KHOJKI SIGN SUKUN */ + { 0x11241, 0x11241 }, /* KHOJKI VOWEL SIGN VOCALIC R */ + { 0x112DF, 0x112EA }, /* KHUDAWADI SIGN ANUSVARA - KHUDAWADI SIGN VIRAMA */ + { 0x11300, 0x11303 }, /* GRANTHA SIGN COMBINING ANUSVARA ABOVE - GRANTHA SIGN VISARGA */ + { 0x1133B, 0x1133C }, /* COMBINING BINDU BELOW - GRANTHA SIGN NUKTA */ + { 0x1133E, 0x11344 }, /* GRANTHA VOWEL SIGN AA - GRANTHA VOWEL SIGN VOCALIC RR */ + { 0x11347, 0x11348 }, /* GRANTHA VOWEL SIGN EE - GRANTHA VOWEL SIGN AI */ + { 0x1134B, 0x1134D }, /* GRANTHA VOWEL SIGN OO - GRANTHA SIGN VIRAMA */ + { 0x11357, 0x11357 }, /* GRANTHA AU LENGTH MARK */ + { 0x11362, 0x11363 }, /* GRANTHA VOWEL SIGN VOCALIC L - GRANTHA VOWEL SIGN VOCALIC LL */ + { 0x11366, 0x1136C }, /* COMBINING GRANTHA DIGIT ZERO - COMBINING GRANTHA DIGIT SIX */ + { 0x11370, 0x11374 }, /* COMBINING GRANTHA LETTER A - COMBINING GRANTHA LETTER PA */ + { 0x113B8, 0x113C0 }, /* TULU-TIGALARI VOWEL SIGN AA - TULU-TIGALARI VOWEL SIGN VOCALIC LL */ + { 0x113C2, 0x113C2 }, /* TULU-TIGALARI VOWEL SIGN EE */ + { 0x113C5, 0x113C5 }, /* TULU-TIGALARI VOWEL SIGN AI */ + { 0x113C7, 0x113CA }, /* TULU-TIGALARI VOWEL SIGN OO - TULU-TIGALARI SIGN CANDRA ANUNASIKA */ + { 0x113CC, 0x113D0 }, /* TULU-TIGALARI SIGN ANUSVARA - TULU-TIGALARI CONJOINER */ + { 0x113D2, 0x113D2 }, /* TULU-TIGALARI GEMINATION MARK */ + { 0x113E1, 0x113E2 }, /* TULU-TIGALARI VEDIC TONE SVARITA - TULU-TIGALARI VEDIC TONE ANUDATTA */ + { 0x11435, 0x11446 }, /* NEWA VOWEL SIGN AA - NEWA SIGN NUKTA */ + { 0x1145E, 0x1145E }, /* NEWA SANDHI MARK */ + { 0x114B0, 0x114C3 }, /* TIRHUTA VOWEL SIGN AA - TIRHUTA SIGN NUKTA */ + { 0x115AF, 0x115B5 }, /* SIDDHAM VOWEL SIGN AA - SIDDHAM VOWEL SIGN VOCALIC RR */ + { 0x115B8, 0x115C0 }, /* SIDDHAM VOWEL SIGN E - SIDDHAM SIGN NUKTA */ + { 0x115DC, 0x115DD }, /* SIDDHAM VOWEL SIGN ALTERNATE U - SIDDHAM VOWEL SIGN ALTERNATE UU */ + { 0x11630, 0x11640 }, /* MODI VOWEL SIGN AA - MODI SIGN ARDHACANDRA */ + { 0x116AB, 0x116B7 }, /* TAKRI SIGN ANUSVARA - TAKRI SIGN NUKTA */ + { 0x1171D, 0x1172B }, /* AHOM CONSONANT SIGN MEDIAL LA - AHOM SIGN KILLER */ + { 0x1182C, 0x1183A }, /* DOGRA VOWEL SIGN AA - DOGRA SIGN NUKTA */ + { 0x11930, 0x11935 }, /* DIVES AKURU VOWEL SIGN AA - DIVES AKURU VOWEL SIGN E */ + { 0x11937, 0x11938 }, /* DIVES AKURU VOWEL SIGN AI - DIVES AKURU VOWEL SIGN O */ + { 0x1193B, 0x1193E }, /* DIVES AKURU SIGN ANUSVARA - DIVES AKURU VIRAMA */ + { 0x11940, 0x11940 }, /* DIVES AKURU MEDIAL YA */ + { 0x11942, 0x11943 }, /* DIVES AKURU MEDIAL RA - DIVES AKURU SIGN NUKTA */ + { 0x119D1, 0x119D7 }, /* NANDINAGARI VOWEL SIGN AA - NANDINAGARI VOWEL SIGN VOCALIC RR */ + { 0x119DA, 0x119E0 }, /* NANDINAGARI VOWEL SIGN E - NANDINAGARI SIGN VIRAMA */ + { 0x119E4, 0x119E4 }, /* NANDINAGARI VOWEL SIGN PRISHTHAMATRA E */ + { 0x11A01, 0x11A0A }, /* ZANABAZAR SQUARE VOWEL SIGN I - ZANABAZAR SQUARE VOWEL LENGTH MARK */ + { 0x11A33, 0x11A39 }, /* ZANABAZAR SQUARE FINAL CONSONANT MARK - ZANABAZAR SQUARE SIGN VISARGA */ + { 0x11A3B, 0x11A3E }, /* ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA - ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA */ + { 0x11A47, 0x11A47 }, /* ZANABAZAR SQUARE SUBJOINER */ + { 0x11A51, 0x11A5B }, /* SOYOMBO VOWEL SIGN I - SOYOMBO VOWEL LENGTH MARK */ + { 0x11A8A, 0x11A99 }, /* SOYOMBO FINAL CONSONANT SIGN G - SOYOMBO SUBJOINER */ + { 0x11C2F, 0x11C36 }, /* BHAIKSUKI VOWEL SIGN AA - BHAIKSUKI VOWEL SIGN VOCALIC L */ + { 0x11C38, 0x11C3F }, /* BHAIKSUKI VOWEL SIGN E - BHAIKSUKI SIGN VIRAMA */ + { 0x11C92, 0x11CA7 }, /* MARCHEN SUBJOINED LETTER KA - MARCHEN SUBJOINED LETTER ZA */ + { 0x11CA9, 0x11CB6 }, /* MARCHEN SUBJOINED LETTER YA - MARCHEN SIGN CANDRABINDU */ + { 0x11D31, 0x11D36 }, /* MASARAM GONDI VOWEL SIGN AA - MASARAM GONDI VOWEL SIGN VOCALIC R */ + { 0x11D3A, 0x11D3A }, /* MASARAM GONDI VOWEL SIGN E */ + { 0x11D3C, 0x11D3D }, /* MASARAM GONDI VOWEL SIGN AI - MASARAM GONDI VOWEL SIGN O */ + { 0x11D3F, 0x11D45 }, /* MASARAM GONDI VOWEL SIGN AU - MASARAM GONDI VIRAMA */ + { 0x11D47, 0x11D47 }, /* MASARAM GONDI RA-KARA */ + { 0x11D8A, 0x11D8E }, /* GUNJALA GONDI VOWEL SIGN AA - GUNJALA GONDI VOWEL SIGN UU */ + { 0x11D90, 0x11D91 }, /* GUNJALA GONDI VOWEL SIGN EE - GUNJALA GONDI VOWEL SIGN AI */ + { 0x11D93, 0x11D97 }, /* GUNJALA GONDI VOWEL SIGN OO - GUNJALA GONDI VIRAMA */ + { 0x11EF3, 0x11EF6 }, /* MAKASAR VOWEL SIGN I - MAKASAR VOWEL SIGN O */ + { 0x11F00, 0x11F01 }, /* KAWI SIGN CANDRABINDU - KAWI SIGN ANUSVARA */ + { 0x11F03, 0x11F03 }, /* KAWI SIGN VISARGA */ + { 0x11F34, 0x11F3A }, /* KAWI VOWEL SIGN AA - KAWI VOWEL SIGN VOCALIC R */ + { 0x11F3E, 0x11F42 }, /* KAWI VOWEL SIGN E - KAWI CONJOINER */ + { 0x11F5A, 0x11F5A }, /* KAWI SIGN NUKTA */ + { 0x13430, 0x13440 }, /* EGYPTIAN HIEROGLYPH VERTICAL JOINER - EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY */ + { 0x13447, 0x13455 }, /* EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START - EGYPTIAN HIEROGLYPH MODIFIER DAMAGED */ + { 0x1611E, 0x1612F }, /* GURUNG KHEMA VOWEL SIGN AA - GURUNG KHEMA SIGN THOLHOMA */ + { 0x16AF0, 0x16AF4 }, /* BASSA VAH COMBINING HIGH TONE - BASSA VAH COMBINING HIGH-LOW TONE */ + { 0x16B30, 0x16B36 }, /* PAHAWH HMONG MARK CIM TUB - PAHAWH HMONG MARK CIM TAUM */ + { 0x16F4F, 0x16F4F }, /* MIAO SIGN CONSONANT MODIFIER BAR */ + { 0x16F51, 0x16F87 }, /* MIAO SIGN ASPIRATION - MIAO VOWEL SIGN UI */ + { 0x16F8F, 0x16F92 }, /* MIAO TONE RIGHT - MIAO TONE BELOW */ + { 0x16FE4, 0x16FE4 }, /* KHITAN SMALL SCRIPT FILLER */ + { 0x16FF0, 0x16FF1 }, /* VIETNAMESE ALTERNATE READING MARK CA - VIETNAMESE ALTERNATE READING MARK NHAY */ + { 0x1BC9D, 0x1BC9E }, /* DUPLOYAN THICK LETTER SELECTOR - DUPLOYAN DOUBLE MARK */ + { 0x1BCA0, 0x1BCA3 }, /* SHORTHAND FORMAT LETTER OVERLAP - SHORTHAND FORMAT UP STEP */ + { 0x1CF00, 0x1CF2D }, /* ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT - ZNAMENNY COMBINING MARK KRYZH ON LEFT */ + { 0x1CF30, 0x1CF46 }, /* ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO - ZNAMENNY PRIZNAK MODIFIER ROG */ + { 0x1D165, 0x1D169 }, /* MUSICAL SYMBOL COMBINING STEM - MUSICAL SYMBOL COMBINING TREMOLO-3 */ + { 0x1D16D, 0x1D182 }, /* MUSICAL SYMBOL COMBINING AUGMENTATION DOT - MUSICAL SYMBOL COMBINING LOURE */ + { 0x1D185, 0x1D18B }, /* MUSICAL SYMBOL COMBINING DOIT - MUSICAL SYMBOL COMBINING TRIPLE TONGUE */ + { 0x1D1AA, 0x1D1AD }, /* MUSICAL SYMBOL COMBINING DOWN BOW - MUSICAL SYMBOL COMBINING SNAP PIZZICATO */ + { 0x1D242, 0x1D244 }, /* COMBINING GREEK MUSICAL TRISEME - COMBINING GREEK MUSICAL PENTASEME */ + { 0x1DA00, 0x1DA36 }, /* SIGNWRITING HEAD RIM - SIGNWRITING AIR SUCKING IN */ + { 0x1DA3B, 0x1DA6C }, /* SIGNWRITING MOUTH CLOSED NEUTRAL - SIGNWRITING EXCITEMENT */ + { 0x1DA75, 0x1DA75 }, /* SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS */ + { 0x1DA84, 0x1DA84 }, /* SIGNWRITING LOCATION HEAD NECK */ + { 0x1DA9B, 0x1DA9F }, /* SIGNWRITING FILL MODIFIER-2 - SIGNWRITING FILL MODIFIER-6 */ + { 0x1DAA1, 0x1DAAF }, /* SIGNWRITING ROTATION MODIFIER-2 - SIGNWRITING ROTATION MODIFIER-16 */ + { 0x1E000, 0x1E006 }, /* COMBINING GLAGOLITIC LETTER AZU - COMBINING GLAGOLITIC LETTER ZHIVETE */ + { 0x1E008, 0x1E018 }, /* COMBINING GLAGOLITIC LETTER ZEMLJA - COMBINING GLAGOLITIC LETTER HERU */ + { 0x1E01B, 0x1E021 }, /* COMBINING GLAGOLITIC LETTER SHTA - COMBINING GLAGOLITIC LETTER YATI */ + { 0x1E023, 0x1E024 }, /* COMBINING GLAGOLITIC LETTER YU - COMBINING GLAGOLITIC LETTER SMALL YUS */ + { 0x1E026, 0x1E02A }, /* COMBINING GLAGOLITIC LETTER YO - COMBINING GLAGOLITIC LETTER FITA */ + { 0x1E08F, 0x1E08F }, /* COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */ + { 0x1E130, 0x1E136 }, /* NYIAKENG PUACHUE HMONG TONE-B - NYIAKENG PUACHUE HMONG TONE-D */ + { 0x1E2AE, 0x1E2AE }, /* TOTO SIGN RISING TONE */ + { 0x1E2EC, 0x1E2EF }, /* WANCHO TONE TUP - WANCHO TONE KOINI */ + { 0x1E4EC, 0x1E4EF }, /* NAG MUNDARI SIGN MUHOR - NAG MUNDARI SIGN SUTUH */ + { 0x1E5EE, 0x1E5EF }, /* OL ONAL SIGN MU - OL ONAL SIGN IKIR */ + { 0x1E8D0, 0x1E8D6 }, /* MENDE KIKAKUI COMBINING NUMBER TEENS - MENDE KIKAKUI COMBINING NUMBER MILLIONS */ + { 0x1E944, 0x1E94A }, /* ADLAM ALIF LENGTHENER - ADLAM NUKTA */ + { 0x1F3FB, 0x1F3FF }, /* EMOJI MODIFIER FITZPATRICK TYPE-1-2 - EMOJI MODIFIER FITZPATRICK TYPE-6 */ + { 0x1F9B0, 0x1F9B3 }, /* EMOJI COMPONENT RED HAIR - EMOJI COMPONENT WHITE HAIR */ + { 0xE0001, 0xE0001 }, /* LANGUAGE TAG */ + { 0xE0020, 0xE007F }, /* TAG SPACE - CANCEL TAG */ + { 0xE0100, 0xE01EF }, /* VARIATION SELECTOR-17 - VARIATION SELECTOR-256 */ +}; + +/* Double-width character ranges */ +static const struct ucs_interval ucs_double_width_ranges[] = { + { 0x01100, 0x0115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ + { 0x0231A, 0x0231B }, /* WATCH - HOURGLASS */ + { 0x02329, 0x0232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ + { 0x023E9, 0x023EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ + { 0x023F0, 0x023F0 }, /* ALARM CLOCK */ + { 0x023F3, 0x023F3 }, /* HOURGLASS WITH FLOWING SAND */ + { 0x025FD, 0x025FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ + { 0x02614, 0x02615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ + { 0x02630, 0x02637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ + { 0x02648, 0x02653 }, /* ARIES - PISCES */ + { 0x0267F, 0x0267F }, /* WHEELCHAIR SYMBOL */ + { 0x0268A, 0x0268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ + { 0x02693, 0x02693 }, /* ANCHOR */ + { 0x026A1, 0x026A1 }, /* HIGH VOLTAGE SIGN */ + { 0x026AA, 0x026AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ + { 0x026BD, 0x026BE }, /* SOCCER BALL - BASEBALL */ + { 0x026C4, 0x026C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ + { 0x026CE, 0x026CE }, /* OPHIUCHUS */ + { 0x026D4, 0x026D4 }, /* NO ENTRY */ + { 0x026EA, 0x026EA }, /* CHURCH */ + { 0x026F2, 0x026F3 }, /* FOUNTAIN - FLAG IN HOLE */ + { 0x026F5, 0x026F5 }, /* SAILBOAT */ + { 0x026FA, 0x026FA }, /* TENT */ + { 0x026FD, 0x026FD }, /* FUEL PUMP */ + { 0x02705, 0x02705 }, /* WHITE HEAVY CHECK MARK */ + { 0x0270A, 0x0270B }, /* RAISED FIST - RAISED HAND */ + { 0x02728, 0x02728 }, /* SPARKLES */ + { 0x0274C, 0x0274C }, /* CROSS MARK */ + { 0x0274E, 0x0274E }, /* NEGATIVE SQUARED CROSS MARK */ + { 0x02753, 0x02755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ + { 0x02757, 0x02757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ + { 0x02795, 0x02797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ + { 0x027B0, 0x027B0 }, /* CURLY LOOP */ + { 0x027BF, 0x027BF }, /* DOUBLE CURLY LOOP */ + { 0x02B1B, 0x02B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ + { 0x02B50, 0x02B50 }, /* WHITE MEDIUM STAR */ + { 0x02B55, 0x02B55 }, /* HEAVY LARGE CIRCLE */ + { 0x02E80, 0x02E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ + { 0x02E9B, 0x02EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ + { 0x02F00, 0x02FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ + { 0x02FF0, 0x03029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ + { 0x03030, 0x0303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ + { 0x03041, 0x03096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ + { 0x0309B, 0x030FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ + { 0x03105, 0x0312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ + { 0x03131, 0x0318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ + { 0x03190, 0x031E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ + { 0x031EF, 0x0321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ + { 0x03220, 0x03247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ + { 0x03250, 0x0A48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ + { 0x0A490, 0x0A4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ + { 0x0A960, 0x0A97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ + { 0x0AC00, 0x0D7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ + { 0x0F900, 0x0FAFF }, /* U+F900 - U+FAFF */ + { 0x0FE10, 0x0FE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ + { 0x0FE30, 0x0FE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ + { 0x0FE54, 0x0FE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ + { 0x0FE68, 0x0FE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ + { 0x0FF01, 0x0FF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ + { 0x0FFE0, 0x0FFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ + { 0x16FE0, 0x16FE3 }, /* TANGUT ITERATION MARK - OLD CHINESE ITERATION MARK */ + { 0x17000, 0x187F7 }, /* U+17000 - U+187F7 */ + { 0x18800, 0x18CD5 }, /* TANGUT COMPONENT-001 - KHITAN SMALL SCRIPT CHARACTER-18CD5 */ + { 0x18CFF, 0x18D08 }, /* U+18CFF - U+18D08 */ + { 0x1AFF0, 0x1AFF3 }, /* KATAKANA LETTER MINNAN TONE-2 - KATAKANA LETTER MINNAN TONE-5 */ + { 0x1AFF5, 0x1AFFB }, /* KATAKANA LETTER MINNAN TONE-7 - KATAKANA LETTER MINNAN NASALIZED TONE-5 */ + { 0x1AFFD, 0x1AFFE }, /* KATAKANA LETTER MINNAN NASALIZED TONE-7 - KATAKANA LETTER MINNAN NASALIZED TONE-8 */ + { 0x1B000, 0x1B122 }, /* KATAKANA LETTER ARCHAIC E - KATAKANA LETTER ARCHAIC WU */ + { 0x1B132, 0x1B132 }, /* HIRAGANA LETTER SMALL KO */ + { 0x1B150, 0x1B152 }, /* HIRAGANA LETTER SMALL WI - HIRAGANA LETTER SMALL WO */ + { 0x1B155, 0x1B155 }, /* KATAKANA LETTER SMALL KO */ + { 0x1B164, 0x1B167 }, /* KATAKANA LETTER SMALL WI - KATAKANA LETTER SMALL N */ + { 0x1B170, 0x1B2FB }, /* NUSHU CHARACTER-1B170 - NUSHU CHARACTER-1B2FB */ + { 0x1D300, 0x1D356 }, /* MONOGRAM FOR EARTH - TETRAGRAM FOR FOSTERING */ + { 0x1D360, 0x1D376 }, /* COUNTING ROD UNIT DIGIT ONE - IDEOGRAPHIC TALLY MARK FIVE */ + { 0x1F000, 0x1F02F }, /* U+1F000 - U+1F02F */ + { 0x1F0A0, 0x1F0FF }, /* U+1F0A0 - U+1F0FF */ + { 0x1F18E, 0x1F18E }, /* NEGATIVE SQUARED AB */ + { 0x1F191, 0x1F19A }, /* SQUARED CL - SQUARED VS */ + { 0x1F200, 0x1F202 }, /* SQUARE HIRAGANA HOKA - SQUARED KATAKANA SA */ + { 0x1F210, 0x1F23B }, /* SQUARED CJK UNIFIED IDEOGRAPH-624B - SQUARED CJK UNIFIED IDEOGRAPH-914D */ + { 0x1F240, 0x1F248 }, /* TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C - TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 */ + { 0x1F250, 0x1F251 }, /* CIRCLED IDEOGRAPH ADVANTAGE - CIRCLED IDEOGRAPH ACCEPT */ + { 0x1F260, 0x1F265 }, /* ROUNDED SYMBOL FOR FU - ROUNDED SYMBOL FOR CAI */ + { 0x1F300, 0x1F3FA }, /* CYCLONE - AMPHORA */ + { 0x1F400, 0x1F64F }, /* RAT - PERSON WITH FOLDED HANDS */ + { 0x1F680, 0x1F9AF }, /* ROCKET - PROBING CANE */ + { 0x1F9B4, 0x1FAFF }, /* U+1F9B4 - U+1FAFF */ + { 0x20000, 0x2FFFD }, /* U+20000 - U+2FFFD */ + { 0x30000, 0x3FFFD }, /* U+30000 - U+3FFFD */ +}; From 54cda9201c673fd1c5de189d961670999232e49d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:08 -0400 Subject: [PATCH 067/105] vt: use new tables in ucs.c This removes the table from ucs.c and substitutes the generated tables from ucs_width_table.h providing comprehensive ranges for double-width and zero-width Unicode code points. Also implements ucs_is_zero_width() to query the new zero-width table. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-7-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs.c | 44 +++++++++++++++++++++----------------- include/linux/consolemap.h | 6 +----- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index dc4a6e794531..5f9f25bd201b 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -8,22 +8,12 @@ #include #include -/* ucs_is_double_width() is based on the wcwidth() implementation by - * Markus Kuhn -- 2007-05-26 (Unicode 5.0) - * Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - */ - struct ucs_interval { u32 first; u32 last; }; -static const struct ucs_interval ucs_double_width_ranges[] = { - { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, - { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, - { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, - { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } -}; +#include "ucs_width_table.h" static int interval_cmp(const void *key, const void *element) { @@ -37,6 +27,27 @@ static int interval_cmp(const void *key, const void *element) return 0; } +static bool cp_in_range(u32 cp, const struct ucs_interval *ranges, size_t size) +{ + if (!in_range(cp, ranges[0].first, ranges[size - 1].last)) + return false; + + return __inline_bsearch(&cp, ranges, size, sizeof(*ranges), + interval_cmp) != NULL; +} + +/** + * ucs_is_zero_width() - Determine if a Unicode code point is zero-width. + * @cp: Unicode code point (UCS-4) + * + * Return: true if the character is zero-width, false otherwise + */ +bool ucs_is_zero_width(u32 cp) +{ + return cp_in_range(cp, ucs_zero_width_ranges, + ARRAY_SIZE(ucs_zero_width_ranges)); +} + /** * ucs_is_double_width() - Determine if a Unicode code point is double-width. * @cp: Unicode code point (UCS-4) @@ -45,13 +56,6 @@ static int interval_cmp(const void *key, const void *element) */ bool ucs_is_double_width(u32 cp) { - size_t size = ARRAY_SIZE(ucs_double_width_ranges); - - if (!in_range(cp, ucs_double_width_ranges[0].first, - ucs_double_width_ranges[size - 1].last)) - return false; - - return __inline_bsearch(&cp, ucs_double_width_ranges, size, - sizeof(*ucs_double_width_ranges), - interval_cmp) != NULL; + return cp_in_range(cp, ucs_double_width_ranges, + ARRAY_SIZE(ucs_double_width_ranges)); } diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 7d778752dcef..b3a911866662 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -29,11 +29,7 @@ u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); -static inline bool ucs_is_zero_width(uint32_t cp) -{ - /* coming soon */ - return false; -} +bool ucs_is_zero_width(uint32_t cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) From 03c6de017b2a9eaa1062566c6e70cf0ed72e24d9 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:09 -0400 Subject: [PATCH 068/105] vt: introduce gen_ucs_recompose_table.py to create ucs_recompose_table.h The generated table maps base character + combining mark pairs to their precomposed equivalents using Python's unicodedata module. The default script behavior is to create a table with most commonly used Latin, Greek, and Cyrillic recomposition pairs only. It is much smaller than the table with all possible recomposition pairs (71 entries vs 1000 entries). But if one needs/wants the full table then simply running the script with the --full argument will generate it. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-8-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_recompose_table.py | 255 ++++++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100755 drivers/tty/vt/gen_ucs_recompose_table.py diff --git a/drivers/tty/vt/gen_ucs_recompose_table.py b/drivers/tty/vt/gen_ucs_recompose_table.py new file mode 100755 index 000000000000..d30f8f5242d2 --- /dev/null +++ b/drivers/tty/vt/gen_ucs_recompose_table.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Leverage Python's unicodedata module to generate ucs_recompose_table.h +# +# The generated table maps base character + combining mark pairs to their +# precomposed equivalents. +# +# Usage: +# python3 gen_ucs_recompose_table.py # Generate with common recomposition pairs +# python3 gen_ucs_recompose_table.py --full # Generate with all recomposition pairs + +import unicodedata +import sys +import argparse +import textwrap + +# This script's file name +from pathlib import Path +this_file = Path(__file__).name + +# Output file name +out_file = "ucs_recompose_table.h" + +common_recompose_description = "most commonly used Latin, Greek, and Cyrillic recomposition pairs only" +COMMON_RECOMPOSITION_PAIRS = [ + # Latin letters with accents - uppercase + (0x0041, 0x0300, 0x00C0), # A + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER A WITH GRAVE + (0x0041, 0x0301, 0x00C1), # A + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER A WITH ACUTE + (0x0041, 0x0302, 0x00C2), # A + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER A WITH CIRCUMFLEX + (0x0041, 0x0303, 0x00C3), # A + COMBINING TILDE = LATIN CAPITAL LETTER A WITH TILDE + (0x0041, 0x0308, 0x00C4), # A + COMBINING DIAERESIS = LATIN CAPITAL LETTER A WITH DIAERESIS + (0x0041, 0x030A, 0x00C5), # A + COMBINING RING ABOVE = LATIN CAPITAL LETTER A WITH RING ABOVE + (0x0043, 0x0327, 0x00C7), # C + COMBINING CEDILLA = LATIN CAPITAL LETTER C WITH CEDILLA + (0x0045, 0x0300, 0x00C8), # E + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER E WITH GRAVE + (0x0045, 0x0301, 0x00C9), # E + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER E WITH ACUTE + (0x0045, 0x0302, 0x00CA), # E + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER E WITH CIRCUMFLEX + (0x0045, 0x0308, 0x00CB), # E + COMBINING DIAERESIS = LATIN CAPITAL LETTER E WITH DIAERESIS + (0x0049, 0x0300, 0x00CC), # I + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER I WITH GRAVE + (0x0049, 0x0301, 0x00CD), # I + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER I WITH ACUTE + (0x0049, 0x0302, 0x00CE), # I + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER I WITH CIRCUMFLEX + (0x0049, 0x0308, 0x00CF), # I + COMBINING DIAERESIS = LATIN CAPITAL LETTER I WITH DIAERESIS + (0x004E, 0x0303, 0x00D1), # N + COMBINING TILDE = LATIN CAPITAL LETTER N WITH TILDE + (0x004F, 0x0300, 0x00D2), # O + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER O WITH GRAVE + (0x004F, 0x0301, 0x00D3), # O + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER O WITH ACUTE + (0x004F, 0x0302, 0x00D4), # O + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER O WITH CIRCUMFLEX + (0x004F, 0x0303, 0x00D5), # O + COMBINING TILDE = LATIN CAPITAL LETTER O WITH TILDE + (0x004F, 0x0308, 0x00D6), # O + COMBINING DIAERESIS = LATIN CAPITAL LETTER O WITH DIAERESIS + (0x0055, 0x0300, 0x00D9), # U + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER U WITH GRAVE + (0x0055, 0x0301, 0x00DA), # U + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER U WITH ACUTE + (0x0055, 0x0302, 0x00DB), # U + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER U WITH CIRCUMFLEX + (0x0055, 0x0308, 0x00DC), # U + COMBINING DIAERESIS = LATIN CAPITAL LETTER U WITH DIAERESIS + (0x0059, 0x0301, 0x00DD), # Y + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER Y WITH ACUTE + + # Latin letters with accents - lowercase + (0x0061, 0x0300, 0x00E0), # a + COMBINING GRAVE ACCENT = LATIN SMALL LETTER A WITH GRAVE + (0x0061, 0x0301, 0x00E1), # a + COMBINING ACUTE ACCENT = LATIN SMALL LETTER A WITH ACUTE + (0x0061, 0x0302, 0x00E2), # a + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER A WITH CIRCUMFLEX + (0x0061, 0x0303, 0x00E3), # a + COMBINING TILDE = LATIN SMALL LETTER A WITH TILDE + (0x0061, 0x0308, 0x00E4), # a + COMBINING DIAERESIS = LATIN SMALL LETTER A WITH DIAERESIS + (0x0061, 0x030A, 0x00E5), # a + COMBINING RING ABOVE = LATIN SMALL LETTER A WITH RING ABOVE + (0x0063, 0x0327, 0x00E7), # c + COMBINING CEDILLA = LATIN SMALL LETTER C WITH CEDILLA + (0x0065, 0x0300, 0x00E8), # e + COMBINING GRAVE ACCENT = LATIN SMALL LETTER E WITH GRAVE + (0x0065, 0x0301, 0x00E9), # e + COMBINING ACUTE ACCENT = LATIN SMALL LETTER E WITH ACUTE + (0x0065, 0x0302, 0x00EA), # e + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER E WITH CIRCUMFLEX + (0x0065, 0x0308, 0x00EB), # e + COMBINING DIAERESIS = LATIN SMALL LETTER E WITH DIAERESIS + (0x0069, 0x0300, 0x00EC), # i + COMBINING GRAVE ACCENT = LATIN SMALL LETTER I WITH GRAVE + (0x0069, 0x0301, 0x00ED), # i + COMBINING ACUTE ACCENT = LATIN SMALL LETTER I WITH ACUTE + (0x0069, 0x0302, 0x00EE), # i + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER I WITH CIRCUMFLEX + (0x0069, 0x0308, 0x00EF), # i + COMBINING DIAERESIS = LATIN SMALL LETTER I WITH DIAERESIS + (0x006E, 0x0303, 0x00F1), # n + COMBINING TILDE = LATIN SMALL LETTER N WITH TILDE + (0x006F, 0x0300, 0x00F2), # o + COMBINING GRAVE ACCENT = LATIN SMALL LETTER O WITH GRAVE + (0x006F, 0x0301, 0x00F3), # o + COMBINING ACUTE ACCENT = LATIN SMALL LETTER O WITH ACUTE + (0x006F, 0x0302, 0x00F4), # o + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER O WITH CIRCUMFLEX + (0x006F, 0x0303, 0x00F5), # o + COMBINING TILDE = LATIN SMALL LETTER O WITH TILDE + (0x006F, 0x0308, 0x00F6), # o + COMBINING DIAERESIS = LATIN SMALL LETTER O WITH DIAERESIS + (0x0075, 0x0300, 0x00F9), # u + COMBINING GRAVE ACCENT = LATIN SMALL LETTER U WITH GRAVE + (0x0075, 0x0301, 0x00FA), # u + COMBINING ACUTE ACCENT = LATIN SMALL LETTER U WITH ACUTE + (0x0075, 0x0302, 0x00FB), # u + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER U WITH CIRCUMFLEX + (0x0075, 0x0308, 0x00FC), # u + COMBINING DIAERESIS = LATIN SMALL LETTER U WITH DIAERESIS + (0x0079, 0x0301, 0x00FD), # y + COMBINING ACUTE ACCENT = LATIN SMALL LETTER Y WITH ACUTE + (0x0079, 0x0308, 0x00FF), # y + COMBINING DIAERESIS = LATIN SMALL LETTER Y WITH DIAERESIS + + # Common Greek characters + (0x0391, 0x0301, 0x0386), # Α + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ALPHA WITH TONOS + (0x0395, 0x0301, 0x0388), # Ε + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER EPSILON WITH TONOS + (0x0397, 0x0301, 0x0389), # Η + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ETA WITH TONOS + (0x0399, 0x0301, 0x038A), # Ι + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER IOTA WITH TONOS + (0x039F, 0x0301, 0x038C), # Ο + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMICRON WITH TONOS + (0x03A5, 0x0301, 0x038E), # Υ + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER UPSILON WITH TONOS + (0x03A9, 0x0301, 0x038F), # Ω + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMEGA WITH TONOS + (0x03B1, 0x0301, 0x03AC), # α + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ALPHA WITH TONOS + (0x03B5, 0x0301, 0x03AD), # ε + COMBINING ACUTE ACCENT = GREEK SMALL LETTER EPSILON WITH TONOS + (0x03B7, 0x0301, 0x03AE), # η + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ETA WITH TONOS + (0x03B9, 0x0301, 0x03AF), # ι + COMBINING ACUTE ACCENT = GREEK SMALL LETTER IOTA WITH TONOS + (0x03BF, 0x0301, 0x03CC), # ο + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMICRON WITH TONOS + (0x03C5, 0x0301, 0x03CD), # υ + COMBINING ACUTE ACCENT = GREEK SMALL LETTER UPSILON WITH TONOS + (0x03C9, 0x0301, 0x03CE), # ω + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMEGA WITH TONOS + + # Common Cyrillic characters + (0x0418, 0x0306, 0x0419), # И + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT I + (0x0438, 0x0306, 0x0439), # и + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT I + (0x0423, 0x0306, 0x040E), # У + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT U + (0x0443, 0x0306, 0x045E), # у + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT U +] + +full_recompose_description = "all possible recomposition pairs from the Unicode BMP" +def collect_all_recomposition_pairs(): + """Collect all possible recomposition pairs from the Unicode data.""" + # Map to store recomposition pairs: (base, combining) -> recomposed + recompose_map = {} + + # Process all assigned Unicode code points in BMP (Basic Multilingual Plane) + # We limit to BMP (0x0000-0xFFFF) to keep our table smaller with uint16_t + for cp in range(0, 0x10000): + try: + char = chr(cp) + + # Skip unassigned or control characters + if not unicodedata.name(char, ''): + continue + + # Find decomposition + decomp = unicodedata.decomposition(char) + if not decomp or '<' in decomp: # Skip compatibility decompositions + continue + + # Parse the decomposition + parts = decomp.split() + if len(parts) == 2: # Simple base + combining mark + base = int(parts[0], 16) + combining = int(parts[1], 16) + + # Only store if both are in BMP + if base < 0x10000 and combining < 0x10000: + recompose_map[(base, combining)] = cp + + except (ValueError, TypeError): + continue + + # Convert to a list of tuples and sort for binary search + recompose_list = [(base, combining, recomposed) + for (base, combining), recomposed in recompose_map.items()] + recompose_list.sort() + + return recompose_list + +def validate_common_pairs(full_list): + """Validate that all common pairs are in the full list. + + Raises: + ValueError: If any common pair is missing or has a different recomposition + value than what's in the full table. + """ + full_pairs = {(base, combining): recomposed for base, combining, recomposed in full_list} + for base, combining, recomposed in COMMON_RECOMPOSITION_PAIRS: + full_recomposed = full_pairs.get((base, combining)) + if full_recomposed is None: + error_msg = f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) not found in full data" + print(error_msg) + raise ValueError(error_msg) + elif full_recomposed != recomposed: + error_msg = (f"Error: Common pair (0x{base:04X}, 0x{combining:04X}) has different recomposition: " + f"0x{recomposed:04X} vs 0x{full_recomposed:04X}") + print(error_msg) + raise ValueError(error_msg) + +def generate_recomposition_table(use_full_list=False): + """Generate the recomposition C table.""" + + # Collect all recomposition pairs for validation + full_recompose_list = collect_all_recomposition_pairs() + + # Decide which list to use + if use_full_list: + print("Using full recomposition list...") + recompose_list = full_recompose_list + table_description = full_recompose_description + alt_list = COMMON_RECOMPOSITION_PAIRS + alt_description = common_recompose_description + else: + print("Using common recomposition list...") + # Validate that all common pairs are in the full list + validate_common_pairs(full_recompose_list) + recompose_list = sorted(COMMON_RECOMPOSITION_PAIRS) + table_description = common_recompose_description + alt_list = full_recompose_list + alt_description = full_recompose_description + generation_mode = " --full" if use_full_list else "" + alternative_mode = " --full" if not use_full_list else "" + table_description_detail = f"{table_description} ({len(recompose_list)} entries)" + alt_description_detail = f"{alt_description} ({len(alt_list)} entries)" + + # Calculate min/max values for boundary checks + min_base = min(base for base, _, _ in recompose_list) + max_base = max(base for base, _, _ in recompose_list) + min_combining = min(combining for _, combining, _ in recompose_list) + max_combining = max(combining for _, combining, _ in recompose_list) + + # Generate implementation file + with open(out_file, 'w') as f: + f.write(f"""\ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * {out_file} - Unicode character recomposition + * + * Auto-generated by {this_file}{generation_mode} + * + * Unicode Version: {unicodedata.unidata_version} + * +{textwrap.fill( + f"This file contains a table with {table_description_detail}. " + + f"To generate a table with {alt_description_detail} instead, run:", + width=75, initial_indent=" * ", subsequent_indent=" * ")} + * + * python3 {this_file}{alternative_mode} + */ + +/* + * Table of {table_description} + * Sorted by base character and then combining mark for binary search + */ +static const struct ucs_recomposition ucs_recomposition_table[] = {{ +""") + + for base, combining, recomposed in recompose_list: + try: + base_name = unicodedata.name(chr(base)) + combining_name = unicodedata.name(chr(combining)) + recomposed_name = unicodedata.name(chr(recomposed)) + comment = f"/* {base_name} + {combining_name} = {recomposed_name} */" + except ValueError: + comment = f"/* U+{base:04X} + U+{combining:04X} = U+{recomposed:04X} */" + f.write(f"\t{{ 0x{base:04X}, 0x{combining:04X}, 0x{recomposed:04X} }}, {comment}\n") + + f.write(f"""\ +}}; + +/* + * Boundary values for quick rejection + * These are calculated by analyzing the table during generation + */ +#define UCS_RECOMPOSE_MIN_BASE 0x{min_base:04X} +#define UCS_RECOMPOSE_MAX_BASE 0x{max_base:04X} +#define UCS_RECOMPOSE_MIN_MARK 0x{min_combining:04X} +#define UCS_RECOMPOSE_MAX_MARK 0x{max_combining:04X} +""") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Unicode recomposition table") + parser.add_argument("--full", action="store_true", + help="Generate a full recomposition table (default: common pairs only)") + args = parser.parse_args() + + generate_recomposition_table(use_full_list=args.full) From 9bd73840935746dccef20dbcb509317c8e718f08 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:10 -0400 Subject: [PATCH 069/105] vt: create ucs_recompose_table.h with gen_ucs_recompose_table.py Table of base character + combining mark pairs with their precomposed equivalents. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-9-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs_recompose_table.h | 102 +++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 drivers/tty/vt/ucs_recompose_table.h diff --git a/drivers/tty/vt/ucs_recompose_table.h b/drivers/tty/vt/ucs_recompose_table.h new file mode 100644 index 000000000000..bd91edde5d19 --- /dev/null +++ b/drivers/tty/vt/ucs_recompose_table.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ucs_recompose_table.h - Unicode character recomposition + * + * Auto-generated by gen_ucs_recompose_table.py + * + * Unicode Version: 16.0.0 + * + * This file contains a table with most commonly used Latin, Greek, and + * Cyrillic recomposition pairs only (71 entries). To generate a table with + * all possible recomposition pairs from the Unicode BMP (1000 entries) + * instead, run: + * + * python gen_ucs_recompose_table.py --full + */ + +/* + * Table of most commonly used Latin, Greek, and Cyrillic recomposition pairs only + * Sorted by base character and then combining mark for binary search + */ +static const struct ucs_recomposition ucs_recomposition_table[] = { + { 0x0041, 0x0300, 0x00C0 }, /* LATIN CAPITAL LETTER A + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER A WITH GRAVE */ + { 0x0041, 0x0301, 0x00C1 }, /* LATIN CAPITAL LETTER A + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER A WITH ACUTE */ + { 0x0041, 0x0302, 0x00C2 }, /* LATIN CAPITAL LETTER A + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER A WITH CIRCUMFLEX */ + { 0x0041, 0x0303, 0x00C3 }, /* LATIN CAPITAL LETTER A + COMBINING TILDE = LATIN CAPITAL LETTER A WITH TILDE */ + { 0x0041, 0x0308, 0x00C4 }, /* LATIN CAPITAL LETTER A + COMBINING DIAERESIS = LATIN CAPITAL LETTER A WITH DIAERESIS */ + { 0x0041, 0x030A, 0x00C5 }, /* LATIN CAPITAL LETTER A + COMBINING RING ABOVE = LATIN CAPITAL LETTER A WITH RING ABOVE */ + { 0x0043, 0x0327, 0x00C7 }, /* LATIN CAPITAL LETTER C + COMBINING CEDILLA = LATIN CAPITAL LETTER C WITH CEDILLA */ + { 0x0045, 0x0300, 0x00C8 }, /* LATIN CAPITAL LETTER E + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER E WITH GRAVE */ + { 0x0045, 0x0301, 0x00C9 }, /* LATIN CAPITAL LETTER E + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER E WITH ACUTE */ + { 0x0045, 0x0302, 0x00CA }, /* LATIN CAPITAL LETTER E + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER E WITH CIRCUMFLEX */ + { 0x0045, 0x0308, 0x00CB }, /* LATIN CAPITAL LETTER E + COMBINING DIAERESIS = LATIN CAPITAL LETTER E WITH DIAERESIS */ + { 0x0049, 0x0300, 0x00CC }, /* LATIN CAPITAL LETTER I + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER I WITH GRAVE */ + { 0x0049, 0x0301, 0x00CD }, /* LATIN CAPITAL LETTER I + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER I WITH ACUTE */ + { 0x0049, 0x0302, 0x00CE }, /* LATIN CAPITAL LETTER I + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER I WITH CIRCUMFLEX */ + { 0x0049, 0x0308, 0x00CF }, /* LATIN CAPITAL LETTER I + COMBINING DIAERESIS = LATIN CAPITAL LETTER I WITH DIAERESIS */ + { 0x004E, 0x0303, 0x00D1 }, /* LATIN CAPITAL LETTER N + COMBINING TILDE = LATIN CAPITAL LETTER N WITH TILDE */ + { 0x004F, 0x0300, 0x00D2 }, /* LATIN CAPITAL LETTER O + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER O WITH GRAVE */ + { 0x004F, 0x0301, 0x00D3 }, /* LATIN CAPITAL LETTER O + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER O WITH ACUTE */ + { 0x004F, 0x0302, 0x00D4 }, /* LATIN CAPITAL LETTER O + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER O WITH CIRCUMFLEX */ + { 0x004F, 0x0303, 0x00D5 }, /* LATIN CAPITAL LETTER O + COMBINING TILDE = LATIN CAPITAL LETTER O WITH TILDE */ + { 0x004F, 0x0308, 0x00D6 }, /* LATIN CAPITAL LETTER O + COMBINING DIAERESIS = LATIN CAPITAL LETTER O WITH DIAERESIS */ + { 0x0055, 0x0300, 0x00D9 }, /* LATIN CAPITAL LETTER U + COMBINING GRAVE ACCENT = LATIN CAPITAL LETTER U WITH GRAVE */ + { 0x0055, 0x0301, 0x00DA }, /* LATIN CAPITAL LETTER U + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER U WITH ACUTE */ + { 0x0055, 0x0302, 0x00DB }, /* LATIN CAPITAL LETTER U + COMBINING CIRCUMFLEX ACCENT = LATIN CAPITAL LETTER U WITH CIRCUMFLEX */ + { 0x0055, 0x0308, 0x00DC }, /* LATIN CAPITAL LETTER U + COMBINING DIAERESIS = LATIN CAPITAL LETTER U WITH DIAERESIS */ + { 0x0059, 0x0301, 0x00DD }, /* LATIN CAPITAL LETTER Y + COMBINING ACUTE ACCENT = LATIN CAPITAL LETTER Y WITH ACUTE */ + { 0x0061, 0x0300, 0x00E0 }, /* LATIN SMALL LETTER A + COMBINING GRAVE ACCENT = LATIN SMALL LETTER A WITH GRAVE */ + { 0x0061, 0x0301, 0x00E1 }, /* LATIN SMALL LETTER A + COMBINING ACUTE ACCENT = LATIN SMALL LETTER A WITH ACUTE */ + { 0x0061, 0x0302, 0x00E2 }, /* LATIN SMALL LETTER A + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER A WITH CIRCUMFLEX */ + { 0x0061, 0x0303, 0x00E3 }, /* LATIN SMALL LETTER A + COMBINING TILDE = LATIN SMALL LETTER A WITH TILDE */ + { 0x0061, 0x0308, 0x00E4 }, /* LATIN SMALL LETTER A + COMBINING DIAERESIS = LATIN SMALL LETTER A WITH DIAERESIS */ + { 0x0061, 0x030A, 0x00E5 }, /* LATIN SMALL LETTER A + COMBINING RING ABOVE = LATIN SMALL LETTER A WITH RING ABOVE */ + { 0x0063, 0x0327, 0x00E7 }, /* LATIN SMALL LETTER C + COMBINING CEDILLA = LATIN SMALL LETTER C WITH CEDILLA */ + { 0x0065, 0x0300, 0x00E8 }, /* LATIN SMALL LETTER E + COMBINING GRAVE ACCENT = LATIN SMALL LETTER E WITH GRAVE */ + { 0x0065, 0x0301, 0x00E9 }, /* LATIN SMALL LETTER E + COMBINING ACUTE ACCENT = LATIN SMALL LETTER E WITH ACUTE */ + { 0x0065, 0x0302, 0x00EA }, /* LATIN SMALL LETTER E + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER E WITH CIRCUMFLEX */ + { 0x0065, 0x0308, 0x00EB }, /* LATIN SMALL LETTER E + COMBINING DIAERESIS = LATIN SMALL LETTER E WITH DIAERESIS */ + { 0x0069, 0x0300, 0x00EC }, /* LATIN SMALL LETTER I + COMBINING GRAVE ACCENT = LATIN SMALL LETTER I WITH GRAVE */ + { 0x0069, 0x0301, 0x00ED }, /* LATIN SMALL LETTER I + COMBINING ACUTE ACCENT = LATIN SMALL LETTER I WITH ACUTE */ + { 0x0069, 0x0302, 0x00EE }, /* LATIN SMALL LETTER I + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER I WITH CIRCUMFLEX */ + { 0x0069, 0x0308, 0x00EF }, /* LATIN SMALL LETTER I + COMBINING DIAERESIS = LATIN SMALL LETTER I WITH DIAERESIS */ + { 0x006E, 0x0303, 0x00F1 }, /* LATIN SMALL LETTER N + COMBINING TILDE = LATIN SMALL LETTER N WITH TILDE */ + { 0x006F, 0x0300, 0x00F2 }, /* LATIN SMALL LETTER O + COMBINING GRAVE ACCENT = LATIN SMALL LETTER O WITH GRAVE */ + { 0x006F, 0x0301, 0x00F3 }, /* LATIN SMALL LETTER O + COMBINING ACUTE ACCENT = LATIN SMALL LETTER O WITH ACUTE */ + { 0x006F, 0x0302, 0x00F4 }, /* LATIN SMALL LETTER O + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER O WITH CIRCUMFLEX */ + { 0x006F, 0x0303, 0x00F5 }, /* LATIN SMALL LETTER O + COMBINING TILDE = LATIN SMALL LETTER O WITH TILDE */ + { 0x006F, 0x0308, 0x00F6 }, /* LATIN SMALL LETTER O + COMBINING DIAERESIS = LATIN SMALL LETTER O WITH DIAERESIS */ + { 0x0075, 0x0300, 0x00F9 }, /* LATIN SMALL LETTER U + COMBINING GRAVE ACCENT = LATIN SMALL LETTER U WITH GRAVE */ + { 0x0075, 0x0301, 0x00FA }, /* LATIN SMALL LETTER U + COMBINING ACUTE ACCENT = LATIN SMALL LETTER U WITH ACUTE */ + { 0x0075, 0x0302, 0x00FB }, /* LATIN SMALL LETTER U + COMBINING CIRCUMFLEX ACCENT = LATIN SMALL LETTER U WITH CIRCUMFLEX */ + { 0x0075, 0x0308, 0x00FC }, /* LATIN SMALL LETTER U + COMBINING DIAERESIS = LATIN SMALL LETTER U WITH DIAERESIS */ + { 0x0079, 0x0301, 0x00FD }, /* LATIN SMALL LETTER Y + COMBINING ACUTE ACCENT = LATIN SMALL LETTER Y WITH ACUTE */ + { 0x0079, 0x0308, 0x00FF }, /* LATIN SMALL LETTER Y + COMBINING DIAERESIS = LATIN SMALL LETTER Y WITH DIAERESIS */ + { 0x0391, 0x0301, 0x0386 }, /* GREEK CAPITAL LETTER ALPHA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ALPHA WITH TONOS */ + { 0x0395, 0x0301, 0x0388 }, /* GREEK CAPITAL LETTER EPSILON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER EPSILON WITH TONOS */ + { 0x0397, 0x0301, 0x0389 }, /* GREEK CAPITAL LETTER ETA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER ETA WITH TONOS */ + { 0x0399, 0x0301, 0x038A }, /* GREEK CAPITAL LETTER IOTA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER IOTA WITH TONOS */ + { 0x039F, 0x0301, 0x038C }, /* GREEK CAPITAL LETTER OMICRON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMICRON WITH TONOS */ + { 0x03A5, 0x0301, 0x038E }, /* GREEK CAPITAL LETTER UPSILON + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER UPSILON WITH TONOS */ + { 0x03A9, 0x0301, 0x038F }, /* GREEK CAPITAL LETTER OMEGA + COMBINING ACUTE ACCENT = GREEK CAPITAL LETTER OMEGA WITH TONOS */ + { 0x03B1, 0x0301, 0x03AC }, /* GREEK SMALL LETTER ALPHA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ALPHA WITH TONOS */ + { 0x03B5, 0x0301, 0x03AD }, /* GREEK SMALL LETTER EPSILON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER EPSILON WITH TONOS */ + { 0x03B7, 0x0301, 0x03AE }, /* GREEK SMALL LETTER ETA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER ETA WITH TONOS */ + { 0x03B9, 0x0301, 0x03AF }, /* GREEK SMALL LETTER IOTA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER IOTA WITH TONOS */ + { 0x03BF, 0x0301, 0x03CC }, /* GREEK SMALL LETTER OMICRON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMICRON WITH TONOS */ + { 0x03C5, 0x0301, 0x03CD }, /* GREEK SMALL LETTER UPSILON + COMBINING ACUTE ACCENT = GREEK SMALL LETTER UPSILON WITH TONOS */ + { 0x03C9, 0x0301, 0x03CE }, /* GREEK SMALL LETTER OMEGA + COMBINING ACUTE ACCENT = GREEK SMALL LETTER OMEGA WITH TONOS */ + { 0x0418, 0x0306, 0x0419 }, /* CYRILLIC CAPITAL LETTER I + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT I */ + { 0x0423, 0x0306, 0x040E }, /* CYRILLIC CAPITAL LETTER U + COMBINING BREVE = CYRILLIC CAPITAL LETTER SHORT U */ + { 0x0438, 0x0306, 0x0439 }, /* CYRILLIC SMALL LETTER I + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT I */ + { 0x0443, 0x0306, 0x045E }, /* CYRILLIC SMALL LETTER U + COMBINING BREVE = CYRILLIC SMALL LETTER SHORT U */ +}; + +/* + * Boundary values for quick rejection + * These are calculated by analyzing the table during generation + */ +#define UCS_RECOMPOSE_MIN_BASE 0x0041 +#define UCS_RECOMPOSE_MAX_BASE 0x0443 +#define UCS_RECOMPOSE_MIN_MARK 0x0300 +#define UCS_RECOMPOSE_MAX_MARK 0x0327 From b5c574995d842d241d810f3a6a3ebb03c52d57fa Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:11 -0400 Subject: [PATCH 070/105] vt: support Unicode recomposition Try replacing any decomposed Unicode sequence by the corresponding recomposed code point. Code point to glyph correspondance works best after recomposition, and this apply mostly to single-width code points therefore we can't preserve them in their decomposed form anyway. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-10-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs.c | 62 ++++++++++++++++++++++++++++++++++++++ drivers/tty/vt/vt.c | 14 +++++++-- include/linux/consolemap.h | 6 ++++ 3 files changed, 79 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index 5f9f25bd201b..bf25d63cea61 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -59,3 +59,65 @@ bool ucs_is_double_width(u32 cp) return cp_in_range(cp, ucs_double_width_ranges, ARRAY_SIZE(ucs_double_width_ranges)); } + +/* + * Structure for base with combining mark pairs and resulting recompositions. + * Using u16 to save space since all values are within BMP range. + */ +struct ucs_recomposition { + u16 base; /* base character */ + u16 mark; /* combining mark */ + u16 recomposed; /* corresponding recomposed character */ +}; + +#include "ucs_recompose_table.h" + +struct compare_key { + u16 base; + u16 mark; +}; + +static int recomposition_cmp(const void *key, const void *element) +{ + const struct compare_key *search_key = key; + const struct ucs_recomposition *entry = element; + + /* Compare base character first */ + if (search_key->base < entry->base) + return -1; + if (search_key->base > entry->base) + return 1; + + /* Base characters match, now compare combining character */ + if (search_key->mark < entry->mark) + return -1; + if (search_key->mark > entry->mark) + return 1; + + /* Both match */ + return 0; +} + +/** + * ucs_recompose() - Attempt to recompose two Unicode characters into a single character. + * @base: Base Unicode code point (UCS-4) + * @mark: Combining mark Unicode code point (UCS-4) + * + * Return: Recomposed Unicode code point, or 0 if no recomposition is possible + */ +u32 ucs_recompose(u32 base, u32 mark) +{ + /* Check if characters are within the range of our table */ + if (!in_range(base, UCS_RECOMPOSE_MIN_BASE, UCS_RECOMPOSE_MAX_BASE) || + !in_range(mark, UCS_RECOMPOSE_MIN_MARK, UCS_RECOMPOSE_MAX_MARK)) + return 0; + + struct compare_key key = { base, mark }; + struct ucs_recomposition *result = + __inline_bsearch(&key, ucs_recomposition_table, + ARRAY_SIZE(ucs_recomposition_table), + sizeof(*ucs_recomposition_table), + recomposition_cmp); + + return result ? result->recomposed : 0; +} diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index a989feffad5e..76554c2040bf 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2925,9 +2925,9 @@ static void vc_con_rewind(struct vc_data *vc) #define UCS_VS16 0xfe0f /* Variation Selector 16 */ -static int vc_process_ucs(struct vc_data *vc, int c, int *tc) +static int vc_process_ucs(struct vc_data *vc, int *c, int *tc) { - u32 prev_c, curr_c = c; + u32 prev_c, curr_c = *c; if (ucs_is_double_width(curr_c)) return 2; @@ -2964,6 +2964,14 @@ static int vc_process_ucs(struct vc_data *vc, int c, int *tc) return 1; } + /* try recomposition */ + prev_c = ucs_recompose(prev_c, curr_c); + if (prev_c != 0) { + vc_con_rewind(vc); + *tc = *c = prev_c; + return 1; + } + /* Otherwise zero-width code points are ignored. */ return 0; } @@ -2978,7 +2986,7 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { - width = vc_process_ucs(vc, c, &tc); + width = vc_process_ucs(vc, &c, &tc); if (!width) goto out; } diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..8167494229db 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); +u32 ucs_recompose(u32 base, u32 mark); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } + +static inline u32 ucs_recompose(u32 base, u32 mark) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From 5617aeb14a4381e4ee61778c91ed90a615275f39 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:12 -0400 Subject: [PATCH 071/105] vt: pad double-width code points with a zero-width space In the Unicode screen buffer, we follow double-width code points with a space to maintain proper column alignment. This, however, creates semantic problems when e.g. using cut and paste. Let's use a better code point for the column padding's purpose i.e. a zero-width space rather than a full space. This way the combination retains a width of 2. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-11-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 76554c2040bf..1bd1878094a0 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2923,6 +2923,7 @@ static void vc_con_rewind(struct vc_data *vc) vc->vc_need_wrap = 0; } +#define UCS_ZWS 0x200b /* Zero Width Space */ #define UCS_VS16 0xfe0f /* Variation Selector 16 */ static int vc_process_ucs(struct vc_data *vc, int *c, int *tc) @@ -2941,8 +2942,8 @@ static int vc_process_ucs(struct vc_data *vc, int *c, int *tc) /* * Let's merge this zero-width code point with the preceding * double-width code point by replacing the existing - * whitespace padding. To do so we rewind one column and - * pretend this has a width of 1. + * zero-width space padding. To do so we rewind one column + * and pretend this has a width of 1. * We give the legacy display the same initial space padding. */ vc_con_rewind(vc); @@ -3065,7 +3066,11 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, tc = conv_uni_to_pc(vc, ' '); if (tc < 0) tc = ' '; - next_c = ' '; + /* + * Store a zero-width space in the Unicode screen given that + * the previous code point is semantically double width. + */ + next_c = UCS_ZWS; } out: From ffae2340a6af9beb580f107ad28afde7b57dea5b Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:13 -0400 Subject: [PATCH 072/105] vt: remove zero-width-space handling from conv_uni_to_pc() This is now taken care of by ucs_is_zero_width(). Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-12-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/consolemap.c | 2 -- drivers/tty/vt/vt.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/vt/consolemap.c b/drivers/tty/vt/consolemap.c index 82d70083fead..bb4bb272ebec 100644 --- a/drivers/tty/vt/consolemap.c +++ b/drivers/tty/vt/consolemap.c @@ -870,8 +870,6 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs) return -4; /* Not found */ else if (ucs < 0x20) return -1; /* Not a printable character */ - else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) - return -2; /* Zero-width space */ /* * UNI_DIRECT_BASE indicates the start of the region in the User Zone * which always has a 1:1 mapping to the currently loaded font. The diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 1bd1878094a0..24c6cd2eed78 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2995,7 +2995,7 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, /* Now try to find out how to display it */ tc = conv_uni_to_pc(vc, tc); if (tc & ~charmask) { - if (tc == -1 || tc == -2) + if (tc == -1) return -1; /* nothing to display */ /* Glyph not found */ From ad934777f0f15c8cea042b6a81deaa7fe53b6dea Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:14 -0400 Subject: [PATCH 073/105] vt: update gen_ucs_width_table.py to make tables more space efficient Split table ranges into BMP (16-bit) and non-BMP (above 16-bit). This reduces the corresponding text size by 20-25%. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-13-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_width_table.py | 55 ++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/drivers/tty/vt/gen_ucs_width_table.py b/drivers/tty/vt/gen_ucs_width_table.py index 00510444a727..059ed9a8baa2 100755 --- a/drivers/tty/vt/gen_ucs_width_table.py +++ b/drivers/tty/vt/gen_ucs_width_table.py @@ -194,6 +194,27 @@ def write_tables(zero_width_ranges, double_width_ranges): double_width_ranges: List of (start, end) ranges for double-width characters """ + # Function to split ranges into BMP (16-bit) and non-BMP (above 16-bit) + def split_ranges_by_size(ranges): + bmp_ranges = [] + non_bmp_ranges = [] + + for start, end in ranges: + if end <= 0xFFFF: + bmp_ranges.append((start, end)) + elif start > 0xFFFF: + non_bmp_ranges.append((start, end)) + else: + # Split the range at 0xFFFF + bmp_ranges.append((start, 0xFFFF)) + non_bmp_ranges.append((0x10000, end)) + + return bmp_ranges, non_bmp_ranges + + # Split ranges into BMP and non-BMP + zero_width_bmp, zero_width_non_bmp = split_ranges_by_size(zero_width_ranges) + double_width_bmp, double_width_non_bmp = split_ranges_by_size(double_width_ranges) + # Function to generate code point description comments def get_code_point_comment(start, end): try: @@ -221,22 +242,44 @@ def write_tables(zero_width_ranges, double_width_ranges): * Unicode Version: {unicodedata.unidata_version} */ -/* Zero-width character ranges */ -static const struct ucs_interval ucs_zero_width_ranges[] = {{ +/* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ +static const struct ucs_interval16 ucs_zero_width_bmp_ranges[] = {{ """) - for start, end in zero_width_ranges: + for start, end in zero_width_bmp: + comment = get_code_point_comment(start, end) + f.write(f"\t{{ 0x{start:04X}, 0x{end:04X} }}, {comment}\n") + + f.write("""\ +}; + +/* Zero-width character ranges (non-BMP, U+10000 and above) */ +static const struct ucs_interval32 ucs_zero_width_non_bmp_ranges[] = { +""") + + for start, end in zero_width_non_bmp: comment = get_code_point_comment(start, end) f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") f.write("""\ }; -/* Double-width character ranges */ -static const struct ucs_interval ucs_double_width_ranges[] = { +/* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ +static const struct ucs_interval16 ucs_double_width_bmp_ranges[] = { """) - for start, end in double_width_ranges: + for start, end in double_width_bmp: + comment = get_code_point_comment(start, end) + f.write(f"\t{{ 0x{start:04X}, 0x{end:04X} }}, {comment}\n") + + f.write("""\ +}; + +/* Double-width character ranges (non-BMP, U+10000 and above) */ +static const struct ucs_interval32 ucs_double_width_non_bmp_ranges[] = { +""") + + for start, end in double_width_non_bmp: comment = get_code_point_comment(start, end) f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n") From d8f81c82b13fcbf93fae688cee1995260cfa59de Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:15 -0400 Subject: [PATCH 074/105] vt: refresh ucs_width_table.h and adjust code in ucs.c accordingly Width tables are now split into BMP (16-bit) and non-BMP (above 16-bit). This reduces the corresponding text size by 20-25%. Note: scripts/checkpatch.pl complains about "... exceeds 100 columns". Please ignore. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-14-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs.c | 56 +++- drivers/tty/vt/ucs_width_table.h | 540 ++++++++++++++++--------------- 2 files changed, 320 insertions(+), 276 deletions(-) diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index bf25d63cea61..0b58cb7344a3 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -8,17 +8,22 @@ #include #include -struct ucs_interval { +struct ucs_interval16 { + u16 first; + u16 last; +}; + +struct ucs_interval32 { u32 first; u32 last; }; #include "ucs_width_table.h" -static int interval_cmp(const void *key, const void *element) +static int interval16_cmp(const void *key, const void *element) { - u32 cp = *(u32 *)key; - const struct ucs_interval *entry = element; + u16 cp = *(u16 *)key; + const struct ucs_interval16 *entry = element; if (cp < entry->first) return -1; @@ -27,15 +32,38 @@ static int interval_cmp(const void *key, const void *element) return 0; } -static bool cp_in_range(u32 cp, const struct ucs_interval *ranges, size_t size) +static int interval32_cmp(const void *key, const void *element) +{ + u32 cp = *(u32 *)key; + const struct ucs_interval32 *entry = element; + + if (cp < entry->first) + return -1; + if (cp > entry->last) + return 1; + return 0; +} + +static bool cp_in_range16(u16 cp, const struct ucs_interval16 *ranges, size_t size) { if (!in_range(cp, ranges[0].first, ranges[size - 1].last)) return false; return __inline_bsearch(&cp, ranges, size, sizeof(*ranges), - interval_cmp) != NULL; + interval16_cmp) != NULL; } +static bool cp_in_range32(u32 cp, const struct ucs_interval32 *ranges, size_t size) +{ + if (!in_range(cp, ranges[0].first, ranges[size - 1].last)) + return false; + + return __inline_bsearch(&cp, ranges, size, sizeof(*ranges), + interval32_cmp) != NULL; +} + +#define UCS_IS_BMP(cp) ((cp) <= 0xffff) + /** * ucs_is_zero_width() - Determine if a Unicode code point is zero-width. * @cp: Unicode code point (UCS-4) @@ -44,8 +72,12 @@ static bool cp_in_range(u32 cp, const struct ucs_interval *ranges, size_t size) */ bool ucs_is_zero_width(u32 cp) { - return cp_in_range(cp, ucs_zero_width_ranges, - ARRAY_SIZE(ucs_zero_width_ranges)); + if (UCS_IS_BMP(cp)) + return cp_in_range16(cp, ucs_zero_width_bmp_ranges, + ARRAY_SIZE(ucs_zero_width_bmp_ranges)); + else + return cp_in_range32(cp, ucs_zero_width_non_bmp_ranges, + ARRAY_SIZE(ucs_zero_width_non_bmp_ranges)); } /** @@ -56,8 +88,12 @@ bool ucs_is_zero_width(u32 cp) */ bool ucs_is_double_width(u32 cp) { - return cp_in_range(cp, ucs_double_width_ranges, - ARRAY_SIZE(ucs_double_width_ranges)); + if (UCS_IS_BMP(cp)) + return cp_in_range16(cp, ucs_double_width_bmp_ranges, + ARRAY_SIZE(ucs_double_width_bmp_ranges)); + else + return cp_in_range32(cp, ucs_double_width_non_bmp_ranges, + ARRAY_SIZE(ucs_double_width_non_bmp_ranges)); } /* diff --git a/drivers/tty/vt/ucs_width_table.h b/drivers/tty/vt/ucs_width_table.h index 9cc86b5cdf92..6fcb8f1d577d 100644 --- a/drivers/tty/vt/ucs_width_table.h +++ b/drivers/tty/vt/ucs_width_table.h @@ -7,210 +7,214 @@ * Unicode Version: 16.0.0 */ -/* Zero-width character ranges */ -static const struct ucs_interval ucs_zero_width_ranges[] = { - { 0x000AD, 0x000AD }, /* SOFT HYPHEN */ - { 0x00300, 0x0036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ - { 0x00483, 0x00489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ - { 0x00591, 0x005BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ - { 0x005BF, 0x005BF }, /* HEBREW POINT RAFE */ - { 0x005C1, 0x005C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ - { 0x005C4, 0x005C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ - { 0x005C7, 0x005C7 }, /* HEBREW POINT QAMATS QATAN */ - { 0x00600, 0x00605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ - { 0x00610, 0x0061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ - { 0x0061C, 0x0061C }, /* ARABIC LETTER MARK */ - { 0x0064B, 0x0065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ - { 0x00670, 0x00670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ - { 0x006D6, 0x006DD }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC END OF AYAH */ - { 0x006DF, 0x006E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ - { 0x006E7, 0x006E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ - { 0x006EA, 0x006ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ - { 0x0070F, 0x0070F }, /* SYRIAC ABBREVIATION MARK */ - { 0x00711, 0x00711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ - { 0x00730, 0x0074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ - { 0x007A6, 0x007B0 }, /* THAANA ABAFILI - THAANA SUKUN */ - { 0x007EB, 0x007F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ - { 0x007FD, 0x007FD }, /* NKO DANTAYALAN */ - { 0x00816, 0x00819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ - { 0x0081B, 0x00823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ - { 0x00825, 0x00827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ - { 0x00829, 0x0082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ - { 0x00859, 0x0085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ - { 0x00890, 0x00891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ - { 0x00897, 0x0089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ - { 0x008CA, 0x00903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ - { 0x0093A, 0x0093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ - { 0x0093E, 0x0094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ - { 0x00951, 0x00957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ - { 0x00962, 0x00963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ - { 0x00981, 0x00983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ - { 0x009BC, 0x009BC }, /* BENGALI SIGN NUKTA */ - { 0x009BE, 0x009C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ - { 0x009C7, 0x009C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ - { 0x009CB, 0x009CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ - { 0x009D7, 0x009D7 }, /* BENGALI AU LENGTH MARK */ - { 0x009E2, 0x009E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ - { 0x009FE, 0x009FE }, /* BENGALI SANDHI MARK */ - { 0x00A01, 0x00A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ - { 0x00A3C, 0x00A3C }, /* GURMUKHI SIGN NUKTA */ - { 0x00A3E, 0x00A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ - { 0x00A47, 0x00A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ - { 0x00A4B, 0x00A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ - { 0x00A51, 0x00A51 }, /* GURMUKHI SIGN UDAAT */ - { 0x00A70, 0x00A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ - { 0x00A75, 0x00A75 }, /* GURMUKHI SIGN YAKASH */ - { 0x00A81, 0x00A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ - { 0x00ABC, 0x00ABC }, /* GUJARATI SIGN NUKTA */ - { 0x00ABE, 0x00AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ - { 0x00AC7, 0x00AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ - { 0x00ACB, 0x00ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ - { 0x00AE2, 0x00AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ - { 0x00AFA, 0x00AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ - { 0x00B01, 0x00B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ - { 0x00B3C, 0x00B3C }, /* ORIYA SIGN NUKTA */ - { 0x00B3E, 0x00B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ - { 0x00B47, 0x00B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ - { 0x00B4B, 0x00B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ - { 0x00B55, 0x00B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ - { 0x00B62, 0x00B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ - { 0x00B82, 0x00B82 }, /* TAMIL SIGN ANUSVARA */ - { 0x00BBE, 0x00BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ - { 0x00BC6, 0x00BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ - { 0x00BCA, 0x00BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ - { 0x00BD7, 0x00BD7 }, /* TAMIL AU LENGTH MARK */ - { 0x00C00, 0x00C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ - { 0x00C3C, 0x00C3C }, /* TELUGU SIGN NUKTA */ - { 0x00C3E, 0x00C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ - { 0x00C46, 0x00C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ - { 0x00C4A, 0x00C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ - { 0x00C55, 0x00C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ - { 0x00C62, 0x00C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ - { 0x00C81, 0x00C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ - { 0x00CBC, 0x00CBC }, /* KANNADA SIGN NUKTA */ - { 0x00CBE, 0x00CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ - { 0x00CC6, 0x00CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ - { 0x00CCA, 0x00CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ - { 0x00CD5, 0x00CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ - { 0x00CE2, 0x00CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ - { 0x00CF3, 0x00CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ - { 0x00D00, 0x00D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ - { 0x00D3B, 0x00D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ - { 0x00D3E, 0x00D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ - { 0x00D46, 0x00D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ - { 0x00D4A, 0x00D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ - { 0x00D57, 0x00D57 }, /* MALAYALAM AU LENGTH MARK */ - { 0x00D62, 0x00D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ - { 0x00D81, 0x00D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ - { 0x00DCA, 0x00DCA }, /* SINHALA SIGN AL-LAKUNA */ - { 0x00DCF, 0x00DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ - { 0x00DD6, 0x00DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ - { 0x00DD8, 0x00DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ - { 0x00DF2, 0x00DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ - { 0x00E31, 0x00E31 }, /* THAI CHARACTER MAI HAN-AKAT */ - { 0x00E34, 0x00E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ - { 0x00E47, 0x00E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ - { 0x00EB1, 0x00EB1 }, /* LAO VOWEL SIGN MAI KAN */ - { 0x00EB4, 0x00EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ - { 0x00EC8, 0x00ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ - { 0x00F18, 0x00F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ - { 0x00F35, 0x00F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ - { 0x00F37, 0x00F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ - { 0x00F39, 0x00F39 }, /* TIBETAN MARK TSA -PHRU */ - { 0x00F3E, 0x00F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ - { 0x00F71, 0x00F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ - { 0x00F86, 0x00F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ - { 0x00F8D, 0x00F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ - { 0x00F99, 0x00FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ - { 0x00FC6, 0x00FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ - { 0x0102B, 0x0103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ - { 0x01056, 0x01059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ - { 0x0105E, 0x01060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ - { 0x01062, 0x01064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ - { 0x01067, 0x0106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ - { 0x01071, 0x01074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ - { 0x01082, 0x0108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ - { 0x0108F, 0x0108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ - { 0x0109A, 0x0109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ - { 0x0135D, 0x0135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ - { 0x01712, 0x01715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ - { 0x01732, 0x01734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ - { 0x01752, 0x01753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ - { 0x01772, 0x01773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ - { 0x017B4, 0x017D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ - { 0x017DD, 0x017DD }, /* KHMER SIGN ATTHACAN */ - { 0x0180B, 0x0180F }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR FOUR */ - { 0x01885, 0x01886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ - { 0x018A9, 0x018A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ - { 0x01920, 0x0192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ - { 0x01930, 0x0193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ - { 0x01A17, 0x01A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ - { 0x01A55, 0x01A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ - { 0x01A60, 0x01A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ - { 0x01A7F, 0x01A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ - { 0x01AB0, 0x01ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ - { 0x01B00, 0x01B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ - { 0x01B34, 0x01B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ - { 0x01B6B, 0x01B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ - { 0x01B80, 0x01B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ - { 0x01BA1, 0x01BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ - { 0x01BE6, 0x01BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ - { 0x01C24, 0x01C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ - { 0x01CD0, 0x01CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ - { 0x01CD4, 0x01CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ - { 0x01CED, 0x01CED }, /* VEDIC SIGN TIRYAK */ - { 0x01CF4, 0x01CF4 }, /* VEDIC TONE CANDRA ABOVE */ - { 0x01CF7, 0x01CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ - { 0x01DC0, 0x01DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ - { 0x0200B, 0x0200F }, /* ZERO WIDTH SPACE - RIGHT-TO-LEFT MARK */ - { 0x0202A, 0x0202E }, /* LEFT-TO-RIGHT EMBEDDING - RIGHT-TO-LEFT OVERRIDE */ - { 0x02060, 0x02064 }, /* WORD JOINER - INVISIBLE PLUS */ - { 0x02066, 0x0206F }, /* LEFT-TO-RIGHT ISOLATE - NOMINAL DIGIT SHAPES */ - { 0x020D0, 0x020F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ - { 0x02640, 0x02640 }, /* FEMALE SIGN */ - { 0x02642, 0x02642 }, /* MALE SIGN */ - { 0x026A7, 0x026A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ - { 0x02CEF, 0x02CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ - { 0x02D7F, 0x02D7F }, /* TIFINAGH CONSONANT JOINER */ - { 0x02DE0, 0x02DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ - { 0x0302A, 0x0302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ - { 0x03099, 0x0309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ - { 0x0A66F, 0x0A672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ - { 0x0A674, 0x0A67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ - { 0x0A69E, 0x0A69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ - { 0x0A6F0, 0x0A6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ - { 0x0A802, 0x0A802 }, /* SYLOTI NAGRI SIGN DVISVARA */ - { 0x0A806, 0x0A806 }, /* SYLOTI NAGRI SIGN HASANTA */ - { 0x0A80B, 0x0A80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ - { 0x0A823, 0x0A827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ - { 0x0A82C, 0x0A82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ - { 0x0A880, 0x0A881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ - { 0x0A8B4, 0x0A8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ - { 0x0A8E0, 0x0A8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ - { 0x0A8FF, 0x0A8FF }, /* DEVANAGARI VOWEL SIGN AY */ - { 0x0A926, 0x0A92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ - { 0x0A947, 0x0A953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ - { 0x0A980, 0x0A983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ - { 0x0A9B3, 0x0A9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ - { 0x0A9E5, 0x0A9E5 }, /* MYANMAR SIGN SHAN SAW */ - { 0x0AA29, 0x0AA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ - { 0x0AA43, 0x0AA43 }, /* CHAM CONSONANT SIGN FINAL NG */ - { 0x0AA4C, 0x0AA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ - { 0x0AA7B, 0x0AA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ - { 0x0AAB0, 0x0AAB0 }, /* TAI VIET MAI KANG */ - { 0x0AAB2, 0x0AAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ - { 0x0AAB7, 0x0AAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ - { 0x0AABE, 0x0AABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ - { 0x0AAC1, 0x0AAC1 }, /* TAI VIET TONE MAI THO */ - { 0x0AAEB, 0x0AAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ - { 0x0AAF5, 0x0AAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ - { 0x0ABE3, 0x0ABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ - { 0x0ABEC, 0x0ABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ - { 0x0FB1E, 0x0FB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ - { 0x0FE00, 0x0FE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ - { 0x0FE20, 0x0FE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ - { 0x0FEFF, 0x0FEFF }, /* ZERO WIDTH NO-BREAK SPACE */ - { 0x0FFF9, 0x0FFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ +/* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ +static const struct ucs_interval16 ucs_zero_width_bmp_ranges[] = { + { 0x00AD, 0x00AD }, /* SOFT HYPHEN */ + { 0x0300, 0x036F }, /* COMBINING GRAVE ACCENT - COMBINING LATIN SMALL LETTER X */ + { 0x0483, 0x0489 }, /* COMBINING CYRILLIC TITLO - COMBINING CYRILLIC MILLIONS SIGN */ + { 0x0591, 0x05BD }, /* HEBREW ACCENT ETNAHTA - HEBREW POINT METEG */ + { 0x05BF, 0x05BF }, /* HEBREW POINT RAFE */ + { 0x05C1, 0x05C2 }, /* HEBREW POINT SHIN DOT - HEBREW POINT SIN DOT */ + { 0x05C4, 0x05C5 }, /* HEBREW MARK UPPER DOT - HEBREW MARK LOWER DOT */ + { 0x05C7, 0x05C7 }, /* HEBREW POINT QAMATS QATAN */ + { 0x0600, 0x0605 }, /* ARABIC NUMBER SIGN - ARABIC NUMBER MARK ABOVE */ + { 0x0610, 0x061A }, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM - ARABIC SMALL KASRA */ + { 0x061C, 0x061C }, /* ARABIC LETTER MARK */ + { 0x064B, 0x065F }, /* ARABIC FATHATAN - ARABIC WAVY HAMZA BELOW */ + { 0x0670, 0x0670 }, /* ARABIC LETTER SUPERSCRIPT ALEF */ + { 0x06D6, 0x06DD }, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA - ARABIC END OF AYAH */ + { 0x06DF, 0x06E4 }, /* ARABIC SMALL HIGH ROUNDED ZERO - ARABIC SMALL HIGH MADDA */ + { 0x06E7, 0x06E8 }, /* ARABIC SMALL HIGH YEH - ARABIC SMALL HIGH NOON */ + { 0x06EA, 0x06ED }, /* ARABIC EMPTY CENTRE LOW STOP - ARABIC SMALL LOW MEEM */ + { 0x070F, 0x070F }, /* SYRIAC ABBREVIATION MARK */ + { 0x0711, 0x0711 }, /* SYRIAC LETTER SUPERSCRIPT ALAPH */ + { 0x0730, 0x074A }, /* SYRIAC PTHAHA ABOVE - SYRIAC BARREKH */ + { 0x07A6, 0x07B0 }, /* THAANA ABAFILI - THAANA SUKUN */ + { 0x07EB, 0x07F3 }, /* NKO COMBINING SHORT HIGH TONE - NKO COMBINING DOUBLE DOT ABOVE */ + { 0x07FD, 0x07FD }, /* NKO DANTAYALAN */ + { 0x0816, 0x0819 }, /* SAMARITAN MARK IN - SAMARITAN MARK DAGESH */ + { 0x081B, 0x0823 }, /* SAMARITAN MARK EPENTHETIC YUT - SAMARITAN VOWEL SIGN A */ + { 0x0825, 0x0827 }, /* SAMARITAN VOWEL SIGN SHORT A - SAMARITAN VOWEL SIGN U */ + { 0x0829, 0x082D }, /* SAMARITAN VOWEL SIGN LONG I - SAMARITAN MARK NEQUDAA */ + { 0x0859, 0x085B }, /* MANDAIC AFFRICATION MARK - MANDAIC GEMINATION MARK */ + { 0x0890, 0x0891 }, /* ARABIC POUND MARK ABOVE - ARABIC PIASTRE MARK ABOVE */ + { 0x0897, 0x089F }, /* ARABIC PEPET - ARABIC HALF MADDA OVER MADDA */ + { 0x08CA, 0x0903 }, /* ARABIC SMALL HIGH FARSI YEH - DEVANAGARI SIGN VISARGA */ + { 0x093A, 0x093C }, /* DEVANAGARI VOWEL SIGN OE - DEVANAGARI SIGN NUKTA */ + { 0x093E, 0x094F }, /* DEVANAGARI VOWEL SIGN AA - DEVANAGARI VOWEL SIGN AW */ + { 0x0951, 0x0957 }, /* DEVANAGARI STRESS SIGN UDATTA - DEVANAGARI VOWEL SIGN UUE */ + { 0x0962, 0x0963 }, /* DEVANAGARI VOWEL SIGN VOCALIC L - DEVANAGARI VOWEL SIGN VOCALIC LL */ + { 0x0981, 0x0983 }, /* BENGALI SIGN CANDRABINDU - BENGALI SIGN VISARGA */ + { 0x09BC, 0x09BC }, /* BENGALI SIGN NUKTA */ + { 0x09BE, 0x09C4 }, /* BENGALI VOWEL SIGN AA - BENGALI VOWEL SIGN VOCALIC RR */ + { 0x09C7, 0x09C8 }, /* BENGALI VOWEL SIGN E - BENGALI VOWEL SIGN AI */ + { 0x09CB, 0x09CD }, /* BENGALI VOWEL SIGN O - BENGALI SIGN VIRAMA */ + { 0x09D7, 0x09D7 }, /* BENGALI AU LENGTH MARK */ + { 0x09E2, 0x09E3 }, /* BENGALI VOWEL SIGN VOCALIC L - BENGALI VOWEL SIGN VOCALIC LL */ + { 0x09FE, 0x09FE }, /* BENGALI SANDHI MARK */ + { 0x0A01, 0x0A03 }, /* GURMUKHI SIGN ADAK BINDI - GURMUKHI SIGN VISARGA */ + { 0x0A3C, 0x0A3C }, /* GURMUKHI SIGN NUKTA */ + { 0x0A3E, 0x0A42 }, /* GURMUKHI VOWEL SIGN AA - GURMUKHI VOWEL SIGN UU */ + { 0x0A47, 0x0A48 }, /* GURMUKHI VOWEL SIGN EE - GURMUKHI VOWEL SIGN AI */ + { 0x0A4B, 0x0A4D }, /* GURMUKHI VOWEL SIGN OO - GURMUKHI SIGN VIRAMA */ + { 0x0A51, 0x0A51 }, /* GURMUKHI SIGN UDAAT */ + { 0x0A70, 0x0A71 }, /* GURMUKHI TIPPI - GURMUKHI ADDAK */ + { 0x0A75, 0x0A75 }, /* GURMUKHI SIGN YAKASH */ + { 0x0A81, 0x0A83 }, /* GUJARATI SIGN CANDRABINDU - GUJARATI SIGN VISARGA */ + { 0x0ABC, 0x0ABC }, /* GUJARATI SIGN NUKTA */ + { 0x0ABE, 0x0AC5 }, /* GUJARATI VOWEL SIGN AA - GUJARATI VOWEL SIGN CANDRA E */ + { 0x0AC7, 0x0AC9 }, /* GUJARATI VOWEL SIGN E - GUJARATI VOWEL SIGN CANDRA O */ + { 0x0ACB, 0x0ACD }, /* GUJARATI VOWEL SIGN O - GUJARATI SIGN VIRAMA */ + { 0x0AE2, 0x0AE3 }, /* GUJARATI VOWEL SIGN VOCALIC L - GUJARATI VOWEL SIGN VOCALIC LL */ + { 0x0AFA, 0x0AFF }, /* GUJARATI SIGN SUKUN - GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE */ + { 0x0B01, 0x0B03 }, /* ORIYA SIGN CANDRABINDU - ORIYA SIGN VISARGA */ + { 0x0B3C, 0x0B3C }, /* ORIYA SIGN NUKTA */ + { 0x0B3E, 0x0B44 }, /* ORIYA VOWEL SIGN AA - ORIYA VOWEL SIGN VOCALIC RR */ + { 0x0B47, 0x0B48 }, /* ORIYA VOWEL SIGN E - ORIYA VOWEL SIGN AI */ + { 0x0B4B, 0x0B4D }, /* ORIYA VOWEL SIGN O - ORIYA SIGN VIRAMA */ + { 0x0B55, 0x0B57 }, /* ORIYA SIGN OVERLINE - ORIYA AU LENGTH MARK */ + { 0x0B62, 0x0B63 }, /* ORIYA VOWEL SIGN VOCALIC L - ORIYA VOWEL SIGN VOCALIC LL */ + { 0x0B82, 0x0B82 }, /* TAMIL SIGN ANUSVARA */ + { 0x0BBE, 0x0BC2 }, /* TAMIL VOWEL SIGN AA - TAMIL VOWEL SIGN UU */ + { 0x0BC6, 0x0BC8 }, /* TAMIL VOWEL SIGN E - TAMIL VOWEL SIGN AI */ + { 0x0BCA, 0x0BCD }, /* TAMIL VOWEL SIGN O - TAMIL SIGN VIRAMA */ + { 0x0BD7, 0x0BD7 }, /* TAMIL AU LENGTH MARK */ + { 0x0C00, 0x0C04 }, /* TELUGU SIGN COMBINING CANDRABINDU ABOVE - TELUGU SIGN COMBINING ANUSVARA ABOVE */ + { 0x0C3C, 0x0C3C }, /* TELUGU SIGN NUKTA */ + { 0x0C3E, 0x0C44 }, /* TELUGU VOWEL SIGN AA - TELUGU VOWEL SIGN VOCALIC RR */ + { 0x0C46, 0x0C48 }, /* TELUGU VOWEL SIGN E - TELUGU VOWEL SIGN AI */ + { 0x0C4A, 0x0C4D }, /* TELUGU VOWEL SIGN O - TELUGU SIGN VIRAMA */ + { 0x0C55, 0x0C56 }, /* TELUGU LENGTH MARK - TELUGU AI LENGTH MARK */ + { 0x0C62, 0x0C63 }, /* TELUGU VOWEL SIGN VOCALIC L - TELUGU VOWEL SIGN VOCALIC LL */ + { 0x0C81, 0x0C83 }, /* KANNADA SIGN CANDRABINDU - KANNADA SIGN VISARGA */ + { 0x0CBC, 0x0CBC }, /* KANNADA SIGN NUKTA */ + { 0x0CBE, 0x0CC4 }, /* KANNADA VOWEL SIGN AA - KANNADA VOWEL SIGN VOCALIC RR */ + { 0x0CC6, 0x0CC8 }, /* KANNADA VOWEL SIGN E - KANNADA VOWEL SIGN AI */ + { 0x0CCA, 0x0CCD }, /* KANNADA VOWEL SIGN O - KANNADA SIGN VIRAMA */ + { 0x0CD5, 0x0CD6 }, /* KANNADA LENGTH MARK - KANNADA AI LENGTH MARK */ + { 0x0CE2, 0x0CE3 }, /* KANNADA VOWEL SIGN VOCALIC L - KANNADA VOWEL SIGN VOCALIC LL */ + { 0x0CF3, 0x0CF3 }, /* KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT */ + { 0x0D00, 0x0D03 }, /* MALAYALAM SIGN COMBINING ANUSVARA ABOVE - MALAYALAM SIGN VISARGA */ + { 0x0D3B, 0x0D3C }, /* MALAYALAM SIGN VERTICAL BAR VIRAMA - MALAYALAM SIGN CIRCULAR VIRAMA */ + { 0x0D3E, 0x0D44 }, /* MALAYALAM VOWEL SIGN AA - MALAYALAM VOWEL SIGN VOCALIC RR */ + { 0x0D46, 0x0D48 }, /* MALAYALAM VOWEL SIGN E - MALAYALAM VOWEL SIGN AI */ + { 0x0D4A, 0x0D4D }, /* MALAYALAM VOWEL SIGN O - MALAYALAM SIGN VIRAMA */ + { 0x0D57, 0x0D57 }, /* MALAYALAM AU LENGTH MARK */ + { 0x0D62, 0x0D63 }, /* MALAYALAM VOWEL SIGN VOCALIC L - MALAYALAM VOWEL SIGN VOCALIC LL */ + { 0x0D81, 0x0D83 }, /* SINHALA SIGN CANDRABINDU - SINHALA SIGN VISARGAYA */ + { 0x0DCA, 0x0DCA }, /* SINHALA SIGN AL-LAKUNA */ + { 0x0DCF, 0x0DD4 }, /* SINHALA VOWEL SIGN AELA-PILLA - SINHALA VOWEL SIGN KETTI PAA-PILLA */ + { 0x0DD6, 0x0DD6 }, /* SINHALA VOWEL SIGN DIGA PAA-PILLA */ + { 0x0DD8, 0x0DDF }, /* SINHALA VOWEL SIGN GAETTA-PILLA - SINHALA VOWEL SIGN GAYANUKITTA */ + { 0x0DF2, 0x0DF3 }, /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA - SINHALA VOWEL SIGN DIGA GAYANUKITTA */ + { 0x0E31, 0x0E31 }, /* THAI CHARACTER MAI HAN-AKAT */ + { 0x0E34, 0x0E3A }, /* THAI CHARACTER SARA I - THAI CHARACTER PHINTHU */ + { 0x0E47, 0x0E4E }, /* THAI CHARACTER MAITAIKHU - THAI CHARACTER YAMAKKAN */ + { 0x0EB1, 0x0EB1 }, /* LAO VOWEL SIGN MAI KAN */ + { 0x0EB4, 0x0EBC }, /* LAO VOWEL SIGN I - LAO SEMIVOWEL SIGN LO */ + { 0x0EC8, 0x0ECE }, /* LAO TONE MAI EK - LAO YAMAKKAN */ + { 0x0F18, 0x0F19 }, /* TIBETAN ASTROLOGICAL SIGN -KHYUD PA - TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */ + { 0x0F35, 0x0F35 }, /* TIBETAN MARK NGAS BZUNG NYI ZLA */ + { 0x0F37, 0x0F37 }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS */ + { 0x0F39, 0x0F39 }, /* TIBETAN MARK TSA -PHRU */ + { 0x0F3E, 0x0F3F }, /* TIBETAN SIGN YAR TSHES - TIBETAN SIGN MAR TSHES */ + { 0x0F71, 0x0F84 }, /* TIBETAN VOWEL SIGN AA - TIBETAN MARK HALANTA */ + { 0x0F86, 0x0F87 }, /* TIBETAN SIGN LCI RTAGS - TIBETAN SIGN YANG RTAGS */ + { 0x0F8D, 0x0F97 }, /* TIBETAN SUBJOINED SIGN LCE TSA CAN - TIBETAN SUBJOINED LETTER JA */ + { 0x0F99, 0x0FBC }, /* TIBETAN SUBJOINED LETTER NYA - TIBETAN SUBJOINED LETTER FIXED-FORM RA */ + { 0x0FC6, 0x0FC6 }, /* TIBETAN SYMBOL PADMA GDAN */ + { 0x102B, 0x103E }, /* MYANMAR VOWEL SIGN TALL AA - MYANMAR CONSONANT SIGN MEDIAL HA */ + { 0x1056, 0x1059 }, /* MYANMAR VOWEL SIGN VOCALIC R - MYANMAR VOWEL SIGN VOCALIC LL */ + { 0x105E, 0x1060 }, /* MYANMAR CONSONANT SIGN MON MEDIAL NA - MYANMAR CONSONANT SIGN MON MEDIAL LA */ + { 0x1062, 0x1064 }, /* MYANMAR VOWEL SIGN SGAW KAREN EU - MYANMAR TONE MARK SGAW KAREN KE PHO */ + { 0x1067, 0x106D }, /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU - MYANMAR SIGN WESTERN PWO KAREN TONE-5 */ + { 0x1071, 0x1074 }, /* MYANMAR VOWEL SIGN GEBA KAREN I - MYANMAR VOWEL SIGN KAYAH EE */ + { 0x1082, 0x108D }, /* MYANMAR CONSONANT SIGN SHAN MEDIAL WA - MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE */ + { 0x108F, 0x108F }, /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */ + { 0x109A, 0x109D }, /* MYANMAR SIGN KHAMTI TONE-1 - MYANMAR VOWEL SIGN AITON AI */ + { 0x135D, 0x135F }, /* ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK - ETHIOPIC COMBINING GEMINATION MARK */ + { 0x1712, 0x1715 }, /* TAGALOG VOWEL SIGN I - TAGALOG SIGN PAMUDPOD */ + { 0x1732, 0x1734 }, /* HANUNOO VOWEL SIGN I - HANUNOO SIGN PAMUDPOD */ + { 0x1752, 0x1753 }, /* BUHID VOWEL SIGN I - BUHID VOWEL SIGN U */ + { 0x1772, 0x1773 }, /* TAGBANWA VOWEL SIGN I - TAGBANWA VOWEL SIGN U */ + { 0x17B4, 0x17D3 }, /* KHMER VOWEL INHERENT AQ - KHMER SIGN BATHAMASAT */ + { 0x17DD, 0x17DD }, /* KHMER SIGN ATTHACAN */ + { 0x180B, 0x180F }, /* MONGOLIAN FREE VARIATION SELECTOR ONE - MONGOLIAN FREE VARIATION SELECTOR FOUR */ + { 0x1885, 0x1886 }, /* MONGOLIAN LETTER ALI GALI BALUDA - MONGOLIAN LETTER ALI GALI THREE BALUDA */ + { 0x18A9, 0x18A9 }, /* MONGOLIAN LETTER ALI GALI DAGALGA */ + { 0x1920, 0x192B }, /* LIMBU VOWEL SIGN A - LIMBU SUBJOINED LETTER WA */ + { 0x1930, 0x193B }, /* LIMBU SMALL LETTER KA - LIMBU SIGN SA-I */ + { 0x1A17, 0x1A1B }, /* BUGINESE VOWEL SIGN I - BUGINESE VOWEL SIGN AE */ + { 0x1A55, 0x1A5E }, /* TAI THAM CONSONANT SIGN MEDIAL RA - TAI THAM CONSONANT SIGN SA */ + { 0x1A60, 0x1A7C }, /* TAI THAM SIGN SAKOT - TAI THAM SIGN KHUEN-LUE KARAN */ + { 0x1A7F, 0x1A7F }, /* TAI THAM COMBINING CRYPTOGRAMMIC DOT */ + { 0x1AB0, 0x1ACE }, /* COMBINING DOUBLED CIRCUMFLEX ACCENT - COMBINING LATIN SMALL LETTER INSULAR T */ + { 0x1B00, 0x1B04 }, /* BALINESE SIGN ULU RICEM - BALINESE SIGN BISAH */ + { 0x1B34, 0x1B44 }, /* BALINESE SIGN REREKAN - BALINESE ADEG ADEG */ + { 0x1B6B, 0x1B73 }, /* BALINESE MUSICAL SYMBOL COMBINING TEGEH - BALINESE MUSICAL SYMBOL COMBINING GONG */ + { 0x1B80, 0x1B82 }, /* SUNDANESE SIGN PANYECEK - SUNDANESE SIGN PANGWISAD */ + { 0x1BA1, 0x1BAD }, /* SUNDANESE CONSONANT SIGN PAMINGKAL - SUNDANESE CONSONANT SIGN PASANGAN WA */ + { 0x1BE6, 0x1BF3 }, /* BATAK SIGN TOMPI - BATAK PANONGONAN */ + { 0x1C24, 0x1C37 }, /* LEPCHA SUBJOINED LETTER YA - LEPCHA SIGN NUKTA */ + { 0x1CD0, 0x1CD2 }, /* VEDIC TONE KARSHANA - VEDIC TONE PRENKHA */ + { 0x1CD4, 0x1CE8 }, /* VEDIC SIGN YAJURVEDIC MIDLINE SVARITA - VEDIC SIGN VISARGA ANUDATTA WITH TAIL */ + { 0x1CED, 0x1CED }, /* VEDIC SIGN TIRYAK */ + { 0x1CF4, 0x1CF4 }, /* VEDIC TONE CANDRA ABOVE */ + { 0x1CF7, 0x1CF9 }, /* VEDIC SIGN ATIKRAMA - VEDIC TONE DOUBLE RING ABOVE */ + { 0x1DC0, 0x1DFF }, /* COMBINING DOTTED GRAVE ACCENT - COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW */ + { 0x200B, 0x200F }, /* ZERO WIDTH SPACE - RIGHT-TO-LEFT MARK */ + { 0x202A, 0x202E }, /* LEFT-TO-RIGHT EMBEDDING - RIGHT-TO-LEFT OVERRIDE */ + { 0x2060, 0x2064 }, /* WORD JOINER - INVISIBLE PLUS */ + { 0x2066, 0x206F }, /* LEFT-TO-RIGHT ISOLATE - NOMINAL DIGIT SHAPES */ + { 0x20D0, 0x20F0 }, /* COMBINING LEFT HARPOON ABOVE - COMBINING ASTERISK ABOVE */ + { 0x2640, 0x2640 }, /* FEMALE SIGN */ + { 0x2642, 0x2642 }, /* MALE SIGN */ + { 0x26A7, 0x26A7 }, /* MALE WITH STROKE AND MALE AND FEMALE SIGN */ + { 0x2CEF, 0x2CF1 }, /* COPTIC COMBINING NI ABOVE - COPTIC COMBINING SPIRITUS LENIS */ + { 0x2D7F, 0x2D7F }, /* TIFINAGH CONSONANT JOINER */ + { 0x2DE0, 0x2DFF }, /* COMBINING CYRILLIC LETTER BE - COMBINING CYRILLIC LETTER IOTIFIED BIG YUS */ + { 0x302A, 0x302F }, /* IDEOGRAPHIC LEVEL TONE MARK - HANGUL DOUBLE DOT TONE MARK */ + { 0x3099, 0x309A }, /* COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK - COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */ + { 0xA66F, 0xA672 }, /* COMBINING CYRILLIC VZMET - COMBINING CYRILLIC THOUSAND MILLIONS SIGN */ + { 0xA674, 0xA67D }, /* COMBINING CYRILLIC LETTER UKRAINIAN IE - COMBINING CYRILLIC PAYEROK */ + { 0xA69E, 0xA69F }, /* COMBINING CYRILLIC LETTER EF - COMBINING CYRILLIC LETTER IOTIFIED E */ + { 0xA6F0, 0xA6F1 }, /* BAMUM COMBINING MARK KOQNDON - BAMUM COMBINING MARK TUKWENTIS */ + { 0xA802, 0xA802 }, /* SYLOTI NAGRI SIGN DVISVARA */ + { 0xA806, 0xA806 }, /* SYLOTI NAGRI SIGN HASANTA */ + { 0xA80B, 0xA80B }, /* SYLOTI NAGRI SIGN ANUSVARA */ + { 0xA823, 0xA827 }, /* SYLOTI NAGRI VOWEL SIGN A - SYLOTI NAGRI VOWEL SIGN OO */ + { 0xA82C, 0xA82C }, /* SYLOTI NAGRI SIGN ALTERNATE HASANTA */ + { 0xA880, 0xA881 }, /* SAURASHTRA SIGN ANUSVARA - SAURASHTRA SIGN VISARGA */ + { 0xA8B4, 0xA8C5 }, /* SAURASHTRA CONSONANT SIGN HAARU - SAURASHTRA SIGN CANDRABINDU */ + { 0xA8E0, 0xA8F1 }, /* COMBINING DEVANAGARI DIGIT ZERO - COMBINING DEVANAGARI SIGN AVAGRAHA */ + { 0xA8FF, 0xA8FF }, /* DEVANAGARI VOWEL SIGN AY */ + { 0xA926, 0xA92D }, /* KAYAH LI VOWEL UE - KAYAH LI TONE CALYA PLOPHU */ + { 0xA947, 0xA953 }, /* REJANG VOWEL SIGN I - REJANG VIRAMA */ + { 0xA980, 0xA983 }, /* JAVANESE SIGN PANYANGGA - JAVANESE SIGN WIGNYAN */ + { 0xA9B3, 0xA9C0 }, /* JAVANESE SIGN CECAK TELU - JAVANESE PANGKON */ + { 0xA9E5, 0xA9E5 }, /* MYANMAR SIGN SHAN SAW */ + { 0xAA29, 0xAA36 }, /* CHAM VOWEL SIGN AA - CHAM CONSONANT SIGN WA */ + { 0xAA43, 0xAA43 }, /* CHAM CONSONANT SIGN FINAL NG */ + { 0xAA4C, 0xAA4D }, /* CHAM CONSONANT SIGN FINAL M - CHAM CONSONANT SIGN FINAL H */ + { 0xAA7B, 0xAA7D }, /* MYANMAR SIGN PAO KAREN TONE - MYANMAR SIGN TAI LAING TONE-5 */ + { 0xAAB0, 0xAAB0 }, /* TAI VIET MAI KANG */ + { 0xAAB2, 0xAAB4 }, /* TAI VIET VOWEL I - TAI VIET VOWEL U */ + { 0xAAB7, 0xAAB8 }, /* TAI VIET MAI KHIT - TAI VIET VOWEL IA */ + { 0xAABE, 0xAABF }, /* TAI VIET VOWEL AM - TAI VIET TONE MAI EK */ + { 0xAAC1, 0xAAC1 }, /* TAI VIET TONE MAI THO */ + { 0xAAEB, 0xAAEF }, /* MEETEI MAYEK VOWEL SIGN II - MEETEI MAYEK VOWEL SIGN AAU */ + { 0xAAF5, 0xAAF6 }, /* MEETEI MAYEK VOWEL SIGN VISARGA - MEETEI MAYEK VIRAMA */ + { 0xABE3, 0xABEA }, /* MEETEI MAYEK VOWEL SIGN ONAP - MEETEI MAYEK VOWEL SIGN NUNG */ + { 0xABEC, 0xABED }, /* MEETEI MAYEK LUM IYEK - MEETEI MAYEK APUN IYEK */ + { 0xFB1E, 0xFB1E }, /* HEBREW POINT JUDEO-SPANISH VARIKA */ + { 0xFE00, 0xFE0F }, /* VARIATION SELECTOR-1 - VARIATION SELECTOR-16 */ + { 0xFE20, 0xFE2F }, /* COMBINING LIGATURE LEFT HALF - COMBINING CYRILLIC TITLO RIGHT HALF */ + { 0xFEFF, 0xFEFF }, /* ZERO WIDTH NO-BREAK SPACE */ + { 0xFFF9, 0xFFFB }, /* INTERLINEAR ANNOTATION ANCHOR - INTERLINEAR ANNOTATION TERMINATOR */ +}; + +/* Zero-width character ranges (non-BMP, U+10000 and above) */ +static const struct ucs_interval32 ucs_zero_width_non_bmp_ranges[] = { { 0x101FD, 0x101FD }, /* PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE */ { 0x102E0, 0x102E0 }, /* COPTIC EPACT THOUSANDS MARK */ { 0x10376, 0x1037A }, /* COMBINING OLD PERMIC LETTER AN - COMBINING OLD PERMIC LETTER SII */ @@ -350,68 +354,72 @@ static const struct ucs_interval ucs_zero_width_ranges[] = { { 0xE0100, 0xE01EF }, /* VARIATION SELECTOR-17 - VARIATION SELECTOR-256 */ }; -/* Double-width character ranges */ -static const struct ucs_interval ucs_double_width_ranges[] = { - { 0x01100, 0x0115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ - { 0x0231A, 0x0231B }, /* WATCH - HOURGLASS */ - { 0x02329, 0x0232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ - { 0x023E9, 0x023EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ - { 0x023F0, 0x023F0 }, /* ALARM CLOCK */ - { 0x023F3, 0x023F3 }, /* HOURGLASS WITH FLOWING SAND */ - { 0x025FD, 0x025FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ - { 0x02614, 0x02615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ - { 0x02630, 0x02637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ - { 0x02648, 0x02653 }, /* ARIES - PISCES */ - { 0x0267F, 0x0267F }, /* WHEELCHAIR SYMBOL */ - { 0x0268A, 0x0268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ - { 0x02693, 0x02693 }, /* ANCHOR */ - { 0x026A1, 0x026A1 }, /* HIGH VOLTAGE SIGN */ - { 0x026AA, 0x026AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ - { 0x026BD, 0x026BE }, /* SOCCER BALL - BASEBALL */ - { 0x026C4, 0x026C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ - { 0x026CE, 0x026CE }, /* OPHIUCHUS */ - { 0x026D4, 0x026D4 }, /* NO ENTRY */ - { 0x026EA, 0x026EA }, /* CHURCH */ - { 0x026F2, 0x026F3 }, /* FOUNTAIN - FLAG IN HOLE */ - { 0x026F5, 0x026F5 }, /* SAILBOAT */ - { 0x026FA, 0x026FA }, /* TENT */ - { 0x026FD, 0x026FD }, /* FUEL PUMP */ - { 0x02705, 0x02705 }, /* WHITE HEAVY CHECK MARK */ - { 0x0270A, 0x0270B }, /* RAISED FIST - RAISED HAND */ - { 0x02728, 0x02728 }, /* SPARKLES */ - { 0x0274C, 0x0274C }, /* CROSS MARK */ - { 0x0274E, 0x0274E }, /* NEGATIVE SQUARED CROSS MARK */ - { 0x02753, 0x02755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ - { 0x02757, 0x02757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ - { 0x02795, 0x02797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ - { 0x027B0, 0x027B0 }, /* CURLY LOOP */ - { 0x027BF, 0x027BF }, /* DOUBLE CURLY LOOP */ - { 0x02B1B, 0x02B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ - { 0x02B50, 0x02B50 }, /* WHITE MEDIUM STAR */ - { 0x02B55, 0x02B55 }, /* HEAVY LARGE CIRCLE */ - { 0x02E80, 0x02E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ - { 0x02E9B, 0x02EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ - { 0x02F00, 0x02FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ - { 0x02FF0, 0x03029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ - { 0x03030, 0x0303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ - { 0x03041, 0x03096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ - { 0x0309B, 0x030FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ - { 0x03105, 0x0312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ - { 0x03131, 0x0318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ - { 0x03190, 0x031E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ - { 0x031EF, 0x0321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ - { 0x03220, 0x03247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ - { 0x03250, 0x0A48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ - { 0x0A490, 0x0A4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ - { 0x0A960, 0x0A97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ - { 0x0AC00, 0x0D7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ - { 0x0F900, 0x0FAFF }, /* U+F900 - U+FAFF */ - { 0x0FE10, 0x0FE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ - { 0x0FE30, 0x0FE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ - { 0x0FE54, 0x0FE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ - { 0x0FE68, 0x0FE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ - { 0x0FF01, 0x0FF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ - { 0x0FFE0, 0x0FFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ +/* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */ +static const struct ucs_interval16 ucs_double_width_bmp_ranges[] = { + { 0x1100, 0x115F }, /* HANGUL CHOSEONG KIYEOK - HANGUL CHOSEONG FILLER */ + { 0x231A, 0x231B }, /* WATCH - HOURGLASS */ + { 0x2329, 0x232A }, /* LEFT-POINTING ANGLE BRACKET - RIGHT-POINTING ANGLE BRACKET */ + { 0x23E9, 0x23EC }, /* BLACK RIGHT-POINTING DOUBLE TRIANGLE - BLACK DOWN-POINTING DOUBLE TRIANGLE */ + { 0x23F0, 0x23F0 }, /* ALARM CLOCK */ + { 0x23F3, 0x23F3 }, /* HOURGLASS WITH FLOWING SAND */ + { 0x25FD, 0x25FE }, /* WHITE MEDIUM SMALL SQUARE - BLACK MEDIUM SMALL SQUARE */ + { 0x2614, 0x2615 }, /* UMBRELLA WITH RAIN DROPS - HOT BEVERAGE */ + { 0x2630, 0x2637 }, /* TRIGRAM FOR HEAVEN - TRIGRAM FOR EARTH */ + { 0x2648, 0x2653 }, /* ARIES - PISCES */ + { 0x267F, 0x267F }, /* WHEELCHAIR SYMBOL */ + { 0x268A, 0x268F }, /* MONOGRAM FOR YANG - DIGRAM FOR GREATER YIN */ + { 0x2693, 0x2693 }, /* ANCHOR */ + { 0x26A1, 0x26A1 }, /* HIGH VOLTAGE SIGN */ + { 0x26AA, 0x26AB }, /* MEDIUM WHITE CIRCLE - MEDIUM BLACK CIRCLE */ + { 0x26BD, 0x26BE }, /* SOCCER BALL - BASEBALL */ + { 0x26C4, 0x26C5 }, /* SNOWMAN WITHOUT SNOW - SUN BEHIND CLOUD */ + { 0x26CE, 0x26CE }, /* OPHIUCHUS */ + { 0x26D4, 0x26D4 }, /* NO ENTRY */ + { 0x26EA, 0x26EA }, /* CHURCH */ + { 0x26F2, 0x26F3 }, /* FOUNTAIN - FLAG IN HOLE */ + { 0x26F5, 0x26F5 }, /* SAILBOAT */ + { 0x26FA, 0x26FA }, /* TENT */ + { 0x26FD, 0x26FD }, /* FUEL PUMP */ + { 0x2705, 0x2705 }, /* WHITE HEAVY CHECK MARK */ + { 0x270A, 0x270B }, /* RAISED FIST - RAISED HAND */ + { 0x2728, 0x2728 }, /* SPARKLES */ + { 0x274C, 0x274C }, /* CROSS MARK */ + { 0x274E, 0x274E }, /* NEGATIVE SQUARED CROSS MARK */ + { 0x2753, 0x2755 }, /* BLACK QUESTION MARK ORNAMENT - WHITE EXCLAMATION MARK ORNAMENT */ + { 0x2757, 0x2757 }, /* HEAVY EXCLAMATION MARK SYMBOL */ + { 0x2795, 0x2797 }, /* HEAVY PLUS SIGN - HEAVY DIVISION SIGN */ + { 0x27B0, 0x27B0 }, /* CURLY LOOP */ + { 0x27BF, 0x27BF }, /* DOUBLE CURLY LOOP */ + { 0x2B1B, 0x2B1C }, /* BLACK LARGE SQUARE - WHITE LARGE SQUARE */ + { 0x2B50, 0x2B50 }, /* WHITE MEDIUM STAR */ + { 0x2B55, 0x2B55 }, /* HEAVY LARGE CIRCLE */ + { 0x2E80, 0x2E99 }, /* CJK RADICAL REPEAT - CJK RADICAL RAP */ + { 0x2E9B, 0x2EF3 }, /* CJK RADICAL CHOKE - CJK RADICAL C-SIMPLIFIED TURTLE */ + { 0x2F00, 0x2FD5 }, /* KANGXI RADICAL ONE - KANGXI RADICAL FLUTE */ + { 0x2FF0, 0x3029 }, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT - HANGZHOU NUMERAL NINE */ + { 0x3030, 0x303E }, /* WAVY DASH - IDEOGRAPHIC VARIATION INDICATOR */ + { 0x3041, 0x3096 }, /* HIRAGANA LETTER SMALL A - HIRAGANA LETTER SMALL KE */ + { 0x309B, 0x30FF }, /* KATAKANA-HIRAGANA VOICED SOUND MARK - KATAKANA DIGRAPH KOTO */ + { 0x3105, 0x312F }, /* BOPOMOFO LETTER B - BOPOMOFO LETTER NN */ + { 0x3131, 0x318E }, /* HANGUL LETTER KIYEOK - HANGUL LETTER ARAEAE */ + { 0x3190, 0x31E5 }, /* IDEOGRAPHIC ANNOTATION LINKING MARK - CJK STROKE SZP */ + { 0x31EF, 0x321E }, /* IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION - PARENTHESIZED KOREAN CHARACTER O HU */ + { 0x3220, 0x3247 }, /* PARENTHESIZED IDEOGRAPH ONE - CIRCLED IDEOGRAPH KOTO */ + { 0x3250, 0xA48C }, /* PARTNERSHIP SIGN - YI SYLLABLE YYR */ + { 0xA490, 0xA4C6 }, /* YI RADICAL QOT - YI RADICAL KE */ + { 0xA960, 0xA97C }, /* HANGUL CHOSEONG TIKEUT-MIEUM - HANGUL CHOSEONG SSANGYEORINHIEUH */ + { 0xAC00, 0xD7A3 }, /* HANGUL SYLLABLE GA - HANGUL SYLLABLE HIH */ + { 0xF900, 0xFAFF }, /* U+F900 - U+FAFF */ + { 0xFE10, 0xFE19 }, /* PRESENTATION FORM FOR VERTICAL COMMA - PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS */ + { 0xFE30, 0xFE52 }, /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - SMALL FULL STOP */ + { 0xFE54, 0xFE66 }, /* SMALL SEMICOLON - SMALL EQUALS SIGN */ + { 0xFE68, 0xFE6B }, /* SMALL REVERSE SOLIDUS - SMALL COMMERCIAL AT */ + { 0xFF01, 0xFF60 }, /* FULLWIDTH EXCLAMATION MARK - FULLWIDTH RIGHT WHITE PARENTHESIS */ + { 0xFFE0, 0xFFE6 }, /* FULLWIDTH CENT SIGN - FULLWIDTH WON SIGN */ +}; + +/* Double-width character ranges (non-BMP, U+10000 and above) */ +static const struct ucs_interval32 ucs_double_width_non_bmp_ranges[] = { { 0x16FE0, 0x16FE3 }, /* TANGUT ITERATION MARK - OLD CHINESE ITERATION MARK */ { 0x17000, 0x187F7 }, /* U+17000 - U+187F7 */ { 0x18800, 0x18CD5 }, /* TANGUT COMPONENT-001 - KHITAN SMALL SCRIPT CHARACTER-18CD5 */ From c2d2c5c0d631f7de9697870e4eec89289177d445 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 17 Apr 2025 14:45:16 -0400 Subject: [PATCH 075/105] vt: move UCS tables to the "shipped" form Use the "shipped" mechanism to copy pre-generated tables to the build tree by default. If GENERATE_UCS_TABLES=1 then they are generated at build time instead. If GENERATE_UCS_TABLES=2 then gen_ucs_recompose_table.py is invoked with --full. Signed-off-by: Nicolas Pitre Suggested-by: Jiri Slaby Link: https://lore.kernel.org/r/20250417184849.475581-15-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/Makefile | 27 ++++++++++++++++++- drivers/tty/vt/gen_ucs_recompose_table.py | 10 ++++--- drivers/tty/vt/gen_ucs_width_table.py | 18 +++++++++---- ..._table.h => ucs_recompose_table.h_shipped} | 0 ...idth_table.h => ucs_width_table.h_shipped} | 0 5 files changed, 45 insertions(+), 10 deletions(-) rename drivers/tty/vt/{ucs_recompose_table.h => ucs_recompose_table.h_shipped} (100%) rename drivers/tty/vt/{ucs_width_table.h => ucs_width_table.h_shipped} (100%) diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index e24c8546ac12..8ba33cc942c7 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -11,7 +11,8 @@ obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o \ ucs.o # Files generated that shall be removed upon make clean -clean-files := consolemap_deftbl.c defkeymap.c +clean-files := consolemap_deftbl.c defkeymap.c \ + ucs_width_table.h ucs_recompose_table.h hostprogs += conmakehash @@ -34,3 +35,27 @@ $(obj)/defkeymap.c: $(obj)/%.c: $(src)/%.map loadkeys --mktable --unicode $< > $@ endif + +$(obj)/ucs.o: $(src)/ucs.c $(obj)/ucs_width_table.h $(obj)/ucs_recompose_table.h + +# You may uncomment one of those to have the UCS tables be regenerated +# during the build process. By default the _shipped versions are used. +# +#GENERATE_UCS_TABLES := 1 +#GENERATE_UCS_TABLES := 2 # invokes gen_ucs_recompose_table.py with --full + +ifdef GENERATE_UCS_TABLES + +$(obj)/ucs_width_table.h: $(src)/gen_ucs_width_table.py + $(PYTHON3) $< -o $@ + +ifeq ($(GENERATE_UCS_TABLES),2) +gen_recomp_arg := --full +else +gen_recomp_arg := +endif + +$(obj)/ucs_recompose_table.h: $(src)/gen_ucs_recompose_table.py + $(PYTHON3) $< -o $@ $(gen_recomp_arg) + +endif diff --git a/drivers/tty/vt/gen_ucs_recompose_table.py b/drivers/tty/vt/gen_ucs_recompose_table.py index d30f8f5242d2..4434a436ac9e 100755 --- a/drivers/tty/vt/gen_ucs_recompose_table.py +++ b/drivers/tty/vt/gen_ucs_recompose_table.py @@ -19,8 +19,8 @@ import textwrap from pathlib import Path this_file = Path(__file__).name -# Output file name -out_file = "ucs_recompose_table.h" +# Default output file name +DEFAULT_OUT_FILE = "ucs_recompose_table.h" common_recompose_description = "most commonly used Latin, Greek, and Cyrillic recomposition pairs only" COMMON_RECOMPOSITION_PAIRS = [ @@ -165,7 +165,7 @@ def validate_common_pairs(full_list): print(error_msg) raise ValueError(error_msg) -def generate_recomposition_table(use_full_list=False): +def generate_recomposition_table(use_full_list=False, out_file=DEFAULT_OUT_FILE): """Generate the recomposition C table.""" # Collect all recomposition pairs for validation @@ -250,6 +250,8 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate Unicode recomposition table") parser.add_argument("--full", action="store_true", help="Generate a full recomposition table (default: common pairs only)") + parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE, + help=f"Output file name (default: {DEFAULT_OUT_FILE})") args = parser.parse_args() - generate_recomposition_table(use_full_list=args.full) + generate_recomposition_table(use_full_list=args.full, out_file=args.output_file) diff --git a/drivers/tty/vt/gen_ucs_width_table.py b/drivers/tty/vt/gen_ucs_width_table.py index 059ed9a8baa2..76e80ebeff13 100755 --- a/drivers/tty/vt/gen_ucs_width_table.py +++ b/drivers/tty/vt/gen_ucs_width_table.py @@ -5,13 +5,14 @@ import unicodedata import sys +import argparse # This script's file name from pathlib import Path this_file = Path(__file__).name -# Output file name -out_file = "ucs_width_table.h" +# Default output file name +DEFAULT_OUT_FILE = "ucs_width_table.h" # --- Global Constants for Width Assignments --- @@ -185,13 +186,14 @@ def create_width_tables(): return zero_width_ranges, double_width_ranges -def write_tables(zero_width_ranges, double_width_ranges): +def write_tables(zero_width_ranges, double_width_ranges, out_file=DEFAULT_OUT_FILE): """ Write the generated tables to C header file. Args: zero_width_ranges: List of (start, end) ranges for zero-width characters double_width_ranges: List of (start, end) ranges for double-width characters + out_file: Output file name (default: DEFAULT_OUT_FILE) """ # Function to split ranges into BMP (16-bit) and non-BMP (above 16-bit) @@ -286,14 +288,20 @@ static const struct ucs_interval32 ucs_double_width_non_bmp_ranges[] = { f.write("};\n") if __name__ == "__main__": + # Parse command line arguments + parser = argparse.ArgumentParser(description="Generate Unicode width tables") + parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE, + help=f"Output file name (default: {DEFAULT_OUT_FILE})") + args = parser.parse_args() + # Write tables to header file zero_width_ranges, double_width_ranges = create_width_tables() - write_tables(zero_width_ranges, double_width_ranges) + write_tables(zero_width_ranges, double_width_ranges, out_file=args.output_file) # Print summary zero_width_count = sum(end - start + 1 for start, end in zero_width_ranges) double_width_count = sum(end - start + 1 for start, end in double_width_ranges) - print(f"Generated {out_file} with:") + print(f"Generated {args.output_file} with:") print(f"- {len(zero_width_ranges)} zero-width ranges covering ~{zero_width_count} code points") print(f"- {len(double_width_ranges)} double-width ranges covering ~{double_width_count} code points") print(f"- Unicode Version: {unicodedata.unidata_version}") diff --git a/drivers/tty/vt/ucs_recompose_table.h b/drivers/tty/vt/ucs_recompose_table.h_shipped similarity index 100% rename from drivers/tty/vt/ucs_recompose_table.h rename to drivers/tty/vt/ucs_recompose_table.h_shipped diff --git a/drivers/tty/vt/ucs_width_table.h b/drivers/tty/vt/ucs_width_table.h_shipped similarity index 100% rename from drivers/tty/vt/ucs_width_table.h rename to drivers/tty/vt/ucs_width_table.h_shipped From f51159c05137dda05b425be70cd6c05f54a997c0 Mon Sep 17 00:00:00 2001 From: Faraz Ata Date: Tue, 29 Apr 2025 15:59:41 +0530 Subject: [PATCH 076/105] tty: serial: samsung_tty: support 18 uart ports ExynosAutov920 SoC supports 18 UART ports, update the value of UART_NR to accommodate the same. Signed-off-by: Faraz Ata Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250429102941.4138463-1-faraz.ata@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index 73e2866febc1..2fb58c626daf 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -52,7 +52,7 @@ #define S3C24XX_SERIAL_MINOR 64 #ifdef CONFIG_ARM64 -#define UART_NR 12 +#define UART_NR 18 #else #define UART_NR CONFIG_SERIAL_SAMSUNG_UARTS #endif From c40b91e38eb8d4489def095d62ab476d45871323 Mon Sep 17 00:00:00 2001 From: Rengarajan S Date: Fri, 25 Apr 2025 20:25:00 +0530 Subject: [PATCH 077/105] 8250: microchip: pci1xxxx: Add PCIe Hot reset disable support for Rev C0 and later devices Systems that issue PCIe hot reset requests during a suspend/resume cycle cause PCI1XXXX device revisions prior to C0 to get its UART configuration registers reset to hardware default values. This results in device inaccessibility and data transfer failures. Starting with Revision C0, support was added in the device hardware (via the Hot Reset Disable Bit) to allow resetting only the PCIe interface and its associated logic, but preserving the UART configuration during a hot reset. This patch enables the hot reset disable feature during suspend/ resume for C0 and later revisions of the device. Signed-off-by: Rengarajan S Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250425145500.29036-1-rengarajan.s@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci1xxxx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci1xxxx.c b/drivers/tty/serial/8250/8250_pci1xxxx.c index e9c51d4e447d..4c149db84692 100644 --- a/drivers/tty/serial/8250/8250_pci1xxxx.c +++ b/drivers/tty/serial/8250/8250_pci1xxxx.c @@ -115,6 +115,7 @@ #define UART_RESET_REG 0x94 #define UART_RESET_D3_RESET_DISABLE BIT(16) +#define UART_RESET_HOT_RESET_DISABLE BIT(17) #define UART_BURST_STATUS_REG 0x9C #define UART_TX_BURST_FIFO 0xA0 @@ -620,6 +621,10 @@ static int pci1xxxx_suspend(struct device *dev) } data = readl(p + UART_RESET_REG); + + if (priv->dev_rev >= 0xC0) + data |= UART_RESET_HOT_RESET_DISABLE; + writel(data | UART_RESET_D3_RESET_DISABLE, p + UART_RESET_REG); if (wakeup) @@ -647,7 +652,12 @@ static int pci1xxxx_resume(struct device *dev) } data = readl(p + UART_RESET_REG); + + if (priv->dev_rev >= 0xC0) + data &= ~UART_RESET_HOT_RESET_DISABLE; + writel(data & ~UART_RESET_D3_RESET_DISABLE, p + UART_RESET_REG); + iounmap(p); for (i = 0; i < priv->nr; i++) { From a883620602758832f81fe042be778e57174add3a Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 25 Apr 2025 20:48:10 +0800 Subject: [PATCH 078/105] serdev: Refine several error or debug messages Refine several dev_err() and dev_dbg() messages to solve: // hardcoded device name dev_dbg(dev, "...dev_name_str...") // repeated device name since dev_dbg() also prints it as prefix dev_err(dev, "...%s...", dev_name(dev)) // not concise as dev_err(dev, "...%d...", err) dev_err(dev, "...%pe...", ERR_PTR(err)) Signed-off-by: Zijun Hu Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250425-fix_serdev-v3-1-2e4ea8261640@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index eb2a2e58fe78..0213381fa358 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -118,12 +118,11 @@ int serdev_device_add(struct serdev_device *serdev) err = device_add(&serdev->dev); if (err < 0) { - dev_err(&serdev->dev, "Can't add %s, status %pe\n", - dev_name(&serdev->dev), ERR_PTR(err)); + dev_err(&serdev->dev, "Failed to add serdev: %d\n", err); goto err_clear_serdev; } - dev_dbg(&serdev->dev, "device %s registered\n", dev_name(&serdev->dev)); + dev_dbg(&serdev->dev, "serdev registered successfully\n"); return 0; @@ -783,8 +782,7 @@ int serdev_controller_add(struct serdev_controller *ctrl) goto err_rpm_disable; } - dev_dbg(&ctrl->dev, "serdev%d registered: dev:%p\n", - ctrl->nr, &ctrl->dev); + dev_dbg(&ctrl->dev, "serdev controller registered: dev:%p\n", &ctrl->dev); return 0; err_rpm_disable: From 5ee558c5d9e9c464bcecb68b3c1d1f9690747a64 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 30 Apr 2025 14:29:17 +0200 Subject: [PATCH 079/105] vt: add new dynamically generated files to .gitignore Add new dynamically generated headers to the local .gitignore. Fixes: c2d2c5c0d631 ("vt: move UCS tables to the "shipped" form") Signed-off-by: Bartosz Golaszewski Reviewed-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250430122917.72105-1-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/vt/.gitignore b/drivers/tty/vt/.gitignore index 0221709b177d..49ce44edad65 100644 --- a/drivers/tty/vt/.gitignore +++ b/drivers/tty/vt/.gitignore @@ -2,3 +2,5 @@ /conmakehash /consolemap_deftbl.c /defkeymap.c +/ucs_recompose_table.h +/ucs_width_table.h From dab6bbc807a37d7922d5310552ddf8eec8383076 Mon Sep 17 00:00:00 2001 From: Dharma Balasubiramani Date: Fri, 2 May 2025 09:10:00 -0700 Subject: [PATCH 080/105] dt-bindings: serial: atmel,at91-usart: add microchip,sama7d65-usart Add SAMA7D65 USART compatible to DT bindings documentation. Signed-off-by: Dharma Balasubiramani Signed-off-by: Ryan Wanner Acked-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/ba19dff5c20bd022cf5391ac909a85ab5e1797b4.1746201835.git.Ryan.Wanner@microchip.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml b/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml index f466c38518c4..087a8926f8b4 100644 --- a/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml +++ b/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml @@ -26,6 +26,7 @@ properties: - enum: - microchip,sam9x60-usart - microchip,sam9x7-usart + - microchip,sama7d65-usart - const: atmel,at91sam9260-usart - items: - const: microchip,sam9x60-dbgu From e3975aa899c0a3bbc10d035e699b142cd1373a71 Mon Sep 17 00:00:00 2001 From: Dustin Lundquist Date: Tue, 6 May 2025 11:18:45 -0700 Subject: [PATCH 081/105] serial: jsm: fix NPE during jsm_uart_port_init No device was set which caused serial_base_ctrl_add to crash. BUG: kernel NULL pointer dereference, address: 0000000000000050 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 16 UID: 0 PID: 368 Comm: (udev-worker) Not tainted 6.12.25-amd64 #1 Debian 6.12.25-1 RIP: 0010:serial_base_ctrl_add+0x96/0x120 Call Trace: serial_core_register_port+0x1a0/0x580 ? __setup_irq+0x39c/0x660 ? __kmalloc_cache_noprof+0x111/0x310 jsm_uart_port_init+0xe8/0x180 [jsm] jsm_probe_one+0x1f4/0x410 [jsm] local_pci_probe+0x42/0x90 pci_device_probe+0x22f/0x270 really_probe+0xdb/0x340 ? pm_runtime_barrier+0x54/0x90 ? __pfx___driver_attach+0x10/0x10 __driver_probe_device+0x78/0x110 driver_probe_device+0x1f/0xa0 __driver_attach+0xba/0x1c0 bus_for_each_dev+0x8c/0xe0 bus_add_driver+0x112/0x1f0 driver_register+0x72/0xd0 jsm_init_module+0x36/0xff0 [jsm] ? __pfx_jsm_init_module+0x10/0x10 [jsm] do_one_initcall+0x58/0x310 do_init_module+0x60/0x230 Tested with Digi Neo PCIe 8 port card. Fixes: 84a9582fd203 ("serial: core: Start managing serial controllers to enable runtime PM") Cc: stable Signed-off-by: Dustin Lundquist Link: https://lore.kernel.org/r/3f31d4f75863614655c4673027a208be78d022ec.camel@null-ptr.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/jsm/jsm_tty.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c index ce0fef7e2c66..be2f130696b3 100644 --- a/drivers/tty/serial/jsm/jsm_tty.c +++ b/drivers/tty/serial/jsm/jsm_tty.c @@ -451,6 +451,7 @@ int jsm_uart_port_init(struct jsm_board *brd) if (!brd->channels[i]) continue; + brd->channels[i]->uart_port.dev = &brd->pci_dev->dev; brd->channels[i]->uart_port.irq = brd->irq; brd->channels[i]->uart_port.uartclk = 14745600; brd->channels[i]->uart_port.type = PORT_JSM; From 2ff5d5f6fe983d04f85a8ae46f99b561508f0a46 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 May 2025 17:00:24 -0500 Subject: [PATCH 082/105] dt-bindings: serial: Convert cnxt,cx92755-usart to DT schema Convert the Conexant Digicolor USART binding to DT schema. It is a straight-forward conversion. Signed-off-by: "Rob Herring (Arm)" Acked-by: Baruch Siach Link: https://lore.kernel.org/r/20250506220025.2545995-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/cnxt,cx92755-usart.yaml | 48 +++++++++++++++++++ .../bindings/serial/digicolor-usart.txt | 27 ----------- 2 files changed, 48 insertions(+), 27 deletions(-) create mode 100644 Documentation/devicetree/bindings/serial/cnxt,cx92755-usart.yaml delete mode 100644 Documentation/devicetree/bindings/serial/digicolor-usart.txt diff --git a/Documentation/devicetree/bindings/serial/cnxt,cx92755-usart.yaml b/Documentation/devicetree/bindings/serial/cnxt,cx92755-usart.yaml new file mode 100644 index 000000000000..720229455330 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/cnxt,cx92755-usart.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/cnxt,cx92755-usart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Conexant Digicolor USART + +maintainers: + - Baruch Siach + +description: > + Note: this binding is only applicable for using the USART peripheral as UART. + USART also support synchronous serial protocols like SPI and I2S. + Use the binding that matches the wiring of your system. + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: cnxt,cx92755-usart + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - interrupts + +unevaluatedProperties: false + +examples: + - | + serial@f0000740 { + compatible = "cnxt,cx92755-usart"; + reg = <0xf0000740 0x20>; + clocks = <&main_clk>; + interrupts = <44>; + }; diff --git a/Documentation/devicetree/bindings/serial/digicolor-usart.txt b/Documentation/devicetree/bindings/serial/digicolor-usart.txt deleted file mode 100644 index 2d3ede66889d..000000000000 --- a/Documentation/devicetree/bindings/serial/digicolor-usart.txt +++ /dev/null @@ -1,27 +0,0 @@ -Binding for Conexant Digicolor USART - -Note: this binding is only applicable for using the USART peripheral as -UART. USART also support synchronous serial protocols like SPI and I2S. Use -the binding that matches the wiring of your system. - -Required properties: -- compatible : should be "cnxt,cx92755-usart". -- reg: Should contain USART controller registers location and length. -- interrupts: Should contain a single USART controller interrupt. -- clocks: Must contain phandles to the USART clock - See ../clocks/clock-bindings.txt for details. - -Note: Each UART port should have an alias correctly numbered -in "aliases" node. - -Example: - aliases { - serial0 = &uart0; - }; - - uart0: uart@f0000740 { - compatible = "cnxt,cx92755-usart"; - reg = <0xf0000740 0x20>; - clocks = <&main_clk>; - interrupts = <44>; - }; From 30b3aecad9fcf519824f40fc7c5e139c91b48591 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 May 2025 17:00:47 -0500 Subject: [PATCH 083/105] dt-bindings: serial: Convert nxp,lpc3220-hsuart to DT schema Convert the NXP LPC3220 HS UART binding to DT schema. It is a straight-forward conversion. Signed-off-by: "Rob Herring (Arm)" Acked-by: Vladimir Zapolskiy Link: https://lore.kernel.org/r/20250506220048.2546915-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/nxp,lpc3220-hsuart.yaml | 39 +++++++++++++++++++ .../bindings/serial/nxp-lpc32xx-hsuart.txt | 14 ------- 2 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 Documentation/devicetree/bindings/serial/nxp,lpc3220-hsuart.yaml delete mode 100644 Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt diff --git a/Documentation/devicetree/bindings/serial/nxp,lpc3220-hsuart.yaml b/Documentation/devicetree/bindings/serial/nxp,lpc3220-hsuart.yaml new file mode 100644 index 000000000000..ffa2ea59f256 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/nxp,lpc3220-hsuart.yaml @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/nxp,lpc3220-hsuart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP LPC32xx SoC High Speed UART + +maintainers: + - Vladimir Zapolskiy + - Piotr Wojtaszczyk + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: nxp,lpc3220-hsuart + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + +unevaluatedProperties: false + +examples: + - | + serial@40014000 { + compatible = "nxp,lpc3220-hsuart"; + reg = <0x40014000 0x1000>; + interrupts = <26 0>; + }; diff --git a/Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt b/Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt deleted file mode 100644 index 0d439dfc1aa5..000000000000 --- a/Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt +++ /dev/null @@ -1,14 +0,0 @@ -* NXP LPC32xx SoC High Speed UART - -Required properties: -- compatible: Should be "nxp,lpc3220-hsuart" -- reg: Should contain registers location and length -- interrupts: Should contain interrupt - -Example: - - uart1: serial@40014000 { - compatible = "nxp,lpc3220-hsuart"; - reg = <0x40014000 0x1000>; - interrupts = <26 0>; - }; From 1dd624430d531abf43a5bba4f9a10821bdf4d9d9 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 May 2025 17:00:11 -0500 Subject: [PATCH 084/105] dt-bindings: serial: Convert arm,mps2-uart to DT schema Convert the Arm MPS2 UART binding to DT schema. It is a straight-forward conversion. Signed-off-by: "Rob Herring (Arm)" Acked-by: Sudeep Holla Link: https://lore.kernel.org/r/20250506220012.2545470-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/arm,mps2-uart.txt | 19 -------- .../bindings/serial/arm,mps2-uart.yaml | 46 +++++++++++++++++++ 2 files changed, 46 insertions(+), 19 deletions(-) delete mode 100644 Documentation/devicetree/bindings/serial/arm,mps2-uart.txt create mode 100644 Documentation/devicetree/bindings/serial/arm,mps2-uart.yaml diff --git a/Documentation/devicetree/bindings/serial/arm,mps2-uart.txt b/Documentation/devicetree/bindings/serial/arm,mps2-uart.txt deleted file mode 100644 index 128cc6aed001..000000000000 --- a/Documentation/devicetree/bindings/serial/arm,mps2-uart.txt +++ /dev/null @@ -1,19 +0,0 @@ -ARM MPS2 UART - -Required properties: -- compatible : Should be "arm,mps2-uart" -- reg : Address and length of the register set -- interrupts : Reference to the UART RX, TX and overrun interrupts - -Required clocking property: -- clocks : The input clock of the UART - - -Examples: - -uart0: serial@40004000 { - compatible = "arm,mps2-uart"; - reg = <0x40004000 0x1000>; - interrupts = <0 1 12>; - clocks = <&sysclk>; -}; diff --git a/Documentation/devicetree/bindings/serial/arm,mps2-uart.yaml b/Documentation/devicetree/bindings/serial/arm,mps2-uart.yaml new file mode 100644 index 000000000000..4a8df078e6f3 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/arm,mps2-uart.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/arm,mps2-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Arm MPS2 UART + +maintainers: + - Vladimir Murzin + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: arm,mps2-uart + + reg: + maxItems: 1 + + interrupts: + items: + - description: RX interrupt + - description: TX interrupt + - description: Overrun interrupt + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +unevaluatedProperties: false + +examples: + - | + serial@40004000 { + compatible = "arm,mps2-uart"; + reg = <0x40004000 0x1000>; + interrupts = <0>, <1>, <12>; + clocks = <&sysclk>; + }; From a34fc8836f680fb56a61d7b864dfb7a5432e5bdc Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 May 2025 17:00:19 -0500 Subject: [PATCH 085/105] dt-bindings: serial: Convert cirrus,ep7209-uart to DT schema Convert the Cirrus EP7209 UART binding to DT schema. There is no user of "cirrus,ep7312-uart" other than the example, so drop it. Drop the "aliases" node part as it is not relevant to the schema. The modem control GPIOs are covered by the serial.yaml schema and don't have to be listed in the schema. Signed-off-by: "Rob Herring (Arm)" Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/20250506220021.2545820-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/cirrus,clps711x-uart.txt | 31 ---------- .../bindings/serial/cirrus,ep7209-uart.yaml | 56 +++++++++++++++++++ 2 files changed, 56 insertions(+), 31 deletions(-) delete mode 100644 Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt create mode 100644 Documentation/devicetree/bindings/serial/cirrus,ep7209-uart.yaml diff --git a/Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt b/Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt deleted file mode 100644 index 07013fa60a48..000000000000 --- a/Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt +++ /dev/null @@ -1,31 +0,0 @@ -* Cirrus Logic CLPS711X Universal Asynchronous Receiver/Transmitter (UART) - -Required properties: -- compatible: Should be "cirrus,ep7209-uart". -- reg: Address and length of the register set for the device. -- interrupts: Should contain UART TX and RX interrupt. -- clocks: Should contain UART core clock number. -- syscon: Phandle to SYSCON node, which contain UART control bits. - -Optional properties: -- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD - line respectively. - -Note: Each UART port should have an alias correctly numbered -in "aliases" node. - -Example: - aliases { - serial0 = &uart1; - }; - - uart1: uart@80000480 { - compatible = "cirrus,ep7312-uart","cirrus,ep7209-uart"; - reg = <0x80000480 0x80>; - interrupts = <12 13>; - clocks = <&clks 11>; - syscon = <&syscon1>; - cts-gpios = <&sysgpio 0 GPIO_ACTIVE_LOW>; - dsr-gpios = <&sysgpio 1 GPIO_ACTIVE_LOW>; - dcd-gpios = <&sysgpio 2 GPIO_ACTIVE_LOW>; - }; diff --git a/Documentation/devicetree/bindings/serial/cirrus,ep7209-uart.yaml b/Documentation/devicetree/bindings/serial/cirrus,ep7209-uart.yaml new file mode 100644 index 000000000000..c9976e86872b --- /dev/null +++ b/Documentation/devicetree/bindings/serial/cirrus,ep7209-uart.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/cirrus,ep7209-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cirrus Logic CLPS711X Universal Asynchronous Receiver/Transmitter (UART) + +maintainers: + - Alexander Shiyan + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: cirrus,ep7209-uart + + reg: + maxItems: 1 + + interrupts: + items: + - description: UART TX interrupt + - description: UART RX interrupt + + clocks: + maxItems: 1 + + syscon: + description: Phandle to SYSCON node, which contains UART control bits. + $ref: /schemas/types.yaml#/definitions/phandle + +required: + - compatible + - reg + - interrupts + - clocks + - syscon + +unevaluatedProperties: false + +examples: + - | + #include + + serial@80000480 { + compatible = "cirrus,ep7209-uart"; + reg = <0x80000480 0x80>; + interrupts = <12>, <13>; + clocks = <&clks 11>; + syscon = <&syscon1>; + cts-gpios = <&sysgpio 0 GPIO_ACTIVE_LOW>; + dsr-gpios = <&sysgpio 1 GPIO_ACTIVE_LOW>; + dcd-gpios = <&sysgpio 2 GPIO_ACTIVE_LOW>; + }; From 6f5ff13bbc20b99a623f37ba85730929a36a5100 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 May 2025 17:00:28 -0500 Subject: [PATCH 086/105] dt-bindings: serial: Convert lantiq,asc to DT schema Convert the Lantiq SoC ASC UART binding to DT schema. There are no such clock identifier defines nor a user with clocks, so drop the example with clocks. Signed-off-by: "Rob Herring (Arm)" Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/20250506220029.2546179-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/lantiq,asc.yaml | 56 +++++++++++++++++++ .../devicetree/bindings/serial/lantiq_asc.txt | 31 ---------- 2 files changed, 56 insertions(+), 31 deletions(-) create mode 100644 Documentation/devicetree/bindings/serial/lantiq,asc.yaml delete mode 100644 Documentation/devicetree/bindings/serial/lantiq_asc.txt diff --git a/Documentation/devicetree/bindings/serial/lantiq,asc.yaml b/Documentation/devicetree/bindings/serial/lantiq,asc.yaml new file mode 100644 index 000000000000..96e8c79cb047 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/lantiq,asc.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/lantiq,asc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Lantiq SoC ASC serial controller + +maintainers: + - John Crispin + - Songjun Wu + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: lantiq,asc + + reg: + maxItems: 1 + + interrupts: + items: + - description: TX interrupt + - description: RX interrupt + - description: Error interrupt + + clocks: + items: + - description: Frequency clock + - description: Gate clock + + clock-names: + items: + - const: freq + - const: asc + +required: + - compatible + - reg + - interrupts + +unevaluatedProperties: false + +examples: + - | + #include + + serial@16600000 { + compatible = "lantiq,asc"; + reg = <0x16600000 0x100000>; + interrupts = , + , + ; + }; diff --git a/Documentation/devicetree/bindings/serial/lantiq_asc.txt b/Documentation/devicetree/bindings/serial/lantiq_asc.txt deleted file mode 100644 index 40e81a5818f6..000000000000 --- a/Documentation/devicetree/bindings/serial/lantiq_asc.txt +++ /dev/null @@ -1,31 +0,0 @@ -Lantiq SoC ASC serial controller - -Required properties: -- compatible : Should be "lantiq,asc" -- reg : Address and length of the register set for the device -- interrupts: the 3 (tx rx err) interrupt numbers. The interrupt specifier - depends on the interrupt-parent interrupt controller. - -Optional properties: -- clocks: Should contain frequency clock and gate clock -- clock-names: Should be "freq" and "asc" - -Example: - -asc0: serial@16600000 { - compatible = "lantiq,asc"; - reg = <0x16600000 0x100000>; - interrupt-parent = <&gic>; - interrupts = , - , - ; - clocks = <&cgu CLK_SSX4>, <&cgu GCLK_UART>; - clock-names = "freq", "asc"; -}; - -asc1: serial@e100c00 { - compatible = "lantiq,asc"; - reg = <0xE100C00 0x400>; - interrupt-parent = <&icu0>; - interrupts = <112 113 114>; -}; From 2446bd692e33788835c2daf65eec385c53486404 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 7 May 2025 10:44:02 -0500 Subject: [PATCH 087/105] dt-bindings: serial: Convert marvell,armada-3700-uart to DT schema Convert the Marvell Armada-3700 UART binding to DT schema. It is a straight-forward conversion. Drop the long deprecated single interrupt support. Signed-off-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250507154408.1595932-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../serial/marvell,armada-3700-uart.yaml | 102 ++++++++++++++++++ .../devicetree/bindings/serial/mvebu-uart.txt | 56 ---------- MAINTAINERS | 2 +- 3 files changed, 103 insertions(+), 57 deletions(-) create mode 100644 Documentation/devicetree/bindings/serial/marvell,armada-3700-uart.yaml delete mode 100644 Documentation/devicetree/bindings/serial/mvebu-uart.txt diff --git a/Documentation/devicetree/bindings/serial/marvell,armada-3700-uart.yaml b/Documentation/devicetree/bindings/serial/marvell,armada-3700-uart.yaml new file mode 100644 index 000000000000..6c7fa3d19369 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/marvell,armada-3700-uart.yaml @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/marvell,armada-3700-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Armada-3700 UART + +maintainers: + - Pali Rohár + +description: + Marvell UART is a non standard UART used in some of Marvell EBU SoCs (e.g. + Armada-3700). + +properties: + compatible: + enum: + - marvell,armada-3700-uart + - marvell,armada-3700-uart-ext + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + description: + UART reference clock used to derive the baud rate. If absent, only fixed + baud rate from the bootloader is supported. + + interrupts: + minItems: 2 + items: + - description: UART sum interrupt + - description: UART TX interrupt + - description: UART RX interrupt + + interrupt-names: + minItems: 2 + maxItems: 3 + +required: + - compatible + - reg + - interrupts + - interrupt-names + +unevaluatedProperties: false + +allOf: + - $ref: /schemas/serial/serial.yaml# + - if: + properties: + compatible: + const: marvell,armada-3700-uart-ext + then: + properties: + interrupts: + maxItems: 2 + + interrupt-names: + items: + - const: uart-tx + - const: uart-rx + else: + properties: + interrupts: + minItems: 3 + + interrupt-names: + items: + - const: uart-sum + - const: uart-tx + - const: uart-rx + +examples: + - | + #include + + serial@12000 { + compatible = "marvell,armada-3700-uart"; + reg = <0x12000 0x18>; + clocks = <&uartclk 0>; + interrupts = + , + , + ; + interrupt-names = "uart-sum", "uart-tx", "uart-rx"; + }; + + - | + #include + + serial@12200 { + compatible = "marvell,armada-3700-uart-ext"; + reg = <0x12200 0x30>; + clocks = <&uartclk 1>; + interrupts = + , + ; + interrupt-names = "uart-tx", "uart-rx"; + }; diff --git a/Documentation/devicetree/bindings/serial/mvebu-uart.txt b/Documentation/devicetree/bindings/serial/mvebu-uart.txt deleted file mode 100644 index a062bbca532c..000000000000 --- a/Documentation/devicetree/bindings/serial/mvebu-uart.txt +++ /dev/null @@ -1,56 +0,0 @@ -* Marvell UART : Non standard UART used in some of Marvell EBU SoCs - e.g., Armada-3700. - -Required properties: -- compatible: - - "marvell,armada-3700-uart" for the standard variant of the UART - (32 bytes FIFO, no DMA, level interrupts, 8-bit access to the - FIFO), called also UART1. - - "marvell,armada-3700-uart-ext" for the extended variant of the - UART (128 bytes FIFO, DMA, front interrupts, 8-bit or 32-bit - accesses to the FIFO), called also UART2. -- reg: offset and length of the register set for the device. -- clocks: UART reference clock used to derive the baudrate. If no clock - is provided (possible only with the "marvell,armada-3700-uart" - compatible string for backward compatibility), it will only work - if the baudrate was initialized by the bootloader and no baudrate - change will then be possible. When provided it should be UART1-clk - for standard variant of UART and UART2-clk for extended variant - of UART. TBG clock (with UART TBG divisors d1=d2=1) or xtal clock - should not be used and are supported only for backward compatibility. -- interrupts: - - Must contain three elements for the standard variant of the IP - (marvell,armada-3700-uart): "uart-sum", "uart-tx" and "uart-rx", - respectively the UART sum interrupt, the UART TX interrupt and - UART RX interrupt. A corresponding interrupt-names property must - be defined. - - Must contain two elements for the extended variant of the IP - (marvell,armada-3700-uart-ext): "uart-tx" and "uart-rx", - respectively the UART TX interrupt and the UART RX interrupt. A - corresponding interrupt-names property must be defined. - - For backward compatibility reasons, a single element interrupts - property is also supported for the standard variant of the IP, - containing only the UART sum interrupt. This form is deprecated - and should no longer be used. - -Example: - uart0: serial@12000 { - compatible = "marvell,armada-3700-uart"; - reg = <0x12000 0x18>; - clocks = <&uartclk 0>; - interrupts = - , - , - ; - interrupt-names = "uart-sum", "uart-tx", "uart-rx"; - }; - - uart1: serial@12200 { - compatible = "marvell,armada-3700-uart-ext"; - reg = <0x12200 0x30>; - clocks = <&uartclk 1>; - interrupts = - , - ; - interrupt-names = "uart-tx", "uart-rx"; - }; diff --git a/MAINTAINERS b/MAINTAINERS index 3cbf9ac0d83f..d86c1d309708 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14275,7 +14275,7 @@ MARVELL ARMADA 3700 SERIAL DRIVER M: Pali Rohár S: Maintained F: Documentation/devicetree/bindings/clock/marvell,armada-3700-uart-clock.yaml -F: Documentation/devicetree/bindings/serial/mvebu-uart.txt +F: Documentation/devicetree/bindings/serial/marvell,armada-3700-uart.yaml F: drivers/tty/serial/mvebu-uart.c MARVELL ARMADA DRM SUPPORT From 6259530ccd7af47522c8414f8546fe59de9ddc18 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 7 May 2025 10:49:08 -0500 Subject: [PATCH 088/105] dt-bindings: serial: Convert snps,arc-uart to DT schema Convert the Synopsys ARC UART binding to DT schema. Drop the "aliases" portion which is not relevant to this schema. Reviewed-by: Thierry Reding Signed-off-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250507154909.1602497-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/serial/arc-uart.txt | 25 --------- .../bindings/serial/snps,arc-uart.yaml | 51 +++++++++++++++++++ 2 files changed, 51 insertions(+), 25 deletions(-) delete mode 100644 Documentation/devicetree/bindings/serial/arc-uart.txt create mode 100644 Documentation/devicetree/bindings/serial/snps,arc-uart.yaml diff --git a/Documentation/devicetree/bindings/serial/arc-uart.txt b/Documentation/devicetree/bindings/serial/arc-uart.txt deleted file mode 100644 index 256cc150ca7e..000000000000 --- a/Documentation/devicetree/bindings/serial/arc-uart.txt +++ /dev/null @@ -1,25 +0,0 @@ -* Synopsys ARC UART : Non standard UART used in some of the ARC FPGA boards - -Required properties: -- compatible : "snps,arc-uart" -- reg : offset and length of the register set for the device. -- interrupts : device interrupt -- clock-frequency : the input clock frequency for the UART -- current-speed : baud rate for UART - -e.g. - -arcuart0: serial@c0fc1000 { - compatible = "snps,arc-uart"; - reg = <0xc0fc1000 0x100>; - interrupts = <5>; - clock-frequency = <80000000>; - current-speed = <115200>; -}; - -Note: Each port should have an alias correctly numbered in "aliases" node. - -e.g. -aliases { - serial0 = &arcuart0; -}; diff --git a/Documentation/devicetree/bindings/serial/snps,arc-uart.yaml b/Documentation/devicetree/bindings/serial/snps,arc-uart.yaml new file mode 100644 index 000000000000..dd3096fbfb6a --- /dev/null +++ b/Documentation/devicetree/bindings/serial/snps,arc-uart.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/snps,arc-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Synopsys ARC UART + +maintainers: + - Vineet Gupta + +description: + Synopsys ARC UART is a non-standard UART used in some of the ARC FPGA boards. + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: snps,arc-uart + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clock-frequency: + description: the input clock frequency for the UART + + current-speed: + description: baud rate for UART + +required: + - compatible + - reg + - interrupts + - clock-frequency + - current-speed + +unevaluatedProperties: false + +examples: + - | + serial@c0fc1000 { + compatible = "snps,arc-uart"; + reg = <0xc0fc1000 0x100>; + interrupts = <5>; + clock-frequency = <80000000>; + current-speed = <115200>; + }; From 7282b8add2988b93f35a155d927d08e14558c7b8 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 6 May 2025 17:00:15 -0500 Subject: [PATCH 089/105] dt-bindings: serial: Convert arm,sbsa-uart to DT schema Convert the Arm SBSA UART binding to DT schema. It is a straight-forward conversion. Signed-off-by: "Rob Herring (Arm)" Reviewed-by: Thierry Reding Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20250506220016.2545637-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/arm,sbsa-uart.yaml | 38 +++++++++++++++++++ .../bindings/serial/arm_sbsa_uart.txt | 10 ----- 2 files changed, 38 insertions(+), 10 deletions(-) create mode 100644 Documentation/devicetree/bindings/serial/arm,sbsa-uart.yaml delete mode 100644 Documentation/devicetree/bindings/serial/arm_sbsa_uart.txt diff --git a/Documentation/devicetree/bindings/serial/arm,sbsa-uart.yaml b/Documentation/devicetree/bindings/serial/arm,sbsa-uart.yaml new file mode 100644 index 000000000000..68e3fd64b1d8 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/arm,sbsa-uart.yaml @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +--- +$id: http://devicetree.org/schemas/serial/arm,sbsa-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM SBSA UART + +maintainers: + - Andre Przywara + +description: + This UART uses a subset of the PL011 registers and consequently lives in the + PL011 driver. It's baudrate and other communication parameters cannot be + adjusted at runtime, so it lacks a clock specifier here. + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: arm,sbsa-uart + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + current-speed: + description: fixed baud rate set by the firmware + +required: + - compatible + - reg + - interrupts + - current-speed + +unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/serial/arm_sbsa_uart.txt b/Documentation/devicetree/bindings/serial/arm_sbsa_uart.txt deleted file mode 100644 index 4163e7eb7763..000000000000 --- a/Documentation/devicetree/bindings/serial/arm_sbsa_uart.txt +++ /dev/null @@ -1,10 +0,0 @@ -* ARM SBSA defined generic UART -This UART uses a subset of the PL011 registers and consequently lives -in the PL011 driver. It's baudrate and other communication parameters -cannot be adjusted at runtime, so it lacks a clock specifier here. - -Required properties: -- compatible: must be "arm,sbsa-uart" -- reg: exactly one register range -- interrupts: exactly one interrupt specifier -- current-speed: the (fixed) baud rate set by the firmware From 76619c4fce711acbfd732909644d40c1caf27041 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 7 May 2025 10:49:36 -0500 Subject: [PATCH 090/105] dt-bindings: serial: Convert microchip,pic32mzda-uart to DT schema Convert the Microchip PIC32 UART binding to DT schema. The binding was unclear there are 3 interrupts. The functions were determined from the driver. The 'cts-gpios' property is covered by serial.yaml schema. Signed-off-by: "Rob Herring (Arm)" Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20250507154937.1603190-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/microchip,pic32-uart.txt | 29 ---------- .../serial/microchip,pic32mzda-uart.yaml | 53 +++++++++++++++++++ 2 files changed, 53 insertions(+), 29 deletions(-) delete mode 100644 Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt create mode 100644 Documentation/devicetree/bindings/serial/microchip,pic32mzda-uart.yaml diff --git a/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt b/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt deleted file mode 100644 index c8dd440e9747..000000000000 --- a/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt +++ /dev/null @@ -1,29 +0,0 @@ -* Microchip Universal Asynchronous Receiver Transmitter (UART) - -Required properties: -- compatible: Should be "microchip,pic32mzda-uart" -- reg: Should contain registers location and length -- interrupts: Should contain interrupt -- clocks: Phandle to the clock. - See: Documentation/devicetree/bindings/clock/clock-bindings.txt -- pinctrl-names: A pinctrl state names "default" must be defined. -- pinctrl-0: Phandle referencing pin configuration of the UART peripheral. - See: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt - -Optional properties: -- cts-gpios: CTS pin for UART - -Example: - uart1: serial@1f822000 { - compatible = "microchip,pic32mzda-uart"; - reg = <0x1f822000 0x50>; - interrupts = <112 IRQ_TYPE_LEVEL_HIGH>, - <113 IRQ_TYPE_LEVEL_HIGH>, - <114 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&rootclk PB2CLK>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1 - &pinctrl_uart1_cts - &pinctrl_uart1_rts>; - cts-gpios = <&gpio1 15 0>; - }; diff --git a/Documentation/devicetree/bindings/serial/microchip,pic32mzda-uart.yaml b/Documentation/devicetree/bindings/serial/microchip,pic32mzda-uart.yaml new file mode 100644 index 000000000000..b176fd5b580e --- /dev/null +++ b/Documentation/devicetree/bindings/serial/microchip,pic32mzda-uart.yaml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/microchip,pic32mzda-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip PIC32 UART + +maintainers: + - Andrei Pistirica + - Purna Chandra Mandal + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: microchip,pic32mzda-uart + + reg: + maxItems: 1 + + interrupts: + items: + - description: Fault + - description: RX + - description: TX + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +unevaluatedProperties: false + +examples: + - | + #include + #include + + serial@1f822000 { + compatible = "microchip,pic32mzda-uart"; + reg = <0x1f822000 0x50>; + interrupts = <112 IRQ_TYPE_LEVEL_HIGH>, + <113 IRQ_TYPE_LEVEL_HIGH>, + <114 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&rootclk PB2CLK>; + cts-gpios = <&gpio1 15 0>; + }; From 669bd383024ee3c6175b90f17e7e15703a78fb8f Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 7 May 2025 10:49:22 -0500 Subject: [PATCH 091/105] dt-bindings: serial: Convert socionext,milbeaut-usio-uart to DT schema Convert the Socionext Milbeaut UART binding to DT schema. It is a straight-forward conversion. Reviewed-by: Thierry Reding Signed-off-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250507154924.1602842-1-robh@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/serial/milbeaut-uart.txt | 21 ------- .../serial/socionext,milbeaut-usio-uart.yaml | 56 +++++++++++++++++++ 2 files changed, 56 insertions(+), 21 deletions(-) delete mode 100644 Documentation/devicetree/bindings/serial/milbeaut-uart.txt create mode 100644 Documentation/devicetree/bindings/serial/socionext,milbeaut-usio-uart.yaml diff --git a/Documentation/devicetree/bindings/serial/milbeaut-uart.txt b/Documentation/devicetree/bindings/serial/milbeaut-uart.txt deleted file mode 100644 index 3d2fb1a7ba94..000000000000 --- a/Documentation/devicetree/bindings/serial/milbeaut-uart.txt +++ /dev/null @@ -1,21 +0,0 @@ -Socionext Milbeaut UART controller - -Required properties: -- compatible: should be "socionext,milbeaut-usio-uart". -- reg: offset and length of the register set for the device. -- interrupts: two interrupts specifier. -- interrupt-names: should be "rx", "tx". -- clocks: phandle to the input clock. - -Optional properties: -- auto-flow-control: flow control enable. - -Example: - usio1: usio_uart@1e700010 { - compatible = "socionext,milbeaut-usio-uart"; - reg = <0x1e700010 0x10>; - interrupts = <0 141 0x4>, <0 149 0x4>; - interrupt-names = "rx", "tx"; - clocks = <&clk 2>; - auto-flow-control; - }; diff --git a/Documentation/devicetree/bindings/serial/socionext,milbeaut-usio-uart.yaml b/Documentation/devicetree/bindings/serial/socionext,milbeaut-usio-uart.yaml new file mode 100644 index 000000000000..34a997ca2e11 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/socionext,milbeaut-usio-uart.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/socionext,milbeaut-usio-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Socionext Milbeaut UART controller + +maintainers: + - Sugaya Taichi + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: socionext,milbeaut-usio-uart + + reg: + maxItems: 1 + + interrupts: + items: + - description: RX interrupt specifier + - description: TX interrupt specifier + + interrupt-names: + items: + - const: rx + - const: tx + + clocks: + maxItems: 1 + + auto-flow-control: + description: Enable automatic flow control. + type: boolean + +required: + - compatible + - reg + - interrupts + - interrupt-names + +unevaluatedProperties: false + +examples: + - | + serial@1e700010 { + compatible = "socionext,milbeaut-usio-uart"; + reg = <0x1e700010 0x10>; + interrupts = <0 141 0x4>, <0 149 0x4>; + interrupt-names = "rx", "tx"; + clocks = <&clk 2>; + auto-flow-control; + }; From 857eec4678805d24391c17c9f3c4d9d21b63371e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 14 May 2025 14:51:28 +0200 Subject: [PATCH 092/105] dt-bindings: serial: 8250_omap: Drop redundant properties The binding references in-kernel serial.yaml, so there is no need to explicitly list its properties. Note that rts-gpio is also redundant because DTS should be simply converted to -gpios variants. Signed-off-by: Krzysztof Kozlowski Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250514125127.56149-2-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/8250_omap.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Documentation/devicetree/bindings/serial/8250_omap.yaml b/Documentation/devicetree/bindings/serial/8250_omap.yaml index 4b78de6b46a2..1859f71297ff 100644 --- a/Documentation/devicetree/bindings/serial/8250_omap.yaml +++ b/Documentation/devicetree/bindings/serial/8250_omap.yaml @@ -64,14 +64,7 @@ properties: clock-names: const: fclk - rts-gpios: true - cts-gpios: true - dtr-gpios: true - dsr-gpios: true - rng-gpios: true - dcd-gpios: true rs485-rts-active-high: true - rts-gpio: true power-domains: true clock-frequency: true current-speed: true From 063a896456c339fc181cbd04a08ce409b7866f83 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Thu, 15 May 2025 16:13:11 +0800 Subject: [PATCH 093/105] serial: max3100: Replace open-coded parity calculation with parity8() Refactor parity calculations to use the standard parity8() helper. This change eliminates redundant implementations. Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Link: https://lore.kernel.org/r/20250515081311.775559-1-visitorckw@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max3100.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index f2dd83692b2c..d28a2ebfa29f 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -16,6 +16,7 @@ /* 4 MAX3100s should be enough for everyone */ #define MAX_MAX3100 4 +#include #include #include #include @@ -133,7 +134,7 @@ static int max3100_do_parity(struct max3100_port *s, u16 c) else c &= 0xff; - parity = parity ^ (hweight8(c) & 1); + parity = parity ^ parity8(c); return parity; } From a16014c0db3aed66379bfd7b042e251478b02868 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:16 -0400 Subject: [PATCH 094/105] vt: ucs.c: fix misappropriate in_range() usage The in_range() helper accepts a start and a length, not a start and an end. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250507141535.40655-2-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/ucs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index 0b58cb7344a3..b0b23830170d 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -46,7 +46,7 @@ static int interval32_cmp(const void *key, const void *element) static bool cp_in_range16(u16 cp, const struct ucs_interval16 *ranges, size_t size) { - if (!in_range(cp, ranges[0].first, ranges[size - 1].last)) + if (cp < ranges[0].first || cp > ranges[size - 1].last) return false; return __inline_bsearch(&cp, ranges, size, sizeof(*ranges), @@ -55,7 +55,7 @@ static bool cp_in_range16(u16 cp, const struct ucs_interval16 *ranges, size_t si static bool cp_in_range32(u32 cp, const struct ucs_interval32 *ranges, size_t size) { - if (!in_range(cp, ranges[0].first, ranges[size - 1].last)) + if (cp < ranges[0].first || cp > ranges[size - 1].last) return false; return __inline_bsearch(&cp, ranges, size, sizeof(*ranges), @@ -144,8 +144,8 @@ static int recomposition_cmp(const void *key, const void *element) u32 ucs_recompose(u32 base, u32 mark) { /* Check if characters are within the range of our table */ - if (!in_range(base, UCS_RECOMPOSE_MIN_BASE, UCS_RECOMPOSE_MAX_BASE) || - !in_range(mark, UCS_RECOMPOSE_MIN_MARK, UCS_RECOMPOSE_MAX_MARK)) + if (base < UCS_RECOMPOSE_MIN_BASE || base > UCS_RECOMPOSE_MAX_BASE || + mark < UCS_RECOMPOSE_MIN_MARK || mark > UCS_RECOMPOSE_MAX_MARK) return 0; struct compare_key key = { base, mark }; From 68e7a421ab4f0ca97dcfff638bc5c784bf28eebd Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:17 -0400 Subject: [PATCH 095/105] vt: make sure displayed double-width characters are remembered as such And to do so we ensure the Unicode screen buffer is initialized when double-width characters are encountered. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250507141535.40655-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 24c6cd2eed78..58fa1b285f22 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2930,8 +2930,15 @@ static int vc_process_ucs(struct vc_data *vc, int *c, int *tc) { u32 prev_c, curr_c = *c; - if (ucs_is_double_width(curr_c)) + if (ucs_is_double_width(curr_c)) { + /* + * The Unicode screen memory is allocated only when + * required. This is one such case as we need to remember + * which displayed characters are double-width. + */ + vc_uniscr_check(vc); return 2; + } if (!ucs_is_zero_width(curr_c)) return 1; From bb9a1516765252619ef0e36e9ecf3aedbe7b5710 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:18 -0400 Subject: [PATCH 096/105] vt: move glyph determination to a separate function No logical changes. Make it easier for enhancements to come. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250507141535.40655-4-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 71 ++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 58fa1b285f22..556af82a9231 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2925,6 +2925,7 @@ static void vc_con_rewind(struct vc_data *vc) #define UCS_ZWS 0x200b /* Zero Width Space */ #define UCS_VS16 0xfe0f /* Variation Selector 16 */ +#define UCS_REPLACEMENT 0xfffd /* Replacement Character */ static int vc_process_ucs(struct vc_data *vc, int *c, int *tc) { @@ -2984,12 +2985,38 @@ static int vc_process_ucs(struct vc_data *vc, int *c, int *tc) return 0; } +static int vc_get_glyph(struct vc_data *vc, int tc) +{ + int glyph = conv_uni_to_pc(vc, tc); + u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; + + if (!(glyph & ~charmask)) + return glyph; + + if (glyph == -1) + return -1; /* nothing to display */ + + /* Glyph not found */ + if ((!vc->vc_utf || vc->vc_disp_ctrl || tc < 128) && !(tc & ~charmask)) { + /* + * In legacy mode use the glyph we get by a 1:1 mapping. + * This would make absolutely no sense with Unicode in mind, but do this for + * ASCII characters since a font may lack Unicode mapping info and we don't + * want to end up with having question marks only. + */ + return tc; + } + + /* Display U+FFFD (Unicode Replacement Character). */ + return conv_uni_to_pc(vc, UCS_REPLACEMENT); +} + static int vc_con_write_normal(struct vc_data *vc, int tc, int c, struct vc_draw_region *draw) { int next_c; unsigned char vc_attr = vc->vc_attr; - u16 himask = vc->vc_hi_font_mask, charmask = himask ? 0x1ff : 0xff; + u16 himask = vc->vc_hi_font_mask; u8 width = 1; bool inverse = false; @@ -3000,39 +3027,17 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, } /* Now try to find out how to display it */ - tc = conv_uni_to_pc(vc, tc); - if (tc & ~charmask) { - if (tc == -1) - return -1; /* nothing to display */ + tc = vc_get_glyph(vc, tc); + if (tc == -1) + return -1; /* nothing to display */ + if (tc < 0) { + inverse = true; + tc = conv_uni_to_pc(vc, '?'); + if (tc < 0) + tc = '?'; - /* Glyph not found */ - if ((!vc->vc_utf || vc->vc_disp_ctrl || c < 128) && - !(c & ~charmask)) { - /* - * In legacy mode use the glyph we get by a 1:1 - * mapping. - * This would make absolutely no sense with Unicode in - * mind, but do this for ASCII characters since a font - * may lack Unicode mapping info and we don't want to - * end up with having question marks only. - */ - tc = c; - } else { - /* - * Display U+FFFD. If it's not found, display an inverse - * question mark. - */ - tc = conv_uni_to_pc(vc, 0xfffd); - if (tc < 0) { - inverse = true; - tc = conv_uni_to_pc(vc, '?'); - if (tc < 0) - tc = '?'; - - vc_attr = vc_invert_attr(vc); - con_flush(vc, draw); - } - } + vc_attr = vc_invert_attr(vc); + con_flush(vc, draw); } next_c = c; From 5071ddc18e17797248151ca2bea1b8d4e67d996f Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:19 -0400 Subject: [PATCH 097/105] vt: introduce gen_ucs_fallback_table.py to create ucs_fallback_table.h The generated table maps complex characters to their simpler fallback forms for a terminal display when corresponding glyphs are unavailable. This includes diacritics, symbols as well as many drawing characters. Fallback characters aren't perfect replacements, obviously. But they are still far more useful than a bunch of squared question marks. Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250507141535.40655-5-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_fallback_table.py | 352 +++++++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100755 drivers/tty/vt/gen_ucs_fallback_table.py diff --git a/drivers/tty/vt/gen_ucs_fallback_table.py b/drivers/tty/vt/gen_ucs_fallback_table.py new file mode 100755 index 000000000000..80257c6df440 --- /dev/null +++ b/drivers/tty/vt/gen_ucs_fallback_table.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Leverage Python's unidecode module to generate ucs_fallback_table.h +# +# The generated table maps complex characters to their simpler fallback forms +# for a terminal display when corresponding glyphs are unavailable. +# +# Usage: +# python3 gen_ucs_fallback_table.py # Generate fallback tables +# python3 gen_ucs_fallback_table.py -o FILE # Specify output file + +import unicodedata +from unidecode import unidecode +import sys +import argparse +from collections import defaultdict + +# Try to get unidecode version +try: + from importlib.metadata import version + unidecode_version = version('unidecode') +except: + unidecode_version = 'unknown' + +# This script's file name +from pathlib import Path +this_file = Path(__file__).name + +# Default output file name +DEFAULT_OUT_FILE = "ucs_fallback_table.h" + +# Define the range marker value +RANGE_MARKER = 0x00 + +def generate_fallback_map(): + """Generate a fallback map using unidecode for all relevant Unicode points.""" + fallback_map = {} + + # Process BMP characters (0x0000 - 0xFFFF) to keep table size manageable + for cp in range(0x0080, 0x10000): # Skip ASCII range (0x00-0x7F) + char = chr(cp) + + # Skip unassigned/control characters + try: + if not unicodedata.name(char, ''): + continue + except ValueError: + continue + + # Get the unidecode transliteration + ascii_version = unidecode(char) + + # Only store if it results in a single character mapping + if len(ascii_version) == 1: + fallback_map[cp] = ord(ascii_version) + + # Apply manual overrides for special cases + fallback_map.update(get_special_overrides()) + + return fallback_map + +def get_special_overrides(): + """Get special case overrides that need different handling than unidecode + provides... or doesn't provide at all.""" + + overrides = {} + + # Multi-character unidecode output + # These map to single chars instead of unidecode's multiple-char mappings + # In a terminal fallback context, we need a single character rather than multiple + overrides[0x00C6] = ord('E') # Æ LATIN CAPITAL LETTER AE -> E (unidecode: "AE") + overrides[0x00E6] = ord('e') # æ LATIN SMALL LETTER AE -> e (unidecode: "ae") + overrides[0x0152] = ord('E') # Œ LATIN CAPITAL LIGATURE OE -> E (unidecode: "OE") + overrides[0x0153] = ord('e') # œ LATIN SMALL LETTER LIGATURE OE -> e (unidecode: "oe") + overrides[0x00DF] = ord('s') # ß LATIN SMALL LETTER SHARP S -> s (unidecode: "ss") + + # Comparison operators that unidecode renders as multiple characters + overrides[0x2264] = ord('<') # ≤ LESS-THAN OR EQUAL TO -> < (unidecode: "<=") + overrides[0x2265] = ord('>') # ≥ GREATER-THAN OR EQUAL TO -> > (unidecode: ">=") + + # Unidecode returns an empty string for these + overrides[0x2260] = ord('#') # ≠ NOT EQUAL TO -> # (unidecode: empty string) + + # Quadrant block characters that unidecode doesn't map + for cp in range(0x2596, 0x259F+1): + overrides[cp] = ord('#') # ▖ ▗ ▘ ▙ etc. - map to # (unidecode: empty string) + + # Directional arrows + # These provide better semantic meaning than unidecode's mappings + overrides[0x2192] = ord('>') # → RIGHTWARDS ARROW -> > (unidecode: "-") + overrides[0x2190] = ord('<') # ← LEFTWARDS ARROW -> < (unidecode: "-") + overrides[0x2191] = ord('^') # ↑ UPWARDS ARROW -> ^ (unidecode: "|") + overrides[0x2193] = ord('v') # ↓ DOWNWARDS ARROW -> v (unidecode: "|") + + # Double arrows with their directional semantic mappings + overrides[0x21D0] = ord('<') # ⇐ LEFTWARDS DOUBLE ARROW -> < + overrides[0x21D1] = ord('^') # ⇑ UPWARDS DOUBLE ARROW -> ^ + overrides[0x21D2] = ord('>') # ⇒ RIGHTWARDS DOUBLE ARROW -> > + overrides[0x21D3] = ord('v') # ⇓ DOWNWARDS DOUBLE ARROW -> v + + # Halfwidth arrows + # These need the same treatment as their normal-width counterparts + overrides[0xFFE9] = ord('<') # ← HALFWIDTH LEFTWARDS ARROW -> < (unidecode: "-") + overrides[0xFFEA] = ord('^') # ↑ HALFWIDTH UPWARDS ARROW -> ^ (unidecode: "|") + overrides[0xFFEB] = ord('>') # → HALFWIDTH RIGHTWARDS ARROW -> > (unidecode: "-") + overrides[0xFFEC] = ord('v') # ↓ HALFWIDTH DOWNWARDS ARROW -> v (unidecode: "|") + + # Currency symbols - each mapped to a representative letter + overrides[0x00A2] = ord('c') # ¢ CENT SIGN -> c + overrides[0x00A3] = ord('L') # £ POUND SIGN -> L + overrides[0x00A5] = ord('Y') # ¥ YEN SIGN -> Y + overrides[0x20AC] = ord('E') # € EURO SIGN -> E + + # Symbols mapped to letters + overrides[0x00A7] = ord('S') # § SECTION SIGN -> S + overrides[0x00A9] = ord('C') # © COPYRIGHT SIGN -> C + overrides[0x00AE] = ord('R') # ® REGISTERED SIGN -> R + overrides[0x2122] = ord('T') # ™ TRADE MARK SIGN -> T + + # Degree-related symbols + overrides[0x00B0] = ord('o') # ° DEGREE SIGN -> o + overrides[0x2103] = ord('C') # ℃ DEGREE CELSIUS -> C + overrides[0x2109] = ord('F') # ℉ DEGREE FAHRENHEIT -> F + + # Angle quotation marks + overrides[0x00AB] = ord('<') # « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -> < + overrides[0x00BB] = ord('>') # » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -> > + + # Operators with circular shape + overrides[0x2218] = ord('o') # ∘ RING OPERATOR -> o + overrides[0x2219] = ord('.') # ∙ BULLET OPERATOR -> . + + # Negated mathematical symbols (preserving the negation semantics) + # Negated symbols mapped to exclamation mark (semantically "not") + for cp in (0x2204, 0x2209, 0x220C, 0x2224, 0x2226, 0x226E, 0x226F, 0x2280, 0x2281, 0x2284, 0x2285): + overrides[cp] = ord('!') # Negated math symbols -> ! (not) + + # Negated symbols mapped to hash sign (semantically "not equal") + for cp in (0x2241, 0x2244, 0x2249, 0x2262, 0x2268, 0x2269, 0x226D, 0x228A, 0x228B): + overrides[cp] = ord('#') # Negated equality symbols -> # (not equal) + + # Negated arrows - all mapped to exclamation mark + for cp in (0x219A, 0x219B, 0x21AE, 0x21CD, 0x21CE, 0x21CF): + overrides[cp] = ord('!') # Negated arrows -> ! (not) + + # Dashes and hyphens + for cp in (0x2010, 0x2011, 0x2012, 0x2013, 0x2014, 0x2015, 0x2043, 0x2052): + overrides[cp] = ord('-') # Dashes and hyphens -> - + + # Question mark punctuation + for cp in (0x203D, 0x2047, 0x2048): + overrides[cp] = ord('?') # Question marks -> ? + + # Exclamation mark punctuation + for cp in (0x203C, 0x2049): + overrides[cp] = ord('!') # Exclamation marks -> ! + + # Asterisk-like symbols + for cp in (0x2042, 0x2051, 0x2055): + overrides[cp] = ord('*') + + # Other specific punctuation with unique mappings + overrides[0x201E] = ord('"') # „ DOUBLE LOW-9 QUOTATION MARK + overrides[0x2023] = ord('>') # ‣ TRIANGULAR BULLET + overrides[0x2026] = ord('.') # … HORIZONTAL ELLIPSIS + overrides[0x2033] = ord('"') # ″ DOUBLE PRIME + overrides[0x204B] = ord('P') # ⁋ REVERSED PILCROW SIGN + overrides[0x204C] = ord('<') # ⁌ BLACK LEFTWARDS BULLET + overrides[0x204D] = ord('>') # ⁍ BLACK RIGHTWARDS BULLET + overrides[0x204F] = ord(';') # ⁏ REVERSED SEMICOLON + overrides[0x205B] = ord(':') # ⁛ FOUR DOT MARK + + # Check marks + overrides[0x2713] = ord('v') # ✓ CHECK MARK + overrides[0x2714] = ord('V') # ✔ HEAVY CHECK MARK + + # X marks - lowercase for regular, uppercase for heavy + for cp in (0x2715, 0x2717): + overrides[cp] = ord('x') # Regular X marks -> x + for cp in (0x2716, 0x2718): + overrides[cp] = ord('X') # Heavy X marks -> X + + # Stars and asterisk-like symbols mapped to '*' + for cp in (0x2605, 0x2606, 0x262A, 0x269D, 0x2698): + overrides[cp] = ord('*') # All star and asterisk symbols -> * + for cp in range(0x2721, 0x2746+1): + overrides[cp] = ord('*') # All star and asterisk symbols -> * + for cp in range(0x2749, 0x274B+1): + overrides[cp] = ord('*') # Last set of asterisk symbols -> * + for cp in (0x229B, 0x22C6, 0x235F, 0x2363): + overrides[cp] = ord('*') # Star operators -> * + + # Special exclusions with fallback value of 0 + # These will be filtered out in organize_by_pages() + + # Exclude U+2028 (LINE SEPARATOR) + overrides[0x2028] = 0 # LINE SEPARATOR (unidecode: '\n') + + return overrides + +def organize_by_pages(fallback_map): + """Organize the fallback mappings by their high byte (page).""" + # Group by high byte (page) + page_groups = defaultdict(list) + for code, fallback in fallback_map.items(): + # Skip characters with fallback value of 0 (excluded characters) + if fallback == 0: + continue + + page = code >> 8 # Get the high byte (page) + offset = code & 0xFF # Get the low byte (offset within page) + page_groups[page].append((offset, fallback)) + + # Sort each page's entries by offset + for page in page_groups: + page_groups[page].sort() + + return page_groups + +def compress_ranges(page_groups): + """Compress consecutive entries with the same fallback character into ranges. + A range is only compressed if it contains 3 or more consecutive entries.""" + + compressed_pages = {} + + for page, entries in page_groups.items(): + compressed_entries = [] + i = 0 + while i < len(entries): + start_offset, fallback = entries[i] + + # Look ahead to find consecutive entries with the same fallback + j = i + 1 + while (j < len(entries) and + entries[j][0] == entries[j-1][0] + 1 and # consecutive offsets + entries[j][1] == fallback): # same fallback + j += 1 + + # Calculate the range end + end_offset = entries[j-1][0] + + # If we found a range with 3 or more entries (worth compressing) + if j - i >= 3: + # Add a range entry + compressed_entries.append((start_offset, RANGE_MARKER)) + compressed_entries.append((end_offset, fallback)) + else: + # Add the individual entries as is + for k in range(i, j): + compressed_entries.append(entries[k]) + + i = j + + compressed_pages[page] = compressed_entries + + return compressed_pages + +def cp_name(cp): + """Get the Unicode character name for a code point.""" + try: + return unicodedata.name(chr(cp)) + except: + return f"U+{cp:04X}" + +def generate_fallback_tables(out_file=DEFAULT_OUT_FILE): + """Generate the fallback character tables.""" + # Generate fallback map using unidecode + fallback_map = generate_fallback_map() + print(f"Generated {len(fallback_map)} total fallback mappings") + + # Organize by pages + page_groups = organize_by_pages(fallback_map) + print(f"Organized into {len(page_groups)} pages") + + # Compress ranges + compressed_pages = compress_ranges(page_groups) + total_compressed_entries = sum(len(entries) for entries in compressed_pages.values()) + print(f"Total compressed entries: {total_compressed_entries}") + + # Create output file + with open(out_file, 'w') as f: + f.write(f"""\ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * {out_file} - Unicode character fallback table + * + * Auto-generated by {this_file} + * + * Unicode Version: {unicodedata.unidata_version} + * Unidecode Version: {unidecode_version} + * + * This file contains optimized tables that map complex Unicode characters + * to simpler fallback characters for terminal display when corresponding + * glyphs are unavailable. + */ + +static const struct ucs_page_desc ucs_fallback_pages[] = {{ +""") + + # Convert compressed_pages to a sorted list of (page, entries) tuples + sorted_pages = sorted(compressed_pages.items()) + + # Track the start index for each page + start_index = 0 + + # Write page descriptors + for page, entries in sorted_pages: + count = len(entries) + f.write(f"\t{{ 0x{page:02X}, {count}, {start_index} }},\n") + start_index += count + + # Write entries array + f.write("""\ +}; + +/* Page entries array (referenced by page descriptors) */ +static const struct ucs_page_entry ucs_fallback_entries[] = { +""") + + # Write all entries + for page, entries in sorted_pages: + page_hex = f"0x{page:02X}" + f.write(f"\t/* Entries for page {page_hex} */\n") + + for i, (offset, fallback) in enumerate(entries): + # Convert to hex for better readability + offset_hex = f"0x{offset:02X}" + fallback_hex = f"0x{fallback:02X}" + + # Handle comments + codepoint = (page << 8) | offset + + if fallback == RANGE_MARKER: + comment = f"{cp_name(codepoint)} -> ..." + else: + comment = f"{cp_name(codepoint)} -> '{chr(fallback)}'" + f.write(f"\t{{ 0x{offset:02X}, 0x{fallback:02X} }}, /* {comment} */\n") + + f.write(f"""\ +}}; + +#define UCS_PAGE_ENTRY_RANGE_MARKER {RANGE_MARKER} +""") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Unicode fallback character tables") + parser.add_argument("-o", "--output", dest="output_file", default=DEFAULT_OUT_FILE, + help=f"Output file name (default: {DEFAULT_OUT_FILE})") + args = parser.parse_args() + + generate_fallback_tables(out_file=args.output_file) From de45d93f00e2a161c11199fa9a4f515ce2930f01 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:20 -0400 Subject: [PATCH 098/105] vt: create ucs_fallback_table.h_shipped with gen_ucs_fallback_table.py The generated table maps complex characters to their simpler fallback forms for a terminal display when corresponding glyphs are unavailable. A page-based approach is used to reduce compiled binary footprint. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250507141535.40655-6-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/.gitignore | 1 + drivers/tty/vt/Makefile | 5 +- drivers/tty/vt/ucs_fallback_table.h_shipped | 3440 +++++++++++++++++++ 3 files changed, 3445 insertions(+), 1 deletion(-) create mode 100644 drivers/tty/vt/ucs_fallback_table.h_shipped diff --git a/drivers/tty/vt/.gitignore b/drivers/tty/vt/.gitignore index 49ce44edad65..a74859bab862 100644 --- a/drivers/tty/vt/.gitignore +++ b/drivers/tty/vt/.gitignore @@ -2,5 +2,6 @@ /conmakehash /consolemap_deftbl.c /defkeymap.c +/ucs_fallback_table.h /ucs_recompose_table.h /ucs_width_table.h diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index 8ba33cc942c7..509362a3e11e 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o \ # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c \ - ucs_width_table.h ucs_recompose_table.h + ucs_width_table.h ucs_recompose_table.h ucs_fallback_table.h hostprogs += conmakehash @@ -58,4 +58,7 @@ endif $(obj)/ucs_recompose_table.h: $(src)/gen_ucs_recompose_table.py $(PYTHON3) $< -o $@ $(gen_recomp_arg) +$(obj)/ucs_fallback_table.h: $(src)/gen_ucs_fallback_table.py + $(PYTHON3) $< -o $@ + endif diff --git a/drivers/tty/vt/ucs_fallback_table.h_shipped b/drivers/tty/vt/ucs_fallback_table.h_shipped new file mode 100644 index 000000000000..7fa803511eb5 --- /dev/null +++ b/drivers/tty/vt/ucs_fallback_table.h_shipped @@ -0,0 +1,3440 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ucs_fallback_table.h - Unicode character fallback table + * + * Auto-generated by gen_ucs_fallback_table.py + * + * Unicode Version: 16.0.0 + * Unidecode Version: 1.3.8 + * + * This file contains optimized tables that map complex Unicode characters + * to simpler fallback characters for terminal display when corresponding + * glyphs are unavailable. + */ + +static const struct ucs_page_desc ucs_fallback_pages[] = { + { 0x00, 62, 0 }, + { 0x01, 218, 62 }, + { 0x02, 196, 280 }, + { 0x03, 96, 476 }, + { 0x04, 113, 572 }, + { 0x05, 100, 685 }, + { 0x06, 119, 785 }, + { 0x07, 91, 904 }, + { 0x09, 99, 995 }, + { 0x0A, 78, 1094 }, + { 0x0B, 79, 1172 }, + { 0x0C, 85, 1251 }, + { 0x0D, 73, 1336 }, + { 0x0E, 83, 1409 }, + { 0x0F, 69, 1492 }, + { 0x10, 93, 1561 }, + { 0x11, 51, 1654 }, + { 0x13, 22, 1705 }, + { 0x14, 30, 1727 }, + { 0x15, 17, 1757 }, + { 0x16, 81, 1774 }, + { 0x17, 47, 1855 }, + { 0x18, 96, 1902 }, + { 0x1D, 105, 1998 }, + { 0x1E, 246, 2103 }, + { 0x1F, 94, 2349 }, + { 0x20, 107, 2443 }, + { 0x21, 136, 2550 }, + { 0x22, 34, 2686 }, + { 0x23, 4, 2720 }, + { 0x24, 72, 2724 }, + { 0x25, 60, 2796 }, + { 0x26, 6, 2856 }, + { 0x27, 18, 2862 }, + { 0x28, 64, 2880 }, + { 0x29, 1, 2944 }, + { 0x2C, 15, 2945 }, + { 0x2E, 29, 2960 }, + { 0x30, 53, 2989 }, + { 0x31, 50, 3042 }, + { 0x32, 5, 3092 }, + { 0xA0, 4, 3097 }, + { 0xC5, 2, 3101 }, + { 0xC6, 2, 3103 }, + { 0xC7, 1, 3105 }, + { 0xFB, 35, 3106 }, + { 0xFE, 37, 3141 }, + { 0xFF, 144, 3178 }, +}; + +/* Page entries array (referenced by page descriptors) */ +static const struct ucs_page_entry ucs_fallback_entries[] = { + /* Entries for page 0x00 */ + { 0xA0, 0x20 }, /* NO-BREAK SPACE -> ' ' */ + { 0xA1, 0x21 }, /* INVERTED EXCLAMATION MARK -> '!' */ + { 0xA2, 0x63 }, /* CENT SIGN -> 'c' */ + { 0xA3, 0x4C }, /* POUND SIGN -> 'L' */ + { 0xA5, 0x59 }, /* YEN SIGN -> 'Y' */ + { 0xA6, 0x7C }, /* BROKEN BAR -> '|' */ + { 0xA7, 0x53 }, /* SECTION SIGN -> 'S' */ + { 0xA8, 0x22 }, /* DIAERESIS -> '"' */ + { 0xA9, 0x43 }, /* COPYRIGHT SIGN -> 'C' */ + { 0xAA, 0x61 }, /* FEMININE ORDINAL INDICATOR -> 'a' */ + { 0xAB, 0x3C }, /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -> '<' */ + { 0xAC, 0x21 }, /* NOT SIGN -> '!' */ + { 0xAE, 0x52 }, /* REGISTERED SIGN -> 'R' */ + { 0xAF, 0x2D }, /* MACRON -> '-' */ + { 0xB0, 0x6F }, /* DEGREE SIGN -> 'o' */ + { 0xB2, 0x32 }, /* SUPERSCRIPT TWO -> '2' */ + { 0xB3, 0x33 }, /* SUPERSCRIPT THREE -> '3' */ + { 0xB4, 0x27 }, /* ACUTE ACCENT -> ''' */ + { 0xB5, 0x75 }, /* MICRO SIGN -> 'u' */ + { 0xB6, 0x50 }, /* PILCROW SIGN -> 'P' */ + { 0xB7, 0x2A }, /* MIDDLE DOT -> '*' */ + { 0xB8, 0x2C }, /* CEDILLA -> ',' */ + { 0xB9, 0x31 }, /* SUPERSCRIPT ONE -> '1' */ + { 0xBA, 0x6F }, /* MASCULINE ORDINAL INDICATOR -> 'o' */ + { 0xBB, 0x3E }, /* RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -> '>' */ + { 0xBF, 0x3F }, /* INVERTED QUESTION MARK -> '?' */ + { 0xC0, 0x00 }, /* LATIN CAPITAL LETTER A WITH GRAVE -> ... */ + { 0xC5, 0x41 }, /* LATIN CAPITAL LETTER A WITH RING ABOVE -> 'A' */ + { 0xC6, 0x45 }, /* LATIN CAPITAL LETTER AE -> 'E' */ + { 0xC7, 0x43 }, /* LATIN CAPITAL LETTER C WITH CEDILLA -> 'C' */ + { 0xC8, 0x00 }, /* LATIN CAPITAL LETTER E WITH GRAVE -> ... */ + { 0xCB, 0x45 }, /* LATIN CAPITAL LETTER E WITH DIAERESIS -> 'E' */ + { 0xCC, 0x00 }, /* LATIN CAPITAL LETTER I WITH GRAVE -> ... */ + { 0xCF, 0x49 }, /* LATIN CAPITAL LETTER I WITH DIAERESIS -> 'I' */ + { 0xD0, 0x44 }, /* LATIN CAPITAL LETTER ETH -> 'D' */ + { 0xD1, 0x4E }, /* LATIN CAPITAL LETTER N WITH TILDE -> 'N' */ + { 0xD2, 0x00 }, /* LATIN CAPITAL LETTER O WITH GRAVE -> ... */ + { 0xD6, 0x4F }, /* LATIN CAPITAL LETTER O WITH DIAERESIS -> 'O' */ + { 0xD7, 0x78 }, /* MULTIPLICATION SIGN -> 'x' */ + { 0xD8, 0x4F }, /* LATIN CAPITAL LETTER O WITH STROKE -> 'O' */ + { 0xD9, 0x00 }, /* LATIN CAPITAL LETTER U WITH GRAVE -> ... */ + { 0xDC, 0x55 }, /* LATIN CAPITAL LETTER U WITH DIAERESIS -> 'U' */ + { 0xDD, 0x59 }, /* LATIN CAPITAL LETTER Y WITH ACUTE -> 'Y' */ + { 0xDF, 0x73 }, /* LATIN SMALL LETTER SHARP S -> 's' */ + { 0xE0, 0x00 }, /* LATIN SMALL LETTER A WITH GRAVE -> ... */ + { 0xE5, 0x61 }, /* LATIN SMALL LETTER A WITH RING ABOVE -> 'a' */ + { 0xE6, 0x65 }, /* LATIN SMALL LETTER AE -> 'e' */ + { 0xE7, 0x63 }, /* LATIN SMALL LETTER C WITH CEDILLA -> 'c' */ + { 0xE8, 0x00 }, /* LATIN SMALL LETTER E WITH GRAVE -> ... */ + { 0xEB, 0x65 }, /* LATIN SMALL LETTER E WITH DIAERESIS -> 'e' */ + { 0xEC, 0x00 }, /* LATIN SMALL LETTER I WITH GRAVE -> ... */ + { 0xEF, 0x69 }, /* LATIN SMALL LETTER I WITH DIAERESIS -> 'i' */ + { 0xF0, 0x64 }, /* LATIN SMALL LETTER ETH -> 'd' */ + { 0xF1, 0x6E }, /* LATIN SMALL LETTER N WITH TILDE -> 'n' */ + { 0xF2, 0x00 }, /* LATIN SMALL LETTER O WITH GRAVE -> ... */ + { 0xF6, 0x6F }, /* LATIN SMALL LETTER O WITH DIAERESIS -> 'o' */ + { 0xF7, 0x2F }, /* DIVISION SIGN -> '/' */ + { 0xF8, 0x6F }, /* LATIN SMALL LETTER O WITH STROKE -> 'o' */ + { 0xF9, 0x00 }, /* LATIN SMALL LETTER U WITH GRAVE -> ... */ + { 0xFC, 0x75 }, /* LATIN SMALL LETTER U WITH DIAERESIS -> 'u' */ + { 0xFD, 0x79 }, /* LATIN SMALL LETTER Y WITH ACUTE -> 'y' */ + { 0xFF, 0x79 }, /* LATIN SMALL LETTER Y WITH DIAERESIS -> 'y' */ + /* Entries for page 0x01 */ + { 0x00, 0x41 }, /* LATIN CAPITAL LETTER A WITH MACRON -> 'A' */ + { 0x01, 0x61 }, /* LATIN SMALL LETTER A WITH MACRON -> 'a' */ + { 0x02, 0x41 }, /* LATIN CAPITAL LETTER A WITH BREVE -> 'A' */ + { 0x03, 0x61 }, /* LATIN SMALL LETTER A WITH BREVE -> 'a' */ + { 0x04, 0x41 }, /* LATIN CAPITAL LETTER A WITH OGONEK -> 'A' */ + { 0x05, 0x61 }, /* LATIN SMALL LETTER A WITH OGONEK -> 'a' */ + { 0x06, 0x43 }, /* LATIN CAPITAL LETTER C WITH ACUTE -> 'C' */ + { 0x07, 0x63 }, /* LATIN SMALL LETTER C WITH ACUTE -> 'c' */ + { 0x08, 0x43 }, /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX -> 'C' */ + { 0x09, 0x63 }, /* LATIN SMALL LETTER C WITH CIRCUMFLEX -> 'c' */ + { 0x0A, 0x43 }, /* LATIN CAPITAL LETTER C WITH DOT ABOVE -> 'C' */ + { 0x0B, 0x63 }, /* LATIN SMALL LETTER C WITH DOT ABOVE -> 'c' */ + { 0x0C, 0x43 }, /* LATIN CAPITAL LETTER C WITH CARON -> 'C' */ + { 0x0D, 0x63 }, /* LATIN SMALL LETTER C WITH CARON -> 'c' */ + { 0x0E, 0x44 }, /* LATIN CAPITAL LETTER D WITH CARON -> 'D' */ + { 0x0F, 0x64 }, /* LATIN SMALL LETTER D WITH CARON -> 'd' */ + { 0x10, 0x44 }, /* LATIN CAPITAL LETTER D WITH STROKE -> 'D' */ + { 0x11, 0x64 }, /* LATIN SMALL LETTER D WITH STROKE -> 'd' */ + { 0x12, 0x45 }, /* LATIN CAPITAL LETTER E WITH MACRON -> 'E' */ + { 0x13, 0x65 }, /* LATIN SMALL LETTER E WITH MACRON -> 'e' */ + { 0x14, 0x45 }, /* LATIN CAPITAL LETTER E WITH BREVE -> 'E' */ + { 0x15, 0x65 }, /* LATIN SMALL LETTER E WITH BREVE -> 'e' */ + { 0x16, 0x45 }, /* LATIN CAPITAL LETTER E WITH DOT ABOVE -> 'E' */ + { 0x17, 0x65 }, /* LATIN SMALL LETTER E WITH DOT ABOVE -> 'e' */ + { 0x18, 0x45 }, /* LATIN CAPITAL LETTER E WITH OGONEK -> 'E' */ + { 0x19, 0x65 }, /* LATIN SMALL LETTER E WITH OGONEK -> 'e' */ + { 0x1A, 0x45 }, /* LATIN CAPITAL LETTER E WITH CARON -> 'E' */ + { 0x1B, 0x65 }, /* LATIN SMALL LETTER E WITH CARON -> 'e' */ + { 0x1C, 0x47 }, /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX -> 'G' */ + { 0x1D, 0x67 }, /* LATIN SMALL LETTER G WITH CIRCUMFLEX -> 'g' */ + { 0x1E, 0x47 }, /* LATIN CAPITAL LETTER G WITH BREVE -> 'G' */ + { 0x1F, 0x67 }, /* LATIN SMALL LETTER G WITH BREVE -> 'g' */ + { 0x20, 0x47 }, /* LATIN CAPITAL LETTER G WITH DOT ABOVE -> 'G' */ + { 0x21, 0x67 }, /* LATIN SMALL LETTER G WITH DOT ABOVE -> 'g' */ + { 0x22, 0x47 }, /* LATIN CAPITAL LETTER G WITH CEDILLA -> 'G' */ + { 0x23, 0x67 }, /* LATIN SMALL LETTER G WITH CEDILLA -> 'g' */ + { 0x24, 0x48 }, /* LATIN CAPITAL LETTER H WITH CIRCUMFLEX -> 'H' */ + { 0x25, 0x68 }, /* LATIN SMALL LETTER H WITH CIRCUMFLEX -> 'h' */ + { 0x26, 0x48 }, /* LATIN CAPITAL LETTER H WITH STROKE -> 'H' */ + { 0x27, 0x68 }, /* LATIN SMALL LETTER H WITH STROKE -> 'h' */ + { 0x28, 0x49 }, /* LATIN CAPITAL LETTER I WITH TILDE -> 'I' */ + { 0x29, 0x69 }, /* LATIN SMALL LETTER I WITH TILDE -> 'i' */ + { 0x2A, 0x49 }, /* LATIN CAPITAL LETTER I WITH MACRON -> 'I' */ + { 0x2B, 0x69 }, /* LATIN SMALL LETTER I WITH MACRON -> 'i' */ + { 0x2C, 0x49 }, /* LATIN CAPITAL LETTER I WITH BREVE -> 'I' */ + { 0x2D, 0x69 }, /* LATIN SMALL LETTER I WITH BREVE -> 'i' */ + { 0x2E, 0x49 }, /* LATIN CAPITAL LETTER I WITH OGONEK -> 'I' */ + { 0x2F, 0x69 }, /* LATIN SMALL LETTER I WITH OGONEK -> 'i' */ + { 0x30, 0x49 }, /* LATIN CAPITAL LETTER I WITH DOT ABOVE -> 'I' */ + { 0x31, 0x69 }, /* LATIN SMALL LETTER DOTLESS I -> 'i' */ + { 0x34, 0x4A }, /* LATIN CAPITAL LETTER J WITH CIRCUMFLEX -> 'J' */ + { 0x35, 0x6A }, /* LATIN SMALL LETTER J WITH CIRCUMFLEX -> 'j' */ + { 0x36, 0x4B }, /* LATIN CAPITAL LETTER K WITH CEDILLA -> 'K' */ + { 0x37, 0x6B }, /* LATIN SMALL LETTER K WITH CEDILLA -> 'k' */ + { 0x38, 0x6B }, /* LATIN SMALL LETTER KRA -> 'k' */ + { 0x39, 0x4C }, /* LATIN CAPITAL LETTER L WITH ACUTE -> 'L' */ + { 0x3A, 0x6C }, /* LATIN SMALL LETTER L WITH ACUTE -> 'l' */ + { 0x3B, 0x4C }, /* LATIN CAPITAL LETTER L WITH CEDILLA -> 'L' */ + { 0x3C, 0x6C }, /* LATIN SMALL LETTER L WITH CEDILLA -> 'l' */ + { 0x3D, 0x4C }, /* LATIN CAPITAL LETTER L WITH CARON -> 'L' */ + { 0x3E, 0x6C }, /* LATIN SMALL LETTER L WITH CARON -> 'l' */ + { 0x3F, 0x4C }, /* LATIN CAPITAL LETTER L WITH MIDDLE DOT -> 'L' */ + { 0x40, 0x6C }, /* LATIN SMALL LETTER L WITH MIDDLE DOT -> 'l' */ + { 0x41, 0x4C }, /* LATIN CAPITAL LETTER L WITH STROKE -> 'L' */ + { 0x42, 0x6C }, /* LATIN SMALL LETTER L WITH STROKE -> 'l' */ + { 0x43, 0x4E }, /* LATIN CAPITAL LETTER N WITH ACUTE -> 'N' */ + { 0x44, 0x6E }, /* LATIN SMALL LETTER N WITH ACUTE -> 'n' */ + { 0x45, 0x4E }, /* LATIN CAPITAL LETTER N WITH CEDILLA -> 'N' */ + { 0x46, 0x6E }, /* LATIN SMALL LETTER N WITH CEDILLA -> 'n' */ + { 0x47, 0x4E }, /* LATIN CAPITAL LETTER N WITH CARON -> 'N' */ + { 0x48, 0x6E }, /* LATIN SMALL LETTER N WITH CARON -> 'n' */ + { 0x4C, 0x4F }, /* LATIN CAPITAL LETTER O WITH MACRON -> 'O' */ + { 0x4D, 0x6F }, /* LATIN SMALL LETTER O WITH MACRON -> 'o' */ + { 0x4E, 0x4F }, /* LATIN CAPITAL LETTER O WITH BREVE -> 'O' */ + { 0x4F, 0x6F }, /* LATIN SMALL LETTER O WITH BREVE -> 'o' */ + { 0x50, 0x4F }, /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -> 'O' */ + { 0x51, 0x6F }, /* LATIN SMALL LETTER O WITH DOUBLE ACUTE -> 'o' */ + { 0x52, 0x45 }, /* LATIN CAPITAL LIGATURE OE -> 'E' */ + { 0x53, 0x65 }, /* LATIN SMALL LIGATURE OE -> 'e' */ + { 0x54, 0x52 }, /* LATIN CAPITAL LETTER R WITH ACUTE -> 'R' */ + { 0x55, 0x72 }, /* LATIN SMALL LETTER R WITH ACUTE -> 'r' */ + { 0x56, 0x52 }, /* LATIN CAPITAL LETTER R WITH CEDILLA -> 'R' */ + { 0x57, 0x72 }, /* LATIN SMALL LETTER R WITH CEDILLA -> 'r' */ + { 0x58, 0x52 }, /* LATIN CAPITAL LETTER R WITH CARON -> 'R' */ + { 0x59, 0x72 }, /* LATIN SMALL LETTER R WITH CARON -> 'r' */ + { 0x5A, 0x53 }, /* LATIN CAPITAL LETTER S WITH ACUTE -> 'S' */ + { 0x5B, 0x73 }, /* LATIN SMALL LETTER S WITH ACUTE -> 's' */ + { 0x5C, 0x53 }, /* LATIN CAPITAL LETTER S WITH CIRCUMFLEX -> 'S' */ + { 0x5D, 0x73 }, /* LATIN SMALL LETTER S WITH CIRCUMFLEX -> 's' */ + { 0x5E, 0x53 }, /* LATIN CAPITAL LETTER S WITH CEDILLA -> 'S' */ + { 0x5F, 0x73 }, /* LATIN SMALL LETTER S WITH CEDILLA -> 's' */ + { 0x60, 0x53 }, /* LATIN CAPITAL LETTER S WITH CARON -> 'S' */ + { 0x61, 0x73 }, /* LATIN SMALL LETTER S WITH CARON -> 's' */ + { 0x62, 0x54 }, /* LATIN CAPITAL LETTER T WITH CEDILLA -> 'T' */ + { 0x63, 0x74 }, /* LATIN SMALL LETTER T WITH CEDILLA -> 't' */ + { 0x64, 0x54 }, /* LATIN CAPITAL LETTER T WITH CARON -> 'T' */ + { 0x65, 0x74 }, /* LATIN SMALL LETTER T WITH CARON -> 't' */ + { 0x66, 0x54 }, /* LATIN CAPITAL LETTER T WITH STROKE -> 'T' */ + { 0x67, 0x74 }, /* LATIN SMALL LETTER T WITH STROKE -> 't' */ + { 0x68, 0x55 }, /* LATIN CAPITAL LETTER U WITH TILDE -> 'U' */ + { 0x69, 0x75 }, /* LATIN SMALL LETTER U WITH TILDE -> 'u' */ + { 0x6A, 0x55 }, /* LATIN CAPITAL LETTER U WITH MACRON -> 'U' */ + { 0x6B, 0x75 }, /* LATIN SMALL LETTER U WITH MACRON -> 'u' */ + { 0x6C, 0x55 }, /* LATIN CAPITAL LETTER U WITH BREVE -> 'U' */ + { 0x6D, 0x75 }, /* LATIN SMALL LETTER U WITH BREVE -> 'u' */ + { 0x6E, 0x55 }, /* LATIN CAPITAL LETTER U WITH RING ABOVE -> 'U' */ + { 0x6F, 0x75 }, /* LATIN SMALL LETTER U WITH RING ABOVE -> 'u' */ + { 0x70, 0x55 }, /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE -> 'U' */ + { 0x71, 0x75 }, /* LATIN SMALL LETTER U WITH DOUBLE ACUTE -> 'u' */ + { 0x72, 0x55 }, /* LATIN CAPITAL LETTER U WITH OGONEK -> 'U' */ + { 0x73, 0x75 }, /* LATIN SMALL LETTER U WITH OGONEK -> 'u' */ + { 0x74, 0x57 }, /* LATIN CAPITAL LETTER W WITH CIRCUMFLEX -> 'W' */ + { 0x75, 0x77 }, /* LATIN SMALL LETTER W WITH CIRCUMFLEX -> 'w' */ + { 0x76, 0x59 }, /* LATIN CAPITAL LETTER Y WITH CIRCUMFLEX -> 'Y' */ + { 0x77, 0x79 }, /* LATIN SMALL LETTER Y WITH CIRCUMFLEX -> 'y' */ + { 0x78, 0x59 }, /* LATIN CAPITAL LETTER Y WITH DIAERESIS -> 'Y' */ + { 0x79, 0x5A }, /* LATIN CAPITAL LETTER Z WITH ACUTE -> 'Z' */ + { 0x7A, 0x7A }, /* LATIN SMALL LETTER Z WITH ACUTE -> 'z' */ + { 0x7B, 0x5A }, /* LATIN CAPITAL LETTER Z WITH DOT ABOVE -> 'Z' */ + { 0x7C, 0x7A }, /* LATIN SMALL LETTER Z WITH DOT ABOVE -> 'z' */ + { 0x7D, 0x5A }, /* LATIN CAPITAL LETTER Z WITH CARON -> 'Z' */ + { 0x7E, 0x7A }, /* LATIN SMALL LETTER Z WITH CARON -> 'z' */ + { 0x7F, 0x73 }, /* LATIN SMALL LETTER LONG S -> 's' */ + { 0x80, 0x62 }, /* LATIN SMALL LETTER B WITH STROKE -> 'b' */ + { 0x81, 0x42 }, /* LATIN CAPITAL LETTER B WITH HOOK -> 'B' */ + { 0x82, 0x42 }, /* LATIN CAPITAL LETTER B WITH TOPBAR -> 'B' */ + { 0x83, 0x62 }, /* LATIN SMALL LETTER B WITH TOPBAR -> 'b' */ + { 0x84, 0x36 }, /* LATIN CAPITAL LETTER TONE SIX -> '6' */ + { 0x85, 0x36 }, /* LATIN SMALL LETTER TONE SIX -> '6' */ + { 0x86, 0x4F }, /* LATIN CAPITAL LETTER OPEN O -> 'O' */ + { 0x87, 0x43 }, /* LATIN CAPITAL LETTER C WITH HOOK -> 'C' */ + { 0x88, 0x63 }, /* LATIN SMALL LETTER C WITH HOOK -> 'c' */ + { 0x89, 0x00 }, /* LATIN CAPITAL LETTER AFRICAN D -> ... */ + { 0x8B, 0x44 }, /* LATIN CAPITAL LETTER D WITH TOPBAR -> 'D' */ + { 0x8C, 0x64 }, /* LATIN SMALL LETTER D WITH TOPBAR -> 'd' */ + { 0x8D, 0x64 }, /* LATIN SMALL LETTER TURNED DELTA -> 'd' */ + { 0x8E, 0x33 }, /* LATIN CAPITAL LETTER REVERSED E -> '3' */ + { 0x8F, 0x40 }, /* LATIN CAPITAL LETTER SCHWA -> '@' */ + { 0x90, 0x45 }, /* LATIN CAPITAL LETTER OPEN E -> 'E' */ + { 0x91, 0x46 }, /* LATIN CAPITAL LETTER F WITH HOOK -> 'F' */ + { 0x92, 0x66 }, /* LATIN SMALL LETTER F WITH HOOK -> 'f' */ + { 0x93, 0x47 }, /* LATIN CAPITAL LETTER G WITH HOOK -> 'G' */ + { 0x94, 0x47 }, /* LATIN CAPITAL LETTER GAMMA -> 'G' */ + { 0x96, 0x49 }, /* LATIN CAPITAL LETTER IOTA -> 'I' */ + { 0x97, 0x49 }, /* LATIN CAPITAL LETTER I WITH STROKE -> 'I' */ + { 0x98, 0x4B }, /* LATIN CAPITAL LETTER K WITH HOOK -> 'K' */ + { 0x99, 0x6B }, /* LATIN SMALL LETTER K WITH HOOK -> 'k' */ + { 0x9A, 0x6C }, /* LATIN SMALL LETTER L WITH BAR -> 'l' */ + { 0x9B, 0x6C }, /* LATIN SMALL LETTER LAMBDA WITH STROKE -> 'l' */ + { 0x9C, 0x57 }, /* LATIN CAPITAL LETTER TURNED M -> 'W' */ + { 0x9D, 0x4E }, /* LATIN CAPITAL LETTER N WITH LEFT HOOK -> 'N' */ + { 0x9E, 0x6E }, /* LATIN SMALL LETTER N WITH LONG RIGHT LEG -> 'n' */ + { 0x9F, 0x4F }, /* LATIN CAPITAL LETTER O WITH MIDDLE TILDE -> 'O' */ + { 0xA0, 0x4F }, /* LATIN CAPITAL LETTER O WITH HORN -> 'O' */ + { 0xA1, 0x6F }, /* LATIN SMALL LETTER O WITH HORN -> 'o' */ + { 0xA4, 0x50 }, /* LATIN CAPITAL LETTER P WITH HOOK -> 'P' */ + { 0xA5, 0x70 }, /* LATIN SMALL LETTER P WITH HOOK -> 'p' */ + { 0xA7, 0x32 }, /* LATIN CAPITAL LETTER TONE TWO -> '2' */ + { 0xA8, 0x32 }, /* LATIN SMALL LETTER TONE TWO -> '2' */ + { 0xAB, 0x74 }, /* LATIN SMALL LETTER T WITH PALATAL HOOK -> 't' */ + { 0xAC, 0x54 }, /* LATIN CAPITAL LETTER T WITH HOOK -> 'T' */ + { 0xAD, 0x74 }, /* LATIN SMALL LETTER T WITH HOOK -> 't' */ + { 0xAE, 0x54 }, /* LATIN CAPITAL LETTER T WITH RETROFLEX HOOK -> 'T' */ + { 0xAF, 0x55 }, /* LATIN CAPITAL LETTER U WITH HORN -> 'U' */ + { 0xB0, 0x75 }, /* LATIN SMALL LETTER U WITH HORN -> 'u' */ + { 0xB1, 0x59 }, /* LATIN CAPITAL LETTER UPSILON -> 'Y' */ + { 0xB2, 0x56 }, /* LATIN CAPITAL LETTER V WITH HOOK -> 'V' */ + { 0xB3, 0x59 }, /* LATIN CAPITAL LETTER Y WITH HOOK -> 'Y' */ + { 0xB4, 0x79 }, /* LATIN SMALL LETTER Y WITH HOOK -> 'y' */ + { 0xB5, 0x5A }, /* LATIN CAPITAL LETTER Z WITH STROKE -> 'Z' */ + { 0xB6, 0x7A }, /* LATIN SMALL LETTER Z WITH STROKE -> 'z' */ + { 0xBB, 0x32 }, /* LATIN LETTER TWO WITH STROKE -> '2' */ + { 0xBC, 0x35 }, /* LATIN CAPITAL LETTER TONE FIVE -> '5' */ + { 0xBD, 0x35 }, /* LATIN SMALL LETTER TONE FIVE -> '5' */ + { 0xBF, 0x77 }, /* LATIN LETTER WYNN -> 'w' */ + { 0xC0, 0x7C }, /* LATIN LETTER DENTAL CLICK -> '|' */ + { 0xC3, 0x21 }, /* LATIN LETTER RETROFLEX CLICK -> '!' */ + { 0xCD, 0x41 }, /* LATIN CAPITAL LETTER A WITH CARON -> 'A' */ + { 0xCE, 0x61 }, /* LATIN SMALL LETTER A WITH CARON -> 'a' */ + { 0xCF, 0x49 }, /* LATIN CAPITAL LETTER I WITH CARON -> 'I' */ + { 0xD0, 0x69 }, /* LATIN SMALL LETTER I WITH CARON -> 'i' */ + { 0xD1, 0x4F }, /* LATIN CAPITAL LETTER O WITH CARON -> 'O' */ + { 0xD2, 0x6F }, /* LATIN SMALL LETTER O WITH CARON -> 'o' */ + { 0xD3, 0x55 }, /* LATIN CAPITAL LETTER U WITH CARON -> 'U' */ + { 0xD4, 0x75 }, /* LATIN SMALL LETTER U WITH CARON -> 'u' */ + { 0xD5, 0x55 }, /* LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON -> 'U' */ + { 0xD6, 0x75 }, /* LATIN SMALL LETTER U WITH DIAERESIS AND MACRON -> 'u' */ + { 0xD7, 0x55 }, /* LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE -> 'U' */ + { 0xD8, 0x75 }, /* LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE -> 'u' */ + { 0xD9, 0x55 }, /* LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON -> 'U' */ + { 0xDA, 0x75 }, /* LATIN SMALL LETTER U WITH DIAERESIS AND CARON -> 'u' */ + { 0xDB, 0x55 }, /* LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE -> 'U' */ + { 0xDC, 0x75 }, /* LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE -> 'u' */ + { 0xDD, 0x40 }, /* LATIN SMALL LETTER TURNED E -> '@' */ + { 0xDE, 0x41 }, /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON -> 'A' */ + { 0xDF, 0x61 }, /* LATIN SMALL LETTER A WITH DIAERESIS AND MACRON -> 'a' */ + { 0xE0, 0x41 }, /* LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON -> 'A' */ + { 0xE1, 0x61 }, /* LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON -> 'a' */ + { 0xE4, 0x47 }, /* LATIN CAPITAL LETTER G WITH STROKE -> 'G' */ + { 0xE5, 0x67 }, /* LATIN SMALL LETTER G WITH STROKE -> 'g' */ + { 0xE6, 0x47 }, /* LATIN CAPITAL LETTER G WITH CARON -> 'G' */ + { 0xE7, 0x67 }, /* LATIN SMALL LETTER G WITH CARON -> 'g' */ + { 0xE8, 0x4B }, /* LATIN CAPITAL LETTER K WITH CARON -> 'K' */ + { 0xE9, 0x6B }, /* LATIN SMALL LETTER K WITH CARON -> 'k' */ + { 0xEA, 0x4F }, /* LATIN CAPITAL LETTER O WITH OGONEK -> 'O' */ + { 0xEB, 0x6F }, /* LATIN SMALL LETTER O WITH OGONEK -> 'o' */ + { 0xEC, 0x4F }, /* LATIN CAPITAL LETTER O WITH OGONEK AND MACRON -> 'O' */ + { 0xED, 0x6F }, /* LATIN SMALL LETTER O WITH OGONEK AND MACRON -> 'o' */ + { 0xF0, 0x6A }, /* LATIN SMALL LETTER J WITH CARON -> 'j' */ + { 0xF4, 0x47 }, /* LATIN CAPITAL LETTER G WITH ACUTE -> 'G' */ + { 0xF5, 0x67 }, /* LATIN SMALL LETTER G WITH ACUTE -> 'g' */ + { 0xF7, 0x57 }, /* LATIN CAPITAL LETTER WYNN -> 'W' */ + { 0xF8, 0x4E }, /* LATIN CAPITAL LETTER N WITH GRAVE -> 'N' */ + { 0xF9, 0x6E }, /* LATIN SMALL LETTER N WITH GRAVE -> 'n' */ + { 0xFA, 0x41 }, /* LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE -> 'A' */ + { 0xFB, 0x61 }, /* LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE -> 'a' */ + { 0xFE, 0x4F }, /* LATIN CAPITAL LETTER O WITH STROKE AND ACUTE -> 'O' */ + { 0xFF, 0x6F }, /* LATIN SMALL LETTER O WITH STROKE AND ACUTE -> 'o' */ + /* Entries for page 0x02 */ + { 0x00, 0x41 }, /* LATIN CAPITAL LETTER A WITH DOUBLE GRAVE -> 'A' */ + { 0x01, 0x61 }, /* LATIN SMALL LETTER A WITH DOUBLE GRAVE -> 'a' */ + { 0x02, 0x41 }, /* LATIN CAPITAL LETTER A WITH INVERTED BREVE -> 'A' */ + { 0x03, 0x61 }, /* LATIN SMALL LETTER A WITH INVERTED BREVE -> 'a' */ + { 0x04, 0x45 }, /* LATIN CAPITAL LETTER E WITH DOUBLE GRAVE -> 'E' */ + { 0x05, 0x65 }, /* LATIN SMALL LETTER E WITH DOUBLE GRAVE -> 'e' */ + { 0x06, 0x45 }, /* LATIN CAPITAL LETTER E WITH INVERTED BREVE -> 'E' */ + { 0x07, 0x65 }, /* LATIN SMALL LETTER E WITH INVERTED BREVE -> 'e' */ + { 0x08, 0x49 }, /* LATIN CAPITAL LETTER I WITH DOUBLE GRAVE -> 'I' */ + { 0x09, 0x69 }, /* LATIN SMALL LETTER I WITH DOUBLE GRAVE -> 'i' */ + { 0x0A, 0x49 }, /* LATIN CAPITAL LETTER I WITH INVERTED BREVE -> 'I' */ + { 0x0B, 0x69 }, /* LATIN SMALL LETTER I WITH INVERTED BREVE -> 'i' */ + { 0x0C, 0x4F }, /* LATIN CAPITAL LETTER O WITH DOUBLE GRAVE -> 'O' */ + { 0x0D, 0x6F }, /* LATIN SMALL LETTER O WITH DOUBLE GRAVE -> 'o' */ + { 0x0E, 0x4F }, /* LATIN CAPITAL LETTER O WITH INVERTED BREVE -> 'O' */ + { 0x0F, 0x6F }, /* LATIN SMALL LETTER O WITH INVERTED BREVE -> 'o' */ + { 0x10, 0x52 }, /* LATIN CAPITAL LETTER R WITH DOUBLE GRAVE -> 'R' */ + { 0x11, 0x72 }, /* LATIN SMALL LETTER R WITH DOUBLE GRAVE -> 'r' */ + { 0x12, 0x52 }, /* LATIN CAPITAL LETTER R WITH INVERTED BREVE -> 'R' */ + { 0x13, 0x72 }, /* LATIN SMALL LETTER R WITH INVERTED BREVE -> 'r' */ + { 0x14, 0x55 }, /* LATIN CAPITAL LETTER U WITH DOUBLE GRAVE -> 'U' */ + { 0x15, 0x75 }, /* LATIN SMALL LETTER U WITH DOUBLE GRAVE -> 'u' */ + { 0x16, 0x55 }, /* LATIN CAPITAL LETTER U WITH INVERTED BREVE -> 'U' */ + { 0x17, 0x75 }, /* LATIN SMALL LETTER U WITH INVERTED BREVE -> 'u' */ + { 0x18, 0x53 }, /* LATIN CAPITAL LETTER S WITH COMMA BELOW -> 'S' */ + { 0x19, 0x73 }, /* LATIN SMALL LETTER S WITH COMMA BELOW -> 's' */ + { 0x1A, 0x54 }, /* LATIN CAPITAL LETTER T WITH COMMA BELOW -> 'T' */ + { 0x1B, 0x74 }, /* LATIN SMALL LETTER T WITH COMMA BELOW -> 't' */ + { 0x1C, 0x59 }, /* LATIN CAPITAL LETTER YOGH -> 'Y' */ + { 0x1D, 0x79 }, /* LATIN SMALL LETTER YOGH -> 'y' */ + { 0x1E, 0x48 }, /* LATIN CAPITAL LETTER H WITH CARON -> 'H' */ + { 0x1F, 0x68 }, /* LATIN SMALL LETTER H WITH CARON -> 'h' */ + { 0x20, 0x4E }, /* LATIN CAPITAL LETTER N WITH LONG RIGHT LEG -> 'N' */ + { 0x21, 0x64 }, /* LATIN SMALL LETTER D WITH CURL -> 'd' */ + { 0x24, 0x5A }, /* LATIN CAPITAL LETTER Z WITH HOOK -> 'Z' */ + { 0x25, 0x7A }, /* LATIN SMALL LETTER Z WITH HOOK -> 'z' */ + { 0x26, 0x41 }, /* LATIN CAPITAL LETTER A WITH DOT ABOVE -> 'A' */ + { 0x27, 0x61 }, /* LATIN SMALL LETTER A WITH DOT ABOVE -> 'a' */ + { 0x28, 0x45 }, /* LATIN CAPITAL LETTER E WITH CEDILLA -> 'E' */ + { 0x29, 0x65 }, /* LATIN SMALL LETTER E WITH CEDILLA -> 'e' */ + { 0x2A, 0x4F }, /* LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON -> 'O' */ + { 0x2B, 0x6F }, /* LATIN SMALL LETTER O WITH DIAERESIS AND MACRON -> 'o' */ + { 0x2C, 0x4F }, /* LATIN CAPITAL LETTER O WITH TILDE AND MACRON -> 'O' */ + { 0x2D, 0x6F }, /* LATIN SMALL LETTER O WITH TILDE AND MACRON -> 'o' */ + { 0x2E, 0x4F }, /* LATIN CAPITAL LETTER O WITH DOT ABOVE -> 'O' */ + { 0x2F, 0x6F }, /* LATIN SMALL LETTER O WITH DOT ABOVE -> 'o' */ + { 0x30, 0x4F }, /* LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON -> 'O' */ + { 0x31, 0x6F }, /* LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON -> 'o' */ + { 0x32, 0x59 }, /* LATIN CAPITAL LETTER Y WITH MACRON -> 'Y' */ + { 0x33, 0x79 }, /* LATIN SMALL LETTER Y WITH MACRON -> 'y' */ + { 0x34, 0x6C }, /* LATIN SMALL LETTER L WITH CURL -> 'l' */ + { 0x35, 0x6E }, /* LATIN SMALL LETTER N WITH CURL -> 'n' */ + { 0x36, 0x74 }, /* LATIN SMALL LETTER T WITH CURL -> 't' */ + { 0x37, 0x6A }, /* LATIN SMALL LETTER DOTLESS J -> 'j' */ + { 0x3A, 0x41 }, /* LATIN CAPITAL LETTER A WITH STROKE -> 'A' */ + { 0x3B, 0x43 }, /* LATIN CAPITAL LETTER C WITH STROKE -> 'C' */ + { 0x3C, 0x63 }, /* LATIN SMALL LETTER C WITH STROKE -> 'c' */ + { 0x3D, 0x4C }, /* LATIN CAPITAL LETTER L WITH BAR -> 'L' */ + { 0x3E, 0x54 }, /* LATIN CAPITAL LETTER T WITH DIAGONAL STROKE -> 'T' */ + { 0x3F, 0x73 }, /* LATIN SMALL LETTER S WITH SWASH TAIL -> 's' */ + { 0x40, 0x7A }, /* LATIN SMALL LETTER Z WITH SWASH TAIL -> 'z' */ + { 0x43, 0x42 }, /* LATIN CAPITAL LETTER B WITH STROKE -> 'B' */ + { 0x44, 0x55 }, /* LATIN CAPITAL LETTER U BAR -> 'U' */ + { 0x45, 0x5E }, /* LATIN CAPITAL LETTER TURNED V -> '^' */ + { 0x46, 0x45 }, /* LATIN CAPITAL LETTER E WITH STROKE -> 'E' */ + { 0x47, 0x65 }, /* LATIN SMALL LETTER E WITH STROKE -> 'e' */ + { 0x48, 0x4A }, /* LATIN CAPITAL LETTER J WITH STROKE -> 'J' */ + { 0x49, 0x6A }, /* LATIN SMALL LETTER J WITH STROKE -> 'j' */ + { 0x4A, 0x71 }, /* LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL -> 'q' */ + { 0x4B, 0x71 }, /* LATIN SMALL LETTER Q WITH HOOK TAIL -> 'q' */ + { 0x4C, 0x52 }, /* LATIN CAPITAL LETTER R WITH STROKE -> 'R' */ + { 0x4D, 0x72 }, /* LATIN SMALL LETTER R WITH STROKE -> 'r' */ + { 0x4E, 0x59 }, /* LATIN CAPITAL LETTER Y WITH STROKE -> 'Y' */ + { 0x4F, 0x79 }, /* LATIN SMALL LETTER Y WITH STROKE -> 'y' */ + { 0x50, 0x00 }, /* LATIN SMALL LETTER TURNED A -> ... */ + { 0x52, 0x61 }, /* LATIN SMALL LETTER TURNED ALPHA -> 'a' */ + { 0x53, 0x62 }, /* LATIN SMALL LETTER B WITH HOOK -> 'b' */ + { 0x54, 0x6F }, /* LATIN SMALL LETTER OPEN O -> 'o' */ + { 0x55, 0x63 }, /* LATIN SMALL LETTER C WITH CURL -> 'c' */ + { 0x56, 0x64 }, /* LATIN SMALL LETTER D WITH TAIL -> 'd' */ + { 0x57, 0x64 }, /* LATIN SMALL LETTER D WITH HOOK -> 'd' */ + { 0x58, 0x65 }, /* LATIN SMALL LETTER REVERSED E -> 'e' */ + { 0x59, 0x40 }, /* LATIN SMALL LETTER SCHWA -> '@' */ + { 0x5A, 0x40 }, /* LATIN SMALL LETTER SCHWA WITH HOOK -> '@' */ + { 0x5B, 0x00 }, /* LATIN SMALL LETTER OPEN E -> ... */ + { 0x5E, 0x65 }, /* LATIN SMALL LETTER CLOSED REVERSED OPEN E -> 'e' */ + { 0x5F, 0x6A }, /* LATIN SMALL LETTER DOTLESS J WITH STROKE -> 'j' */ + { 0x60, 0x00 }, /* LATIN SMALL LETTER G WITH HOOK -> ... */ + { 0x63, 0x67 }, /* LATIN SMALL LETTER GAMMA -> 'g' */ + { 0x64, 0x75 }, /* LATIN SMALL LETTER RAMS HORN -> 'u' */ + { 0x65, 0x59 }, /* LATIN SMALL LETTER TURNED H -> 'Y' */ + { 0x66, 0x68 }, /* LATIN SMALL LETTER H WITH HOOK -> 'h' */ + { 0x67, 0x68 }, /* LATIN SMALL LETTER HENG WITH HOOK -> 'h' */ + { 0x68, 0x69 }, /* LATIN SMALL LETTER I WITH STROKE -> 'i' */ + { 0x69, 0x69 }, /* LATIN SMALL LETTER IOTA -> 'i' */ + { 0x6A, 0x49 }, /* LATIN LETTER SMALL CAPITAL I -> 'I' */ + { 0x6B, 0x00 }, /* LATIN SMALL LETTER L WITH MIDDLE TILDE -> ... */ + { 0x6D, 0x6C }, /* LATIN SMALL LETTER L WITH RETROFLEX HOOK -> 'l' */ + { 0x6F, 0x57 }, /* LATIN SMALL LETTER TURNED M -> 'W' */ + { 0x70, 0x57 }, /* LATIN SMALL LETTER TURNED M WITH LONG LEG -> 'W' */ + { 0x71, 0x6D }, /* LATIN SMALL LETTER M WITH HOOK -> 'm' */ + { 0x72, 0x00 }, /* LATIN SMALL LETTER N WITH LEFT HOOK -> ... */ + { 0x74, 0x6E }, /* LATIN LETTER SMALL CAPITAL N -> 'n' */ + { 0x75, 0x6F }, /* LATIN SMALL LETTER BARRED O -> 'o' */ + { 0x77, 0x4F }, /* LATIN SMALL LETTER CLOSED OMEGA -> 'O' */ + { 0x78, 0x46 }, /* LATIN SMALL LETTER PHI -> 'F' */ + { 0x79, 0x00 }, /* LATIN SMALL LETTER TURNED R -> ... */ + { 0x7F, 0x72 }, /* LATIN SMALL LETTER REVERSED R WITH FISHHOOK -> 'r' */ + { 0x80, 0x52 }, /* LATIN LETTER SMALL CAPITAL R -> 'R' */ + { 0x81, 0x52 }, /* LATIN LETTER SMALL CAPITAL INVERTED R -> 'R' */ + { 0x82, 0x73 }, /* LATIN SMALL LETTER S WITH HOOK -> 's' */ + { 0x83, 0x53 }, /* LATIN SMALL LETTER ESH -> 'S' */ + { 0x84, 0x6A }, /* LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK -> 'j' */ + { 0x85, 0x53 }, /* LATIN SMALL LETTER SQUAT REVERSED ESH -> 'S' */ + { 0x86, 0x53 }, /* LATIN SMALL LETTER ESH WITH CURL -> 'S' */ + { 0x87, 0x74 }, /* LATIN SMALL LETTER TURNED T -> 't' */ + { 0x88, 0x74 }, /* LATIN SMALL LETTER T WITH RETROFLEX HOOK -> 't' */ + { 0x89, 0x75 }, /* LATIN SMALL LETTER U BAR -> 'u' */ + { 0x8A, 0x55 }, /* LATIN SMALL LETTER UPSILON -> 'U' */ + { 0x8B, 0x76 }, /* LATIN SMALL LETTER V WITH HOOK -> 'v' */ + { 0x8C, 0x5E }, /* LATIN SMALL LETTER TURNED V -> '^' */ + { 0x8D, 0x77 }, /* LATIN SMALL LETTER TURNED W -> 'w' */ + { 0x8E, 0x79 }, /* LATIN SMALL LETTER TURNED Y -> 'y' */ + { 0x8F, 0x59 }, /* LATIN LETTER SMALL CAPITAL Y -> 'Y' */ + { 0x90, 0x7A }, /* LATIN SMALL LETTER Z WITH RETROFLEX HOOK -> 'z' */ + { 0x91, 0x7A }, /* LATIN SMALL LETTER Z WITH CURL -> 'z' */ + { 0x92, 0x5A }, /* LATIN SMALL LETTER EZH -> 'Z' */ + { 0x93, 0x5A }, /* LATIN SMALL LETTER EZH WITH CURL -> 'Z' */ + { 0x94, 0x00 }, /* LATIN LETTER GLOTTAL STOP -> ... */ + { 0x96, 0x3F }, /* LATIN LETTER INVERTED GLOTTAL STOP -> '?' */ + { 0x97, 0x43 }, /* LATIN LETTER STRETCHED C -> 'C' */ + { 0x98, 0x40 }, /* LATIN LETTER BILABIAL CLICK -> '@' */ + { 0x99, 0x42 }, /* LATIN LETTER SMALL CAPITAL B -> 'B' */ + { 0x9A, 0x45 }, /* LATIN SMALL LETTER CLOSED OPEN E -> 'E' */ + { 0x9B, 0x47 }, /* LATIN LETTER SMALL CAPITAL G WITH HOOK -> 'G' */ + { 0x9C, 0x48 }, /* LATIN LETTER SMALL CAPITAL H -> 'H' */ + { 0x9D, 0x6A }, /* LATIN SMALL LETTER J WITH CROSSED-TAIL -> 'j' */ + { 0x9E, 0x6B }, /* LATIN SMALL LETTER TURNED K -> 'k' */ + { 0x9F, 0x4C }, /* LATIN LETTER SMALL CAPITAL L -> 'L' */ + { 0xA0, 0x71 }, /* LATIN SMALL LETTER Q WITH HOOK -> 'q' */ + { 0xA1, 0x3F }, /* LATIN LETTER GLOTTAL STOP WITH STROKE -> '?' */ + { 0xA2, 0x3F }, /* LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE -> '?' */ + { 0xAE, 0x00 }, /* LATIN SMALL LETTER TURNED H WITH FISHHOOK -> ... */ + { 0xB1, 0x68 }, /* MODIFIER LETTER SMALL H WITH HOOK -> 'h' */ + { 0xB2, 0x6A }, /* MODIFIER LETTER SMALL J -> 'j' */ + { 0xB3, 0x00 }, /* MODIFIER LETTER SMALL R -> ... */ + { 0xB6, 0x72 }, /* MODIFIER LETTER SMALL CAPITAL INVERTED R -> 'r' */ + { 0xB7, 0x77 }, /* MODIFIER LETTER SMALL W -> 'w' */ + { 0xB8, 0x79 }, /* MODIFIER LETTER SMALL Y -> 'y' */ + { 0xB9, 0x27 }, /* MODIFIER LETTER PRIME -> ''' */ + { 0xBA, 0x22 }, /* MODIFIER LETTER DOUBLE PRIME -> '"' */ + { 0xBB, 0x60 }, /* MODIFIER LETTER TURNED COMMA -> '`' */ + { 0xBC, 0x27 }, /* MODIFIER LETTER APOSTROPHE -> ''' */ + { 0xBD, 0x60 }, /* MODIFIER LETTER REVERSED COMMA -> '`' */ + { 0xBE, 0x60 }, /* MODIFIER LETTER RIGHT HALF RING -> '`' */ + { 0xBF, 0x27 }, /* MODIFIER LETTER LEFT HALF RING -> ''' */ + { 0xC0, 0x3F }, /* MODIFIER LETTER GLOTTAL STOP -> '?' */ + { 0xC1, 0x3F }, /* MODIFIER LETTER REVERSED GLOTTAL STOP -> '?' */ + { 0xC2, 0x3C }, /* MODIFIER LETTER LEFT ARROWHEAD -> '<' */ + { 0xC3, 0x3E }, /* MODIFIER LETTER RIGHT ARROWHEAD -> '>' */ + { 0xC4, 0x5E }, /* MODIFIER LETTER UP ARROWHEAD -> '^' */ + { 0xC5, 0x56 }, /* MODIFIER LETTER DOWN ARROWHEAD -> 'V' */ + { 0xC6, 0x5E }, /* MODIFIER LETTER CIRCUMFLEX ACCENT -> '^' */ + { 0xC7, 0x56 }, /* CARON -> 'V' */ + { 0xC8, 0x27 }, /* MODIFIER LETTER VERTICAL LINE -> ''' */ + { 0xC9, 0x2D }, /* MODIFIER LETTER MACRON -> '-' */ + { 0xCA, 0x2F }, /* MODIFIER LETTER ACUTE ACCENT -> '/' */ + { 0xCB, 0x5C }, /* MODIFIER LETTER GRAVE ACCENT -> '\' */ + { 0xCC, 0x2C }, /* MODIFIER LETTER LOW VERTICAL LINE -> ',' */ + { 0xCD, 0x5F }, /* MODIFIER LETTER LOW MACRON -> '_' */ + { 0xCE, 0x5C }, /* MODIFIER LETTER LOW GRAVE ACCENT -> '\' */ + { 0xCF, 0x2F }, /* MODIFIER LETTER LOW ACUTE ACCENT -> '/' */ + { 0xD0, 0x3A }, /* MODIFIER LETTER TRIANGULAR COLON -> ':' */ + { 0xD1, 0x2E }, /* MODIFIER LETTER HALF TRIANGULAR COLON -> '.' */ + { 0xD2, 0x60 }, /* MODIFIER LETTER CENTRED RIGHT HALF RING -> '`' */ + { 0xD3, 0x27 }, /* MODIFIER LETTER CENTRED LEFT HALF RING -> ''' */ + { 0xD4, 0x5E }, /* MODIFIER LETTER UP TACK -> '^' */ + { 0xD5, 0x56 }, /* MODIFIER LETTER DOWN TACK -> 'V' */ + { 0xD6, 0x2B }, /* MODIFIER LETTER PLUS SIGN -> '+' */ + { 0xD7, 0x2D }, /* MODIFIER LETTER MINUS SIGN -> '-' */ + { 0xD8, 0x56 }, /* BREVE -> 'V' */ + { 0xD9, 0x2E }, /* DOT ABOVE -> '.' */ + { 0xDA, 0x40 }, /* RING ABOVE -> '@' */ + { 0xDB, 0x2C }, /* OGONEK -> ',' */ + { 0xDC, 0x7E }, /* SMALL TILDE -> '~' */ + { 0xDD, 0x22 }, /* DOUBLE ACUTE ACCENT -> '"' */ + { 0xDE, 0x52 }, /* MODIFIER LETTER RHOTIC HOOK -> 'R' */ + { 0xDF, 0x58 }, /* MODIFIER LETTER CROSS ACCENT -> 'X' */ + { 0xE0, 0x47 }, /* MODIFIER LETTER SMALL GAMMA -> 'G' */ + { 0xE1, 0x6C }, /* MODIFIER LETTER SMALL L -> 'l' */ + { 0xE2, 0x73 }, /* MODIFIER LETTER SMALL S -> 's' */ + { 0xE3, 0x78 }, /* MODIFIER LETTER SMALL X -> 'x' */ + { 0xE4, 0x3F }, /* MODIFIER LETTER SMALL REVERSED GLOTTAL STOP -> '?' */ + { 0xEC, 0x56 }, /* MODIFIER LETTER VOICING -> 'V' */ + { 0xED, 0x3D }, /* MODIFIER LETTER UNASPIRATED -> '=' */ + { 0xEE, 0x22 }, /* MODIFIER LETTER DOUBLE APOSTROPHE -> '"' */ + /* Entries for page 0x03 */ + { 0x63, 0x61 }, /* COMBINING LATIN SMALL LETTER A -> 'a' */ + { 0x64, 0x65 }, /* COMBINING LATIN SMALL LETTER E -> 'e' */ + { 0x65, 0x69 }, /* COMBINING LATIN SMALL LETTER I -> 'i' */ + { 0x66, 0x6F }, /* COMBINING LATIN SMALL LETTER O -> 'o' */ + { 0x67, 0x75 }, /* COMBINING LATIN SMALL LETTER U -> 'u' */ + { 0x68, 0x63 }, /* COMBINING LATIN SMALL LETTER C -> 'c' */ + { 0x69, 0x64 }, /* COMBINING LATIN SMALL LETTER D -> 'd' */ + { 0x6A, 0x68 }, /* COMBINING LATIN SMALL LETTER H -> 'h' */ + { 0x6B, 0x6D }, /* COMBINING LATIN SMALL LETTER M -> 'm' */ + { 0x6C, 0x72 }, /* COMBINING LATIN SMALL LETTER R -> 'r' */ + { 0x6D, 0x74 }, /* COMBINING LATIN SMALL LETTER T -> 't' */ + { 0x6E, 0x76 }, /* COMBINING LATIN SMALL LETTER V -> 'v' */ + { 0x6F, 0x78 }, /* COMBINING LATIN SMALL LETTER X -> 'x' */ + { 0x74, 0x27 }, /* GREEK NUMERAL SIGN -> ''' */ + { 0x75, 0x2C }, /* GREEK LOWER NUMERAL SIGN -> ',' */ + { 0x7E, 0x3F }, /* GREEK QUESTION MARK -> '?' */ + { 0x86, 0x41 }, /* GREEK CAPITAL LETTER ALPHA WITH TONOS -> 'A' */ + { 0x87, 0x3B }, /* GREEK ANO TELEIA -> ';' */ + { 0x88, 0x45 }, /* GREEK CAPITAL LETTER EPSILON WITH TONOS -> 'E' */ + { 0x89, 0x45 }, /* GREEK CAPITAL LETTER ETA WITH TONOS -> 'E' */ + { 0x8A, 0x49 }, /* GREEK CAPITAL LETTER IOTA WITH TONOS -> 'I' */ + { 0x8C, 0x4F }, /* GREEK CAPITAL LETTER OMICRON WITH TONOS -> 'O' */ + { 0x8E, 0x55 }, /* GREEK CAPITAL LETTER UPSILON WITH TONOS -> 'U' */ + { 0x8F, 0x4F }, /* GREEK CAPITAL LETTER OMEGA WITH TONOS -> 'O' */ + { 0x90, 0x49 }, /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS -> 'I' */ + { 0x91, 0x41 }, /* GREEK CAPITAL LETTER ALPHA -> 'A' */ + { 0x92, 0x42 }, /* GREEK CAPITAL LETTER BETA -> 'B' */ + { 0x93, 0x47 }, /* GREEK CAPITAL LETTER GAMMA -> 'G' */ + { 0x94, 0x44 }, /* GREEK CAPITAL LETTER DELTA -> 'D' */ + { 0x95, 0x45 }, /* GREEK CAPITAL LETTER EPSILON -> 'E' */ + { 0x96, 0x5A }, /* GREEK CAPITAL LETTER ZETA -> 'Z' */ + { 0x97, 0x45 }, /* GREEK CAPITAL LETTER ETA -> 'E' */ + { 0x99, 0x49 }, /* GREEK CAPITAL LETTER IOTA -> 'I' */ + { 0x9A, 0x4B }, /* GREEK CAPITAL LETTER KAPPA -> 'K' */ + { 0x9B, 0x4C }, /* GREEK CAPITAL LETTER LAMDA -> 'L' */ + { 0x9C, 0x4D }, /* GREEK CAPITAL LETTER MU -> 'M' */ + { 0x9D, 0x4E }, /* GREEK CAPITAL LETTER NU -> 'N' */ + { 0x9F, 0x4F }, /* GREEK CAPITAL LETTER OMICRON -> 'O' */ + { 0xA0, 0x50 }, /* GREEK CAPITAL LETTER PI -> 'P' */ + { 0xA1, 0x52 }, /* GREEK CAPITAL LETTER RHO -> 'R' */ + { 0xA3, 0x53 }, /* GREEK CAPITAL LETTER SIGMA -> 'S' */ + { 0xA4, 0x54 }, /* GREEK CAPITAL LETTER TAU -> 'T' */ + { 0xA5, 0x55 }, /* GREEK CAPITAL LETTER UPSILON -> 'U' */ + { 0xA9, 0x4F }, /* GREEK CAPITAL LETTER OMEGA -> 'O' */ + { 0xAA, 0x49 }, /* GREEK CAPITAL LETTER IOTA WITH DIALYTIKA -> 'I' */ + { 0xAB, 0x55 }, /* GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA -> 'U' */ + { 0xAC, 0x61 }, /* GREEK SMALL LETTER ALPHA WITH TONOS -> 'a' */ + { 0xAD, 0x65 }, /* GREEK SMALL LETTER EPSILON WITH TONOS -> 'e' */ + { 0xAE, 0x65 }, /* GREEK SMALL LETTER ETA WITH TONOS -> 'e' */ + { 0xAF, 0x69 }, /* GREEK SMALL LETTER IOTA WITH TONOS -> 'i' */ + { 0xB0, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS -> 'u' */ + { 0xB1, 0x61 }, /* GREEK SMALL LETTER ALPHA -> 'a' */ + { 0xB2, 0x62 }, /* GREEK SMALL LETTER BETA -> 'b' */ + { 0xB3, 0x67 }, /* GREEK SMALL LETTER GAMMA -> 'g' */ + { 0xB4, 0x64 }, /* GREEK SMALL LETTER DELTA -> 'd' */ + { 0xB5, 0x65 }, /* GREEK SMALL LETTER EPSILON -> 'e' */ + { 0xB6, 0x7A }, /* GREEK SMALL LETTER ZETA -> 'z' */ + { 0xB7, 0x65 }, /* GREEK SMALL LETTER ETA -> 'e' */ + { 0xB9, 0x69 }, /* GREEK SMALL LETTER IOTA -> 'i' */ + { 0xBA, 0x6B }, /* GREEK SMALL LETTER KAPPA -> 'k' */ + { 0xBB, 0x6C }, /* GREEK SMALL LETTER LAMDA -> 'l' */ + { 0xBC, 0x6D }, /* GREEK SMALL LETTER MU -> 'm' */ + { 0xBD, 0x6E }, /* GREEK SMALL LETTER NU -> 'n' */ + { 0xBE, 0x78 }, /* GREEK SMALL LETTER XI -> 'x' */ + { 0xBF, 0x6F }, /* GREEK SMALL LETTER OMICRON -> 'o' */ + { 0xC0, 0x70 }, /* GREEK SMALL LETTER PI -> 'p' */ + { 0xC1, 0x72 }, /* GREEK SMALL LETTER RHO -> 'r' */ + { 0xC2, 0x73 }, /* GREEK SMALL LETTER FINAL SIGMA -> 's' */ + { 0xC3, 0x73 }, /* GREEK SMALL LETTER SIGMA -> 's' */ + { 0xC4, 0x74 }, /* GREEK SMALL LETTER TAU -> 't' */ + { 0xC5, 0x75 }, /* GREEK SMALL LETTER UPSILON -> 'u' */ + { 0xC9, 0x6F }, /* GREEK SMALL LETTER OMEGA -> 'o' */ + { 0xCA, 0x69 }, /* GREEK SMALL LETTER IOTA WITH DIALYTIKA -> 'i' */ + { 0xCB, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH DIALYTIKA -> 'u' */ + { 0xCC, 0x6F }, /* GREEK SMALL LETTER OMICRON WITH TONOS -> 'o' */ + { 0xCD, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH TONOS -> 'u' */ + { 0xCE, 0x6F }, /* GREEK SMALL LETTER OMEGA WITH TONOS -> 'o' */ + { 0xD0, 0x62 }, /* GREEK BETA SYMBOL -> 'b' */ + { 0xD2, 0x00 }, /* GREEK UPSILON WITH HOOK SYMBOL -> ... */ + { 0xD4, 0x55 }, /* GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL -> 'U' */ + { 0xD6, 0x70 }, /* GREEK PI SYMBOL -> 'p' */ + { 0xD7, 0x26 }, /* GREEK KAI SYMBOL -> '&' */ + { 0xDC, 0x57 }, /* GREEK LETTER DIGAMMA -> 'W' */ + { 0xDD, 0x77 }, /* GREEK SMALL LETTER DIGAMMA -> 'w' */ + { 0xDE, 0x51 }, /* GREEK LETTER KOPPA -> 'Q' */ + { 0xDF, 0x71 }, /* GREEK SMALL LETTER KOPPA -> 'q' */ + { 0xE4, 0x46 }, /* COPTIC CAPITAL LETTER FEI -> 'F' */ + { 0xE5, 0x66 }, /* COPTIC SMALL LETTER FEI -> 'f' */ + { 0xE8, 0x48 }, /* COPTIC CAPITAL LETTER HORI -> 'H' */ + { 0xE9, 0x68 }, /* COPTIC SMALL LETTER HORI -> 'h' */ + { 0xEA, 0x47 }, /* COPTIC CAPITAL LETTER GANGIA -> 'G' */ + { 0xEB, 0x67 }, /* COPTIC SMALL LETTER GANGIA -> 'g' */ + { 0xF0, 0x6B }, /* GREEK KAPPA SYMBOL -> 'k' */ + { 0xF1, 0x72 }, /* GREEK RHO SYMBOL -> 'r' */ + { 0xF2, 0x63 }, /* GREEK LUNATE SIGMA SYMBOL -> 'c' */ + { 0xF3, 0x6A }, /* GREEK LETTER YOT -> 'j' */ + /* Entries for page 0x04 */ + { 0x06, 0x49 }, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -> 'I' */ + { 0x08, 0x4A }, /* CYRILLIC CAPITAL LETTER JE -> 'J' */ + { 0x0D, 0x49 }, /* CYRILLIC CAPITAL LETTER I WITH GRAVE -> 'I' */ + { 0x0E, 0x55 }, /* CYRILLIC CAPITAL LETTER SHORT U -> 'U' */ + { 0x10, 0x41 }, /* CYRILLIC CAPITAL LETTER A -> 'A' */ + { 0x11, 0x42 }, /* CYRILLIC CAPITAL LETTER BE -> 'B' */ + { 0x12, 0x56 }, /* CYRILLIC CAPITAL LETTER VE -> 'V' */ + { 0x13, 0x47 }, /* CYRILLIC CAPITAL LETTER GHE -> 'G' */ + { 0x14, 0x44 }, /* CYRILLIC CAPITAL LETTER DE -> 'D' */ + { 0x15, 0x45 }, /* CYRILLIC CAPITAL LETTER IE -> 'E' */ + { 0x17, 0x5A }, /* CYRILLIC CAPITAL LETTER ZE -> 'Z' */ + { 0x18, 0x49 }, /* CYRILLIC CAPITAL LETTER I -> 'I' */ + { 0x19, 0x49 }, /* CYRILLIC CAPITAL LETTER SHORT I -> 'I' */ + { 0x1A, 0x4B }, /* CYRILLIC CAPITAL LETTER KA -> 'K' */ + { 0x1B, 0x4C }, /* CYRILLIC CAPITAL LETTER EL -> 'L' */ + { 0x1C, 0x4D }, /* CYRILLIC CAPITAL LETTER EM -> 'M' */ + { 0x1D, 0x4E }, /* CYRILLIC CAPITAL LETTER EN -> 'N' */ + { 0x1E, 0x4F }, /* CYRILLIC CAPITAL LETTER O -> 'O' */ + { 0x1F, 0x50 }, /* CYRILLIC CAPITAL LETTER PE -> 'P' */ + { 0x20, 0x52 }, /* CYRILLIC CAPITAL LETTER ER -> 'R' */ + { 0x21, 0x53 }, /* CYRILLIC CAPITAL LETTER ES -> 'S' */ + { 0x22, 0x54 }, /* CYRILLIC CAPITAL LETTER TE -> 'T' */ + { 0x23, 0x55 }, /* CYRILLIC CAPITAL LETTER U -> 'U' */ + { 0x24, 0x46 }, /* CYRILLIC CAPITAL LETTER EF -> 'F' */ + { 0x2A, 0x27 }, /* CYRILLIC CAPITAL LETTER HARD SIGN -> ''' */ + { 0x2B, 0x59 }, /* CYRILLIC CAPITAL LETTER YERU -> 'Y' */ + { 0x2C, 0x27 }, /* CYRILLIC CAPITAL LETTER SOFT SIGN -> ''' */ + { 0x2D, 0x45 }, /* CYRILLIC CAPITAL LETTER E -> 'E' */ + { 0x30, 0x61 }, /* CYRILLIC SMALL LETTER A -> 'a' */ + { 0x31, 0x62 }, /* CYRILLIC SMALL LETTER BE -> 'b' */ + { 0x32, 0x76 }, /* CYRILLIC SMALL LETTER VE -> 'v' */ + { 0x33, 0x67 }, /* CYRILLIC SMALL LETTER GHE -> 'g' */ + { 0x34, 0x64 }, /* CYRILLIC SMALL LETTER DE -> 'd' */ + { 0x35, 0x65 }, /* CYRILLIC SMALL LETTER IE -> 'e' */ + { 0x37, 0x7A }, /* CYRILLIC SMALL LETTER ZE -> 'z' */ + { 0x38, 0x69 }, /* CYRILLIC SMALL LETTER I -> 'i' */ + { 0x39, 0x69 }, /* CYRILLIC SMALL LETTER SHORT I -> 'i' */ + { 0x3A, 0x6B }, /* CYRILLIC SMALL LETTER KA -> 'k' */ + { 0x3B, 0x6C }, /* CYRILLIC SMALL LETTER EL -> 'l' */ + { 0x3C, 0x6D }, /* CYRILLIC SMALL LETTER EM -> 'm' */ + { 0x3D, 0x6E }, /* CYRILLIC SMALL LETTER EN -> 'n' */ + { 0x3E, 0x6F }, /* CYRILLIC SMALL LETTER O -> 'o' */ + { 0x3F, 0x70 }, /* CYRILLIC SMALL LETTER PE -> 'p' */ + { 0x40, 0x72 }, /* CYRILLIC SMALL LETTER ER -> 'r' */ + { 0x41, 0x73 }, /* CYRILLIC SMALL LETTER ES -> 's' */ + { 0x42, 0x74 }, /* CYRILLIC SMALL LETTER TE -> 't' */ + { 0x43, 0x75 }, /* CYRILLIC SMALL LETTER U -> 'u' */ + { 0x44, 0x66 }, /* CYRILLIC SMALL LETTER EF -> 'f' */ + { 0x4A, 0x27 }, /* CYRILLIC SMALL LETTER HARD SIGN -> ''' */ + { 0x4B, 0x79 }, /* CYRILLIC SMALL LETTER YERU -> 'y' */ + { 0x4C, 0x27 }, /* CYRILLIC SMALL LETTER SOFT SIGN -> ''' */ + { 0x4D, 0x65 }, /* CYRILLIC SMALL LETTER E -> 'e' */ + { 0x56, 0x69 }, /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -> 'i' */ + { 0x58, 0x6A }, /* CYRILLIC SMALL LETTER JE -> 'j' */ + { 0x5D, 0x69 }, /* CYRILLIC SMALL LETTER I WITH GRAVE -> 'i' */ + { 0x5E, 0x75 }, /* CYRILLIC SMALL LETTER SHORT U -> 'u' */ + { 0x60, 0x4F }, /* CYRILLIC CAPITAL LETTER OMEGA -> 'O' */ + { 0x61, 0x6F }, /* CYRILLIC SMALL LETTER OMEGA -> 'o' */ + { 0x62, 0x45 }, /* CYRILLIC CAPITAL LETTER YAT -> 'E' */ + { 0x63, 0x65 }, /* CYRILLIC SMALL LETTER YAT -> 'e' */ + { 0x66, 0x45 }, /* CYRILLIC CAPITAL LETTER LITTLE YUS -> 'E' */ + { 0x67, 0x65 }, /* CYRILLIC SMALL LETTER LITTLE YUS -> 'e' */ + { 0x6A, 0x4F }, /* CYRILLIC CAPITAL LETTER BIG YUS -> 'O' */ + { 0x6B, 0x6F }, /* CYRILLIC SMALL LETTER BIG YUS -> 'o' */ + { 0x72, 0x46 }, /* CYRILLIC CAPITAL LETTER FITA -> 'F' */ + { 0x73, 0x66 }, /* CYRILLIC SMALL LETTER FITA -> 'f' */ + { 0x74, 0x59 }, /* CYRILLIC CAPITAL LETTER IZHITSA -> 'Y' */ + { 0x75, 0x79 }, /* CYRILLIC SMALL LETTER IZHITSA -> 'y' */ + { 0x76, 0x59 }, /* CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT -> 'Y' */ + { 0x77, 0x79 }, /* CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT -> 'y' */ + { 0x78, 0x75 }, /* CYRILLIC CAPITAL LETTER UK -> 'u' */ + { 0x79, 0x75 }, /* CYRILLIC SMALL LETTER UK -> 'u' */ + { 0x7A, 0x4F }, /* CYRILLIC CAPITAL LETTER ROUND OMEGA -> 'O' */ + { 0x7B, 0x6F }, /* CYRILLIC SMALL LETTER ROUND OMEGA -> 'o' */ + { 0x7C, 0x4F }, /* CYRILLIC CAPITAL LETTER OMEGA WITH TITLO -> 'O' */ + { 0x7D, 0x6F }, /* CYRILLIC SMALL LETTER OMEGA WITH TITLO -> 'o' */ + { 0x80, 0x51 }, /* CYRILLIC CAPITAL LETTER KOPPA -> 'Q' */ + { 0x81, 0x71 }, /* CYRILLIC SMALL LETTER KOPPA -> 'q' */ + { 0x8C, 0x22 }, /* CYRILLIC CAPITAL LETTER SEMISOFT SIGN -> '"' */ + { 0x8D, 0x22 }, /* CYRILLIC SMALL LETTER SEMISOFT SIGN -> '"' */ + { 0xAE, 0x55 }, /* CYRILLIC CAPITAL LETTER STRAIGHT U -> 'U' */ + { 0xAF, 0x75 }, /* CYRILLIC SMALL LETTER STRAIGHT U -> 'u' */ + { 0xBA, 0x48 }, /* CYRILLIC CAPITAL LETTER SHHA -> 'H' */ + { 0xBB, 0x68 }, /* CYRILLIC SMALL LETTER SHHA -> 'h' */ + { 0xC0, 0x60 }, /* CYRILLIC LETTER PALOCHKA -> '`' */ + { 0xD0, 0x61 }, /* CYRILLIC CAPITAL LETTER A WITH BREVE -> 'a' */ + { 0xD1, 0x61 }, /* CYRILLIC SMALL LETTER A WITH BREVE -> 'a' */ + { 0xD2, 0x41 }, /* CYRILLIC CAPITAL LETTER A WITH DIAERESIS -> 'A' */ + { 0xD3, 0x61 }, /* CYRILLIC SMALL LETTER A WITH DIAERESIS -> 'a' */ + { 0xD8, 0x00 }, /* CYRILLIC CAPITAL LETTER SCHWA -> ... */ + { 0xDB, 0x40 }, /* CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS -> '@' */ + { 0xDE, 0x5A }, /* CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS -> 'Z' */ + { 0xDF, 0x7A }, /* CYRILLIC SMALL LETTER ZE WITH DIAERESIS -> 'z' */ + { 0xE2, 0x49 }, /* CYRILLIC CAPITAL LETTER I WITH MACRON -> 'I' */ + { 0xE3, 0x69 }, /* CYRILLIC SMALL LETTER I WITH MACRON -> 'i' */ + { 0xE4, 0x49 }, /* CYRILLIC CAPITAL LETTER I WITH DIAERESIS -> 'I' */ + { 0xE5, 0x69 }, /* CYRILLIC SMALL LETTER I WITH DIAERESIS -> 'i' */ + { 0xE6, 0x4F }, /* CYRILLIC CAPITAL LETTER O WITH DIAERESIS -> 'O' */ + { 0xE7, 0x6F }, /* CYRILLIC SMALL LETTER O WITH DIAERESIS -> 'o' */ + { 0xE8, 0x4F }, /* CYRILLIC CAPITAL LETTER BARRED O -> 'O' */ + { 0xE9, 0x6F }, /* CYRILLIC SMALL LETTER BARRED O -> 'o' */ + { 0xEA, 0x4F }, /* CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS -> 'O' */ + { 0xEB, 0x6F }, /* CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS -> 'o' */ + { 0xEC, 0x45 }, /* CYRILLIC CAPITAL LETTER E WITH DIAERESIS -> 'E' */ + { 0xED, 0x65 }, /* CYRILLIC SMALL LETTER E WITH DIAERESIS -> 'e' */ + { 0xEE, 0x55 }, /* CYRILLIC CAPITAL LETTER U WITH MACRON -> 'U' */ + { 0xEF, 0x75 }, /* CYRILLIC SMALL LETTER U WITH MACRON -> 'u' */ + { 0xF0, 0x55 }, /* CYRILLIC CAPITAL LETTER U WITH DIAERESIS -> 'U' */ + { 0xF1, 0x75 }, /* CYRILLIC SMALL LETTER U WITH DIAERESIS -> 'u' */ + { 0xF2, 0x55 }, /* CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE -> 'U' */ + { 0xF3, 0x75 }, /* CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE -> 'u' */ + { 0xF8, 0x59 }, /* CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS -> 'Y' */ + { 0xF9, 0x79 }, /* CYRILLIC SMALL LETTER YERU WITH DIAERESIS -> 'y' */ + /* Entries for page 0x05 */ + { 0x31, 0x41 }, /* ARMENIAN CAPITAL LETTER AYB -> 'A' */ + { 0x32, 0x42 }, /* ARMENIAN CAPITAL LETTER BEN -> 'B' */ + { 0x33, 0x47 }, /* ARMENIAN CAPITAL LETTER GIM -> 'G' */ + { 0x34, 0x44 }, /* ARMENIAN CAPITAL LETTER DA -> 'D' */ + { 0x35, 0x45 }, /* ARMENIAN CAPITAL LETTER ECH -> 'E' */ + { 0x36, 0x5A }, /* ARMENIAN CAPITAL LETTER ZA -> 'Z' */ + { 0x37, 0x45 }, /* ARMENIAN CAPITAL LETTER EH -> 'E' */ + { 0x38, 0x45 }, /* ARMENIAN CAPITAL LETTER ET -> 'E' */ + { 0x3B, 0x49 }, /* ARMENIAN CAPITAL LETTER INI -> 'I' */ + { 0x3C, 0x4C }, /* ARMENIAN CAPITAL LETTER LIWN -> 'L' */ + { 0x3F, 0x4B }, /* ARMENIAN CAPITAL LETTER KEN -> 'K' */ + { 0x40, 0x48 }, /* ARMENIAN CAPITAL LETTER HO -> 'H' */ + { 0x44, 0x4D }, /* ARMENIAN CAPITAL LETTER MEN -> 'M' */ + { 0x45, 0x59 }, /* ARMENIAN CAPITAL LETTER YI -> 'Y' */ + { 0x46, 0x4E }, /* ARMENIAN CAPITAL LETTER NOW -> 'N' */ + { 0x48, 0x4F }, /* ARMENIAN CAPITAL LETTER VO -> 'O' */ + { 0x4A, 0x50 }, /* ARMENIAN CAPITAL LETTER PEH -> 'P' */ + { 0x4B, 0x4A }, /* ARMENIAN CAPITAL LETTER JHEH -> 'J' */ + { 0x4D, 0x53 }, /* ARMENIAN CAPITAL LETTER SEH -> 'S' */ + { 0x4E, 0x56 }, /* ARMENIAN CAPITAL LETTER VEW -> 'V' */ + { 0x4F, 0x54 }, /* ARMENIAN CAPITAL LETTER TIWN -> 'T' */ + { 0x50, 0x52 }, /* ARMENIAN CAPITAL LETTER REH -> 'R' */ + { 0x52, 0x57 }, /* ARMENIAN CAPITAL LETTER YIWN -> 'W' */ + { 0x55, 0x4F }, /* ARMENIAN CAPITAL LETTER OH -> 'O' */ + { 0x56, 0x46 }, /* ARMENIAN CAPITAL LETTER FEH -> 'F' */ + { 0x59, 0x3C }, /* ARMENIAN MODIFIER LETTER LEFT HALF RING -> '<' */ + { 0x5A, 0x27 }, /* ARMENIAN APOSTROPHE -> ''' */ + { 0x5B, 0x2F }, /* ARMENIAN EMPHASIS MARK -> '/' */ + { 0x5C, 0x21 }, /* ARMENIAN EXCLAMATION MARK -> '!' */ + { 0x5D, 0x2C }, /* ARMENIAN COMMA -> ',' */ + { 0x5E, 0x3F }, /* ARMENIAN QUESTION MARK -> '?' */ + { 0x5F, 0x2E }, /* ARMENIAN ABBREVIATION MARK -> '.' */ + { 0x61, 0x61 }, /* ARMENIAN SMALL LETTER AYB -> 'a' */ + { 0x62, 0x62 }, /* ARMENIAN SMALL LETTER BEN -> 'b' */ + { 0x63, 0x67 }, /* ARMENIAN SMALL LETTER GIM -> 'g' */ + { 0x64, 0x64 }, /* ARMENIAN SMALL LETTER DA -> 'd' */ + { 0x65, 0x65 }, /* ARMENIAN SMALL LETTER ECH -> 'e' */ + { 0x66, 0x7A }, /* ARMENIAN SMALL LETTER ZA -> 'z' */ + { 0x67, 0x65 }, /* ARMENIAN SMALL LETTER EH -> 'e' */ + { 0x68, 0x65 }, /* ARMENIAN SMALL LETTER ET -> 'e' */ + { 0x6B, 0x69 }, /* ARMENIAN SMALL LETTER INI -> 'i' */ + { 0x6C, 0x6C }, /* ARMENIAN SMALL LETTER LIWN -> 'l' */ + { 0x6F, 0x6B }, /* ARMENIAN SMALL LETTER KEN -> 'k' */ + { 0x70, 0x68 }, /* ARMENIAN SMALL LETTER HO -> 'h' */ + { 0x74, 0x6D }, /* ARMENIAN SMALL LETTER MEN -> 'm' */ + { 0x75, 0x79 }, /* ARMENIAN SMALL LETTER YI -> 'y' */ + { 0x76, 0x6E }, /* ARMENIAN SMALL LETTER NOW -> 'n' */ + { 0x78, 0x6F }, /* ARMENIAN SMALL LETTER VO -> 'o' */ + { 0x7A, 0x70 }, /* ARMENIAN SMALL LETTER PEH -> 'p' */ + { 0x7B, 0x6A }, /* ARMENIAN SMALL LETTER JHEH -> 'j' */ + { 0x7D, 0x73 }, /* ARMENIAN SMALL LETTER SEH -> 's' */ + { 0x7E, 0x76 }, /* ARMENIAN SMALL LETTER VEW -> 'v' */ + { 0x7F, 0x74 }, /* ARMENIAN SMALL LETTER TIWN -> 't' */ + { 0x80, 0x72 }, /* ARMENIAN SMALL LETTER REH -> 'r' */ + { 0x82, 0x77 }, /* ARMENIAN SMALL LETTER YIWN -> 'w' */ + { 0x85, 0x6F }, /* ARMENIAN SMALL LETTER OH -> 'o' */ + { 0x86, 0x66 }, /* ARMENIAN SMALL LETTER FEH -> 'f' */ + { 0x89, 0x3A }, /* ARMENIAN FULL STOP -> ':' */ + { 0x8A, 0x2D }, /* ARMENIAN HYPHEN -> '-' */ + { 0xB1, 0x65 }, /* HEBREW POINT HATAF SEGOL -> 'e' */ + { 0xB2, 0x61 }, /* HEBREW POINT HATAF PATAH -> 'a' */ + { 0xB3, 0x6F }, /* HEBREW POINT HATAF QAMATS -> 'o' */ + { 0xB4, 0x69 }, /* HEBREW POINT HIRIQ -> 'i' */ + { 0xB5, 0x65 }, /* HEBREW POINT TSERE -> 'e' */ + { 0xB6, 0x65 }, /* HEBREW POINT SEGOL -> 'e' */ + { 0xB7, 0x61 }, /* HEBREW POINT PATAH -> 'a' */ + { 0xB8, 0x61 }, /* HEBREW POINT QAMATS -> 'a' */ + { 0xB9, 0x6F }, /* HEBREW POINT HOLAM -> 'o' */ + { 0xBA, 0x6F }, /* HEBREW POINT HOLAM HASER FOR VAV -> 'o' */ + { 0xBB, 0x75 }, /* HEBREW POINT QUBUTS -> 'u' */ + { 0xBE, 0x2D }, /* HEBREW PUNCTUATION MAQAF -> '-' */ + { 0xC0, 0x7C }, /* HEBREW PUNCTUATION PASEQ -> '|' */ + { 0xC3, 0x2E }, /* HEBREW PUNCTUATION SOF PASUQ -> '.' */ + { 0xC6, 0x6E }, /* HEBREW PUNCTUATION NUN HAFUKHA -> 'n' */ + { 0xC7, 0x6F }, /* HEBREW POINT QAMATS QATAN -> 'o' */ + { 0xD0, 0x41 }, /* HEBREW LETTER ALEF -> 'A' */ + { 0xD1, 0x62 }, /* HEBREW LETTER BET -> 'b' */ + { 0xD2, 0x67 }, /* HEBREW LETTER GIMEL -> 'g' */ + { 0xD3, 0x64 }, /* HEBREW LETTER DALET -> 'd' */ + { 0xD4, 0x68 }, /* HEBREW LETTER HE -> 'h' */ + { 0xD5, 0x76 }, /* HEBREW LETTER VAV -> 'v' */ + { 0xD6, 0x7A }, /* HEBREW LETTER ZAYIN -> 'z' */ + { 0xD7, 0x48 }, /* HEBREW LETTER HET -> 'H' */ + { 0xD8, 0x54 }, /* HEBREW LETTER TET -> 'T' */ + { 0xD9, 0x79 }, /* HEBREW LETTER YOD -> 'y' */ + { 0xDC, 0x6C }, /* HEBREW LETTER LAMED -> 'l' */ + { 0xDD, 0x6D }, /* HEBREW LETTER FINAL MEM -> 'm' */ + { 0xDE, 0x6D }, /* HEBREW LETTER MEM -> 'm' */ + { 0xDF, 0x6E }, /* HEBREW LETTER FINAL NUN -> 'n' */ + { 0xE0, 0x6E }, /* HEBREW LETTER NUN -> 'n' */ + { 0xE1, 0x73 }, /* HEBREW LETTER SAMEKH -> 's' */ + { 0xE2, 0x60 }, /* HEBREW LETTER AYIN -> '`' */ + { 0xE3, 0x70 }, /* HEBREW LETTER FINAL PE -> 'p' */ + { 0xE4, 0x70 }, /* HEBREW LETTER PE -> 'p' */ + { 0xE7, 0x6B }, /* HEBREW LETTER QOF -> 'k' */ + { 0xE8, 0x72 }, /* HEBREW LETTER RESH -> 'r' */ + { 0xEA, 0x74 }, /* HEBREW LETTER TAV -> 't' */ + { 0xF0, 0x56 }, /* HEBREW LIGATURE YIDDISH DOUBLE VAV -> 'V' */ + { 0xF3, 0x27 }, /* HEBREW PUNCTUATION GERESH -> ''' */ + { 0xF4, 0x22 }, /* HEBREW PUNCTUATION GERSHAYIM -> '"' */ + /* Entries for page 0x06 */ + { 0x0C, 0x2C }, /* ARABIC COMMA -> ',' */ + { 0x1B, 0x3B }, /* ARABIC SEMICOLON -> ';' */ + { 0x1F, 0x3F }, /* ARABIC QUESTION MARK -> '?' */ + { 0x22, 0x61 }, /* ARABIC LETTER ALEF WITH MADDA ABOVE -> 'a' */ + { 0x23, 0x27 }, /* ARABIC LETTER ALEF WITH HAMZA ABOVE -> ''' */ + { 0x28, 0x62 }, /* ARABIC LETTER BEH -> 'b' */ + { 0x29, 0x40 }, /* ARABIC LETTER TEH MARBUTA -> '@' */ + { 0x2A, 0x74 }, /* ARABIC LETTER TEH -> 't' */ + { 0x2C, 0x6A }, /* ARABIC LETTER JEEM -> 'j' */ + { 0x2D, 0x48 }, /* ARABIC LETTER HAH -> 'H' */ + { 0x2F, 0x64 }, /* ARABIC LETTER DAL -> 'd' */ + { 0x31, 0x72 }, /* ARABIC LETTER REH -> 'r' */ + { 0x32, 0x7A }, /* ARABIC LETTER ZAIN -> 'z' */ + { 0x33, 0x73 }, /* ARABIC LETTER SEEN -> 's' */ + { 0x35, 0x53 }, /* ARABIC LETTER SAD -> 'S' */ + { 0x36, 0x44 }, /* ARABIC LETTER DAD -> 'D' */ + { 0x37, 0x54 }, /* ARABIC LETTER TAH -> 'T' */ + { 0x38, 0x5A }, /* ARABIC LETTER ZAH -> 'Z' */ + { 0x39, 0x60 }, /* ARABIC LETTER AIN -> '`' */ + { 0x3A, 0x47 }, /* ARABIC LETTER GHAIN -> 'G' */ + { 0x41, 0x66 }, /* ARABIC LETTER FEH -> 'f' */ + { 0x42, 0x71 }, /* ARABIC LETTER QAF -> 'q' */ + { 0x43, 0x6B }, /* ARABIC LETTER KAF -> 'k' */ + { 0x44, 0x6C }, /* ARABIC LETTER LAM -> 'l' */ + { 0x45, 0x6D }, /* ARABIC LETTER MEEM -> 'm' */ + { 0x46, 0x6E }, /* ARABIC LETTER NOON -> 'n' */ + { 0x47, 0x68 }, /* ARABIC LETTER HEH -> 'h' */ + { 0x48, 0x77 }, /* ARABIC LETTER WAW -> 'w' */ + { 0x49, 0x7E }, /* ARABIC LETTER ALEF MAKSURA -> '~' */ + { 0x4A, 0x79 }, /* ARABIC LETTER YEH -> 'y' */ + { 0x4E, 0x61 }, /* ARABIC FATHA -> 'a' */ + { 0x4F, 0x75 }, /* ARABIC DAMMA -> 'u' */ + { 0x50, 0x69 }, /* ARABIC KASRA -> 'i' */ + { 0x51, 0x57 }, /* ARABIC SHADDA -> 'W' */ + { 0x54, 0x27 }, /* ARABIC HAMZA ABOVE -> ''' */ + { 0x55, 0x27 }, /* ARABIC HAMZA BELOW -> ''' */ + { 0x60, 0x30 }, /* ARABIC-INDIC DIGIT ZERO -> '0' */ + { 0x61, 0x31 }, /* ARABIC-INDIC DIGIT ONE -> '1' */ + { 0x62, 0x32 }, /* ARABIC-INDIC DIGIT TWO -> '2' */ + { 0x63, 0x33 }, /* ARABIC-INDIC DIGIT THREE -> '3' */ + { 0x64, 0x34 }, /* ARABIC-INDIC DIGIT FOUR -> '4' */ + { 0x65, 0x35 }, /* ARABIC-INDIC DIGIT FIVE -> '5' */ + { 0x66, 0x36 }, /* ARABIC-INDIC DIGIT SIX -> '6' */ + { 0x67, 0x37 }, /* ARABIC-INDIC DIGIT SEVEN -> '7' */ + { 0x68, 0x38 }, /* ARABIC-INDIC DIGIT EIGHT -> '8' */ + { 0x69, 0x39 }, /* ARABIC-INDIC DIGIT NINE -> '9' */ + { 0x6A, 0x25 }, /* ARABIC PERCENT SIGN -> '%' */ + { 0x6B, 0x2E }, /* ARABIC DECIMAL SEPARATOR -> '.' */ + { 0x6C, 0x2C }, /* ARABIC THOUSANDS SEPARATOR -> ',' */ + { 0x6D, 0x2A }, /* ARABIC FIVE POINTED STAR -> '*' */ + { 0x71, 0x00 }, /* ARABIC LETTER ALEF WASLA -> ... */ + { 0x73, 0x27 }, /* ARABIC LETTER ALEF WITH WAVY HAMZA BELOW -> ''' */ + { 0x75, 0x27 }, /* ARABIC LETTER HIGH HAMZA ALEF -> ''' */ + { 0x7B, 0x62 }, /* ARABIC LETTER BEEH -> 'b' */ + { 0x7C, 0x74 }, /* ARABIC LETTER TEH WITH RING -> 't' */ + { 0x7D, 0x54 }, /* ARABIC LETTER TEH WITH THREE DOTS ABOVE DOWNWARDS -> 'T' */ + { 0x7E, 0x70 }, /* ARABIC LETTER PEH -> 'p' */ + { 0x82, 0x48 }, /* ARABIC LETTER HAH WITH TWO DOTS VERTICAL ABOVE -> 'H' */ + { 0x85, 0x48 }, /* ARABIC LETTER HAH WITH THREE DOTS ABOVE -> 'H' */ + { 0x89, 0x44 }, /* ARABIC LETTER DAL WITH RING -> 'D' */ + { 0x8A, 0x44 }, /* ARABIC LETTER DAL WITH DOT BELOW -> 'D' */ + { 0x8E, 0x64 }, /* ARABIC LETTER DUL -> 'd' */ + { 0x8F, 0x44 }, /* ARABIC LETTER DAL WITH THREE DOTS ABOVE DOWNWARDS -> 'D' */ + { 0x90, 0x44 }, /* ARABIC LETTER DAL WITH FOUR DOTS ABOVE -> 'D' */ + { 0x92, 0x00 }, /* ARABIC LETTER REH WITH SMALL V -> ... */ + { 0x97, 0x52 }, /* ARABIC LETTER REH WITH TWO DOTS ABOVE -> 'R' */ + { 0x98, 0x6A }, /* ARABIC LETTER JEH -> 'j' */ + { 0x99, 0x52 }, /* ARABIC LETTER REH WITH FOUR DOTS ABOVE -> 'R' */ + { 0x9A, 0x00 }, /* ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE -> ... */ + { 0x9E, 0x53 }, /* ARABIC LETTER SAD WITH THREE DOTS ABOVE -> 'S' */ + { 0x9F, 0x54 }, /* ARABIC LETTER TAH WITH THREE DOTS ABOVE -> 'T' */ + { 0xA1, 0x00 }, /* ARABIC LETTER DOTLESS FEH -> ... */ + { 0xA3, 0x46 }, /* ARABIC LETTER FEH WITH DOT BELOW -> 'F' */ + { 0xA4, 0x76 }, /* ARABIC LETTER VEH -> 'v' */ + { 0xA5, 0x66 }, /* ARABIC LETTER FEH WITH THREE DOTS BELOW -> 'f' */ + { 0xA7, 0x51 }, /* ARABIC LETTER QAF WITH DOT ABOVE -> 'Q' */ + { 0xA8, 0x51 }, /* ARABIC LETTER QAF WITH THREE DOTS ABOVE -> 'Q' */ + { 0xAA, 0x6B }, /* ARABIC LETTER SWASH KAF -> 'k' */ + { 0xAB, 0x4B }, /* ARABIC LETTER KAF WITH RING -> 'K' */ + { 0xAC, 0x4B }, /* ARABIC LETTER KAF WITH DOT ABOVE -> 'K' */ + { 0xAE, 0x4B }, /* ARABIC LETTER KAF WITH THREE DOTS BELOW -> 'K' */ + { 0xAF, 0x67 }, /* ARABIC LETTER GAF -> 'g' */ + { 0xB0, 0x47 }, /* ARABIC LETTER GAF WITH RING -> 'G' */ + { 0xB1, 0x4E }, /* ARABIC LETTER NGOEH -> 'N' */ + { 0xB2, 0x00 }, /* ARABIC LETTER GAF WITH TWO DOTS BELOW -> ... */ + { 0xB4, 0x47 }, /* ARABIC LETTER GAF WITH THREE DOTS ABOVE -> 'G' */ + { 0xB5, 0x00 }, /* ARABIC LETTER LAM WITH SMALL V -> ... */ + { 0xB8, 0x4C }, /* ARABIC LETTER LAM WITH THREE DOTS BELOW -> 'L' */ + { 0xB9, 0x00 }, /* ARABIC LETTER NOON WITH DOT BELOW -> ... */ + { 0xBD, 0x4E }, /* ARABIC LETTER NOON WITH THREE DOTS ABOVE -> 'N' */ + { 0xBE, 0x68 }, /* ARABIC LETTER HEH DOACHASHMEE -> 'h' */ + { 0xC1, 0x68 }, /* ARABIC LETTER HEH GOAL -> 'h' */ + { 0xC2, 0x48 }, /* ARABIC LETTER HEH GOAL WITH HAMZA ABOVE -> 'H' */ + { 0xC3, 0x40 }, /* ARABIC LETTER TEH MARBUTA GOAL -> '@' */ + { 0xC4, 0x57 }, /* ARABIC LETTER WAW WITH RING -> 'W' */ + { 0xC7, 0x75 }, /* ARABIC LETTER U -> 'u' */ + { 0xCA, 0x57 }, /* ARABIC LETTER WAW WITH TWO DOTS ABOVE -> 'W' */ + { 0xCB, 0x76 }, /* ARABIC LETTER VE -> 'v' */ + { 0xCC, 0x79 }, /* ARABIC LETTER FARSI YEH -> 'y' */ + { 0xCD, 0x59 }, /* ARABIC LETTER YEH WITH TAIL -> 'Y' */ + { 0xCE, 0x59 }, /* ARABIC LETTER YEH WITH SMALL V -> 'Y' */ + { 0xCF, 0x57 }, /* ARABIC LETTER WAW WITH DOT ABOVE -> 'W' */ + { 0xD2, 0x79 }, /* ARABIC LETTER YEH BARREE -> 'y' */ + { 0xD4, 0x2E }, /* ARABIC FULL STOP -> '.' */ + { 0xDD, 0x40 }, /* ARABIC END OF AYAH -> '@' */ + { 0xDE, 0x23 }, /* ARABIC START OF RUB EL HIZB -> '#' */ + { 0xE9, 0x5E }, /* ARABIC PLACE OF SAJDAH -> '^' */ + { 0xF0, 0x30 }, /* EXTENDED ARABIC-INDIC DIGIT ZERO -> '0' */ + { 0xF1, 0x31 }, /* EXTENDED ARABIC-INDIC DIGIT ONE -> '1' */ + { 0xF2, 0x32 }, /* EXTENDED ARABIC-INDIC DIGIT TWO -> '2' */ + { 0xF3, 0x33 }, /* EXTENDED ARABIC-INDIC DIGIT THREE -> '3' */ + { 0xF4, 0x34 }, /* EXTENDED ARABIC-INDIC DIGIT FOUR -> '4' */ + { 0xF5, 0x35 }, /* EXTENDED ARABIC-INDIC DIGIT FIVE -> '5' */ + { 0xF6, 0x36 }, /* EXTENDED ARABIC-INDIC DIGIT SIX -> '6' */ + { 0xF7, 0x37 }, /* EXTENDED ARABIC-INDIC DIGIT SEVEN -> '7' */ + { 0xF8, 0x38 }, /* EXTENDED ARABIC-INDIC DIGIT EIGHT -> '8' */ + { 0xF9, 0x39 }, /* EXTENDED ARABIC-INDIC DIGIT NINE -> '9' */ + { 0xFB, 0x44 }, /* ARABIC LETTER DAD WITH DOT BELOW -> 'D' */ + { 0xFD, 0x26 }, /* ARABIC SIGN SINDHI AMPERSAND -> '&' */ + /* Entries for page 0x07 */ + { 0x01, 0x2F }, /* SYRIAC SUPRALINEAR FULL STOP -> '/' */ + { 0x02, 0x2C }, /* SYRIAC SUBLINEAR FULL STOP -> ',' */ + { 0x03, 0x21 }, /* SYRIAC SUPRALINEAR COLON -> '!' */ + { 0x04, 0x21 }, /* SYRIAC SUBLINEAR COLON -> '!' */ + { 0x05, 0x2D }, /* SYRIAC HORIZONTAL COLON -> '-' */ + { 0x06, 0x2C }, /* SYRIAC COLON SKEWED LEFT -> ',' */ + { 0x07, 0x2C }, /* SYRIAC COLON SKEWED RIGHT -> ',' */ + { 0x08, 0x3B }, /* SYRIAC SUPRALINEAR COLON SKEWED LEFT -> ';' */ + { 0x09, 0x3F }, /* SYRIAC SUBLINEAR COLON SKEWED RIGHT -> '?' */ + { 0x0A, 0x7E }, /* SYRIAC CONTRACTION -> '~' */ + { 0x0B, 0x7B }, /* SYRIAC HARKLEAN OBELUS -> '{' */ + { 0x0C, 0x7D }, /* SYRIAC HARKLEAN METOBELUS -> '}' */ + { 0x0D, 0x2A }, /* SYRIAC HARKLEAN ASTERISCUS -> '*' */ + { 0x10, 0x27 }, /* SYRIAC LETTER ALAPH -> ''' */ + { 0x12, 0x62 }, /* SYRIAC LETTER BETH -> 'b' */ + { 0x13, 0x67 }, /* SYRIAC LETTER GAMAL -> 'g' */ + { 0x14, 0x67 }, /* SYRIAC LETTER GAMAL GARSHUNI -> 'g' */ + { 0x15, 0x64 }, /* SYRIAC LETTER DALATH -> 'd' */ + { 0x16, 0x64 }, /* SYRIAC LETTER DOTLESS DALATH RISH -> 'd' */ + { 0x17, 0x68 }, /* SYRIAC LETTER HE -> 'h' */ + { 0x18, 0x77 }, /* SYRIAC LETTER WAW -> 'w' */ + { 0x19, 0x7A }, /* SYRIAC LETTER ZAIN -> 'z' */ + { 0x1A, 0x48 }, /* SYRIAC LETTER HETH -> 'H' */ + { 0x1B, 0x74 }, /* SYRIAC LETTER TETH -> 't' */ + { 0x1C, 0x74 }, /* SYRIAC LETTER TETH GARSHUNI -> 't' */ + { 0x1D, 0x79 }, /* SYRIAC LETTER YUDH -> 'y' */ + { 0x1F, 0x6B }, /* SYRIAC LETTER KAPH -> 'k' */ + { 0x20, 0x6C }, /* SYRIAC LETTER LAMADH -> 'l' */ + { 0x21, 0x6D }, /* SYRIAC LETTER MIM -> 'm' */ + { 0x22, 0x6E }, /* SYRIAC LETTER NUN -> 'n' */ + { 0x23, 0x73 }, /* SYRIAC LETTER SEMKATH -> 's' */ + { 0x24, 0x73 }, /* SYRIAC LETTER FINAL SEMKATH -> 's' */ + { 0x25, 0x60 }, /* SYRIAC LETTER E -> '`' */ + { 0x26, 0x70 }, /* SYRIAC LETTER PE -> 'p' */ + { 0x27, 0x70 }, /* SYRIAC LETTER REVERSED PE -> 'p' */ + { 0x28, 0x53 }, /* SYRIAC LETTER SADHE -> 'S' */ + { 0x29, 0x71 }, /* SYRIAC LETTER QAPH -> 'q' */ + { 0x2A, 0x72 }, /* SYRIAC LETTER RISH -> 'r' */ + { 0x2C, 0x74 }, /* SYRIAC LETTER TAW -> 't' */ + { 0x30, 0x00 }, /* SYRIAC PTHAHA ABOVE -> ... */ + { 0x32, 0x61 }, /* SYRIAC PTHAHA DOTTED -> 'a' */ + { 0x33, 0x00 }, /* SYRIAC ZQAPHA ABOVE -> ... */ + { 0x35, 0x41 }, /* SYRIAC ZQAPHA DOTTED -> 'A' */ + { 0x36, 0x00 }, /* SYRIAC RBASA ABOVE -> ... */ + { 0x38, 0x65 }, /* SYRIAC DOTTED ZLAMA HORIZONTAL -> 'e' */ + { 0x39, 0x45 }, /* SYRIAC DOTTED ZLAMA ANGULAR -> 'E' */ + { 0x3A, 0x69 }, /* SYRIAC HBASA ABOVE -> 'i' */ + { 0x3B, 0x69 }, /* SYRIAC HBASA BELOW -> 'i' */ + { 0x3C, 0x00 }, /* SYRIAC HBASA-ESASA DOTTED -> ... */ + { 0x3E, 0x75 }, /* SYRIAC ESASA BELOW -> 'u' */ + { 0x3F, 0x6F }, /* SYRIAC RWAHA -> 'o' */ + { 0x41, 0x60 }, /* SYRIAC QUSHSHAYA -> '`' */ + { 0x42, 0x27 }, /* SYRIAC RUKKAKHA -> ''' */ + { 0x45, 0x58 }, /* SYRIAC THREE DOTS ABOVE -> 'X' */ + { 0x46, 0x51 }, /* SYRIAC THREE DOTS BELOW -> 'Q' */ + { 0x47, 0x40 }, /* SYRIAC OBLIQUE LINE ABOVE -> '@' */ + { 0x48, 0x40 }, /* SYRIAC OBLIQUE LINE BELOW -> '@' */ + { 0x49, 0x7C }, /* SYRIAC MUSIC -> '|' */ + { 0x4A, 0x2B }, /* SYRIAC BARREKH -> '+' */ + { 0x80, 0x68 }, /* THAANA LETTER HAA -> 'h' */ + { 0x82, 0x6E }, /* THAANA LETTER NOONU -> 'n' */ + { 0x83, 0x72 }, /* THAANA LETTER RAA -> 'r' */ + { 0x84, 0x62 }, /* THAANA LETTER BAA -> 'b' */ + { 0x85, 0x4C }, /* THAANA LETTER LHAVIYANI -> 'L' */ + { 0x86, 0x6B }, /* THAANA LETTER KAAFU -> 'k' */ + { 0x87, 0x27 }, /* THAANA LETTER ALIFU -> ''' */ + { 0x88, 0x76 }, /* THAANA LETTER VAAVU -> 'v' */ + { 0x89, 0x6D }, /* THAANA LETTER MEEMU -> 'm' */ + { 0x8A, 0x66 }, /* THAANA LETTER FAAFU -> 'f' */ + { 0x8D, 0x6C }, /* THAANA LETTER LAAMU -> 'l' */ + { 0x8E, 0x67 }, /* THAANA LETTER GAAFU -> 'g' */ + { 0x90, 0x73 }, /* THAANA LETTER SEENU -> 's' */ + { 0x91, 0x64 }, /* THAANA LETTER DAVIYANI -> 'd' */ + { 0x92, 0x7A }, /* THAANA LETTER ZAVIYANI -> 'z' */ + { 0x93, 0x74 }, /* THAANA LETTER TAVIYANI -> 't' */ + { 0x94, 0x79 }, /* THAANA LETTER YAA -> 'y' */ + { 0x95, 0x70 }, /* THAANA LETTER PAVIYANI -> 'p' */ + { 0x96, 0x6A }, /* THAANA LETTER JAVIYANI -> 'j' */ + { 0x9C, 0x7A }, /* THAANA LETTER ZAA -> 'z' */ + { 0x9E, 0x73 }, /* THAANA LETTER SAADHU -> 's' */ + { 0x9F, 0x64 }, /* THAANA LETTER DAADHU -> 'd' */ + { 0xA0, 0x74 }, /* THAANA LETTER TO -> 't' */ + { 0xA1, 0x7A }, /* THAANA LETTER ZO -> 'z' */ + { 0xA2, 0x60 }, /* THAANA LETTER AINU -> '`' */ + { 0xA4, 0x71 }, /* THAANA LETTER QAAFU -> 'q' */ + { 0xA5, 0x77 }, /* THAANA LETTER WAAVU -> 'w' */ + { 0xA6, 0x61 }, /* THAANA ABAFILI -> 'a' */ + { 0xA8, 0x69 }, /* THAANA IBIFILI -> 'i' */ + { 0xAA, 0x75 }, /* THAANA UBUFILI -> 'u' */ + { 0xAC, 0x65 }, /* THAANA EBEFILI -> 'e' */ + { 0xAE, 0x6F }, /* THAANA OBOFILI -> 'o' */ + /* Entries for page 0x09 */ + { 0x01, 0x4E }, /* DEVANAGARI SIGN CANDRABINDU -> 'N' */ + { 0x02, 0x4E }, /* DEVANAGARI SIGN ANUSVARA -> 'N' */ + { 0x03, 0x48 }, /* DEVANAGARI SIGN VISARGA -> 'H' */ + { 0x05, 0x61 }, /* DEVANAGARI LETTER A -> 'a' */ + { 0x07, 0x69 }, /* DEVANAGARI LETTER I -> 'i' */ + { 0x09, 0x75 }, /* DEVANAGARI LETTER U -> 'u' */ + { 0x0B, 0x52 }, /* DEVANAGARI LETTER VOCALIC R -> 'R' */ + { 0x0C, 0x4C }, /* DEVANAGARI LETTER VOCALIC L -> 'L' */ + { 0x0E, 0x65 }, /* DEVANAGARI LETTER SHORT E -> 'e' */ + { 0x0F, 0x65 }, /* DEVANAGARI LETTER E -> 'e' */ + { 0x12, 0x6F }, /* DEVANAGARI LETTER SHORT O -> 'o' */ + { 0x13, 0x6F }, /* DEVANAGARI LETTER O -> 'o' */ + { 0x15, 0x6B }, /* DEVANAGARI LETTER KA -> 'k' */ + { 0x17, 0x67 }, /* DEVANAGARI LETTER GA -> 'g' */ + { 0x1A, 0x63 }, /* DEVANAGARI LETTER CA -> 'c' */ + { 0x1C, 0x6A }, /* DEVANAGARI LETTER JA -> 'j' */ + { 0x24, 0x74 }, /* DEVANAGARI LETTER TA -> 't' */ + { 0x26, 0x64 }, /* DEVANAGARI LETTER DA -> 'd' */ + { 0x28, 0x6E }, /* DEVANAGARI LETTER NA -> 'n' */ + { 0x2A, 0x70 }, /* DEVANAGARI LETTER PA -> 'p' */ + { 0x2C, 0x62 }, /* DEVANAGARI LETTER BA -> 'b' */ + { 0x2E, 0x6D }, /* DEVANAGARI LETTER MA -> 'm' */ + { 0x2F, 0x79 }, /* DEVANAGARI LETTER YA -> 'y' */ + { 0x30, 0x72 }, /* DEVANAGARI LETTER RA -> 'r' */ + { 0x32, 0x6C }, /* DEVANAGARI LETTER LA -> 'l' */ + { 0x33, 0x6C }, /* DEVANAGARI LETTER LLA -> 'l' */ + { 0x35, 0x76 }, /* DEVANAGARI LETTER VA -> 'v' */ + { 0x38, 0x73 }, /* DEVANAGARI LETTER SA -> 's' */ + { 0x39, 0x68 }, /* DEVANAGARI LETTER HA -> 'h' */ + { 0x3C, 0x27 }, /* DEVANAGARI SIGN NUKTA -> ''' */ + { 0x3D, 0x27 }, /* DEVANAGARI SIGN AVAGRAHA -> ''' */ + { 0x3F, 0x69 }, /* DEVANAGARI VOWEL SIGN I -> 'i' */ + { 0x41, 0x75 }, /* DEVANAGARI VOWEL SIGN U -> 'u' */ + { 0x43, 0x52 }, /* DEVANAGARI VOWEL SIGN VOCALIC R -> 'R' */ + { 0x46, 0x65 }, /* DEVANAGARI VOWEL SIGN SHORT E -> 'e' */ + { 0x47, 0x65 }, /* DEVANAGARI VOWEL SIGN E -> 'e' */ + { 0x4A, 0x6F }, /* DEVANAGARI VOWEL SIGN SHORT O -> 'o' */ + { 0x4B, 0x6F }, /* DEVANAGARI VOWEL SIGN O -> 'o' */ + { 0x51, 0x27 }, /* DEVANAGARI STRESS SIGN UDATTA -> ''' */ + { 0x52, 0x27 }, /* DEVANAGARI STRESS SIGN ANUDATTA -> ''' */ + { 0x53, 0x60 }, /* DEVANAGARI GRAVE ACCENT -> '`' */ + { 0x54, 0x27 }, /* DEVANAGARI ACUTE ACCENT -> ''' */ + { 0x58, 0x71 }, /* DEVANAGARI LETTER QA -> 'q' */ + { 0x5B, 0x7A }, /* DEVANAGARI LETTER ZA -> 'z' */ + { 0x5E, 0x66 }, /* DEVANAGARI LETTER FA -> 'f' */ + { 0x62, 0x4C }, /* DEVANAGARI VOWEL SIGN VOCALIC L -> 'L' */ + { 0x66, 0x30 }, /* DEVANAGARI DIGIT ZERO -> '0' */ + { 0x67, 0x31 }, /* DEVANAGARI DIGIT ONE -> '1' */ + { 0x68, 0x32 }, /* DEVANAGARI DIGIT TWO -> '2' */ + { 0x69, 0x33 }, /* DEVANAGARI DIGIT THREE -> '3' */ + { 0x6A, 0x34 }, /* DEVANAGARI DIGIT FOUR -> '4' */ + { 0x6B, 0x35 }, /* DEVANAGARI DIGIT FIVE -> '5' */ + { 0x6C, 0x36 }, /* DEVANAGARI DIGIT SIX -> '6' */ + { 0x6D, 0x37 }, /* DEVANAGARI DIGIT SEVEN -> '7' */ + { 0x6E, 0x38 }, /* DEVANAGARI DIGIT EIGHT -> '8' */ + { 0x6F, 0x39 }, /* DEVANAGARI DIGIT NINE -> '9' */ + { 0x70, 0x2E }, /* DEVANAGARI ABBREVIATION SIGN -> '.' */ + { 0x81, 0x4E }, /* BENGALI SIGN CANDRABINDU -> 'N' */ + { 0x82, 0x4E }, /* BENGALI SIGN ANUSVARA -> 'N' */ + { 0x83, 0x48 }, /* BENGALI SIGN VISARGA -> 'H' */ + { 0x85, 0x61 }, /* BENGALI LETTER A -> 'a' */ + { 0x87, 0x69 }, /* BENGALI LETTER I -> 'i' */ + { 0x89, 0x75 }, /* BENGALI LETTER U -> 'u' */ + { 0x8B, 0x52 }, /* BENGALI LETTER VOCALIC R -> 'R' */ + { 0x8F, 0x65 }, /* BENGALI LETTER E -> 'e' */ + { 0x93, 0x6F }, /* BENGALI LETTER O -> 'o' */ + { 0x95, 0x6B }, /* BENGALI LETTER KA -> 'k' */ + { 0x97, 0x67 }, /* BENGALI LETTER GA -> 'g' */ + { 0x9A, 0x63 }, /* BENGALI LETTER CA -> 'c' */ + { 0x9C, 0x6A }, /* BENGALI LETTER JA -> 'j' */ + { 0xA4, 0x74 }, /* BENGALI LETTER TA -> 't' */ + { 0xA6, 0x64 }, /* BENGALI LETTER DA -> 'd' */ + { 0xA8, 0x6E }, /* BENGALI LETTER NA -> 'n' */ + { 0xAA, 0x70 }, /* BENGALI LETTER PA -> 'p' */ + { 0xAC, 0x62 }, /* BENGALI LETTER BA -> 'b' */ + { 0xAE, 0x6D }, /* BENGALI LETTER MA -> 'm' */ + { 0xAF, 0x79 }, /* BENGALI LETTER YA -> 'y' */ + { 0xB0, 0x72 }, /* BENGALI LETTER RA -> 'r' */ + { 0xB2, 0x6C }, /* BENGALI LETTER LA -> 'l' */ + { 0xB8, 0x73 }, /* BENGALI LETTER SA -> 's' */ + { 0xB9, 0x68 }, /* BENGALI LETTER HA -> 'h' */ + { 0xBC, 0x27 }, /* BENGALI SIGN NUKTA -> ''' */ + { 0xBF, 0x69 }, /* BENGALI VOWEL SIGN I -> 'i' */ + { 0xC1, 0x75 }, /* BENGALI VOWEL SIGN U -> 'u' */ + { 0xC3, 0x52 }, /* BENGALI VOWEL SIGN VOCALIC R -> 'R' */ + { 0xC7, 0x65 }, /* BENGALI VOWEL SIGN E -> 'e' */ + { 0xCB, 0x6F }, /* BENGALI VOWEL SIGN O -> 'o' */ + { 0xD7, 0x2B }, /* BENGALI AU LENGTH MARK -> '+' */ + { 0xE2, 0x4C }, /* BENGALI VOWEL SIGN VOCALIC L -> 'L' */ + { 0xE6, 0x30 }, /* BENGALI DIGIT ZERO -> '0' */ + { 0xE7, 0x31 }, /* BENGALI DIGIT ONE -> '1' */ + { 0xE8, 0x32 }, /* BENGALI DIGIT TWO -> '2' */ + { 0xE9, 0x33 }, /* BENGALI DIGIT THREE -> '3' */ + { 0xEA, 0x34 }, /* BENGALI DIGIT FOUR -> '4' */ + { 0xEB, 0x35 }, /* BENGALI DIGIT FIVE -> '5' */ + { 0xEC, 0x36 }, /* BENGALI DIGIT SIX -> '6' */ + { 0xED, 0x37 }, /* BENGALI DIGIT SEVEN -> '7' */ + { 0xEE, 0x38 }, /* BENGALI DIGIT EIGHT -> '8' */ + { 0xEF, 0x39 }, /* BENGALI DIGIT NINE -> '9' */ + /* Entries for page 0x0A */ + { 0x02, 0x4E }, /* GURMUKHI SIGN BINDI -> 'N' */ + { 0x05, 0x61 }, /* GURMUKHI LETTER A -> 'a' */ + { 0x07, 0x69 }, /* GURMUKHI LETTER I -> 'i' */ + { 0x09, 0x75 }, /* GURMUKHI LETTER U -> 'u' */ + { 0x15, 0x6B }, /* GURMUKHI LETTER KA -> 'k' */ + { 0x17, 0x67 }, /* GURMUKHI LETTER GA -> 'g' */ + { 0x1A, 0x63 }, /* GURMUKHI LETTER CA -> 'c' */ + { 0x1C, 0x6A }, /* GURMUKHI LETTER JA -> 'j' */ + { 0x24, 0x74 }, /* GURMUKHI LETTER TA -> 't' */ + { 0x26, 0x64 }, /* GURMUKHI LETTER DA -> 'd' */ + { 0x28, 0x6E }, /* GURMUKHI LETTER NA -> 'n' */ + { 0x2A, 0x70 }, /* GURMUKHI LETTER PA -> 'p' */ + { 0x2C, 0x62 }, /* GURMUKHI LETTER BA -> 'b' */ + { 0x2E, 0x6D }, /* GURMUKHI LETTER MA -> 'm' */ + { 0x2F, 0x79 }, /* GURMUKHI LETTER YA -> 'y' */ + { 0x30, 0x72 }, /* GURMUKHI LETTER RA -> 'r' */ + { 0x32, 0x6C }, /* GURMUKHI LETTER LA -> 'l' */ + { 0x35, 0x76 }, /* GURMUKHI LETTER VA -> 'v' */ + { 0x38, 0x73 }, /* GURMUKHI LETTER SA -> 's' */ + { 0x39, 0x68 }, /* GURMUKHI LETTER HA -> 'h' */ + { 0x3C, 0x27 }, /* GURMUKHI SIGN NUKTA -> ''' */ + { 0x3F, 0x69 }, /* GURMUKHI VOWEL SIGN I -> 'i' */ + { 0x41, 0x75 }, /* GURMUKHI VOWEL SIGN U -> 'u' */ + { 0x5B, 0x7A }, /* GURMUKHI LETTER ZA -> 'z' */ + { 0x5E, 0x66 }, /* GURMUKHI LETTER FA -> 'f' */ + { 0x66, 0x30 }, /* GURMUKHI DIGIT ZERO -> '0' */ + { 0x67, 0x31 }, /* GURMUKHI DIGIT ONE -> '1' */ + { 0x68, 0x32 }, /* GURMUKHI DIGIT TWO -> '2' */ + { 0x69, 0x33 }, /* GURMUKHI DIGIT THREE -> '3' */ + { 0x6A, 0x34 }, /* GURMUKHI DIGIT FOUR -> '4' */ + { 0x6B, 0x35 }, /* GURMUKHI DIGIT FIVE -> '5' */ + { 0x6C, 0x36 }, /* GURMUKHI DIGIT SIX -> '6' */ + { 0x6D, 0x37 }, /* GURMUKHI DIGIT SEVEN -> '7' */ + { 0x6E, 0x38 }, /* GURMUKHI DIGIT EIGHT -> '8' */ + { 0x6F, 0x39 }, /* GURMUKHI DIGIT NINE -> '9' */ + { 0x70, 0x4E }, /* GURMUKHI TIPPI -> 'N' */ + { 0x71, 0x48 }, /* GURMUKHI ADDAK -> 'H' */ + { 0x81, 0x4E }, /* GUJARATI SIGN CANDRABINDU -> 'N' */ + { 0x82, 0x4E }, /* GUJARATI SIGN ANUSVARA -> 'N' */ + { 0x83, 0x48 }, /* GUJARATI SIGN VISARGA -> 'H' */ + { 0x85, 0x61 }, /* GUJARATI LETTER A -> 'a' */ + { 0x87, 0x69 }, /* GUJARATI LETTER I -> 'i' */ + { 0x89, 0x75 }, /* GUJARATI LETTER U -> 'u' */ + { 0x8B, 0x52 }, /* GUJARATI LETTER VOCALIC R -> 'R' */ + { 0x8F, 0x65 }, /* GUJARATI LETTER E -> 'e' */ + { 0x93, 0x6F }, /* GUJARATI LETTER O -> 'o' */ + { 0x95, 0x6B }, /* GUJARATI LETTER KA -> 'k' */ + { 0x97, 0x67 }, /* GUJARATI LETTER GA -> 'g' */ + { 0x9A, 0x63 }, /* GUJARATI LETTER CA -> 'c' */ + { 0x9C, 0x6A }, /* GUJARATI LETTER JA -> 'j' */ + { 0xA4, 0x74 }, /* GUJARATI LETTER TA -> 't' */ + { 0xA6, 0x64 }, /* GUJARATI LETTER DA -> 'd' */ + { 0xA8, 0x6E }, /* GUJARATI LETTER NA -> 'n' */ + { 0xAA, 0x70 }, /* GUJARATI LETTER PA -> 'p' */ + { 0xAC, 0x62 }, /* GUJARATI LETTER BA -> 'b' */ + { 0xAE, 0x6D }, /* GUJARATI LETTER MA -> 'm' */ + { 0xB0, 0x72 }, /* GUJARATI LETTER RA -> 'r' */ + { 0xB2, 0x6C }, /* GUJARATI LETTER LA -> 'l' */ + { 0xB5, 0x76 }, /* GUJARATI LETTER VA -> 'v' */ + { 0xB8, 0x73 }, /* GUJARATI LETTER SA -> 's' */ + { 0xB9, 0x68 }, /* GUJARATI LETTER HA -> 'h' */ + { 0xBC, 0x27 }, /* GUJARATI SIGN NUKTA -> ''' */ + { 0xBD, 0x27 }, /* GUJARATI SIGN AVAGRAHA -> ''' */ + { 0xBF, 0x69 }, /* GUJARATI VOWEL SIGN I -> 'i' */ + { 0xC1, 0x75 }, /* GUJARATI VOWEL SIGN U -> 'u' */ + { 0xC3, 0x52 }, /* GUJARATI VOWEL SIGN VOCALIC R -> 'R' */ + { 0xC7, 0x65 }, /* GUJARATI VOWEL SIGN E -> 'e' */ + { 0xCB, 0x6F }, /* GUJARATI VOWEL SIGN O -> 'o' */ + { 0xE6, 0x30 }, /* GUJARATI DIGIT ZERO -> '0' */ + { 0xE7, 0x31 }, /* GUJARATI DIGIT ONE -> '1' */ + { 0xE8, 0x32 }, /* GUJARATI DIGIT TWO -> '2' */ + { 0xE9, 0x33 }, /* GUJARATI DIGIT THREE -> '3' */ + { 0xEA, 0x34 }, /* GUJARATI DIGIT FOUR -> '4' */ + { 0xEB, 0x35 }, /* GUJARATI DIGIT FIVE -> '5' */ + { 0xEC, 0x36 }, /* GUJARATI DIGIT SIX -> '6' */ + { 0xED, 0x37 }, /* GUJARATI DIGIT SEVEN -> '7' */ + { 0xEE, 0x38 }, /* GUJARATI DIGIT EIGHT -> '8' */ + { 0xEF, 0x39 }, /* GUJARATI DIGIT NINE -> '9' */ + /* Entries for page 0x0B */ + { 0x01, 0x4E }, /* ORIYA SIGN CANDRABINDU -> 'N' */ + { 0x02, 0x4E }, /* ORIYA SIGN ANUSVARA -> 'N' */ + { 0x03, 0x48 }, /* ORIYA SIGN VISARGA -> 'H' */ + { 0x05, 0x61 }, /* ORIYA LETTER A -> 'a' */ + { 0x07, 0x69 }, /* ORIYA LETTER I -> 'i' */ + { 0x09, 0x75 }, /* ORIYA LETTER U -> 'u' */ + { 0x0B, 0x52 }, /* ORIYA LETTER VOCALIC R -> 'R' */ + { 0x0C, 0x4C }, /* ORIYA LETTER VOCALIC L -> 'L' */ + { 0x0F, 0x65 }, /* ORIYA LETTER E -> 'e' */ + { 0x13, 0x6F }, /* ORIYA LETTER O -> 'o' */ + { 0x15, 0x6B }, /* ORIYA LETTER KA -> 'k' */ + { 0x17, 0x67 }, /* ORIYA LETTER GA -> 'g' */ + { 0x1A, 0x63 }, /* ORIYA LETTER CA -> 'c' */ + { 0x1C, 0x6A }, /* ORIYA LETTER JA -> 'j' */ + { 0x24, 0x74 }, /* ORIYA LETTER TA -> 't' */ + { 0x26, 0x64 }, /* ORIYA LETTER DA -> 'd' */ + { 0x28, 0x6E }, /* ORIYA LETTER NA -> 'n' */ + { 0x2A, 0x70 }, /* ORIYA LETTER PA -> 'p' */ + { 0x2C, 0x62 }, /* ORIYA LETTER BA -> 'b' */ + { 0x2E, 0x6D }, /* ORIYA LETTER MA -> 'm' */ + { 0x2F, 0x79 }, /* ORIYA LETTER YA -> 'y' */ + { 0x30, 0x72 }, /* ORIYA LETTER RA -> 'r' */ + { 0x32, 0x6C }, /* ORIYA LETTER LA -> 'l' */ + { 0x38, 0x73 }, /* ORIYA LETTER SA -> 's' */ + { 0x39, 0x68 }, /* ORIYA LETTER HA -> 'h' */ + { 0x3C, 0x27 }, /* ORIYA SIGN NUKTA -> ''' */ + { 0x3D, 0x27 }, /* ORIYA SIGN AVAGRAHA -> ''' */ + { 0x3F, 0x69 }, /* ORIYA VOWEL SIGN I -> 'i' */ + { 0x41, 0x75 }, /* ORIYA VOWEL SIGN U -> 'u' */ + { 0x43, 0x52 }, /* ORIYA VOWEL SIGN VOCALIC R -> 'R' */ + { 0x47, 0x65 }, /* ORIYA VOWEL SIGN E -> 'e' */ + { 0x4B, 0x6F }, /* ORIYA VOWEL SIGN O -> 'o' */ + { 0x56, 0x2B }, /* ORIYA AI LENGTH MARK -> '+' */ + { 0x57, 0x2B }, /* ORIYA AU LENGTH MARK -> '+' */ + { 0x66, 0x30 }, /* ORIYA DIGIT ZERO -> '0' */ + { 0x67, 0x31 }, /* ORIYA DIGIT ONE -> '1' */ + { 0x68, 0x32 }, /* ORIYA DIGIT TWO -> '2' */ + { 0x69, 0x33 }, /* ORIYA DIGIT THREE -> '3' */ + { 0x6A, 0x34 }, /* ORIYA DIGIT FOUR -> '4' */ + { 0x6B, 0x35 }, /* ORIYA DIGIT FIVE -> '5' */ + { 0x6C, 0x36 }, /* ORIYA DIGIT SIX -> '6' */ + { 0x6D, 0x37 }, /* ORIYA DIGIT SEVEN -> '7' */ + { 0x6E, 0x38 }, /* ORIYA DIGIT EIGHT -> '8' */ + { 0x6F, 0x39 }, /* ORIYA DIGIT NINE -> '9' */ + { 0x82, 0x4E }, /* TAMIL SIGN ANUSVARA -> 'N' */ + { 0x83, 0x48 }, /* TAMIL SIGN VISARGA -> 'H' */ + { 0x85, 0x61 }, /* TAMIL LETTER A -> 'a' */ + { 0x87, 0x69 }, /* TAMIL LETTER I -> 'i' */ + { 0x89, 0x75 }, /* TAMIL LETTER U -> 'u' */ + { 0x8E, 0x65 }, /* TAMIL LETTER E -> 'e' */ + { 0x92, 0x6F }, /* TAMIL LETTER O -> 'o' */ + { 0x95, 0x6B }, /* TAMIL LETTER KA -> 'k' */ + { 0x9A, 0x63 }, /* TAMIL LETTER CA -> 'c' */ + { 0x9C, 0x6A }, /* TAMIL LETTER JA -> 'j' */ + { 0xA4, 0x74 }, /* TAMIL LETTER TA -> 't' */ + { 0xA8, 0x6E }, /* TAMIL LETTER NA -> 'n' */ + { 0xAA, 0x70 }, /* TAMIL LETTER PA -> 'p' */ + { 0xAE, 0x6D }, /* TAMIL LETTER MA -> 'm' */ + { 0xAF, 0x79 }, /* TAMIL LETTER YA -> 'y' */ + { 0xB0, 0x72 }, /* TAMIL LETTER RA -> 'r' */ + { 0xB2, 0x6C }, /* TAMIL LETTER LA -> 'l' */ + { 0xB5, 0x76 }, /* TAMIL LETTER VA -> 'v' */ + { 0xB8, 0x73 }, /* TAMIL LETTER SA -> 's' */ + { 0xB9, 0x68 }, /* TAMIL LETTER HA -> 'h' */ + { 0xBF, 0x69 }, /* TAMIL VOWEL SIGN I -> 'i' */ + { 0xC1, 0x75 }, /* TAMIL VOWEL SIGN U -> 'u' */ + { 0xC6, 0x65 }, /* TAMIL VOWEL SIGN E -> 'e' */ + { 0xCA, 0x6F }, /* TAMIL VOWEL SIGN O -> 'o' */ + { 0xD7, 0x2B }, /* TAMIL AU LENGTH MARK -> '+' */ + { 0xE6, 0x30 }, /* TAMIL DIGIT ZERO -> '0' */ + { 0xE7, 0x31 }, /* TAMIL DIGIT ONE -> '1' */ + { 0xE8, 0x32 }, /* TAMIL DIGIT TWO -> '2' */ + { 0xE9, 0x33 }, /* TAMIL DIGIT THREE -> '3' */ + { 0xEA, 0x34 }, /* TAMIL DIGIT FOUR -> '4' */ + { 0xEB, 0x35 }, /* TAMIL DIGIT FIVE -> '5' */ + { 0xEC, 0x36 }, /* TAMIL DIGIT SIX -> '6' */ + { 0xED, 0x37 }, /* TAMIL DIGIT SEVEN -> '7' */ + { 0xEE, 0x38 }, /* TAMIL DIGIT EIGHT -> '8' */ + { 0xEF, 0x39 }, /* TAMIL DIGIT NINE -> '9' */ + /* Entries for page 0x0C */ + { 0x01, 0x4E }, /* TELUGU SIGN CANDRABINDU -> 'N' */ + { 0x02, 0x4E }, /* TELUGU SIGN ANUSVARA -> 'N' */ + { 0x03, 0x48 }, /* TELUGU SIGN VISARGA -> 'H' */ + { 0x05, 0x61 }, /* TELUGU LETTER A -> 'a' */ + { 0x07, 0x69 }, /* TELUGU LETTER I -> 'i' */ + { 0x09, 0x75 }, /* TELUGU LETTER U -> 'u' */ + { 0x0B, 0x52 }, /* TELUGU LETTER VOCALIC R -> 'R' */ + { 0x0C, 0x4C }, /* TELUGU LETTER VOCALIC L -> 'L' */ + { 0x0E, 0x65 }, /* TELUGU LETTER E -> 'e' */ + { 0x12, 0x6F }, /* TELUGU LETTER O -> 'o' */ + { 0x15, 0x6B }, /* TELUGU LETTER KA -> 'k' */ + { 0x17, 0x67 }, /* TELUGU LETTER GA -> 'g' */ + { 0x1A, 0x63 }, /* TELUGU LETTER CA -> 'c' */ + { 0x1C, 0x6A }, /* TELUGU LETTER JA -> 'j' */ + { 0x24, 0x74 }, /* TELUGU LETTER TA -> 't' */ + { 0x26, 0x64 }, /* TELUGU LETTER DA -> 'd' */ + { 0x28, 0x6E }, /* TELUGU LETTER NA -> 'n' */ + { 0x2A, 0x70 }, /* TELUGU LETTER PA -> 'p' */ + { 0x2C, 0x62 }, /* TELUGU LETTER BA -> 'b' */ + { 0x2E, 0x6D }, /* TELUGU LETTER MA -> 'm' */ + { 0x2F, 0x79 }, /* TELUGU LETTER YA -> 'y' */ + { 0x30, 0x72 }, /* TELUGU LETTER RA -> 'r' */ + { 0x32, 0x6C }, /* TELUGU LETTER LA -> 'l' */ + { 0x35, 0x76 }, /* TELUGU LETTER VA -> 'v' */ + { 0x38, 0x73 }, /* TELUGU LETTER SA -> 's' */ + { 0x39, 0x68 }, /* TELUGU LETTER HA -> 'h' */ + { 0x3F, 0x69 }, /* TELUGU VOWEL SIGN I -> 'i' */ + { 0x41, 0x75 }, /* TELUGU VOWEL SIGN U -> 'u' */ + { 0x43, 0x52 }, /* TELUGU VOWEL SIGN VOCALIC R -> 'R' */ + { 0x46, 0x65 }, /* TELUGU VOWEL SIGN E -> 'e' */ + { 0x4A, 0x6F }, /* TELUGU VOWEL SIGN O -> 'o' */ + { 0x55, 0x2B }, /* TELUGU LENGTH MARK -> '+' */ + { 0x56, 0x2B }, /* TELUGU AI LENGTH MARK -> '+' */ + { 0x66, 0x30 }, /* TELUGU DIGIT ZERO -> '0' */ + { 0x67, 0x31 }, /* TELUGU DIGIT ONE -> '1' */ + { 0x68, 0x32 }, /* TELUGU DIGIT TWO -> '2' */ + { 0x69, 0x33 }, /* TELUGU DIGIT THREE -> '3' */ + { 0x6A, 0x34 }, /* TELUGU DIGIT FOUR -> '4' */ + { 0x6B, 0x35 }, /* TELUGU DIGIT FIVE -> '5' */ + { 0x6C, 0x36 }, /* TELUGU DIGIT SIX -> '6' */ + { 0x6D, 0x37 }, /* TELUGU DIGIT SEVEN -> '7' */ + { 0x6E, 0x38 }, /* TELUGU DIGIT EIGHT -> '8' */ + { 0x6F, 0x39 }, /* TELUGU DIGIT NINE -> '9' */ + { 0x82, 0x4E }, /* KANNADA SIGN ANUSVARA -> 'N' */ + { 0x83, 0x48 }, /* KANNADA SIGN VISARGA -> 'H' */ + { 0x85, 0x61 }, /* KANNADA LETTER A -> 'a' */ + { 0x87, 0x69 }, /* KANNADA LETTER I -> 'i' */ + { 0x89, 0x75 }, /* KANNADA LETTER U -> 'u' */ + { 0x8B, 0x52 }, /* KANNADA LETTER VOCALIC R -> 'R' */ + { 0x8C, 0x4C }, /* KANNADA LETTER VOCALIC L -> 'L' */ + { 0x8E, 0x65 }, /* KANNADA LETTER E -> 'e' */ + { 0x92, 0x6F }, /* KANNADA LETTER O -> 'o' */ + { 0x95, 0x6B }, /* KANNADA LETTER KA -> 'k' */ + { 0x97, 0x67 }, /* KANNADA LETTER GA -> 'g' */ + { 0x9A, 0x63 }, /* KANNADA LETTER CA -> 'c' */ + { 0x9C, 0x6A }, /* KANNADA LETTER JA -> 'j' */ + { 0xA4, 0x74 }, /* KANNADA LETTER TA -> 't' */ + { 0xA6, 0x64 }, /* KANNADA LETTER DA -> 'd' */ + { 0xA8, 0x6E }, /* KANNADA LETTER NA -> 'n' */ + { 0xAA, 0x70 }, /* KANNADA LETTER PA -> 'p' */ + { 0xAC, 0x62 }, /* KANNADA LETTER BA -> 'b' */ + { 0xAE, 0x6D }, /* KANNADA LETTER MA -> 'm' */ + { 0xAF, 0x79 }, /* KANNADA LETTER YA -> 'y' */ + { 0xB0, 0x72 }, /* KANNADA LETTER RA -> 'r' */ + { 0xB2, 0x6C }, /* KANNADA LETTER LA -> 'l' */ + { 0xB5, 0x76 }, /* KANNADA LETTER VA -> 'v' */ + { 0xB8, 0x73 }, /* KANNADA LETTER SA -> 's' */ + { 0xB9, 0x68 }, /* KANNADA LETTER HA -> 'h' */ + { 0xBF, 0x69 }, /* KANNADA VOWEL SIGN I -> 'i' */ + { 0xC1, 0x75 }, /* KANNADA VOWEL SIGN U -> 'u' */ + { 0xC3, 0x52 }, /* KANNADA VOWEL SIGN VOCALIC R -> 'R' */ + { 0xC6, 0x65 }, /* KANNADA VOWEL SIGN E -> 'e' */ + { 0xCA, 0x6F }, /* KANNADA VOWEL SIGN O -> 'o' */ + { 0xD5, 0x2B }, /* KANNADA LENGTH MARK -> '+' */ + { 0xD6, 0x2B }, /* KANNADA AI LENGTH MARK -> '+' */ + { 0xE6, 0x30 }, /* KANNADA DIGIT ZERO -> '0' */ + { 0xE7, 0x31 }, /* KANNADA DIGIT ONE -> '1' */ + { 0xE8, 0x32 }, /* KANNADA DIGIT TWO -> '2' */ + { 0xE9, 0x33 }, /* KANNADA DIGIT THREE -> '3' */ + { 0xEA, 0x34 }, /* KANNADA DIGIT FOUR -> '4' */ + { 0xEB, 0x35 }, /* KANNADA DIGIT FIVE -> '5' */ + { 0xEC, 0x36 }, /* KANNADA DIGIT SIX -> '6' */ + { 0xED, 0x37 }, /* KANNADA DIGIT SEVEN -> '7' */ + { 0xEE, 0x38 }, /* KANNADA DIGIT EIGHT -> '8' */ + { 0xEF, 0x39 }, /* KANNADA DIGIT NINE -> '9' */ + /* Entries for page 0x0D */ + { 0x02, 0x4E }, /* MALAYALAM SIGN ANUSVARA -> 'N' */ + { 0x03, 0x48 }, /* MALAYALAM SIGN VISARGA -> 'H' */ + { 0x05, 0x61 }, /* MALAYALAM LETTER A -> 'a' */ + { 0x07, 0x69 }, /* MALAYALAM LETTER I -> 'i' */ + { 0x09, 0x75 }, /* MALAYALAM LETTER U -> 'u' */ + { 0x0B, 0x52 }, /* MALAYALAM LETTER VOCALIC R -> 'R' */ + { 0x0C, 0x4C }, /* MALAYALAM LETTER VOCALIC L -> 'L' */ + { 0x0E, 0x65 }, /* MALAYALAM LETTER E -> 'e' */ + { 0x12, 0x6F }, /* MALAYALAM LETTER O -> 'o' */ + { 0x15, 0x6B }, /* MALAYALAM LETTER KA -> 'k' */ + { 0x17, 0x67 }, /* MALAYALAM LETTER GA -> 'g' */ + { 0x1A, 0x63 }, /* MALAYALAM LETTER CA -> 'c' */ + { 0x1C, 0x6A }, /* MALAYALAM LETTER JA -> 'j' */ + { 0x24, 0x74 }, /* MALAYALAM LETTER TA -> 't' */ + { 0x26, 0x64 }, /* MALAYALAM LETTER DA -> 'd' */ + { 0x28, 0x6E }, /* MALAYALAM LETTER NA -> 'n' */ + { 0x2A, 0x70 }, /* MALAYALAM LETTER PA -> 'p' */ + { 0x2C, 0x62 }, /* MALAYALAM LETTER BA -> 'b' */ + { 0x2E, 0x6D }, /* MALAYALAM LETTER MA -> 'm' */ + { 0x2F, 0x79 }, /* MALAYALAM LETTER YA -> 'y' */ + { 0x30, 0x72 }, /* MALAYALAM LETTER RA -> 'r' */ + { 0x32, 0x6C }, /* MALAYALAM LETTER LA -> 'l' */ + { 0x35, 0x76 }, /* MALAYALAM LETTER VA -> 'v' */ + { 0x38, 0x73 }, /* MALAYALAM LETTER SA -> 's' */ + { 0x39, 0x68 }, /* MALAYALAM LETTER HA -> 'h' */ + { 0x3F, 0x69 }, /* MALAYALAM VOWEL SIGN I -> 'i' */ + { 0x41, 0x75 }, /* MALAYALAM VOWEL SIGN U -> 'u' */ + { 0x43, 0x52 }, /* MALAYALAM VOWEL SIGN VOCALIC R -> 'R' */ + { 0x46, 0x65 }, /* MALAYALAM VOWEL SIGN E -> 'e' */ + { 0x4A, 0x6F }, /* MALAYALAM VOWEL SIGN O -> 'o' */ + { 0x57, 0x2B }, /* MALAYALAM AU LENGTH MARK -> '+' */ + { 0x66, 0x30 }, /* MALAYALAM DIGIT ZERO -> '0' */ + { 0x67, 0x31 }, /* MALAYALAM DIGIT ONE -> '1' */ + { 0x68, 0x32 }, /* MALAYALAM DIGIT TWO -> '2' */ + { 0x69, 0x33 }, /* MALAYALAM DIGIT THREE -> '3' */ + { 0x6A, 0x34 }, /* MALAYALAM DIGIT FOUR -> '4' */ + { 0x6B, 0x35 }, /* MALAYALAM DIGIT FIVE -> '5' */ + { 0x6C, 0x36 }, /* MALAYALAM DIGIT SIX -> '6' */ + { 0x6D, 0x37 }, /* MALAYALAM DIGIT SEVEN -> '7' */ + { 0x6E, 0x38 }, /* MALAYALAM DIGIT EIGHT -> '8' */ + { 0x6F, 0x39 }, /* MALAYALAM DIGIT NINE -> '9' */ + { 0x82, 0x4E }, /* SINHALA SIGN ANUSVARAYA -> 'N' */ + { 0x83, 0x48 }, /* SINHALA SIGN VISARGAYA -> 'H' */ + { 0x85, 0x61 }, /* SINHALA LETTER AYANNA -> 'a' */ + { 0x89, 0x69 }, /* SINHALA LETTER IYANNA -> 'i' */ + { 0x8B, 0x75 }, /* SINHALA LETTER UYANNA -> 'u' */ + { 0x8D, 0x52 }, /* SINHALA LETTER IRUYANNA -> 'R' */ + { 0x8F, 0x4C }, /* SINHALA LETTER ILUYANNA -> 'L' */ + { 0x91, 0x65 }, /* SINHALA LETTER EYANNA -> 'e' */ + { 0x94, 0x6F }, /* SINHALA LETTER OYANNA -> 'o' */ + { 0x9A, 0x6B }, /* SINHALA LETTER ALPAPRAANA KAYANNA -> 'k' */ + { 0x9C, 0x67 }, /* SINHALA LETTER ALPAPRAANA GAYANNA -> 'g' */ + { 0xA0, 0x63 }, /* SINHALA LETTER ALPAPRAANA CAYANNA -> 'c' */ + { 0xA2, 0x6A }, /* SINHALA LETTER ALPAPRAANA JAYANNA -> 'j' */ + { 0xAD, 0x74 }, /* SINHALA LETTER ALPAPRAANA TAYANNA -> 't' */ + { 0xAF, 0x64 }, /* SINHALA LETTER ALPAPRAANA DAYANNA -> 'd' */ + { 0xB1, 0x6E }, /* SINHALA LETTER DANTAJA NAYANNA -> 'n' */ + { 0xB4, 0x70 }, /* SINHALA LETTER ALPAPRAANA PAYANNA -> 'p' */ + { 0xB6, 0x62 }, /* SINHALA LETTER ALPAPRAANA BAYANNA -> 'b' */ + { 0xB8, 0x6D }, /* SINHALA LETTER MAYANNA -> 'm' */ + { 0xBA, 0x79 }, /* SINHALA LETTER YAYANNA -> 'y' */ + { 0xBB, 0x72 }, /* SINHALA LETTER RAYANNA -> 'r' */ + { 0xBD, 0x6C }, /* SINHALA LETTER DANTAJA LAYANNA -> 'l' */ + { 0xC0, 0x76 }, /* SINHALA LETTER VAYANNA -> 'v' */ + { 0xC3, 0x73 }, /* SINHALA LETTER DANTAJA SAYANNA -> 's' */ + { 0xC4, 0x68 }, /* SINHALA LETTER HAYANNA -> 'h' */ + { 0xC6, 0x66 }, /* SINHALA LETTER FAYANNA -> 'f' */ + { 0xD2, 0x69 }, /* SINHALA VOWEL SIGN KETTI IS-PILLA -> 'i' */ + { 0xD4, 0x75 }, /* SINHALA VOWEL SIGN KETTI PAA-PILLA -> 'u' */ + { 0xD8, 0x52 }, /* SINHALA VOWEL SIGN GAETTA-PILLA -> 'R' */ + { 0xD9, 0x65 }, /* SINHALA VOWEL SIGN KOMBUVA -> 'e' */ + { 0xDC, 0x6F }, /* SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA -> 'o' */ + { 0xDF, 0x4C }, /* SINHALA VOWEL SIGN GAYANUKITTA -> 'L' */ + /* Entries for page 0x0E */ + { 0x01, 0x6B }, /* THAI CHARACTER KO KAI -> 'k' */ + { 0x0D, 0x79 }, /* THAI CHARACTER YO YING -> 'y' */ + { 0x0E, 0x64 }, /* THAI CHARACTER DO CHADA -> 'd' */ + { 0x0F, 0x74 }, /* THAI CHARACTER TO PATAK -> 't' */ + { 0x13, 0x6E }, /* THAI CHARACTER NO NEN -> 'n' */ + { 0x14, 0x64 }, /* THAI CHARACTER DO DEK -> 'd' */ + { 0x15, 0x74 }, /* THAI CHARACTER TO TAO -> 't' */ + { 0x19, 0x6E }, /* THAI CHARACTER NO NU -> 'n' */ + { 0x1A, 0x62 }, /* THAI CHARACTER BO BAIMAI -> 'b' */ + { 0x1B, 0x70 }, /* THAI CHARACTER PO PLA -> 'p' */ + { 0x1D, 0x66 }, /* THAI CHARACTER FO FA -> 'f' */ + { 0x1F, 0x66 }, /* THAI CHARACTER FO FAN -> 'f' */ + { 0x21, 0x6D }, /* THAI CHARACTER MO MA -> 'm' */ + { 0x22, 0x79 }, /* THAI CHARACTER YO YAK -> 'y' */ + { 0x23, 0x72 }, /* THAI CHARACTER RO RUA -> 'r' */ + { 0x24, 0x52 }, /* THAI CHARACTER RU -> 'R' */ + { 0x25, 0x6C }, /* THAI CHARACTER LO LING -> 'l' */ + { 0x26, 0x4C }, /* THAI CHARACTER LU -> 'L' */ + { 0x27, 0x77 }, /* THAI CHARACTER WO WAEN -> 'w' */ + { 0x28, 0x00 }, /* THAI CHARACTER SO SALA -> ... */ + { 0x2A, 0x73 }, /* THAI CHARACTER SO SUA -> 's' */ + { 0x2B, 0x68 }, /* THAI CHARACTER HO HIP -> 'h' */ + { 0x2C, 0x6C }, /* THAI CHARACTER LO CHULA -> 'l' */ + { 0x2D, 0x60 }, /* THAI CHARACTER O ANG -> '`' */ + { 0x2E, 0x68 }, /* THAI CHARACTER HO NOKHUK -> 'h' */ + { 0x2F, 0x7E }, /* THAI CHARACTER PAIYANNOI -> '~' */ + { 0x30, 0x61 }, /* THAI CHARACTER SARA A -> 'a' */ + { 0x31, 0x61 }, /* THAI CHARACTER MAI HAN-AKAT -> 'a' */ + { 0x34, 0x69 }, /* THAI CHARACTER SARA I -> 'i' */ + { 0x38, 0x75 }, /* THAI CHARACTER SARA U -> 'u' */ + { 0x3A, 0x27 }, /* THAI CHARACTER PHINTHU -> ''' */ + { 0x40, 0x65 }, /* THAI CHARACTER SARA E -> 'e' */ + { 0x42, 0x6F }, /* THAI CHARACTER SARA O -> 'o' */ + { 0x46, 0x2B }, /* THAI CHARACTER MAIYAMOK -> '+' */ + { 0x4D, 0x4D }, /* THAI CHARACTER NIKHAHIT -> 'M' */ + { 0x50, 0x30 }, /* THAI DIGIT ZERO -> '0' */ + { 0x51, 0x31 }, /* THAI DIGIT ONE -> '1' */ + { 0x52, 0x32 }, /* THAI DIGIT TWO -> '2' */ + { 0x53, 0x33 }, /* THAI DIGIT THREE -> '3' */ + { 0x54, 0x34 }, /* THAI DIGIT FOUR -> '4' */ + { 0x55, 0x35 }, /* THAI DIGIT FIVE -> '5' */ + { 0x56, 0x36 }, /* THAI DIGIT SIX -> '6' */ + { 0x57, 0x37 }, /* THAI DIGIT SEVEN -> '7' */ + { 0x58, 0x38 }, /* THAI DIGIT EIGHT -> '8' */ + { 0x59, 0x39 }, /* THAI DIGIT NINE -> '9' */ + { 0x81, 0x6B }, /* LAO LETTER KO -> 'k' */ + { 0x8A, 0x73 }, /* LAO LETTER SO TAM -> 's' */ + { 0x94, 0x64 }, /* LAO LETTER DO -> 'd' */ + { 0x95, 0x68 }, /* LAO LETTER TO -> 'h' */ + { 0x99, 0x6E }, /* LAO LETTER NO -> 'n' */ + { 0x9A, 0x62 }, /* LAO LETTER BO -> 'b' */ + { 0x9B, 0x70 }, /* LAO LETTER PO -> 'p' */ + { 0x9D, 0x66 }, /* LAO LETTER FO TAM -> 'f' */ + { 0x9F, 0x66 }, /* LAO LETTER FO SUNG -> 'f' */ + { 0xA1, 0x6D }, /* LAO LETTER MO -> 'm' */ + { 0xA2, 0x79 }, /* LAO LETTER YO -> 'y' */ + { 0xA3, 0x72 }, /* LAO LETTER LO LING -> 'r' */ + { 0xA5, 0x6C }, /* LAO LETTER LO LOOT -> 'l' */ + { 0xA7, 0x77 }, /* LAO LETTER WO -> 'w' */ + { 0xAA, 0x73 }, /* LAO LETTER SO SUNG -> 's' */ + { 0xAB, 0x68 }, /* LAO LETTER HO SUNG -> 'h' */ + { 0xAD, 0x60 }, /* LAO LETTER O -> '`' */ + { 0xAF, 0x7E }, /* LAO ELLIPSIS -> '~' */ + { 0xB0, 0x61 }, /* LAO VOWEL SIGN A -> 'a' */ + { 0xB4, 0x69 }, /* LAO VOWEL SIGN I -> 'i' */ + { 0xB6, 0x79 }, /* LAO VOWEL SIGN Y -> 'y' */ + { 0xB8, 0x75 }, /* LAO VOWEL SIGN U -> 'u' */ + { 0xBB, 0x6F }, /* LAO VOWEL SIGN MAI KON -> 'o' */ + { 0xBC, 0x6C }, /* LAO SEMIVOWEL SIGN LO -> 'l' */ + { 0xC0, 0x65 }, /* LAO VOWEL SIGN E -> 'e' */ + { 0xC2, 0x6F }, /* LAO VOWEL SIGN O -> 'o' */ + { 0xC6, 0x2B }, /* LAO KO LA -> '+' */ + { 0xCD, 0x4D }, /* LAO NIGGAHITA -> 'M' */ + { 0xD0, 0x30 }, /* LAO DIGIT ZERO -> '0' */ + { 0xD1, 0x31 }, /* LAO DIGIT ONE -> '1' */ + { 0xD2, 0x32 }, /* LAO DIGIT TWO -> '2' */ + { 0xD3, 0x33 }, /* LAO DIGIT THREE -> '3' */ + { 0xD4, 0x34 }, /* LAO DIGIT FOUR -> '4' */ + { 0xD5, 0x35 }, /* LAO DIGIT FIVE -> '5' */ + { 0xD6, 0x36 }, /* LAO DIGIT SIX -> '6' */ + { 0xD7, 0x37 }, /* LAO DIGIT SEVEN -> '7' */ + { 0xD8, 0x38 }, /* LAO DIGIT EIGHT -> '8' */ + { 0xD9, 0x39 }, /* LAO DIGIT NINE -> '9' */ + /* Entries for page 0x0F */ + { 0x0B, 0x2D }, /* TIBETAN MARK INTERSYLLABIC TSHEG -> '-' */ + { 0x20, 0x30 }, /* TIBETAN DIGIT ZERO -> '0' */ + { 0x21, 0x31 }, /* TIBETAN DIGIT ONE -> '1' */ + { 0x22, 0x32 }, /* TIBETAN DIGIT TWO -> '2' */ + { 0x23, 0x33 }, /* TIBETAN DIGIT THREE -> '3' */ + { 0x24, 0x34 }, /* TIBETAN DIGIT FOUR -> '4' */ + { 0x25, 0x35 }, /* TIBETAN DIGIT FIVE -> '5' */ + { 0x26, 0x36 }, /* TIBETAN DIGIT SIX -> '6' */ + { 0x27, 0x37 }, /* TIBETAN DIGIT SEVEN -> '7' */ + { 0x28, 0x38 }, /* TIBETAN DIGIT EIGHT -> '8' */ + { 0x29, 0x39 }, /* TIBETAN DIGIT NINE -> '9' */ + { 0x34, 0x2B }, /* TIBETAN MARK BSDUS RTAGS -> '+' */ + { 0x35, 0x2A }, /* TIBETAN MARK NGAS BZUNG NYI ZLA -> '*' */ + { 0x36, 0x5E }, /* TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN -> '^' */ + { 0x37, 0x5F }, /* TIBETAN MARK NGAS BZUNG SGOR RTAGS -> '_' */ + { 0x39, 0x7E }, /* TIBETAN MARK TSA -PHRU -> '~' */ + { 0x3B, 0x5D }, /* TIBETAN MARK GUG RTAGS GYAS -> ']' */ + { 0x40, 0x6B }, /* TIBETAN LETTER KA -> 'k' */ + { 0x42, 0x67 }, /* TIBETAN LETTER GA -> 'g' */ + { 0x45, 0x63 }, /* TIBETAN LETTER CA -> 'c' */ + { 0x47, 0x6A }, /* TIBETAN LETTER JA -> 'j' */ + { 0x4F, 0x74 }, /* TIBETAN LETTER TA -> 't' */ + { 0x51, 0x64 }, /* TIBETAN LETTER DA -> 'd' */ + { 0x53, 0x6E }, /* TIBETAN LETTER NA -> 'n' */ + { 0x54, 0x70 }, /* TIBETAN LETTER PA -> 'p' */ + { 0x56, 0x62 }, /* TIBETAN LETTER BA -> 'b' */ + { 0x58, 0x6D }, /* TIBETAN LETTER MA -> 'm' */ + { 0x5D, 0x77 }, /* TIBETAN LETTER WA -> 'w' */ + { 0x5F, 0x7A }, /* TIBETAN LETTER ZA -> 'z' */ + { 0x60, 0x27 }, /* TIBETAN LETTER -A -> ''' */ + { 0x61, 0x79 }, /* TIBETAN LETTER YA -> 'y' */ + { 0x62, 0x72 }, /* TIBETAN LETTER RA -> 'r' */ + { 0x63, 0x6C }, /* TIBETAN LETTER LA -> 'l' */ + { 0x66, 0x73 }, /* TIBETAN LETTER SA -> 's' */ + { 0x67, 0x68 }, /* TIBETAN LETTER HA -> 'h' */ + { 0x68, 0x61 }, /* TIBETAN LETTER A -> 'a' */ + { 0x6A, 0x72 }, /* TIBETAN LETTER FIXED-FORM RA -> 'r' */ + { 0x72, 0x69 }, /* TIBETAN VOWEL SIGN I -> 'i' */ + { 0x74, 0x75 }, /* TIBETAN VOWEL SIGN U -> 'u' */ + { 0x76, 0x52 }, /* TIBETAN VOWEL SIGN VOCALIC R -> 'R' */ + { 0x78, 0x4C }, /* TIBETAN VOWEL SIGN VOCALIC L -> 'L' */ + { 0x7A, 0x65 }, /* TIBETAN VOWEL SIGN E -> 'e' */ + { 0x7C, 0x6F }, /* TIBETAN VOWEL SIGN O -> 'o' */ + { 0x7E, 0x4D }, /* TIBETAN SIGN RJES SU NGA RO -> 'M' */ + { 0x7F, 0x48 }, /* TIBETAN SIGN RNAM BCAD -> 'H' */ + { 0x80, 0x69 }, /* TIBETAN VOWEL SIGN REVERSED I -> 'i' */ + { 0x90, 0x6B }, /* TIBETAN SUBJOINED LETTER KA -> 'k' */ + { 0x92, 0x67 }, /* TIBETAN SUBJOINED LETTER GA -> 'g' */ + { 0x95, 0x63 }, /* TIBETAN SUBJOINED LETTER CA -> 'c' */ + { 0x97, 0x6A }, /* TIBETAN SUBJOINED LETTER JA -> 'j' */ + { 0x9F, 0x74 }, /* TIBETAN SUBJOINED LETTER TA -> 't' */ + { 0xA1, 0x64 }, /* TIBETAN SUBJOINED LETTER DA -> 'd' */ + { 0xA3, 0x6E }, /* TIBETAN SUBJOINED LETTER NA -> 'n' */ + { 0xA4, 0x70 }, /* TIBETAN SUBJOINED LETTER PA -> 'p' */ + { 0xA6, 0x62 }, /* TIBETAN SUBJOINED LETTER BA -> 'b' */ + { 0xA8, 0x6D }, /* TIBETAN SUBJOINED LETTER MA -> 'm' */ + { 0xAD, 0x77 }, /* TIBETAN SUBJOINED LETTER WA -> 'w' */ + { 0xAF, 0x7A }, /* TIBETAN SUBJOINED LETTER ZA -> 'z' */ + { 0xB0, 0x27 }, /* TIBETAN SUBJOINED LETTER -A -> ''' */ + { 0xB1, 0x79 }, /* TIBETAN SUBJOINED LETTER YA -> 'y' */ + { 0xB2, 0x72 }, /* TIBETAN SUBJOINED LETTER RA -> 'r' */ + { 0xB3, 0x6C }, /* TIBETAN SUBJOINED LETTER LA -> 'l' */ + { 0xB6, 0x73 }, /* TIBETAN SUBJOINED LETTER SA -> 's' */ + { 0xB7, 0x68 }, /* TIBETAN SUBJOINED LETTER HA -> 'h' */ + { 0xB8, 0x61 }, /* TIBETAN SUBJOINED LETTER A -> 'a' */ + { 0xBA, 0x77 }, /* TIBETAN SUBJOINED LETTER FIXED-FORM WA -> 'w' */ + { 0xBB, 0x79 }, /* TIBETAN SUBJOINED LETTER FIXED-FORM YA -> 'y' */ + { 0xBC, 0x72 }, /* TIBETAN SUBJOINED LETTER FIXED-FORM RA -> 'r' */ + { 0xBE, 0x58 }, /* TIBETAN KU RU KHA -> 'X' */ + /* Entries for page 0x10 */ + { 0x00, 0x6B }, /* MYANMAR LETTER KA -> 'k' */ + { 0x02, 0x67 }, /* MYANMAR LETTER GA -> 'g' */ + { 0x05, 0x63 }, /* MYANMAR LETTER CA -> 'c' */ + { 0x07, 0x6A }, /* MYANMAR LETTER JA -> 'j' */ + { 0x12, 0x64 }, /* MYANMAR LETTER DA -> 'd' */ + { 0x14, 0x6E }, /* MYANMAR LETTER NA -> 'n' */ + { 0x15, 0x70 }, /* MYANMAR LETTER PA -> 'p' */ + { 0x17, 0x62 }, /* MYANMAR LETTER BA -> 'b' */ + { 0x19, 0x6D }, /* MYANMAR LETTER MA -> 'm' */ + { 0x1A, 0x79 }, /* MYANMAR LETTER YA -> 'y' */ + { 0x1B, 0x72 }, /* MYANMAR LETTER RA -> 'r' */ + { 0x1C, 0x6C }, /* MYANMAR LETTER LA -> 'l' */ + { 0x1D, 0x77 }, /* MYANMAR LETTER WA -> 'w' */ + { 0x1E, 0x73 }, /* MYANMAR LETTER SA -> 's' */ + { 0x1F, 0x68 }, /* MYANMAR LETTER HA -> 'h' */ + { 0x21, 0x61 }, /* MYANMAR LETTER A -> 'a' */ + { 0x23, 0x69 }, /* MYANMAR LETTER I -> 'i' */ + { 0x25, 0x75 }, /* MYANMAR LETTER U -> 'u' */ + { 0x27, 0x65 }, /* MYANMAR LETTER E -> 'e' */ + { 0x29, 0x6F }, /* MYANMAR LETTER O -> 'o' */ + { 0x2D, 0x69 }, /* MYANMAR VOWEL SIGN I -> 'i' */ + { 0x2F, 0x75 }, /* MYANMAR VOWEL SIGN U -> 'u' */ + { 0x31, 0x65 }, /* MYANMAR VOWEL SIGN E -> 'e' */ + { 0x36, 0x4E }, /* MYANMAR SIGN ANUSVARA -> 'N' */ + { 0x37, 0x27 }, /* MYANMAR SIGN DOT BELOW -> ''' */ + { 0x38, 0x3A }, /* MYANMAR SIGN VISARGA -> ':' */ + { 0x40, 0x30 }, /* MYANMAR DIGIT ZERO -> '0' */ + { 0x41, 0x31 }, /* MYANMAR DIGIT ONE -> '1' */ + { 0x42, 0x32 }, /* MYANMAR DIGIT TWO -> '2' */ + { 0x43, 0x33 }, /* MYANMAR DIGIT THREE -> '3' */ + { 0x44, 0x34 }, /* MYANMAR DIGIT FOUR -> '4' */ + { 0x45, 0x35 }, /* MYANMAR DIGIT FIVE -> '5' */ + { 0x46, 0x36 }, /* MYANMAR DIGIT SIX -> '6' */ + { 0x47, 0x37 }, /* MYANMAR DIGIT SEVEN -> '7' */ + { 0x48, 0x38 }, /* MYANMAR DIGIT EIGHT -> '8' */ + { 0x49, 0x39 }, /* MYANMAR DIGIT NINE -> '9' */ + { 0x52, 0x52 }, /* MYANMAR LETTER VOCALIC R -> 'R' */ + { 0x54, 0x4C }, /* MYANMAR LETTER VOCALIC L -> 'L' */ + { 0x56, 0x52 }, /* MYANMAR VOWEL SIGN VOCALIC R -> 'R' */ + { 0x58, 0x4C }, /* MYANMAR VOWEL SIGN VOCALIC L -> 'L' */ + { 0xA0, 0x41 }, /* GEORGIAN CAPITAL LETTER AN -> 'A' */ + { 0xA1, 0x42 }, /* GEORGIAN CAPITAL LETTER BAN -> 'B' */ + { 0xA2, 0x47 }, /* GEORGIAN CAPITAL LETTER GAN -> 'G' */ + { 0xA3, 0x44 }, /* GEORGIAN CAPITAL LETTER DON -> 'D' */ + { 0xA4, 0x45 }, /* GEORGIAN CAPITAL LETTER EN -> 'E' */ + { 0xA5, 0x56 }, /* GEORGIAN CAPITAL LETTER VIN -> 'V' */ + { 0xA6, 0x5A }, /* GEORGIAN CAPITAL LETTER ZEN -> 'Z' */ + { 0xA8, 0x49 }, /* GEORGIAN CAPITAL LETTER IN -> 'I' */ + { 0xA9, 0x4B }, /* GEORGIAN CAPITAL LETTER KAN -> 'K' */ + { 0xAA, 0x4C }, /* GEORGIAN CAPITAL LETTER LAS -> 'L' */ + { 0xAB, 0x4D }, /* GEORGIAN CAPITAL LETTER MAN -> 'M' */ + { 0xAC, 0x4E }, /* GEORGIAN CAPITAL LETTER NAR -> 'N' */ + { 0xAD, 0x4F }, /* GEORGIAN CAPITAL LETTER ON -> 'O' */ + { 0xAE, 0x50 }, /* GEORGIAN CAPITAL LETTER PAR -> 'P' */ + { 0xB0, 0x52 }, /* GEORGIAN CAPITAL LETTER RAE -> 'R' */ + { 0xB1, 0x53 }, /* GEORGIAN CAPITAL LETTER SAN -> 'S' */ + { 0xB2, 0x54 }, /* GEORGIAN CAPITAL LETTER TAR -> 'T' */ + { 0xB3, 0x55 }, /* GEORGIAN CAPITAL LETTER UN -> 'U' */ + { 0xB7, 0x51 }, /* GEORGIAN CAPITAL LETTER QAR -> 'Q' */ + { 0xBC, 0x43 }, /* GEORGIAN CAPITAL LETTER CIL -> 'C' */ + { 0xBE, 0x58 }, /* GEORGIAN CAPITAL LETTER XAN -> 'X' */ + { 0xBF, 0x4A }, /* GEORGIAN CAPITAL LETTER JHAN -> 'J' */ + { 0xC0, 0x48 }, /* GEORGIAN CAPITAL LETTER HAE -> 'H' */ + { 0xC1, 0x45 }, /* GEORGIAN CAPITAL LETTER HE -> 'E' */ + { 0xC2, 0x59 }, /* GEORGIAN CAPITAL LETTER HIE -> 'Y' */ + { 0xC3, 0x57 }, /* GEORGIAN CAPITAL LETTER WE -> 'W' */ + { 0xD0, 0x61 }, /* GEORGIAN LETTER AN -> 'a' */ + { 0xD1, 0x62 }, /* GEORGIAN LETTER BAN -> 'b' */ + { 0xD2, 0x67 }, /* GEORGIAN LETTER GAN -> 'g' */ + { 0xD3, 0x64 }, /* GEORGIAN LETTER DON -> 'd' */ + { 0xD4, 0x65 }, /* GEORGIAN LETTER EN -> 'e' */ + { 0xD5, 0x76 }, /* GEORGIAN LETTER VIN -> 'v' */ + { 0xD6, 0x7A }, /* GEORGIAN LETTER ZEN -> 'z' */ + { 0xD8, 0x69 }, /* GEORGIAN LETTER IN -> 'i' */ + { 0xD9, 0x6B }, /* GEORGIAN LETTER KAN -> 'k' */ + { 0xDA, 0x6C }, /* GEORGIAN LETTER LAS -> 'l' */ + { 0xDB, 0x6D }, /* GEORGIAN LETTER MAN -> 'm' */ + { 0xDC, 0x6E }, /* GEORGIAN LETTER NAR -> 'n' */ + { 0xDD, 0x6F }, /* GEORGIAN LETTER ON -> 'o' */ + { 0xDE, 0x70 }, /* GEORGIAN LETTER PAR -> 'p' */ + { 0xE0, 0x72 }, /* GEORGIAN LETTER RAE -> 'r' */ + { 0xE1, 0x73 }, /* GEORGIAN LETTER SAN -> 's' */ + { 0xE2, 0x74 }, /* GEORGIAN LETTER TAR -> 't' */ + { 0xE3, 0x75 }, /* GEORGIAN LETTER UN -> 'u' */ + { 0xE7, 0x71 }, /* GEORGIAN LETTER QAR -> 'q' */ + { 0xEC, 0x63 }, /* GEORGIAN LETTER CIL -> 'c' */ + { 0xEE, 0x78 }, /* GEORGIAN LETTER XAN -> 'x' */ + { 0xEF, 0x6A }, /* GEORGIAN LETTER JHAN -> 'j' */ + { 0xF0, 0x68 }, /* GEORGIAN LETTER HAE -> 'h' */ + { 0xF1, 0x65 }, /* GEORGIAN LETTER HE -> 'e' */ + { 0xF2, 0x79 }, /* GEORGIAN LETTER HIE -> 'y' */ + { 0xF3, 0x77 }, /* GEORGIAN LETTER WE -> 'w' */ + { 0xF6, 0x66 }, /* GEORGIAN LETTER FI -> 'f' */ + /* Entries for page 0x11 */ + { 0x00, 0x67 }, /* HANGUL CHOSEONG KIYEOK -> 'g' */ + { 0x02, 0x6E }, /* HANGUL CHOSEONG NIEUN -> 'n' */ + { 0x03, 0x64 }, /* HANGUL CHOSEONG TIKEUT -> 'd' */ + { 0x05, 0x72 }, /* HANGUL CHOSEONG RIEUL -> 'r' */ + { 0x06, 0x6D }, /* HANGUL CHOSEONG MIEUM -> 'm' */ + { 0x07, 0x62 }, /* HANGUL CHOSEONG PIEUP -> 'b' */ + { 0x09, 0x73 }, /* HANGUL CHOSEONG SIOS -> 's' */ + { 0x0C, 0x6A }, /* HANGUL CHOSEONG CIEUC -> 'j' */ + { 0x0E, 0x63 }, /* HANGUL CHOSEONG CHIEUCH -> 'c' */ + { 0x0F, 0x6B }, /* HANGUL CHOSEONG KHIEUKH -> 'k' */ + { 0x10, 0x74 }, /* HANGUL CHOSEONG THIEUTH -> 't' */ + { 0x11, 0x70 }, /* HANGUL CHOSEONG PHIEUPH -> 'p' */ + { 0x12, 0x68 }, /* HANGUL CHOSEONG HIEUH -> 'h' */ + { 0x35, 0x73 }, /* HANGUL CHOSEONG SIOS-IEUNG -> 's' */ + { 0x40, 0x5A }, /* HANGUL CHOSEONG PANSIOS -> 'Z' */ + { 0x41, 0x67 }, /* HANGUL CHOSEONG IEUNG-KIYEOK -> 'g' */ + { 0x42, 0x64 }, /* HANGUL CHOSEONG IEUNG-TIKEUT -> 'd' */ + { 0x43, 0x6D }, /* HANGUL CHOSEONG IEUNG-MIEUM -> 'm' */ + { 0x44, 0x62 }, /* HANGUL CHOSEONG IEUNG-PIEUP -> 'b' */ + { 0x45, 0x73 }, /* HANGUL CHOSEONG IEUNG-SIOS -> 's' */ + { 0x46, 0x5A }, /* HANGUL CHOSEONG IEUNG-PANSIOS -> 'Z' */ + { 0x48, 0x6A }, /* HANGUL CHOSEONG IEUNG-CIEUC -> 'j' */ + { 0x49, 0x63 }, /* HANGUL CHOSEONG IEUNG-CHIEUCH -> 'c' */ + { 0x4A, 0x74 }, /* HANGUL CHOSEONG IEUNG-THIEUTH -> 't' */ + { 0x4B, 0x70 }, /* HANGUL CHOSEONG IEUNG-PHIEUPH -> 'p' */ + { 0x4C, 0x4E }, /* HANGUL CHOSEONG YESIEUNG -> 'N' */ + { 0x4D, 0x6A }, /* HANGUL CHOSEONG CIEUC-IEUNG -> 'j' */ + { 0x59, 0x51 }, /* HANGUL CHOSEONG YEORINHIEUH -> 'Q' */ + { 0x61, 0x61 }, /* HANGUL JUNGSEONG A -> 'a' */ + { 0x66, 0x65 }, /* HANGUL JUNGSEONG E -> 'e' */ + { 0x69, 0x6F }, /* HANGUL JUNGSEONG O -> 'o' */ + { 0x6E, 0x75 }, /* HANGUL JUNGSEONG U -> 'u' */ + { 0x75, 0x69 }, /* HANGUL JUNGSEONG I -> 'i' */ + { 0x9E, 0x55 }, /* HANGUL JUNGSEONG ARAEA -> 'U' */ + { 0xA8, 0x67 }, /* HANGUL JONGSEONG KIYEOK -> 'g' */ + { 0xAB, 0x6E }, /* HANGUL JONGSEONG NIEUN -> 'n' */ + { 0xAE, 0x64 }, /* HANGUL JONGSEONG TIKEUT -> 'd' */ + { 0xAF, 0x6C }, /* HANGUL JONGSEONG RIEUL -> 'l' */ + { 0xB7, 0x6D }, /* HANGUL JONGSEONG MIEUM -> 'm' */ + { 0xB8, 0x62 }, /* HANGUL JONGSEONG PIEUP -> 'b' */ + { 0xBA, 0x73 }, /* HANGUL JONGSEONG SIOS -> 's' */ + { 0xBD, 0x6A }, /* HANGUL JONGSEONG CIEUC -> 'j' */ + { 0xBE, 0x63 }, /* HANGUL JONGSEONG CHIEUCH -> 'c' */ + { 0xBF, 0x6B }, /* HANGUL JONGSEONG KHIEUKH -> 'k' */ + { 0xC0, 0x74 }, /* HANGUL JONGSEONG THIEUTH -> 't' */ + { 0xC1, 0x70 }, /* HANGUL JONGSEONG PHIEUPH -> 'p' */ + { 0xC2, 0x68 }, /* HANGUL JONGSEONG HIEUH -> 'h' */ + { 0xEB, 0x5A }, /* HANGUL JONGSEONG PANSIOS -> 'Z' */ + { 0xEC, 0x67 }, /* HANGUL JONGSEONG IEUNG-KIYEOK -> 'g' */ + { 0xF0, 0x4E }, /* HANGUL JONGSEONG YESIEUNG -> 'N' */ + { 0xF9, 0x51 }, /* HANGUL JONGSEONG YEORINHIEUH -> 'Q' */ + /* Entries for page 0x13 */ + { 0x61, 0x20 }, /* ETHIOPIC WORDSPACE -> ' ' */ + { 0x62, 0x2E }, /* ETHIOPIC FULL STOP -> '.' */ + { 0x63, 0x2C }, /* ETHIOPIC COMMA -> ',' */ + { 0x64, 0x3B }, /* ETHIOPIC SEMICOLON -> ';' */ + { 0x65, 0x3A }, /* ETHIOPIC COLON -> ':' */ + { 0x67, 0x3F }, /* ETHIOPIC QUESTION MARK -> '?' */ + { 0x69, 0x31 }, /* ETHIOPIC DIGIT ONE -> '1' */ + { 0x6A, 0x32 }, /* ETHIOPIC DIGIT TWO -> '2' */ + { 0x6B, 0x33 }, /* ETHIOPIC DIGIT THREE -> '3' */ + { 0x6C, 0x34 }, /* ETHIOPIC DIGIT FOUR -> '4' */ + { 0x6D, 0x35 }, /* ETHIOPIC DIGIT FIVE -> '5' */ + { 0x6E, 0x36 }, /* ETHIOPIC DIGIT SIX -> '6' */ + { 0x6F, 0x37 }, /* ETHIOPIC DIGIT SEVEN -> '7' */ + { 0x70, 0x38 }, /* ETHIOPIC DIGIT EIGHT -> '8' */ + { 0x71, 0x39 }, /* ETHIOPIC DIGIT NINE -> '9' */ + { 0xA0, 0x61 }, /* CHEROKEE LETTER A -> 'a' */ + { 0xA1, 0x65 }, /* CHEROKEE LETTER E -> 'e' */ + { 0xA2, 0x69 }, /* CHEROKEE LETTER I -> 'i' */ + { 0xA3, 0x6F }, /* CHEROKEE LETTER O -> 'o' */ + { 0xA4, 0x75 }, /* CHEROKEE LETTER U -> 'u' */ + { 0xA5, 0x76 }, /* CHEROKEE LETTER V -> 'v' */ + { 0xCD, 0x73 }, /* CHEROKEE LETTER S -> 's' */ + /* Entries for page 0x14 */ + { 0x01, 0x65 }, /* CANADIAN SYLLABICS E -> 'e' */ + { 0x03, 0x69 }, /* CANADIAN SYLLABICS I -> 'i' */ + { 0x05, 0x6F }, /* CANADIAN SYLLABICS O -> 'o' */ + { 0x09, 0x69 }, /* CANADIAN SYLLABICS CARRIER I -> 'i' */ + { 0x0A, 0x61 }, /* CANADIAN SYLLABICS A -> 'a' */ + { 0x1D, 0x77 }, /* CANADIAN SYLLABICS Y-CREE W -> 'w' */ + { 0x1E, 0x27 }, /* CANADIAN SYLLABICS GLOTTAL STOP -> ''' */ + { 0x1F, 0x74 }, /* CANADIAN SYLLABICS FINAL ACUTE -> 't' */ + { 0x20, 0x6B }, /* CANADIAN SYLLABICS FINAL GRAVE -> 'k' */ + { 0x22, 0x73 }, /* CANADIAN SYLLABICS FINAL TOP HALF RING -> 's' */ + { 0x23, 0x6E }, /* CANADIAN SYLLABICS FINAL RIGHT HALF RING -> 'n' */ + { 0x24, 0x77 }, /* CANADIAN SYLLABICS FINAL RING -> 'w' */ + { 0x25, 0x6E }, /* CANADIAN SYLLABICS FINAL DOUBLE ACUTE -> 'n' */ + { 0x27, 0x77 }, /* CANADIAN SYLLABICS FINAL MIDDLE DOT -> 'w' */ + { 0x28, 0x63 }, /* CANADIAN SYLLABICS FINAL SHORT HORIZONTAL STROKE -> 'c' */ + { 0x29, 0x3F }, /* CANADIAN SYLLABICS FINAL PLUS -> '?' */ + { 0x2A, 0x6C }, /* CANADIAN SYLLABICS FINAL DOWN TACK -> 'l' */ + { 0x49, 0x70 }, /* CANADIAN SYLLABICS P -> 'p' */ + { 0x4A, 0x70 }, /* CANADIAN SYLLABICS WEST-CREE P -> 'p' */ + { 0x4B, 0x68 }, /* CANADIAN SYLLABICS CARRIER H -> 'h' */ + { 0x66, 0x74 }, /* CANADIAN SYLLABICS T -> 't' */ + { 0x83, 0x6B }, /* CANADIAN SYLLABICS K -> 'k' */ + { 0xA1, 0x63 }, /* CANADIAN SYLLABICS C -> 'c' */ + { 0xBB, 0x6D }, /* CANADIAN SYLLABICS M -> 'm' */ + { 0xBC, 0x6D }, /* CANADIAN SYLLABICS WEST-CREE M -> 'm' */ + { 0xBE, 0x6D }, /* CANADIAN SYLLABICS ATHAPASCAN M -> 'm' */ + { 0xBF, 0x6D }, /* CANADIAN SYLLABICS SAYISI M -> 'm' */ + { 0xD0, 0x6E }, /* CANADIAN SYLLABICS N -> 'n' */ + { 0xEA, 0x00 }, /* CANADIAN SYLLABICS L -> ... */ + { 0xEC, 0x6C }, /* CANADIAN SYLLABICS MEDIAL L -> 'l' */ + /* Entries for page 0x15 */ + { 0x05, 0x73 }, /* CANADIAN SYLLABICS S -> 's' */ + { 0x06, 0x73 }, /* CANADIAN SYLLABICS ATHAPASCAN S -> 's' */ + { 0x08, 0x73 }, /* CANADIAN SYLLABICS BLACKFOOT S -> 's' */ + { 0x3E, 0x00 }, /* CANADIAN SYLLABICS Y -> ... */ + { 0x40, 0x79 }, /* CANADIAN SYLLABICS WEST-CREE Y -> 'y' */ + { 0x50, 0x00 }, /* CANADIAN SYLLABICS R -> ... */ + { 0x52, 0x72 }, /* CANADIAN SYLLABICS MEDIAL R -> 'r' */ + { 0x5D, 0x66 }, /* CANADIAN SYLLABICS F -> 'f' */ + { 0x7B, 0x68 }, /* CANADIAN SYLLABICS NUNAVIK H -> 'h' */ + { 0x7C, 0x68 }, /* CANADIAN SYLLABICS NUNAVUT H -> 'h' */ + { 0x85, 0x71 }, /* CANADIAN SYLLABICS Q -> 'q' */ + { 0xAF, 0x62 }, /* CANADIAN SYLLABICS AIVILIK B -> 'b' */ + { 0xB0, 0x65 }, /* CANADIAN SYLLABICS BLACKFOOT E -> 'e' */ + { 0xB1, 0x69 }, /* CANADIAN SYLLABICS BLACKFOOT I -> 'i' */ + { 0xB2, 0x6F }, /* CANADIAN SYLLABICS BLACKFOOT O -> 'o' */ + { 0xB3, 0x61 }, /* CANADIAN SYLLABICS BLACKFOOT A -> 'a' */ + { 0xEE, 0x70 }, /* CANADIAN SYLLABICS CARRIER P -> 'p' */ + /* Entries for page 0x16 */ + { 0x46, 0x7A }, /* CANADIAN SYLLABICS CARRIER Z -> 'z' */ + { 0x47, 0x7A }, /* CANADIAN SYLLABICS CARRIER INITIAL Z -> 'z' */ + { 0x6D, 0x58 }, /* CANADIAN SYLLABICS CHI SIGN -> 'X' */ + { 0x6E, 0x2E }, /* CANADIAN SYLLABICS FULL STOP -> '.' */ + { 0x80, 0x20 }, /* OGHAM SPACE MARK -> ' ' */ + { 0x81, 0x62 }, /* OGHAM LETTER BEITH -> 'b' */ + { 0x82, 0x6C }, /* OGHAM LETTER LUIS -> 'l' */ + { 0x83, 0x66 }, /* OGHAM LETTER FEARN -> 'f' */ + { 0x84, 0x73 }, /* OGHAM LETTER SAIL -> 's' */ + { 0x85, 0x6E }, /* OGHAM LETTER NION -> 'n' */ + { 0x86, 0x68 }, /* OGHAM LETTER UATH -> 'h' */ + { 0x87, 0x64 }, /* OGHAM LETTER DAIR -> 'd' */ + { 0x88, 0x74 }, /* OGHAM LETTER TINNE -> 't' */ + { 0x89, 0x63 }, /* OGHAM LETTER COLL -> 'c' */ + { 0x8A, 0x71 }, /* OGHAM LETTER CEIRT -> 'q' */ + { 0x8B, 0x6D }, /* OGHAM LETTER MUIN -> 'm' */ + { 0x8C, 0x67 }, /* OGHAM LETTER GORT -> 'g' */ + { 0x8E, 0x7A }, /* OGHAM LETTER STRAIF -> 'z' */ + { 0x8F, 0x72 }, /* OGHAM LETTER RUIS -> 'r' */ + { 0x90, 0x61 }, /* OGHAM LETTER AILM -> 'a' */ + { 0x91, 0x6F }, /* OGHAM LETTER ONN -> 'o' */ + { 0x92, 0x75 }, /* OGHAM LETTER UR -> 'u' */ + { 0x93, 0x65 }, /* OGHAM LETTER EADHADH -> 'e' */ + { 0x94, 0x69 }, /* OGHAM LETTER IODHADH -> 'i' */ + { 0x98, 0x70 }, /* OGHAM LETTER IFIN -> 'p' */ + { 0x99, 0x78 }, /* OGHAM LETTER EAMHANCHOLL -> 'x' */ + { 0x9A, 0x70 }, /* OGHAM LETTER PEITH -> 'p' */ + { 0x9B, 0x3C }, /* OGHAM FEATHER MARK -> '<' */ + { 0x9C, 0x3E }, /* OGHAM REVERSED FEATHER MARK -> '>' */ + { 0xA0, 0x66 }, /* RUNIC LETTER FEHU FEOH FE F -> 'f' */ + { 0xA1, 0x76 }, /* RUNIC LETTER V -> 'v' */ + { 0xA2, 0x75 }, /* RUNIC LETTER URUZ UR U -> 'u' */ + { 0xA4, 0x79 }, /* RUNIC LETTER Y -> 'y' */ + { 0xA5, 0x77 }, /* RUNIC LETTER W -> 'w' */ + { 0xA8, 0x61 }, /* RUNIC LETTER ANSUZ A -> 'a' */ + { 0xA9, 0x6F }, /* RUNIC LETTER OS O -> 'o' */ + { 0xAC, 0x00 }, /* RUNIC LETTER LONG-BRANCH-OSS O -> ... */ + { 0xAE, 0x6F }, /* RUNIC LETTER O -> 'o' */ + { 0xB1, 0x72 }, /* RUNIC LETTER RAIDO RAD REID R -> 'r' */ + { 0xB2, 0x6B }, /* RUNIC LETTER KAUNA -> 'k' */ + { 0xB3, 0x63 }, /* RUNIC LETTER CEN -> 'c' */ + { 0xB4, 0x6B }, /* RUNIC LETTER KAUN K -> 'k' */ + { 0xB5, 0x67 }, /* RUNIC LETTER G -> 'g' */ + { 0xB7, 0x67 }, /* RUNIC LETTER GEBO GYFU G -> 'g' */ + { 0xB8, 0x67 }, /* RUNIC LETTER GAR -> 'g' */ + { 0xB9, 0x77 }, /* RUNIC LETTER WUNJO WYNN W -> 'w' */ + { 0xBA, 0x00 }, /* RUNIC LETTER HAGLAZ H -> ... */ + { 0xBD, 0x68 }, /* RUNIC LETTER SHORT-TWIG-HAGALL H -> 'h' */ + { 0xBE, 0x00 }, /* RUNIC LETTER NAUDIZ NYD NAUD N -> ... */ + { 0xC0, 0x6E }, /* RUNIC LETTER DOTTED-N -> 'n' */ + { 0xC1, 0x69 }, /* RUNIC LETTER ISAZ IS ISS I -> 'i' */ + { 0xC2, 0x65 }, /* RUNIC LETTER E -> 'e' */ + { 0xC3, 0x6A }, /* RUNIC LETTER JERAN J -> 'j' */ + { 0xC4, 0x67 }, /* RUNIC LETTER GER -> 'g' */ + { 0xC6, 0x61 }, /* RUNIC LETTER SHORT-TWIG-AR A -> 'a' */ + { 0xC8, 0x70 }, /* RUNIC LETTER PERTHO PEORTH P -> 'p' */ + { 0xC9, 0x7A }, /* RUNIC LETTER ALGIZ EOLHX -> 'z' */ + { 0xCA, 0x00 }, /* RUNIC LETTER SOWILO S -> ... */ + { 0xCC, 0x73 }, /* RUNIC LETTER SHORT-TWIG-SOL S -> 's' */ + { 0xCD, 0x63 }, /* RUNIC LETTER C -> 'c' */ + { 0xCE, 0x7A }, /* RUNIC LETTER Z -> 'z' */ + { 0xCF, 0x74 }, /* RUNIC LETTER TIWAZ TIR TYR T -> 't' */ + { 0xD0, 0x74 }, /* RUNIC LETTER SHORT-TWIG-TYR T -> 't' */ + { 0xD1, 0x64 }, /* RUNIC LETTER D -> 'd' */ + { 0xD2, 0x62 }, /* RUNIC LETTER BERKANAN BEORC BJARKAN B -> 'b' */ + { 0xD3, 0x62 }, /* RUNIC LETTER SHORT-TWIG-BJARKAN B -> 'b' */ + { 0xD4, 0x70 }, /* RUNIC LETTER DOTTED-P -> 'p' */ + { 0xD5, 0x70 }, /* RUNIC LETTER OPEN-P -> 'p' */ + { 0xD6, 0x65 }, /* RUNIC LETTER EHWAZ EH E -> 'e' */ + { 0xD7, 0x00 }, /* RUNIC LETTER MANNAZ MAN M -> ... */ + { 0xD9, 0x6D }, /* RUNIC LETTER SHORT-TWIG-MADR M -> 'm' */ + { 0xDA, 0x6C }, /* RUNIC LETTER LAUKAZ LAGU LOGR L -> 'l' */ + { 0xDB, 0x6C }, /* RUNIC LETTER DOTTED-L -> 'l' */ + { 0xDE, 0x64 }, /* RUNIC LETTER DAGAZ DAEG D -> 'd' */ + { 0xDF, 0x6F }, /* RUNIC LETTER OTHALAN ETHEL O -> 'o' */ + { 0xE5, 0x73 }, /* RUNIC LETTER STAN -> 's' */ + { 0xE9, 0x71 }, /* RUNIC LETTER Q -> 'q' */ + { 0xEA, 0x78 }, /* RUNIC LETTER X -> 'x' */ + { 0xEB, 0x2E }, /* RUNIC SINGLE PUNCTUATION -> '.' */ + { 0xEC, 0x3A }, /* RUNIC MULTIPLE PUNCTUATION -> ':' */ + { 0xED, 0x2B }, /* RUNIC CROSS PUNCTUATION -> '+' */ + /* Entries for page 0x17 */ + { 0x80, 0x6B }, /* KHMER LETTER KA -> 'k' */ + { 0x82, 0x67 }, /* KHMER LETTER KO -> 'g' */ + { 0x85, 0x63 }, /* KHMER LETTER CA -> 'c' */ + { 0x87, 0x6A }, /* KHMER LETTER CO -> 'j' */ + { 0x8A, 0x74 }, /* KHMER LETTER DA -> 't' */ + { 0x8C, 0x64 }, /* KHMER LETTER DO -> 'd' */ + { 0x8F, 0x74 }, /* KHMER LETTER TA -> 't' */ + { 0x91, 0x64 }, /* KHMER LETTER TO -> 'd' */ + { 0x93, 0x6E }, /* KHMER LETTER NO -> 'n' */ + { 0x94, 0x70 }, /* KHMER LETTER BA -> 'p' */ + { 0x96, 0x62 }, /* KHMER LETTER PO -> 'b' */ + { 0x98, 0x6D }, /* KHMER LETTER MO -> 'm' */ + { 0x99, 0x79 }, /* KHMER LETTER YO -> 'y' */ + { 0x9A, 0x72 }, /* KHMER LETTER RO -> 'r' */ + { 0x9B, 0x6C }, /* KHMER LETTER LO -> 'l' */ + { 0x9C, 0x76 }, /* KHMER LETTER VO -> 'v' */ + { 0x9F, 0x73 }, /* KHMER LETTER SA -> 's' */ + { 0xA0, 0x68 }, /* KHMER LETTER HA -> 'h' */ + { 0xA1, 0x6C }, /* KHMER LETTER LA -> 'l' */ + { 0xA2, 0x71 }, /* KHMER LETTER QA -> 'q' */ + { 0xA3, 0x61 }, /* KHMER INDEPENDENT VOWEL QAQ -> 'a' */ + { 0xA5, 0x69 }, /* KHMER INDEPENDENT VOWEL QI -> 'i' */ + { 0xA7, 0x75 }, /* KHMER INDEPENDENT VOWEL QU -> 'u' */ + { 0xAF, 0x65 }, /* KHMER INDEPENDENT VOWEL QE -> 'e' */ + { 0xB4, 0x61 }, /* KHMER VOWEL INHERENT AQ -> 'a' */ + { 0xB7, 0x69 }, /* KHMER VOWEL SIGN I -> 'i' */ + { 0xB9, 0x79 }, /* KHMER VOWEL SIGN Y -> 'y' */ + { 0xBB, 0x75 }, /* KHMER VOWEL SIGN U -> 'u' */ + { 0xC1, 0x65 }, /* KHMER VOWEL SIGN E -> 'e' */ + { 0xC6, 0x4D }, /* KHMER SIGN NIKAHIT -> 'M' */ + { 0xC7, 0x48 }, /* KHMER SIGN REAHMUK -> 'H' */ + { 0xCC, 0x72 }, /* KHMER SIGN ROBAT -> 'r' */ + { 0xCE, 0x21 }, /* KHMER SIGN KAKABAT -> '!' */ + { 0xD4, 0x2E }, /* KHMER SIGN KHAN -> '.' */ + { 0xD6, 0x3A }, /* KHMER SIGN CAMNUC PII KUUH -> ':' */ + { 0xD7, 0x2B }, /* KHMER SIGN LEK TOO -> '+' */ + { 0xDC, 0x27 }, /* KHMER SIGN AVAKRAHASANYA -> ''' */ + { 0xE0, 0x30 }, /* KHMER DIGIT ZERO -> '0' */ + { 0xE1, 0x31 }, /* KHMER DIGIT ONE -> '1' */ + { 0xE2, 0x32 }, /* KHMER DIGIT TWO -> '2' */ + { 0xE3, 0x33 }, /* KHMER DIGIT THREE -> '3' */ + { 0xE4, 0x34 }, /* KHMER DIGIT FOUR -> '4' */ + { 0xE5, 0x35 }, /* KHMER DIGIT FIVE -> '5' */ + { 0xE6, 0x36 }, /* KHMER DIGIT SIX -> '6' */ + { 0xE7, 0x37 }, /* KHMER DIGIT SEVEN -> '7' */ + { 0xE8, 0x38 }, /* KHMER DIGIT EIGHT -> '8' */ + { 0xE9, 0x39 }, /* KHMER DIGIT NINE -> '9' */ + /* Entries for page 0x18 */ + { 0x07, 0x2D }, /* MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER -> '-' */ + { 0x10, 0x30 }, /* MONGOLIAN DIGIT ZERO -> '0' */ + { 0x11, 0x31 }, /* MONGOLIAN DIGIT ONE -> '1' */ + { 0x12, 0x32 }, /* MONGOLIAN DIGIT TWO -> '2' */ + { 0x13, 0x33 }, /* MONGOLIAN DIGIT THREE -> '3' */ + { 0x14, 0x34 }, /* MONGOLIAN DIGIT FOUR -> '4' */ + { 0x15, 0x35 }, /* MONGOLIAN DIGIT FIVE -> '5' */ + { 0x16, 0x36 }, /* MONGOLIAN DIGIT SIX -> '6' */ + { 0x17, 0x37 }, /* MONGOLIAN DIGIT SEVEN -> '7' */ + { 0x18, 0x38 }, /* MONGOLIAN DIGIT EIGHT -> '8' */ + { 0x19, 0x39 }, /* MONGOLIAN DIGIT NINE -> '9' */ + { 0x20, 0x61 }, /* MONGOLIAN LETTER A -> 'a' */ + { 0x21, 0x65 }, /* MONGOLIAN LETTER E -> 'e' */ + { 0x22, 0x69 }, /* MONGOLIAN LETTER I -> 'i' */ + { 0x23, 0x6F }, /* MONGOLIAN LETTER O -> 'o' */ + { 0x24, 0x75 }, /* MONGOLIAN LETTER U -> 'u' */ + { 0x25, 0x4F }, /* MONGOLIAN LETTER OE -> 'O' */ + { 0x26, 0x55 }, /* MONGOLIAN LETTER UE -> 'U' */ + { 0x28, 0x6E }, /* MONGOLIAN LETTER NA -> 'n' */ + { 0x2A, 0x62 }, /* MONGOLIAN LETTER BA -> 'b' */ + { 0x2B, 0x70 }, /* MONGOLIAN LETTER PA -> 'p' */ + { 0x2C, 0x71 }, /* MONGOLIAN LETTER QA -> 'q' */ + { 0x2D, 0x67 }, /* MONGOLIAN LETTER GA -> 'g' */ + { 0x2E, 0x6D }, /* MONGOLIAN LETTER MA -> 'm' */ + { 0x2F, 0x6C }, /* MONGOLIAN LETTER LA -> 'l' */ + { 0x30, 0x73 }, /* MONGOLIAN LETTER SA -> 's' */ + { 0x32, 0x74 }, /* MONGOLIAN LETTER TA -> 't' */ + { 0x33, 0x64 }, /* MONGOLIAN LETTER DA -> 'd' */ + { 0x35, 0x6A }, /* MONGOLIAN LETTER JA -> 'j' */ + { 0x36, 0x79 }, /* MONGOLIAN LETTER YA -> 'y' */ + { 0x37, 0x72 }, /* MONGOLIAN LETTER RA -> 'r' */ + { 0x38, 0x77 }, /* MONGOLIAN LETTER WA -> 'w' */ + { 0x39, 0x66 }, /* MONGOLIAN LETTER FA -> 'f' */ + { 0x3A, 0x6B }, /* MONGOLIAN LETTER KA -> 'k' */ + { 0x3D, 0x7A }, /* MONGOLIAN LETTER ZA -> 'z' */ + { 0x3E, 0x68 }, /* MONGOLIAN LETTER HAA -> 'h' */ + { 0x43, 0x2D }, /* MONGOLIAN LETTER TODO LONG VOWEL SIGN -> '-' */ + { 0x44, 0x65 }, /* MONGOLIAN LETTER TODO E -> 'e' */ + { 0x45, 0x69 }, /* MONGOLIAN LETTER TODO I -> 'i' */ + { 0x46, 0x6F }, /* MONGOLIAN LETTER TODO O -> 'o' */ + { 0x47, 0x75 }, /* MONGOLIAN LETTER TODO U -> 'u' */ + { 0x48, 0x4F }, /* MONGOLIAN LETTER TODO OE -> 'O' */ + { 0x49, 0x55 }, /* MONGOLIAN LETTER TODO UE -> 'U' */ + { 0x4B, 0x62 }, /* MONGOLIAN LETTER TODO BA -> 'b' */ + { 0x4C, 0x70 }, /* MONGOLIAN LETTER TODO PA -> 'p' */ + { 0x4D, 0x71 }, /* MONGOLIAN LETTER TODO QA -> 'q' */ + { 0x4E, 0x67 }, /* MONGOLIAN LETTER TODO GA -> 'g' */ + { 0x4F, 0x6D }, /* MONGOLIAN LETTER TODO MA -> 'm' */ + { 0x50, 0x74 }, /* MONGOLIAN LETTER TODO TA -> 't' */ + { 0x51, 0x64 }, /* MONGOLIAN LETTER TODO DA -> 'd' */ + { 0x53, 0x6A }, /* MONGOLIAN LETTER TODO JA -> 'j' */ + { 0x55, 0x79 }, /* MONGOLIAN LETTER TODO YA -> 'y' */ + { 0x56, 0x77 }, /* MONGOLIAN LETTER TODO WA -> 'w' */ + { 0x57, 0x6B }, /* MONGOLIAN LETTER TODO KA -> 'k' */ + { 0x58, 0x67 }, /* MONGOLIAN LETTER TODO GAA -> 'g' */ + { 0x59, 0x68 }, /* MONGOLIAN LETTER TODO HAA -> 'h' */ + { 0x5D, 0x65 }, /* MONGOLIAN LETTER SIBE E -> 'e' */ + { 0x5E, 0x69 }, /* MONGOLIAN LETTER SIBE I -> 'i' */ + { 0x60, 0x55 }, /* MONGOLIAN LETTER SIBE UE -> 'U' */ + { 0x61, 0x75 }, /* MONGOLIAN LETTER SIBE U -> 'u' */ + { 0x63, 0x6B }, /* MONGOLIAN LETTER SIBE KA -> 'k' */ + { 0x64, 0x67 }, /* MONGOLIAN LETTER SIBE GA -> 'g' */ + { 0x65, 0x68 }, /* MONGOLIAN LETTER SIBE HA -> 'h' */ + { 0x66, 0x70 }, /* MONGOLIAN LETTER SIBE PA -> 'p' */ + { 0x68, 0x74 }, /* MONGOLIAN LETTER SIBE TA -> 't' */ + { 0x69, 0x64 }, /* MONGOLIAN LETTER SIBE DA -> 'd' */ + { 0x6A, 0x6A }, /* MONGOLIAN LETTER SIBE JA -> 'j' */ + { 0x6B, 0x66 }, /* MONGOLIAN LETTER SIBE FA -> 'f' */ + { 0x6C, 0x67 }, /* MONGOLIAN LETTER SIBE GAA -> 'g' */ + { 0x6D, 0x68 }, /* MONGOLIAN LETTER SIBE HAA -> 'h' */ + { 0x6F, 0x7A }, /* MONGOLIAN LETTER SIBE ZA -> 'z' */ + { 0x70, 0x72 }, /* MONGOLIAN LETTER SIBE RAA -> 'r' */ + { 0x73, 0x69 }, /* MONGOLIAN LETTER MANCHU I -> 'i' */ + { 0x74, 0x6B }, /* MONGOLIAN LETTER MANCHU KA -> 'k' */ + { 0x75, 0x72 }, /* MONGOLIAN LETTER MANCHU RA -> 'r' */ + { 0x76, 0x66 }, /* MONGOLIAN LETTER MANCHU FA -> 'f' */ + { 0x81, 0x48 }, /* MONGOLIAN LETTER ALI GALI VISARGA ONE -> 'H' */ + { 0x82, 0x58 }, /* MONGOLIAN LETTER ALI GALI DAMARU -> 'X' */ + { 0x83, 0x57 }, /* MONGOLIAN LETTER ALI GALI UBADAMA -> 'W' */ + { 0x84, 0x4D }, /* MONGOLIAN LETTER ALI GALI INVERTED UBADAMA -> 'M' */ + { 0x87, 0x61 }, /* MONGOLIAN LETTER ALI GALI A -> 'a' */ + { 0x88, 0x69 }, /* MONGOLIAN LETTER ALI GALI I -> 'i' */ + { 0x89, 0x6B }, /* MONGOLIAN LETTER ALI GALI KA -> 'k' */ + { 0x8B, 0x63 }, /* MONGOLIAN LETTER ALI GALI CA -> 'c' */ + { 0x90, 0x74 }, /* MONGOLIAN LETTER ALI GALI TA -> 't' */ + { 0x91, 0x64 }, /* MONGOLIAN LETTER ALI GALI DA -> 'd' */ + { 0x92, 0x70 }, /* MONGOLIAN LETTER ALI GALI PA -> 'p' */ + { 0x96, 0x7A }, /* MONGOLIAN LETTER ALI GALI ZA -> 'z' */ + { 0x97, 0x61 }, /* MONGOLIAN LETTER ALI GALI AH -> 'a' */ + { 0x98, 0x74 }, /* MONGOLIAN LETTER TODO ALI GALI TA -> 't' */ + { 0x9C, 0x63 }, /* MONGOLIAN LETTER MANCHU ALI GALI CA -> 'c' */ + { 0xA0, 0x74 }, /* MONGOLIAN LETTER MANCHU ALI GALI TA -> 't' */ + { 0xA5, 0x7A }, /* MONGOLIAN LETTER MANCHU ALI GALI ZA -> 'z' */ + { 0xA6, 0x75 }, /* MONGOLIAN LETTER ALI GALI HALF U -> 'u' */ + { 0xA7, 0x79 }, /* MONGOLIAN LETTER ALI GALI HALF YA -> 'y' */ + { 0xA9, 0x27 }, /* MONGOLIAN LETTER ALI GALI DAGALGA -> ''' */ + /* Entries for page 0x1D */ + { 0x00, 0x41 }, /* LATIN LETTER SMALL CAPITAL A -> 'A' */ + { 0x03, 0x42 }, /* LATIN LETTER SMALL CAPITAL BARRED B -> 'B' */ + { 0x04, 0x43 }, /* LATIN LETTER SMALL CAPITAL C -> 'C' */ + { 0x05, 0x44 }, /* LATIN LETTER SMALL CAPITAL D -> 'D' */ + { 0x06, 0x44 }, /* LATIN LETTER SMALL CAPITAL ETH -> 'D' */ + { 0x07, 0x45 }, /* LATIN LETTER SMALL CAPITAL E -> 'E' */ + { 0x08, 0x65 }, /* LATIN SMALL LETTER TURNED OPEN E -> 'e' */ + { 0x09, 0x69 }, /* LATIN SMALL LETTER TURNED I -> 'i' */ + { 0x0A, 0x4A }, /* LATIN LETTER SMALL CAPITAL J -> 'J' */ + { 0x0B, 0x4B }, /* LATIN LETTER SMALL CAPITAL K -> 'K' */ + { 0x0C, 0x4C }, /* LATIN LETTER SMALL CAPITAL L WITH STROKE -> 'L' */ + { 0x0D, 0x4D }, /* LATIN LETTER SMALL CAPITAL M -> 'M' */ + { 0x0E, 0x4E }, /* LATIN LETTER SMALL CAPITAL REVERSED N -> 'N' */ + { 0x0F, 0x4F }, /* LATIN LETTER SMALL CAPITAL O -> 'O' */ + { 0x11, 0x4F }, /* LATIN SMALL LETTER SIDEWAYS O -> 'O' */ + { 0x13, 0x4F }, /* LATIN SMALL LETTER SIDEWAYS O WITH STROKE -> 'O' */ + { 0x18, 0x50 }, /* LATIN LETTER SMALL CAPITAL P -> 'P' */ + { 0x19, 0x52 }, /* LATIN LETTER SMALL CAPITAL REVERSED R -> 'R' */ + { 0x1A, 0x52 }, /* LATIN LETTER SMALL CAPITAL TURNED R -> 'R' */ + { 0x1B, 0x54 }, /* LATIN LETTER SMALL CAPITAL T -> 'T' */ + { 0x1C, 0x55 }, /* LATIN LETTER SMALL CAPITAL U -> 'U' */ + { 0x1D, 0x75 }, /* LATIN SMALL LETTER SIDEWAYS U -> 'u' */ + { 0x1E, 0x75 }, /* LATIN SMALL LETTER SIDEWAYS DIAERESIZED U -> 'u' */ + { 0x1F, 0x6D }, /* LATIN SMALL LETTER SIDEWAYS TURNED M -> 'm' */ + { 0x20, 0x56 }, /* LATIN LETTER SMALL CAPITAL V -> 'V' */ + { 0x21, 0x57 }, /* LATIN LETTER SMALL CAPITAL W -> 'W' */ + { 0x22, 0x5A }, /* LATIN LETTER SMALL CAPITAL Z -> 'Z' */ + { 0x2C, 0x41 }, /* MODIFIER LETTER CAPITAL A -> 'A' */ + { 0x2E, 0x42 }, /* MODIFIER LETTER CAPITAL B -> 'B' */ + { 0x2F, 0x42 }, /* MODIFIER LETTER CAPITAL BARRED B -> 'B' */ + { 0x30, 0x44 }, /* MODIFIER LETTER CAPITAL D -> 'D' */ + { 0x31, 0x45 }, /* MODIFIER LETTER CAPITAL E -> 'E' */ + { 0x32, 0x45 }, /* MODIFIER LETTER CAPITAL REVERSED E -> 'E' */ + { 0x33, 0x47 }, /* MODIFIER LETTER CAPITAL G -> 'G' */ + { 0x34, 0x48 }, /* MODIFIER LETTER CAPITAL H -> 'H' */ + { 0x35, 0x49 }, /* MODIFIER LETTER CAPITAL I -> 'I' */ + { 0x36, 0x4A }, /* MODIFIER LETTER CAPITAL J -> 'J' */ + { 0x37, 0x4B }, /* MODIFIER LETTER CAPITAL K -> 'K' */ + { 0x38, 0x4C }, /* MODIFIER LETTER CAPITAL L -> 'L' */ + { 0x39, 0x4D }, /* MODIFIER LETTER CAPITAL M -> 'M' */ + { 0x3A, 0x4E }, /* MODIFIER LETTER CAPITAL N -> 'N' */ + { 0x3B, 0x4E }, /* MODIFIER LETTER CAPITAL REVERSED N -> 'N' */ + { 0x3C, 0x4F }, /* MODIFIER LETTER CAPITAL O -> 'O' */ + { 0x3E, 0x50 }, /* MODIFIER LETTER CAPITAL P -> 'P' */ + { 0x3F, 0x52 }, /* MODIFIER LETTER CAPITAL R -> 'R' */ + { 0x40, 0x54 }, /* MODIFIER LETTER CAPITAL T -> 'T' */ + { 0x41, 0x55 }, /* MODIFIER LETTER CAPITAL U -> 'U' */ + { 0x42, 0x57 }, /* MODIFIER LETTER CAPITAL W -> 'W' */ + { 0x43, 0x00 }, /* MODIFIER LETTER SMALL A -> ... */ + { 0x45, 0x61 }, /* MODIFIER LETTER SMALL ALPHA -> 'a' */ + { 0x47, 0x62 }, /* MODIFIER LETTER SMALL B -> 'b' */ + { 0x48, 0x64 }, /* MODIFIER LETTER SMALL D -> 'd' */ + { 0x49, 0x65 }, /* MODIFIER LETTER SMALL E -> 'e' */ + { 0x4B, 0x65 }, /* MODIFIER LETTER SMALL OPEN E -> 'e' */ + { 0x4C, 0x65 }, /* MODIFIER LETTER SMALL TURNED OPEN E -> 'e' */ + { 0x4D, 0x67 }, /* MODIFIER LETTER SMALL G -> 'g' */ + { 0x4E, 0x69 }, /* MODIFIER LETTER SMALL TURNED I -> 'i' */ + { 0x4F, 0x6B }, /* MODIFIER LETTER SMALL K -> 'k' */ + { 0x50, 0x6D }, /* MODIFIER LETTER SMALL M -> 'm' */ + { 0x52, 0x6F }, /* MODIFIER LETTER SMALL O -> 'o' */ + { 0x56, 0x70 }, /* MODIFIER LETTER SMALL P -> 'p' */ + { 0x57, 0x74 }, /* MODIFIER LETTER SMALL T -> 't' */ + { 0x58, 0x75 }, /* MODIFIER LETTER SMALL U -> 'u' */ + { 0x59, 0x75 }, /* MODIFIER LETTER SMALL SIDEWAYS U -> 'u' */ + { 0x5A, 0x6D }, /* MODIFIER LETTER SMALL TURNED M -> 'm' */ + { 0x5B, 0x76 }, /* MODIFIER LETTER SMALL V -> 'v' */ + { 0x5D, 0x62 }, /* MODIFIER LETTER SMALL BETA -> 'b' */ + { 0x5E, 0x67 }, /* MODIFIER LETTER SMALL GREEK GAMMA -> 'g' */ + { 0x5F, 0x64 }, /* MODIFIER LETTER SMALL DELTA -> 'd' */ + { 0x60, 0x66 }, /* MODIFIER LETTER SMALL GREEK PHI -> 'f' */ + { 0x62, 0x69 }, /* LATIN SUBSCRIPT SMALL LETTER I -> 'i' */ + { 0x63, 0x72 }, /* LATIN SUBSCRIPT SMALL LETTER R -> 'r' */ + { 0x64, 0x75 }, /* LATIN SUBSCRIPT SMALL LETTER U -> 'u' */ + { 0x65, 0x76 }, /* LATIN SUBSCRIPT SMALL LETTER V -> 'v' */ + { 0x66, 0x62 }, /* GREEK SUBSCRIPT SMALL LETTER BETA -> 'b' */ + { 0x67, 0x67 }, /* GREEK SUBSCRIPT SMALL LETTER GAMMA -> 'g' */ + { 0x68, 0x72 }, /* GREEK SUBSCRIPT SMALL LETTER RHO -> 'r' */ + { 0x69, 0x66 }, /* GREEK SUBSCRIPT SMALL LETTER PHI -> 'f' */ + { 0x6C, 0x62 }, /* LATIN SMALL LETTER B WITH MIDDLE TILDE -> 'b' */ + { 0x6D, 0x64 }, /* LATIN SMALL LETTER D WITH MIDDLE TILDE -> 'd' */ + { 0x6E, 0x66 }, /* LATIN SMALL LETTER F WITH MIDDLE TILDE -> 'f' */ + { 0x6F, 0x6D }, /* LATIN SMALL LETTER M WITH MIDDLE TILDE -> 'm' */ + { 0x70, 0x6E }, /* LATIN SMALL LETTER N WITH MIDDLE TILDE -> 'n' */ + { 0x71, 0x70 }, /* LATIN SMALL LETTER P WITH MIDDLE TILDE -> 'p' */ + { 0x72, 0x72 }, /* LATIN SMALL LETTER R WITH MIDDLE TILDE -> 'r' */ + { 0x73, 0x72 }, /* LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE -> 'r' */ + { 0x74, 0x73 }, /* LATIN SMALL LETTER S WITH MIDDLE TILDE -> 's' */ + { 0x75, 0x74 }, /* LATIN SMALL LETTER T WITH MIDDLE TILDE -> 't' */ + { 0x76, 0x7A }, /* LATIN SMALL LETTER Z WITH MIDDLE TILDE -> 'z' */ + { 0x77, 0x67 }, /* LATIN SMALL LETTER TURNED G -> 'g' */ + { 0x7D, 0x70 }, /* LATIN SMALL LETTER P WITH STROKE -> 'p' */ + { 0x80, 0x62 }, /* LATIN SMALL LETTER B WITH PALATAL HOOK -> 'b' */ + { 0x81, 0x64 }, /* LATIN SMALL LETTER D WITH PALATAL HOOK -> 'd' */ + { 0x82, 0x66 }, /* LATIN SMALL LETTER F WITH PALATAL HOOK -> 'f' */ + { 0x83, 0x67 }, /* LATIN SMALL LETTER G WITH PALATAL HOOK -> 'g' */ + { 0x84, 0x6B }, /* LATIN SMALL LETTER K WITH PALATAL HOOK -> 'k' */ + { 0x85, 0x6C }, /* LATIN SMALL LETTER L WITH PALATAL HOOK -> 'l' */ + { 0x86, 0x6D }, /* LATIN SMALL LETTER M WITH PALATAL HOOK -> 'm' */ + { 0x87, 0x6E }, /* LATIN SMALL LETTER N WITH PALATAL HOOK -> 'n' */ + { 0x88, 0x70 }, /* LATIN SMALL LETTER P WITH PALATAL HOOK -> 'p' */ + { 0x89, 0x72 }, /* LATIN SMALL LETTER R WITH PALATAL HOOK -> 'r' */ + { 0x8A, 0x73 }, /* LATIN SMALL LETTER S WITH PALATAL HOOK -> 's' */ + { 0x8C, 0x76 }, /* LATIN SMALL LETTER V WITH PALATAL HOOK -> 'v' */ + { 0x8D, 0x78 }, /* LATIN SMALL LETTER X WITH PALATAL HOOK -> 'x' */ + { 0x8E, 0x7A }, /* LATIN SMALL LETTER Z WITH PALATAL HOOK -> 'z' */ + /* Entries for page 0x1E */ + { 0x00, 0x41 }, /* LATIN CAPITAL LETTER A WITH RING BELOW -> 'A' */ + { 0x01, 0x61 }, /* LATIN SMALL LETTER A WITH RING BELOW -> 'a' */ + { 0x02, 0x42 }, /* LATIN CAPITAL LETTER B WITH DOT ABOVE -> 'B' */ + { 0x03, 0x62 }, /* LATIN SMALL LETTER B WITH DOT ABOVE -> 'b' */ + { 0x04, 0x42 }, /* LATIN CAPITAL LETTER B WITH DOT BELOW -> 'B' */ + { 0x05, 0x62 }, /* LATIN SMALL LETTER B WITH DOT BELOW -> 'b' */ + { 0x06, 0x42 }, /* LATIN CAPITAL LETTER B WITH LINE BELOW -> 'B' */ + { 0x07, 0x62 }, /* LATIN SMALL LETTER B WITH LINE BELOW -> 'b' */ + { 0x08, 0x43 }, /* LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE -> 'C' */ + { 0x09, 0x63 }, /* LATIN SMALL LETTER C WITH CEDILLA AND ACUTE -> 'c' */ + { 0x0A, 0x44 }, /* LATIN CAPITAL LETTER D WITH DOT ABOVE -> 'D' */ + { 0x0B, 0x64 }, /* LATIN SMALL LETTER D WITH DOT ABOVE -> 'd' */ + { 0x0C, 0x44 }, /* LATIN CAPITAL LETTER D WITH DOT BELOW -> 'D' */ + { 0x0D, 0x64 }, /* LATIN SMALL LETTER D WITH DOT BELOW -> 'd' */ + { 0x0E, 0x44 }, /* LATIN CAPITAL LETTER D WITH LINE BELOW -> 'D' */ + { 0x0F, 0x64 }, /* LATIN SMALL LETTER D WITH LINE BELOW -> 'd' */ + { 0x10, 0x44 }, /* LATIN CAPITAL LETTER D WITH CEDILLA -> 'D' */ + { 0x11, 0x64 }, /* LATIN SMALL LETTER D WITH CEDILLA -> 'd' */ + { 0x12, 0x44 }, /* LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW -> 'D' */ + { 0x13, 0x64 }, /* LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW -> 'd' */ + { 0x14, 0x45 }, /* LATIN CAPITAL LETTER E WITH MACRON AND GRAVE -> 'E' */ + { 0x15, 0x65 }, /* LATIN SMALL LETTER E WITH MACRON AND GRAVE -> 'e' */ + { 0x16, 0x45 }, /* LATIN CAPITAL LETTER E WITH MACRON AND ACUTE -> 'E' */ + { 0x17, 0x65 }, /* LATIN SMALL LETTER E WITH MACRON AND ACUTE -> 'e' */ + { 0x18, 0x45 }, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW -> 'E' */ + { 0x19, 0x65 }, /* LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW -> 'e' */ + { 0x1A, 0x45 }, /* LATIN CAPITAL LETTER E WITH TILDE BELOW -> 'E' */ + { 0x1B, 0x65 }, /* LATIN SMALL LETTER E WITH TILDE BELOW -> 'e' */ + { 0x1C, 0x45 }, /* LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE -> 'E' */ + { 0x1D, 0x65 }, /* LATIN SMALL LETTER E WITH CEDILLA AND BREVE -> 'e' */ + { 0x1E, 0x46 }, /* LATIN CAPITAL LETTER F WITH DOT ABOVE -> 'F' */ + { 0x1F, 0x66 }, /* LATIN SMALL LETTER F WITH DOT ABOVE -> 'f' */ + { 0x20, 0x47 }, /* LATIN CAPITAL LETTER G WITH MACRON -> 'G' */ + { 0x21, 0x67 }, /* LATIN SMALL LETTER G WITH MACRON -> 'g' */ + { 0x22, 0x48 }, /* LATIN CAPITAL LETTER H WITH DOT ABOVE -> 'H' */ + { 0x23, 0x68 }, /* LATIN SMALL LETTER H WITH DOT ABOVE -> 'h' */ + { 0x24, 0x48 }, /* LATIN CAPITAL LETTER H WITH DOT BELOW -> 'H' */ + { 0x25, 0x68 }, /* LATIN SMALL LETTER H WITH DOT BELOW -> 'h' */ + { 0x26, 0x48 }, /* LATIN CAPITAL LETTER H WITH DIAERESIS -> 'H' */ + { 0x27, 0x68 }, /* LATIN SMALL LETTER H WITH DIAERESIS -> 'h' */ + { 0x28, 0x48 }, /* LATIN CAPITAL LETTER H WITH CEDILLA -> 'H' */ + { 0x29, 0x68 }, /* LATIN SMALL LETTER H WITH CEDILLA -> 'h' */ + { 0x2A, 0x48 }, /* LATIN CAPITAL LETTER H WITH BREVE BELOW -> 'H' */ + { 0x2B, 0x68 }, /* LATIN SMALL LETTER H WITH BREVE BELOW -> 'h' */ + { 0x2C, 0x49 }, /* LATIN CAPITAL LETTER I WITH TILDE BELOW -> 'I' */ + { 0x2D, 0x69 }, /* LATIN SMALL LETTER I WITH TILDE BELOW -> 'i' */ + { 0x2E, 0x49 }, /* LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE -> 'I' */ + { 0x2F, 0x69 }, /* LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE -> 'i' */ + { 0x30, 0x4B }, /* LATIN CAPITAL LETTER K WITH ACUTE -> 'K' */ + { 0x31, 0x6B }, /* LATIN SMALL LETTER K WITH ACUTE -> 'k' */ + { 0x32, 0x4B }, /* LATIN CAPITAL LETTER K WITH DOT BELOW -> 'K' */ + { 0x33, 0x6B }, /* LATIN SMALL LETTER K WITH DOT BELOW -> 'k' */ + { 0x34, 0x4B }, /* LATIN CAPITAL LETTER K WITH LINE BELOW -> 'K' */ + { 0x35, 0x6B }, /* LATIN SMALL LETTER K WITH LINE BELOW -> 'k' */ + { 0x36, 0x4C }, /* LATIN CAPITAL LETTER L WITH DOT BELOW -> 'L' */ + { 0x37, 0x6C }, /* LATIN SMALL LETTER L WITH DOT BELOW -> 'l' */ + { 0x38, 0x4C }, /* LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON -> 'L' */ + { 0x39, 0x6C }, /* LATIN SMALL LETTER L WITH DOT BELOW AND MACRON -> 'l' */ + { 0x3A, 0x4C }, /* LATIN CAPITAL LETTER L WITH LINE BELOW -> 'L' */ + { 0x3B, 0x6C }, /* LATIN SMALL LETTER L WITH LINE BELOW -> 'l' */ + { 0x3C, 0x4C }, /* LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW -> 'L' */ + { 0x3D, 0x6C }, /* LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW -> 'l' */ + { 0x3E, 0x4D }, /* LATIN CAPITAL LETTER M WITH ACUTE -> 'M' */ + { 0x3F, 0x6D }, /* LATIN SMALL LETTER M WITH ACUTE -> 'm' */ + { 0x40, 0x4D }, /* LATIN CAPITAL LETTER M WITH DOT ABOVE -> 'M' */ + { 0x41, 0x6D }, /* LATIN SMALL LETTER M WITH DOT ABOVE -> 'm' */ + { 0x42, 0x4D }, /* LATIN CAPITAL LETTER M WITH DOT BELOW -> 'M' */ + { 0x43, 0x6D }, /* LATIN SMALL LETTER M WITH DOT BELOW -> 'm' */ + { 0x44, 0x4E }, /* LATIN CAPITAL LETTER N WITH DOT ABOVE -> 'N' */ + { 0x45, 0x6E }, /* LATIN SMALL LETTER N WITH DOT ABOVE -> 'n' */ + { 0x46, 0x4E }, /* LATIN CAPITAL LETTER N WITH DOT BELOW -> 'N' */ + { 0x47, 0x6E }, /* LATIN SMALL LETTER N WITH DOT BELOW -> 'n' */ + { 0x48, 0x4E }, /* LATIN CAPITAL LETTER N WITH LINE BELOW -> 'N' */ + { 0x49, 0x6E }, /* LATIN SMALL LETTER N WITH LINE BELOW -> 'n' */ + { 0x4A, 0x4E }, /* LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW -> 'N' */ + { 0x4B, 0x6E }, /* LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW -> 'n' */ + { 0x4C, 0x4F }, /* LATIN CAPITAL LETTER O WITH TILDE AND ACUTE -> 'O' */ + { 0x4D, 0x6F }, /* LATIN SMALL LETTER O WITH TILDE AND ACUTE -> 'o' */ + { 0x4E, 0x4F }, /* LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS -> 'O' */ + { 0x4F, 0x6F }, /* LATIN SMALL LETTER O WITH TILDE AND DIAERESIS -> 'o' */ + { 0x50, 0x4F }, /* LATIN CAPITAL LETTER O WITH MACRON AND GRAVE -> 'O' */ + { 0x51, 0x6F }, /* LATIN SMALL LETTER O WITH MACRON AND GRAVE -> 'o' */ + { 0x52, 0x4F }, /* LATIN CAPITAL LETTER O WITH MACRON AND ACUTE -> 'O' */ + { 0x53, 0x6F }, /* LATIN SMALL LETTER O WITH MACRON AND ACUTE -> 'o' */ + { 0x54, 0x50 }, /* LATIN CAPITAL LETTER P WITH ACUTE -> 'P' */ + { 0x55, 0x70 }, /* LATIN SMALL LETTER P WITH ACUTE -> 'p' */ + { 0x56, 0x50 }, /* LATIN CAPITAL LETTER P WITH DOT ABOVE -> 'P' */ + { 0x57, 0x70 }, /* LATIN SMALL LETTER P WITH DOT ABOVE -> 'p' */ + { 0x58, 0x52 }, /* LATIN CAPITAL LETTER R WITH DOT ABOVE -> 'R' */ + { 0x59, 0x72 }, /* LATIN SMALL LETTER R WITH DOT ABOVE -> 'r' */ + { 0x5A, 0x52 }, /* LATIN CAPITAL LETTER R WITH DOT BELOW -> 'R' */ + { 0x5B, 0x72 }, /* LATIN SMALL LETTER R WITH DOT BELOW -> 'r' */ + { 0x5C, 0x52 }, /* LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON -> 'R' */ + { 0x5D, 0x72 }, /* LATIN SMALL LETTER R WITH DOT BELOW AND MACRON -> 'r' */ + { 0x5E, 0x52 }, /* LATIN CAPITAL LETTER R WITH LINE BELOW -> 'R' */ + { 0x5F, 0x72 }, /* LATIN SMALL LETTER R WITH LINE BELOW -> 'r' */ + { 0x60, 0x53 }, /* LATIN CAPITAL LETTER S WITH DOT ABOVE -> 'S' */ + { 0x61, 0x73 }, /* LATIN SMALL LETTER S WITH DOT ABOVE -> 's' */ + { 0x62, 0x53 }, /* LATIN CAPITAL LETTER S WITH DOT BELOW -> 'S' */ + { 0x63, 0x73 }, /* LATIN SMALL LETTER S WITH DOT BELOW -> 's' */ + { 0x64, 0x53 }, /* LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE -> 'S' */ + { 0x65, 0x73 }, /* LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE -> 's' */ + { 0x66, 0x53 }, /* LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE -> 'S' */ + { 0x67, 0x73 }, /* LATIN SMALL LETTER S WITH CARON AND DOT ABOVE -> 's' */ + { 0x68, 0x53 }, /* LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE -> 'S' */ + { 0x69, 0x73 }, /* LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE -> 's' */ + { 0x6A, 0x54 }, /* LATIN CAPITAL LETTER T WITH DOT ABOVE -> 'T' */ + { 0x6B, 0x74 }, /* LATIN SMALL LETTER T WITH DOT ABOVE -> 't' */ + { 0x6C, 0x54 }, /* LATIN CAPITAL LETTER T WITH DOT BELOW -> 'T' */ + { 0x6D, 0x74 }, /* LATIN SMALL LETTER T WITH DOT BELOW -> 't' */ + { 0x6E, 0x54 }, /* LATIN CAPITAL LETTER T WITH LINE BELOW -> 'T' */ + { 0x6F, 0x74 }, /* LATIN SMALL LETTER T WITH LINE BELOW -> 't' */ + { 0x70, 0x54 }, /* LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW -> 'T' */ + { 0x71, 0x74 }, /* LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW -> 't' */ + { 0x72, 0x55 }, /* LATIN CAPITAL LETTER U WITH DIAERESIS BELOW -> 'U' */ + { 0x73, 0x75 }, /* LATIN SMALL LETTER U WITH DIAERESIS BELOW -> 'u' */ + { 0x74, 0x55 }, /* LATIN CAPITAL LETTER U WITH TILDE BELOW -> 'U' */ + { 0x75, 0x75 }, /* LATIN SMALL LETTER U WITH TILDE BELOW -> 'u' */ + { 0x76, 0x55 }, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW -> 'U' */ + { 0x77, 0x75 }, /* LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW -> 'u' */ + { 0x78, 0x55 }, /* LATIN CAPITAL LETTER U WITH TILDE AND ACUTE -> 'U' */ + { 0x79, 0x75 }, /* LATIN SMALL LETTER U WITH TILDE AND ACUTE -> 'u' */ + { 0x7A, 0x55 }, /* LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS -> 'U' */ + { 0x7B, 0x75 }, /* LATIN SMALL LETTER U WITH MACRON AND DIAERESIS -> 'u' */ + { 0x7C, 0x56 }, /* LATIN CAPITAL LETTER V WITH TILDE -> 'V' */ + { 0x7D, 0x76 }, /* LATIN SMALL LETTER V WITH TILDE -> 'v' */ + { 0x7E, 0x56 }, /* LATIN CAPITAL LETTER V WITH DOT BELOW -> 'V' */ + { 0x7F, 0x76 }, /* LATIN SMALL LETTER V WITH DOT BELOW -> 'v' */ + { 0x80, 0x57 }, /* LATIN CAPITAL LETTER W WITH GRAVE -> 'W' */ + { 0x81, 0x77 }, /* LATIN SMALL LETTER W WITH GRAVE -> 'w' */ + { 0x82, 0x57 }, /* LATIN CAPITAL LETTER W WITH ACUTE -> 'W' */ + { 0x83, 0x77 }, /* LATIN SMALL LETTER W WITH ACUTE -> 'w' */ + { 0x84, 0x57 }, /* LATIN CAPITAL LETTER W WITH DIAERESIS -> 'W' */ + { 0x85, 0x77 }, /* LATIN SMALL LETTER W WITH DIAERESIS -> 'w' */ + { 0x86, 0x57 }, /* LATIN CAPITAL LETTER W WITH DOT ABOVE -> 'W' */ + { 0x87, 0x77 }, /* LATIN SMALL LETTER W WITH DOT ABOVE -> 'w' */ + { 0x88, 0x57 }, /* LATIN CAPITAL LETTER W WITH DOT BELOW -> 'W' */ + { 0x89, 0x77 }, /* LATIN SMALL LETTER W WITH DOT BELOW -> 'w' */ + { 0x8A, 0x58 }, /* LATIN CAPITAL LETTER X WITH DOT ABOVE -> 'X' */ + { 0x8B, 0x78 }, /* LATIN SMALL LETTER X WITH DOT ABOVE -> 'x' */ + { 0x8C, 0x58 }, /* LATIN CAPITAL LETTER X WITH DIAERESIS -> 'X' */ + { 0x8D, 0x78 }, /* LATIN SMALL LETTER X WITH DIAERESIS -> 'x' */ + { 0x8E, 0x59 }, /* LATIN CAPITAL LETTER Y WITH DOT ABOVE -> 'Y' */ + { 0x8F, 0x79 }, /* LATIN SMALL LETTER Y WITH DOT ABOVE -> 'y' */ + { 0x90, 0x5A }, /* LATIN CAPITAL LETTER Z WITH CIRCUMFLEX -> 'Z' */ + { 0x91, 0x7A }, /* LATIN SMALL LETTER Z WITH CIRCUMFLEX -> 'z' */ + { 0x92, 0x5A }, /* LATIN CAPITAL LETTER Z WITH DOT BELOW -> 'Z' */ + { 0x93, 0x7A }, /* LATIN SMALL LETTER Z WITH DOT BELOW -> 'z' */ + { 0x94, 0x5A }, /* LATIN CAPITAL LETTER Z WITH LINE BELOW -> 'Z' */ + { 0x95, 0x7A }, /* LATIN SMALL LETTER Z WITH LINE BELOW -> 'z' */ + { 0x96, 0x68 }, /* LATIN SMALL LETTER H WITH LINE BELOW -> 'h' */ + { 0x97, 0x74 }, /* LATIN SMALL LETTER T WITH DIAERESIS -> 't' */ + { 0x98, 0x77 }, /* LATIN SMALL LETTER W WITH RING ABOVE -> 'w' */ + { 0x99, 0x79 }, /* LATIN SMALL LETTER Y WITH RING ABOVE -> 'y' */ + { 0x9A, 0x61 }, /* LATIN SMALL LETTER A WITH RIGHT HALF RING -> 'a' */ + { 0x9B, 0x53 }, /* LATIN SMALL LETTER LONG S WITH DOT ABOVE -> 'S' */ + { 0xA0, 0x41 }, /* LATIN CAPITAL LETTER A WITH DOT BELOW -> 'A' */ + { 0xA1, 0x61 }, /* LATIN SMALL LETTER A WITH DOT BELOW -> 'a' */ + { 0xA2, 0x41 }, /* LATIN CAPITAL LETTER A WITH HOOK ABOVE -> 'A' */ + { 0xA3, 0x61 }, /* LATIN SMALL LETTER A WITH HOOK ABOVE -> 'a' */ + { 0xA4, 0x41 }, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE -> 'A' */ + { 0xA5, 0x61 }, /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE -> 'a' */ + { 0xA6, 0x41 }, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE -> 'A' */ + { 0xA7, 0x61 }, /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE -> 'a' */ + { 0xA8, 0x41 }, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE -> 'A' */ + { 0xA9, 0x61 }, /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE -> 'a' */ + { 0xAA, 0x41 }, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE -> 'A' */ + { 0xAB, 0x61 }, /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE -> 'a' */ + { 0xAC, 0x41 }, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW -> 'A' */ + { 0xAD, 0x61 }, /* LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW -> 'a' */ + { 0xAE, 0x41 }, /* LATIN CAPITAL LETTER A WITH BREVE AND ACUTE -> 'A' */ + { 0xAF, 0x61 }, /* LATIN SMALL LETTER A WITH BREVE AND ACUTE -> 'a' */ + { 0xB0, 0x41 }, /* LATIN CAPITAL LETTER A WITH BREVE AND GRAVE -> 'A' */ + { 0xB1, 0x61 }, /* LATIN SMALL LETTER A WITH BREVE AND GRAVE -> 'a' */ + { 0xB2, 0x41 }, /* LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE -> 'A' */ + { 0xB3, 0x61 }, /* LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE -> 'a' */ + { 0xB4, 0x41 }, /* LATIN CAPITAL LETTER A WITH BREVE AND TILDE -> 'A' */ + { 0xB5, 0x61 }, /* LATIN SMALL LETTER A WITH BREVE AND TILDE -> 'a' */ + { 0xB6, 0x41 }, /* LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW -> 'A' */ + { 0xB7, 0x61 }, /* LATIN SMALL LETTER A WITH BREVE AND DOT BELOW -> 'a' */ + { 0xB8, 0x45 }, /* LATIN CAPITAL LETTER E WITH DOT BELOW -> 'E' */ + { 0xB9, 0x65 }, /* LATIN SMALL LETTER E WITH DOT BELOW -> 'e' */ + { 0xBA, 0x45 }, /* LATIN CAPITAL LETTER E WITH HOOK ABOVE -> 'E' */ + { 0xBB, 0x65 }, /* LATIN SMALL LETTER E WITH HOOK ABOVE -> 'e' */ + { 0xBC, 0x45 }, /* LATIN CAPITAL LETTER E WITH TILDE -> 'E' */ + { 0xBD, 0x65 }, /* LATIN SMALL LETTER E WITH TILDE -> 'e' */ + { 0xBE, 0x45 }, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE -> 'E' */ + { 0xBF, 0x65 }, /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE -> 'e' */ + { 0xC0, 0x45 }, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE -> 'E' */ + { 0xC1, 0x65 }, /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE -> 'e' */ + { 0xC2, 0x45 }, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE -> 'E' */ + { 0xC3, 0x65 }, /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE -> 'e' */ + { 0xC4, 0x45 }, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE -> 'E' */ + { 0xC5, 0x65 }, /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE -> 'e' */ + { 0xC6, 0x45 }, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW -> 'E' */ + { 0xC7, 0x65 }, /* LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW -> 'e' */ + { 0xC8, 0x49 }, /* LATIN CAPITAL LETTER I WITH HOOK ABOVE -> 'I' */ + { 0xC9, 0x69 }, /* LATIN SMALL LETTER I WITH HOOK ABOVE -> 'i' */ + { 0xCA, 0x49 }, /* LATIN CAPITAL LETTER I WITH DOT BELOW -> 'I' */ + { 0xCB, 0x69 }, /* LATIN SMALL LETTER I WITH DOT BELOW -> 'i' */ + { 0xCC, 0x4F }, /* LATIN CAPITAL LETTER O WITH DOT BELOW -> 'O' */ + { 0xCD, 0x6F }, /* LATIN SMALL LETTER O WITH DOT BELOW -> 'o' */ + { 0xCE, 0x4F }, /* LATIN CAPITAL LETTER O WITH HOOK ABOVE -> 'O' */ + { 0xCF, 0x6F }, /* LATIN SMALL LETTER O WITH HOOK ABOVE -> 'o' */ + { 0xD0, 0x4F }, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE -> 'O' */ + { 0xD1, 0x6F }, /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE -> 'o' */ + { 0xD2, 0x4F }, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE -> 'O' */ + { 0xD3, 0x6F }, /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE -> 'o' */ + { 0xD4, 0x4F }, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE -> 'O' */ + { 0xD5, 0x6F }, /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE -> 'o' */ + { 0xD6, 0x4F }, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE -> 'O' */ + { 0xD7, 0x6F }, /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE -> 'o' */ + { 0xD8, 0x4F }, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW -> 'O' */ + { 0xD9, 0x6F }, /* LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW -> 'o' */ + { 0xDA, 0x4F }, /* LATIN CAPITAL LETTER O WITH HORN AND ACUTE -> 'O' */ + { 0xDB, 0x6F }, /* LATIN SMALL LETTER O WITH HORN AND ACUTE -> 'o' */ + { 0xDC, 0x4F }, /* LATIN CAPITAL LETTER O WITH HORN AND GRAVE -> 'O' */ + { 0xDD, 0x6F }, /* LATIN SMALL LETTER O WITH HORN AND GRAVE -> 'o' */ + { 0xDE, 0x4F }, /* LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE -> 'O' */ + { 0xDF, 0x6F }, /* LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE -> 'o' */ + { 0xE0, 0x4F }, /* LATIN CAPITAL LETTER O WITH HORN AND TILDE -> 'O' */ + { 0xE1, 0x6F }, /* LATIN SMALL LETTER O WITH HORN AND TILDE -> 'o' */ + { 0xE2, 0x4F }, /* LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW -> 'O' */ + { 0xE3, 0x6F }, /* LATIN SMALL LETTER O WITH HORN AND DOT BELOW -> 'o' */ + { 0xE4, 0x55 }, /* LATIN CAPITAL LETTER U WITH DOT BELOW -> 'U' */ + { 0xE5, 0x75 }, /* LATIN SMALL LETTER U WITH DOT BELOW -> 'u' */ + { 0xE6, 0x55 }, /* LATIN CAPITAL LETTER U WITH HOOK ABOVE -> 'U' */ + { 0xE7, 0x75 }, /* LATIN SMALL LETTER U WITH HOOK ABOVE -> 'u' */ + { 0xE8, 0x55 }, /* LATIN CAPITAL LETTER U WITH HORN AND ACUTE -> 'U' */ + { 0xE9, 0x75 }, /* LATIN SMALL LETTER U WITH HORN AND ACUTE -> 'u' */ + { 0xEA, 0x55 }, /* LATIN CAPITAL LETTER U WITH HORN AND GRAVE -> 'U' */ + { 0xEB, 0x75 }, /* LATIN SMALL LETTER U WITH HORN AND GRAVE -> 'u' */ + { 0xEC, 0x55 }, /* LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE -> 'U' */ + { 0xED, 0x75 }, /* LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE -> 'u' */ + { 0xEE, 0x55 }, /* LATIN CAPITAL LETTER U WITH HORN AND TILDE -> 'U' */ + { 0xEF, 0x75 }, /* LATIN SMALL LETTER U WITH HORN AND TILDE -> 'u' */ + { 0xF0, 0x55 }, /* LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW -> 'U' */ + { 0xF1, 0x75 }, /* LATIN SMALL LETTER U WITH HORN AND DOT BELOW -> 'u' */ + { 0xF2, 0x59 }, /* LATIN CAPITAL LETTER Y WITH GRAVE -> 'Y' */ + { 0xF3, 0x79 }, /* LATIN SMALL LETTER Y WITH GRAVE -> 'y' */ + { 0xF4, 0x59 }, /* LATIN CAPITAL LETTER Y WITH DOT BELOW -> 'Y' */ + { 0xF5, 0x79 }, /* LATIN SMALL LETTER Y WITH DOT BELOW -> 'y' */ + { 0xF6, 0x59 }, /* LATIN CAPITAL LETTER Y WITH HOOK ABOVE -> 'Y' */ + { 0xF7, 0x79 }, /* LATIN SMALL LETTER Y WITH HOOK ABOVE -> 'y' */ + { 0xF8, 0x59 }, /* LATIN CAPITAL LETTER Y WITH TILDE -> 'Y' */ + { 0xF9, 0x79 }, /* LATIN SMALL LETTER Y WITH TILDE -> 'y' */ + /* Entries for page 0x1F */ + { 0x00, 0x00 }, /* GREEK SMALL LETTER ALPHA WITH PSILI -> ... */ + { 0x07, 0x61 }, /* GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI -> 'a' */ + { 0x08, 0x00 }, /* GREEK CAPITAL LETTER ALPHA WITH PSILI -> ... */ + { 0x0F, 0x41 }, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI -> 'A' */ + { 0x10, 0x00 }, /* GREEK SMALL LETTER EPSILON WITH PSILI -> ... */ + { 0x15, 0x65 }, /* GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA -> 'e' */ + { 0x18, 0x00 }, /* GREEK CAPITAL LETTER EPSILON WITH PSILI -> ... */ + { 0x1D, 0x45 }, /* GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA -> 'E' */ + { 0x20, 0x00 }, /* GREEK SMALL LETTER ETA WITH PSILI -> ... */ + { 0x27, 0x65 }, /* GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI -> 'e' */ + { 0x28, 0x00 }, /* GREEK CAPITAL LETTER ETA WITH PSILI -> ... */ + { 0x2F, 0x45 }, /* GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI -> 'E' */ + { 0x30, 0x00 }, /* GREEK SMALL LETTER IOTA WITH PSILI -> ... */ + { 0x37, 0x69 }, /* GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI -> 'i' */ + { 0x38, 0x00 }, /* GREEK CAPITAL LETTER IOTA WITH PSILI -> ... */ + { 0x3F, 0x49 }, /* GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI -> 'I' */ + { 0x40, 0x00 }, /* GREEK SMALL LETTER OMICRON WITH PSILI -> ... */ + { 0x45, 0x6F }, /* GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA -> 'o' */ + { 0x48, 0x00 }, /* GREEK CAPITAL LETTER OMICRON WITH PSILI -> ... */ + { 0x4D, 0x4F }, /* GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA -> 'O' */ + { 0x50, 0x00 }, /* GREEK SMALL LETTER UPSILON WITH PSILI -> ... */ + { 0x57, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI -> 'u' */ + { 0x59, 0x55 }, /* GREEK CAPITAL LETTER UPSILON WITH DASIA -> 'U' */ + { 0x5B, 0x55 }, /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA -> 'U' */ + { 0x5D, 0x55 }, /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA -> 'U' */ + { 0x5F, 0x55 }, /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI -> 'U' */ + { 0x60, 0x00 }, /* GREEK SMALL LETTER OMEGA WITH PSILI -> ... */ + { 0x67, 0x6F }, /* GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI -> 'o' */ + { 0x68, 0x00 }, /* GREEK CAPITAL LETTER OMEGA WITH PSILI -> ... */ + { 0x6F, 0x4F }, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI -> 'O' */ + { 0x70, 0x61 }, /* GREEK SMALL LETTER ALPHA WITH VARIA -> 'a' */ + { 0x71, 0x61 }, /* GREEK SMALL LETTER ALPHA WITH OXIA -> 'a' */ + { 0x72, 0x00 }, /* GREEK SMALL LETTER EPSILON WITH VARIA -> ... */ + { 0x75, 0x65 }, /* GREEK SMALL LETTER ETA WITH OXIA -> 'e' */ + { 0x76, 0x69 }, /* GREEK SMALL LETTER IOTA WITH VARIA -> 'i' */ + { 0x77, 0x69 }, /* GREEK SMALL LETTER IOTA WITH OXIA -> 'i' */ + { 0x78, 0x6F }, /* GREEK SMALL LETTER OMICRON WITH VARIA -> 'o' */ + { 0x79, 0x6F }, /* GREEK SMALL LETTER OMICRON WITH OXIA -> 'o' */ + { 0x7A, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH VARIA -> 'u' */ + { 0x7B, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH OXIA -> 'u' */ + { 0x7C, 0x6F }, /* GREEK SMALL LETTER OMEGA WITH VARIA -> 'o' */ + { 0x7D, 0x6F }, /* GREEK SMALL LETTER OMEGA WITH OXIA -> 'o' */ + { 0x80, 0x00 }, /* GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI -> ... */ + { 0x87, 0x61 }, /* GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI -> 'a' */ + { 0x88, 0x00 }, /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI -> ... */ + { 0x8F, 0x41 }, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -> 'A' */ + { 0x90, 0x00 }, /* GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI -> ... */ + { 0x97, 0x65 }, /* GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI -> 'e' */ + { 0x98, 0x00 }, /* GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI -> ... */ + { 0x9F, 0x45 }, /* GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -> 'E' */ + { 0xA0, 0x00 }, /* GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI -> ... */ + { 0xA7, 0x6F }, /* GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI -> 'o' */ + { 0xA8, 0x00 }, /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI -> ... */ + { 0xAF, 0x4F }, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI -> 'O' */ + { 0xB0, 0x00 }, /* GREEK SMALL LETTER ALPHA WITH VRACHY -> ... */ + { 0xB4, 0x61 }, /* GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI -> 'a' */ + { 0xB6, 0x61 }, /* GREEK SMALL LETTER ALPHA WITH PERISPOMENI -> 'a' */ + { 0xB7, 0x61 }, /* GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI -> 'a' */ + { 0xB8, 0x00 }, /* GREEK CAPITAL LETTER ALPHA WITH VRACHY -> ... */ + { 0xBC, 0x41 }, /* GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI -> 'A' */ + { 0xBD, 0x27 }, /* GREEK KORONIS -> ''' */ + { 0xBE, 0x69 }, /* GREEK PROSGEGRAMMENI -> 'i' */ + { 0xBF, 0x27 }, /* GREEK PSILI -> ''' */ + { 0xC0, 0x7E }, /* GREEK PERISPOMENI -> '~' */ + { 0xC2, 0x00 }, /* GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI -> ... */ + { 0xC4, 0x65 }, /* GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI -> 'e' */ + { 0xC6, 0x65 }, /* GREEK SMALL LETTER ETA WITH PERISPOMENI -> 'e' */ + { 0xC7, 0x65 }, /* GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI -> 'e' */ + { 0xC8, 0x00 }, /* GREEK CAPITAL LETTER EPSILON WITH VARIA -> ... */ + { 0xCC, 0x45 }, /* GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI -> 'E' */ + { 0xD0, 0x00 }, /* GREEK SMALL LETTER IOTA WITH VRACHY -> ... */ + { 0xD3, 0x69 }, /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA -> 'i' */ + { 0xD6, 0x69 }, /* GREEK SMALL LETTER IOTA WITH PERISPOMENI -> 'i' */ + { 0xD7, 0x69 }, /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI -> 'i' */ + { 0xD8, 0x00 }, /* GREEK CAPITAL LETTER IOTA WITH VRACHY -> ... */ + { 0xDB, 0x49 }, /* GREEK CAPITAL LETTER IOTA WITH OXIA -> 'I' */ + { 0xE0, 0x00 }, /* GREEK SMALL LETTER UPSILON WITH VRACHY -> ... */ + { 0xE3, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA -> 'u' */ + { 0xE4, 0x52 }, /* GREEK SMALL LETTER RHO WITH PSILI -> 'R' */ + { 0xE5, 0x52 }, /* GREEK SMALL LETTER RHO WITH DASIA -> 'R' */ + { 0xE6, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH PERISPOMENI -> 'u' */ + { 0xE7, 0x75 }, /* GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI -> 'u' */ + { 0xE8, 0x00 }, /* GREEK CAPITAL LETTER UPSILON WITH VRACHY -> ... */ + { 0xEB, 0x55 }, /* GREEK CAPITAL LETTER UPSILON WITH OXIA -> 'U' */ + { 0xEC, 0x52 }, /* GREEK CAPITAL LETTER RHO WITH DASIA -> 'R' */ + { 0xEF, 0x60 }, /* GREEK VARIA -> '`' */ + { 0xF2, 0x00 }, /* GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI -> ... */ + { 0xF4, 0x6F }, /* GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI -> 'o' */ + { 0xF6, 0x6F }, /* GREEK SMALL LETTER OMEGA WITH PERISPOMENI -> 'o' */ + { 0xF7, 0x6F }, /* GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI -> 'o' */ + { 0xF8, 0x00 }, /* GREEK CAPITAL LETTER OMICRON WITH VARIA -> ... */ + { 0xFC, 0x4F }, /* GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -> 'O' */ + { 0xFD, 0x27 }, /* GREEK OXIA -> ''' */ + { 0xFE, 0x60 }, /* GREEK DASIA -> '`' */ + /* Entries for page 0x20 */ + { 0x00, 0x00 }, /* EN QUAD -> ... */ + { 0x0B, 0x20 }, /* ZERO WIDTH SPACE -> ' ' */ + { 0x10, 0x00 }, /* HYPHEN -> ... */ + { 0x15, 0x2D }, /* HORIZONTAL BAR -> '-' */ + { 0x17, 0x5F }, /* DOUBLE LOW LINE -> '_' */ + { 0x18, 0x27 }, /* LEFT SINGLE QUOTATION MARK -> ''' */ + { 0x19, 0x27 }, /* RIGHT SINGLE QUOTATION MARK -> ''' */ + { 0x1A, 0x2C }, /* SINGLE LOW-9 QUOTATION MARK -> ',' */ + { 0x1B, 0x27 }, /* SINGLE HIGH-REVERSED-9 QUOTATION MARK -> ''' */ + { 0x1C, 0x00 }, /* LEFT DOUBLE QUOTATION MARK -> ... */ + { 0x1F, 0x22 }, /* DOUBLE HIGH-REVERSED-9 QUOTATION MARK -> '"' */ + { 0x20, 0x2B }, /* DAGGER -> '+' */ + { 0x22, 0x2A }, /* BULLET -> '*' */ + { 0x23, 0x3E }, /* TRIANGULAR BULLET -> '>' */ + { 0x24, 0x2E }, /* ONE DOT LEADER -> '.' */ + { 0x26, 0x2E }, /* HORIZONTAL ELLIPSIS -> '.' */ + { 0x27, 0x2E }, /* HYPHENATION POINT -> '.' */ + { 0x2F, 0x20 }, /* NARROW NO-BREAK SPACE -> ' ' */ + { 0x32, 0x27 }, /* PRIME -> ''' */ + { 0x33, 0x22 }, /* DOUBLE PRIME -> '"' */ + { 0x35, 0x60 }, /* REVERSED PRIME -> '`' */ + { 0x38, 0x5E }, /* CARET -> '^' */ + { 0x39, 0x3C }, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK -> '<' */ + { 0x3A, 0x3E }, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -> '>' */ + { 0x3B, 0x2A }, /* REFERENCE MARK -> '*' */ + { 0x3C, 0x21 }, /* DOUBLE EXCLAMATION MARK -> '!' */ + { 0x3D, 0x3F }, /* INTERROBANG -> '?' */ + { 0x3E, 0x2D }, /* OVERLINE -> '-' */ + { 0x3F, 0x5F }, /* UNDERTIE -> '_' */ + { 0x40, 0x2D }, /* CHARACTER TIE -> '-' */ + { 0x41, 0x5E }, /* CARET INSERTION POINT -> '^' */ + { 0x42, 0x2A }, /* ASTERISM -> '*' */ + { 0x43, 0x2D }, /* HYPHEN BULLET -> '-' */ + { 0x44, 0x2F }, /* FRACTION SLASH -> '/' */ + { 0x47, 0x3F }, /* DOUBLE QUESTION MARK -> '?' */ + { 0x48, 0x3F }, /* QUESTION EXCLAMATION MARK -> '?' */ + { 0x49, 0x21 }, /* EXCLAMATION QUESTION MARK -> '!' */ + { 0x4A, 0x26 }, /* TIRONIAN SIGN ET -> '&' */ + { 0x4B, 0x50 }, /* REVERSED PILCROW SIGN -> 'P' */ + { 0x4C, 0x3C }, /* BLACK LEFTWARDS BULLET -> '<' */ + { 0x4D, 0x3E }, /* BLACK RIGHTWARDS BULLET -> '>' */ + { 0x4E, 0x2A }, /* LOW ASTERISK -> '*' */ + { 0x4F, 0x3B }, /* REVERSED SEMICOLON -> ';' */ + { 0x51, 0x2A }, /* TWO ASTERISKS ALIGNED VERTICALLY -> '*' */ + { 0x52, 0x2D }, /* COMMERCIAL MINUS SIGN -> '-' */ + { 0x53, 0x7E }, /* SWUNG DASH -> '~' */ + { 0x55, 0x2A }, /* FLOWER PUNCTUATION MARK -> '*' */ + { 0x5B, 0x3A }, /* FOUR DOT MARK -> ':' */ + { 0x5F, 0x20 }, /* MEDIUM MATHEMATICAL SPACE -> ' ' */ + { 0x70, 0x30 }, /* SUPERSCRIPT ZERO -> '0' */ + { 0x71, 0x69 }, /* SUPERSCRIPT LATIN SMALL LETTER I -> 'i' */ + { 0x74, 0x34 }, /* SUPERSCRIPT FOUR -> '4' */ + { 0x75, 0x35 }, /* SUPERSCRIPT FIVE -> '5' */ + { 0x76, 0x36 }, /* SUPERSCRIPT SIX -> '6' */ + { 0x77, 0x37 }, /* SUPERSCRIPT SEVEN -> '7' */ + { 0x78, 0x38 }, /* SUPERSCRIPT EIGHT -> '8' */ + { 0x79, 0x39 }, /* SUPERSCRIPT NINE -> '9' */ + { 0x7A, 0x2B }, /* SUPERSCRIPT PLUS SIGN -> '+' */ + { 0x7B, 0x2D }, /* SUPERSCRIPT MINUS -> '-' */ + { 0x7C, 0x3D }, /* SUPERSCRIPT EQUALS SIGN -> '=' */ + { 0x7D, 0x28 }, /* SUPERSCRIPT LEFT PARENTHESIS -> '(' */ + { 0x7E, 0x29 }, /* SUPERSCRIPT RIGHT PARENTHESIS -> ')' */ + { 0x7F, 0x6E }, /* SUPERSCRIPT LATIN SMALL LETTER N -> 'n' */ + { 0x80, 0x30 }, /* SUBSCRIPT ZERO -> '0' */ + { 0x81, 0x31 }, /* SUBSCRIPT ONE -> '1' */ + { 0x82, 0x32 }, /* SUBSCRIPT TWO -> '2' */ + { 0x83, 0x33 }, /* SUBSCRIPT THREE -> '3' */ + { 0x84, 0x34 }, /* SUBSCRIPT FOUR -> '4' */ + { 0x85, 0x35 }, /* SUBSCRIPT FIVE -> '5' */ + { 0x86, 0x36 }, /* SUBSCRIPT SIX -> '6' */ + { 0x87, 0x37 }, /* SUBSCRIPT SEVEN -> '7' */ + { 0x88, 0x38 }, /* SUBSCRIPT EIGHT -> '8' */ + { 0x89, 0x39 }, /* SUBSCRIPT NINE -> '9' */ + { 0x8A, 0x2B }, /* SUBSCRIPT PLUS SIGN -> '+' */ + { 0x8B, 0x2D }, /* SUBSCRIPT MINUS -> '-' */ + { 0x8C, 0x3D }, /* SUBSCRIPT EQUALS SIGN -> '=' */ + { 0x8D, 0x28 }, /* SUBSCRIPT LEFT PARENTHESIS -> '(' */ + { 0x8E, 0x29 }, /* SUBSCRIPT RIGHT PARENTHESIS -> ')' */ + { 0x90, 0x61 }, /* LATIN SUBSCRIPT SMALL LETTER A -> 'a' */ + { 0x91, 0x65 }, /* LATIN SUBSCRIPT SMALL LETTER E -> 'e' */ + { 0x92, 0x6F }, /* LATIN SUBSCRIPT SMALL LETTER O -> 'o' */ + { 0x93, 0x78 }, /* LATIN SUBSCRIPT SMALL LETTER X -> 'x' */ + { 0x95, 0x68 }, /* LATIN SUBSCRIPT SMALL LETTER H -> 'h' */ + { 0x96, 0x6B }, /* LATIN SUBSCRIPT SMALL LETTER K -> 'k' */ + { 0x97, 0x6C }, /* LATIN SUBSCRIPT SMALL LETTER L -> 'l' */ + { 0x98, 0x6D }, /* LATIN SUBSCRIPT SMALL LETTER M -> 'm' */ + { 0x99, 0x6E }, /* LATIN SUBSCRIPT SMALL LETTER N -> 'n' */ + { 0x9A, 0x70 }, /* LATIN SUBSCRIPT SMALL LETTER P -> 'p' */ + { 0x9B, 0x73 }, /* LATIN SUBSCRIPT SMALL LETTER S -> 's' */ + { 0x9C, 0x74 }, /* LATIN SUBSCRIPT SMALL LETTER T -> 't' */ + { 0xA4, 0x4C }, /* LIRA SIGN -> 'L' */ + { 0xA6, 0x4E }, /* NAIRA SIGN -> 'N' */ + { 0xA9, 0x57 }, /* WON SIGN -> 'W' */ + { 0xAB, 0x44 }, /* DONG SIGN -> 'D' */ + { 0xAC, 0x45 }, /* EURO SIGN -> 'E' */ + { 0xAD, 0x4B }, /* KIP SIGN -> 'K' */ + { 0xAE, 0x54 }, /* TUGRIK SIGN -> 'T' */ + { 0xB1, 0x50 }, /* PESO SIGN -> 'P' */ + { 0xB2, 0x47 }, /* GUARANI SIGN -> 'G' */ + { 0xB3, 0x41 }, /* AUSTRAL SIGN -> 'A' */ + { 0xB6, 0x4C }, /* LIVRE TOURNOIS SIGN -> 'L' */ + { 0xB8, 0x54 }, /* TENGE SIGN -> 'T' */ + { 0xBA, 0x4C }, /* TURKISH LIRA SIGN -> 'L' */ + { 0xBB, 0x4D }, /* NORDIC MARK SIGN -> 'M' */ + { 0xBC, 0x6D }, /* MANAT SIGN -> 'm' */ + { 0xBD, 0x52 }, /* RUBLE SIGN -> 'R' */ + { 0xBE, 0x6C }, /* LARI SIGN -> 'l' */ + /* Entries for page 0x21 */ + { 0x02, 0x43 }, /* DOUBLE-STRUCK CAPITAL C -> 'C' */ + { 0x03, 0x43 }, /* DEGREE CELSIUS -> 'C' */ + { 0x09, 0x46 }, /* DEGREE FAHRENHEIT -> 'F' */ + { 0x0A, 0x67 }, /* SCRIPT SMALL G -> 'g' */ + { 0x0B, 0x00 }, /* SCRIPT CAPITAL H -> ... */ + { 0x0D, 0x48 }, /* DOUBLE-STRUCK CAPITAL H -> 'H' */ + { 0x0E, 0x68 }, /* PLANCK CONSTANT -> 'h' */ + { 0x10, 0x49 }, /* SCRIPT CAPITAL I -> 'I' */ + { 0x11, 0x49 }, /* BLACK-LETTER CAPITAL I -> 'I' */ + { 0x12, 0x4C }, /* SCRIPT CAPITAL L -> 'L' */ + { 0x13, 0x6C }, /* SCRIPT SMALL L -> 'l' */ + { 0x15, 0x4E }, /* DOUBLE-STRUCK CAPITAL N -> 'N' */ + { 0x19, 0x50 }, /* DOUBLE-STRUCK CAPITAL P -> 'P' */ + { 0x1A, 0x51 }, /* DOUBLE-STRUCK CAPITAL Q -> 'Q' */ + { 0x1B, 0x00 }, /* SCRIPT CAPITAL R -> ... */ + { 0x1D, 0x52 }, /* DOUBLE-STRUCK CAPITAL R -> 'R' */ + { 0x22, 0x54 }, /* TRADE MARK SIGN -> 'T' */ + { 0x24, 0x5A }, /* DOUBLE-STRUCK CAPITAL Z -> 'Z' */ + { 0x28, 0x5A }, /* BLACK-LETTER CAPITAL Z -> 'Z' */ + { 0x2A, 0x4B }, /* KELVIN SIGN -> 'K' */ + { 0x2B, 0x41 }, /* ANGSTROM SIGN -> 'A' */ + { 0x2C, 0x42 }, /* SCRIPT CAPITAL B -> 'B' */ + { 0x2D, 0x43 }, /* BLACK-LETTER CAPITAL C -> 'C' */ + { 0x2E, 0x65 }, /* ESTIMATED SYMBOL -> 'e' */ + { 0x2F, 0x65 }, /* SCRIPT SMALL E -> 'e' */ + { 0x30, 0x45 }, /* SCRIPT CAPITAL E -> 'E' */ + { 0x31, 0x46 }, /* SCRIPT CAPITAL F -> 'F' */ + { 0x32, 0x46 }, /* TURNED CAPITAL F -> 'F' */ + { 0x33, 0x4D }, /* SCRIPT CAPITAL M -> 'M' */ + { 0x34, 0x6F }, /* SCRIPT SMALL O -> 'o' */ + { 0x39, 0x69 }, /* INFORMATION SOURCE -> 'i' */ + { 0x45, 0x44 }, /* DOUBLE-STRUCK ITALIC CAPITAL D -> 'D' */ + { 0x46, 0x64 }, /* DOUBLE-STRUCK ITALIC SMALL D -> 'd' */ + { 0x47, 0x65 }, /* DOUBLE-STRUCK ITALIC SMALL E -> 'e' */ + { 0x48, 0x69 }, /* DOUBLE-STRUCK ITALIC SMALL I -> 'i' */ + { 0x49, 0x6A }, /* DOUBLE-STRUCK ITALIC SMALL J -> 'j' */ + { 0x4E, 0x46 }, /* TURNED SMALL F -> 'F' */ + { 0x60, 0x49 }, /* ROMAN NUMERAL ONE -> 'I' */ + { 0x64, 0x56 }, /* ROMAN NUMERAL FIVE -> 'V' */ + { 0x69, 0x58 }, /* ROMAN NUMERAL TEN -> 'X' */ + { 0x6C, 0x4C }, /* ROMAN NUMERAL FIFTY -> 'L' */ + { 0x6D, 0x43 }, /* ROMAN NUMERAL ONE HUNDRED -> 'C' */ + { 0x6E, 0x44 }, /* ROMAN NUMERAL FIVE HUNDRED -> 'D' */ + { 0x6F, 0x4D }, /* ROMAN NUMERAL ONE THOUSAND -> 'M' */ + { 0x70, 0x69 }, /* SMALL ROMAN NUMERAL ONE -> 'i' */ + { 0x74, 0x76 }, /* SMALL ROMAN NUMERAL FIVE -> 'v' */ + { 0x79, 0x78 }, /* SMALL ROMAN NUMERAL TEN -> 'x' */ + { 0x7C, 0x6C }, /* SMALL ROMAN NUMERAL FIFTY -> 'l' */ + { 0x7D, 0x63 }, /* SMALL ROMAN NUMERAL ONE HUNDRED -> 'c' */ + { 0x7E, 0x64 }, /* SMALL ROMAN NUMERAL FIVE HUNDRED -> 'd' */ + { 0x7F, 0x6D }, /* SMALL ROMAN NUMERAL ONE THOUSAND -> 'm' */ + { 0x83, 0x29 }, /* ROMAN NUMERAL REVERSED ONE HUNDRED -> ')' */ + { 0x90, 0x3C }, /* LEFTWARDS ARROW -> '<' */ + { 0x91, 0x5E }, /* UPWARDS ARROW -> '^' */ + { 0x92, 0x3E }, /* RIGHTWARDS ARROW -> '>' */ + { 0x93, 0x76 }, /* DOWNWARDS ARROW -> 'v' */ + { 0x94, 0x2D }, /* LEFT RIGHT ARROW -> '-' */ + { 0x95, 0x7C }, /* UP DOWN ARROW -> '|' */ + { 0x96, 0x5C }, /* NORTH WEST ARROW -> '\' */ + { 0x97, 0x2F }, /* NORTH EAST ARROW -> '/' */ + { 0x98, 0x5C }, /* SOUTH EAST ARROW -> '\' */ + { 0x99, 0x2F }, /* SOUTH WEST ARROW -> '/' */ + { 0x9A, 0x21 }, /* LEFTWARDS ARROW WITH STROKE -> '!' */ + { 0x9B, 0x21 }, /* RIGHTWARDS ARROW WITH STROKE -> '!' */ + { 0x9C, 0x7E }, /* LEFTWARDS WAVE ARROW -> '~' */ + { 0x9D, 0x7E }, /* RIGHTWARDS WAVE ARROW -> '~' */ + { 0x9E, 0x2D }, /* LEFTWARDS TWO HEADED ARROW -> '-' */ + { 0x9F, 0x7C }, /* UPWARDS TWO HEADED ARROW -> '|' */ + { 0xA0, 0x2D }, /* RIGHTWARDS TWO HEADED ARROW -> '-' */ + { 0xA1, 0x7C }, /* DOWNWARDS TWO HEADED ARROW -> '|' */ + { 0xA2, 0x00 }, /* LEFTWARDS ARROW WITH TAIL -> ... */ + { 0xA4, 0x2D }, /* LEFTWARDS ARROW FROM BAR -> '-' */ + { 0xA5, 0x7C }, /* UPWARDS ARROW FROM BAR -> '|' */ + { 0xA6, 0x2D }, /* RIGHTWARDS ARROW FROM BAR -> '-' */ + { 0xA7, 0x7C }, /* DOWNWARDS ARROW FROM BAR -> '|' */ + { 0xA8, 0x7C }, /* UP DOWN ARROW WITH BASE -> '|' */ + { 0xA9, 0x00 }, /* LEFTWARDS ARROW WITH HOOK -> ... */ + { 0xAD, 0x2D }, /* LEFT RIGHT WAVE ARROW -> '-' */ + { 0xAE, 0x21 }, /* LEFT RIGHT ARROW WITH STROKE -> '!' */ + { 0xAF, 0x00 }, /* DOWNWARDS ZIGZAG ARROW -> ... */ + { 0xB5, 0x7C }, /* DOWNWARDS ARROW WITH CORNER LEFTWARDS -> '|' */ + { 0xB6, 0x5E }, /* ANTICLOCKWISE TOP SEMICIRCLE ARROW -> '^' */ + { 0xB7, 0x56 }, /* CLOCKWISE TOP SEMICIRCLE ARROW -> 'V' */ + { 0xB8, 0x5C }, /* NORTH WEST ARROW TO LONG BAR -> '\' */ + { 0xB9, 0x3D }, /* LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR -> '=' */ + { 0xBA, 0x56 }, /* ANTICLOCKWISE OPEN CIRCLE ARROW -> 'V' */ + { 0xBB, 0x5E }, /* CLOCKWISE OPEN CIRCLE ARROW -> '^' */ + { 0xBC, 0x2D }, /* LEFTWARDS HARPOON WITH BARB UPWARDS -> '-' */ + { 0xBD, 0x2D }, /* LEFTWARDS HARPOON WITH BARB DOWNWARDS -> '-' */ + { 0xBE, 0x7C }, /* UPWARDS HARPOON WITH BARB RIGHTWARDS -> '|' */ + { 0xBF, 0x7C }, /* UPWARDS HARPOON WITH BARB LEFTWARDS -> '|' */ + { 0xC0, 0x2D }, /* RIGHTWARDS HARPOON WITH BARB UPWARDS -> '-' */ + { 0xC1, 0x2D }, /* RIGHTWARDS HARPOON WITH BARB DOWNWARDS -> '-' */ + { 0xC2, 0x7C }, /* DOWNWARDS HARPOON WITH BARB RIGHTWARDS -> '|' */ + { 0xC3, 0x7C }, /* DOWNWARDS HARPOON WITH BARB LEFTWARDS -> '|' */ + { 0xC4, 0x3D }, /* RIGHTWARDS ARROW OVER LEFTWARDS ARROW -> '=' */ + { 0xC5, 0x7C }, /* UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW -> '|' */ + { 0xC6, 0x3D }, /* LEFTWARDS ARROW OVER RIGHTWARDS ARROW -> '=' */ + { 0xC7, 0x3D }, /* LEFTWARDS PAIRED ARROWS -> '=' */ + { 0xC8, 0x7C }, /* UPWARDS PAIRED ARROWS -> '|' */ + { 0xC9, 0x3D }, /* RIGHTWARDS PAIRED ARROWS -> '=' */ + { 0xCA, 0x7C }, /* DOWNWARDS PAIRED ARROWS -> '|' */ + { 0xCB, 0x3D }, /* LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON -> '=' */ + { 0xCC, 0x3D }, /* RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON -> '=' */ + { 0xCD, 0x00 }, /* LEFTWARDS DOUBLE ARROW WITH STROKE -> ... */ + { 0xCF, 0x21 }, /* RIGHTWARDS DOUBLE ARROW WITH STROKE -> '!' */ + { 0xD0, 0x3C }, /* LEFTWARDS DOUBLE ARROW -> '<' */ + { 0xD1, 0x5E }, /* UPWARDS DOUBLE ARROW -> '^' */ + { 0xD2, 0x3E }, /* RIGHTWARDS DOUBLE ARROW -> '>' */ + { 0xD3, 0x76 }, /* DOWNWARDS DOUBLE ARROW -> 'v' */ + { 0xD4, 0x3D }, /* LEFT RIGHT DOUBLE ARROW -> '=' */ + { 0xD5, 0x7C }, /* UP DOWN DOUBLE ARROW -> '|' */ + { 0xD6, 0x5C }, /* NORTH WEST DOUBLE ARROW -> '\' */ + { 0xD7, 0x2F }, /* NORTH EAST DOUBLE ARROW -> '/' */ + { 0xD8, 0x5C }, /* SOUTH EAST DOUBLE ARROW -> '\' */ + { 0xD9, 0x2F }, /* SOUTH WEST DOUBLE ARROW -> '/' */ + { 0xDA, 0x3D }, /* LEFTWARDS TRIPLE ARROW -> '=' */ + { 0xDB, 0x3D }, /* RIGHTWARDS TRIPLE ARROW -> '=' */ + { 0xDC, 0x7E }, /* LEFTWARDS SQUIGGLE ARROW -> '~' */ + { 0xDD, 0x7E }, /* RIGHTWARDS SQUIGGLE ARROW -> '~' */ + { 0xDE, 0x7C }, /* UPWARDS ARROW WITH DOUBLE STROKE -> '|' */ + { 0xDF, 0x7C }, /* DOWNWARDS ARROW WITH DOUBLE STROKE -> '|' */ + { 0xE0, 0x2D }, /* LEFTWARDS DASHED ARROW -> '-' */ + { 0xE1, 0x7C }, /* UPWARDS DASHED ARROW -> '|' */ + { 0xE2, 0x2D }, /* RIGHTWARDS DASHED ARROW -> '-' */ + { 0xE3, 0x7C }, /* DOWNWARDS DASHED ARROW -> '|' */ + { 0xE4, 0x00 }, /* LEFTWARDS ARROW TO BAR -> ... */ + { 0xE6, 0x2D }, /* LEFTWARDS WHITE ARROW -> '-' */ + { 0xE7, 0x7C }, /* UPWARDS WHITE ARROW -> '|' */ + { 0xE8, 0x2D }, /* RIGHTWARDS WHITE ARROW -> '-' */ + { 0xE9, 0x00 }, /* DOWNWARDS WHITE ARROW -> ... */ + { 0xEF, 0x7C }, /* UPWARDS WHITE DOUBLE ARROW ON PEDESTAL -> '|' */ + { 0xF0, 0x2D }, /* RIGHTWARDS WHITE ARROW FROM WALL -> '-' */ + { 0xF1, 0x5C }, /* NORTH WEST ARROW TO CORNER -> '\' */ + { 0xF2, 0x5C }, /* SOUTH EAST ARROW TO CORNER -> '\' */ + { 0xF3, 0x7C }, /* UP DOWN WHITE ARROW -> '|' */ + /* Entries for page 0x22 */ + { 0x04, 0x21 }, /* THERE DOES NOT EXIST -> '!' */ + { 0x09, 0x21 }, /* NOT AN ELEMENT OF -> '!' */ + { 0x0C, 0x21 }, /* DOES NOT CONTAIN AS MEMBER -> '!' */ + { 0x12, 0x2D }, /* MINUS SIGN -> '-' */ + { 0x15, 0x2F }, /* DIVISION SLASH -> '/' */ + { 0x16, 0x5C }, /* SET MINUS -> '\' */ + { 0x17, 0x2A }, /* ASTERISK OPERATOR -> '*' */ + { 0x18, 0x6F }, /* RING OPERATOR -> 'o' */ + { 0x19, 0x2E }, /* BULLET OPERATOR -> '.' */ + { 0x23, 0x7C }, /* DIVIDES -> '|' */ + { 0x24, 0x21 }, /* DOES NOT DIVIDE -> '!' */ + { 0x26, 0x21 }, /* NOT PARALLEL TO -> '!' */ + { 0x36, 0x3A }, /* RATIO -> ':' */ + { 0x3C, 0x7E }, /* TILDE OPERATOR -> '~' */ + { 0x41, 0x23 }, /* NOT TILDE -> '#' */ + { 0x44, 0x23 }, /* NOT ASYMPTOTICALLY EQUAL TO -> '#' */ + { 0x49, 0x23 }, /* NOT ALMOST EQUAL TO -> '#' */ + { 0x60, 0x23 }, /* NOT EQUAL TO -> '#' */ + { 0x62, 0x23 }, /* NOT IDENTICAL TO -> '#' */ + { 0x64, 0x3C }, /* LESS-THAN OR EQUAL TO -> '<' */ + { 0x65, 0x3E }, /* GREATER-THAN OR EQUAL TO -> '>' */ + { 0x68, 0x23 }, /* LESS-THAN BUT NOT EQUAL TO -> '#' */ + { 0x69, 0x23 }, /* GREATER-THAN BUT NOT EQUAL TO -> '#' */ + { 0x6D, 0x23 }, /* NOT EQUIVALENT TO -> '#' */ + { 0x6E, 0x21 }, /* NOT LESS-THAN -> '!' */ + { 0x6F, 0x21 }, /* NOT GREATER-THAN -> '!' */ + { 0x80, 0x21 }, /* DOES NOT PRECEDE -> '!' */ + { 0x81, 0x21 }, /* DOES NOT SUCCEED -> '!' */ + { 0x84, 0x21 }, /* NOT A SUBSET OF -> '!' */ + { 0x85, 0x21 }, /* NOT A SUPERSET OF -> '!' */ + { 0x8A, 0x23 }, /* SUBSET OF WITH NOT EQUAL TO -> '#' */ + { 0x8B, 0x23 }, /* SUPERSET OF WITH NOT EQUAL TO -> '#' */ + { 0x9B, 0x2A }, /* CIRCLED ASTERISK OPERATOR -> '*' */ + { 0xC6, 0x2A }, /* STAR OPERATOR -> '*' */ + /* Entries for page 0x23 */ + { 0x03, 0x5E }, /* UP ARROWHEAD -> '^' */ + { 0x29, 0x3C }, /* LEFT-POINTING ANGLE BRACKET -> '<' */ + { 0x5F, 0x2A }, /* APL FUNCTIONAL SYMBOL CIRCLE STAR -> '*' */ + { 0x63, 0x2A }, /* APL FUNCTIONAL SYMBOL STAR DIAERESIS -> '*' */ + /* Entries for page 0x24 */ + { 0x60, 0x31 }, /* CIRCLED DIGIT ONE -> '1' */ + { 0x61, 0x32 }, /* CIRCLED DIGIT TWO -> '2' */ + { 0x62, 0x33 }, /* CIRCLED DIGIT THREE -> '3' */ + { 0x63, 0x34 }, /* CIRCLED DIGIT FOUR -> '4' */ + { 0x64, 0x35 }, /* CIRCLED DIGIT FIVE -> '5' */ + { 0x65, 0x36 }, /* CIRCLED DIGIT SIX -> '6' */ + { 0x66, 0x37 }, /* CIRCLED DIGIT SEVEN -> '7' */ + { 0x67, 0x38 }, /* CIRCLED DIGIT EIGHT -> '8' */ + { 0x68, 0x39 }, /* CIRCLED DIGIT NINE -> '9' */ + { 0xB6, 0x41 }, /* CIRCLED LATIN CAPITAL LETTER A -> 'A' */ + { 0xB7, 0x42 }, /* CIRCLED LATIN CAPITAL LETTER B -> 'B' */ + { 0xB8, 0x43 }, /* CIRCLED LATIN CAPITAL LETTER C -> 'C' */ + { 0xB9, 0x44 }, /* CIRCLED LATIN CAPITAL LETTER D -> 'D' */ + { 0xBA, 0x45 }, /* CIRCLED LATIN CAPITAL LETTER E -> 'E' */ + { 0xBB, 0x46 }, /* CIRCLED LATIN CAPITAL LETTER F -> 'F' */ + { 0xBC, 0x47 }, /* CIRCLED LATIN CAPITAL LETTER G -> 'G' */ + { 0xBD, 0x48 }, /* CIRCLED LATIN CAPITAL LETTER H -> 'H' */ + { 0xBE, 0x49 }, /* CIRCLED LATIN CAPITAL LETTER I -> 'I' */ + { 0xBF, 0x4A }, /* CIRCLED LATIN CAPITAL LETTER J -> 'J' */ + { 0xC0, 0x4B }, /* CIRCLED LATIN CAPITAL LETTER K -> 'K' */ + { 0xC1, 0x4C }, /* CIRCLED LATIN CAPITAL LETTER L -> 'L' */ + { 0xC2, 0x4D }, /* CIRCLED LATIN CAPITAL LETTER M -> 'M' */ + { 0xC3, 0x4E }, /* CIRCLED LATIN CAPITAL LETTER N -> 'N' */ + { 0xC4, 0x4F }, /* CIRCLED LATIN CAPITAL LETTER O -> 'O' */ + { 0xC5, 0x50 }, /* CIRCLED LATIN CAPITAL LETTER P -> 'P' */ + { 0xC6, 0x51 }, /* CIRCLED LATIN CAPITAL LETTER Q -> 'Q' */ + { 0xC7, 0x52 }, /* CIRCLED LATIN CAPITAL LETTER R -> 'R' */ + { 0xC8, 0x53 }, /* CIRCLED LATIN CAPITAL LETTER S -> 'S' */ + { 0xC9, 0x54 }, /* CIRCLED LATIN CAPITAL LETTER T -> 'T' */ + { 0xCA, 0x55 }, /* CIRCLED LATIN CAPITAL LETTER U -> 'U' */ + { 0xCB, 0x56 }, /* CIRCLED LATIN CAPITAL LETTER V -> 'V' */ + { 0xCC, 0x57 }, /* CIRCLED LATIN CAPITAL LETTER W -> 'W' */ + { 0xCD, 0x58 }, /* CIRCLED LATIN CAPITAL LETTER X -> 'X' */ + { 0xCE, 0x59 }, /* CIRCLED LATIN CAPITAL LETTER Y -> 'Y' */ + { 0xCF, 0x5A }, /* CIRCLED LATIN CAPITAL LETTER Z -> 'Z' */ + { 0xD0, 0x61 }, /* CIRCLED LATIN SMALL LETTER A -> 'a' */ + { 0xD1, 0x62 }, /* CIRCLED LATIN SMALL LETTER B -> 'b' */ + { 0xD2, 0x63 }, /* CIRCLED LATIN SMALL LETTER C -> 'c' */ + { 0xD3, 0x64 }, /* CIRCLED LATIN SMALL LETTER D -> 'd' */ + { 0xD4, 0x65 }, /* CIRCLED LATIN SMALL LETTER E -> 'e' */ + { 0xD5, 0x66 }, /* CIRCLED LATIN SMALL LETTER F -> 'f' */ + { 0xD6, 0x67 }, /* CIRCLED LATIN SMALL LETTER G -> 'g' */ + { 0xD7, 0x68 }, /* CIRCLED LATIN SMALL LETTER H -> 'h' */ + { 0xD8, 0x69 }, /* CIRCLED LATIN SMALL LETTER I -> 'i' */ + { 0xD9, 0x6A }, /* CIRCLED LATIN SMALL LETTER J -> 'j' */ + { 0xDA, 0x6B }, /* CIRCLED LATIN SMALL LETTER K -> 'k' */ + { 0xDB, 0x6C }, /* CIRCLED LATIN SMALL LETTER L -> 'l' */ + { 0xDC, 0x6D }, /* CIRCLED LATIN SMALL LETTER M -> 'm' */ + { 0xDD, 0x6E }, /* CIRCLED LATIN SMALL LETTER N -> 'n' */ + { 0xDE, 0x6F }, /* CIRCLED LATIN SMALL LETTER O -> 'o' */ + { 0xDF, 0x70 }, /* CIRCLED LATIN SMALL LETTER P -> 'p' */ + { 0xE0, 0x71 }, /* CIRCLED LATIN SMALL LETTER Q -> 'q' */ + { 0xE1, 0x72 }, /* CIRCLED LATIN SMALL LETTER R -> 'r' */ + { 0xE2, 0x73 }, /* CIRCLED LATIN SMALL LETTER S -> 's' */ + { 0xE3, 0x74 }, /* CIRCLED LATIN SMALL LETTER T -> 't' */ + { 0xE4, 0x75 }, /* CIRCLED LATIN SMALL LETTER U -> 'u' */ + { 0xE5, 0x76 }, /* CIRCLED LATIN SMALL LETTER V -> 'v' */ + { 0xE6, 0x77 }, /* CIRCLED LATIN SMALL LETTER W -> 'w' */ + { 0xE7, 0x78 }, /* CIRCLED LATIN SMALL LETTER X -> 'x' */ + { 0xE8, 0x79 }, /* CIRCLED LATIN SMALL LETTER Y -> 'y' */ + { 0xE9, 0x7A }, /* CIRCLED LATIN SMALL LETTER Z -> 'z' */ + { 0xEA, 0x30 }, /* CIRCLED DIGIT ZERO -> '0' */ + { 0xF5, 0x31 }, /* DOUBLE CIRCLED DIGIT ONE -> '1' */ + { 0xF6, 0x32 }, /* DOUBLE CIRCLED DIGIT TWO -> '2' */ + { 0xF7, 0x33 }, /* DOUBLE CIRCLED DIGIT THREE -> '3' */ + { 0xF8, 0x34 }, /* DOUBLE CIRCLED DIGIT FOUR -> '4' */ + { 0xF9, 0x35 }, /* DOUBLE CIRCLED DIGIT FIVE -> '5' */ + { 0xFA, 0x36 }, /* DOUBLE CIRCLED DIGIT SIX -> '6' */ + { 0xFB, 0x37 }, /* DOUBLE CIRCLED DIGIT SEVEN -> '7' */ + { 0xFC, 0x38 }, /* DOUBLE CIRCLED DIGIT EIGHT -> '8' */ + { 0xFD, 0x39 }, /* DOUBLE CIRCLED DIGIT NINE -> '9' */ + { 0xFF, 0x30 }, /* NEGATIVE CIRCLED DIGIT ZERO -> '0' */ + /* Entries for page 0x25 */ + { 0x00, 0x2D }, /* BOX DRAWINGS LIGHT HORIZONTAL -> '-' */ + { 0x01, 0x2D }, /* BOX DRAWINGS HEAVY HORIZONTAL -> '-' */ + { 0x02, 0x7C }, /* BOX DRAWINGS LIGHT VERTICAL -> '|' */ + { 0x03, 0x7C }, /* BOX DRAWINGS HEAVY VERTICAL -> '|' */ + { 0x04, 0x2D }, /* BOX DRAWINGS LIGHT TRIPLE DASH HORIZONTAL -> '-' */ + { 0x05, 0x2D }, /* BOX DRAWINGS HEAVY TRIPLE DASH HORIZONTAL -> '-' */ + { 0x06, 0x7C }, /* BOX DRAWINGS LIGHT TRIPLE DASH VERTICAL -> '|' */ + { 0x07, 0x7C }, /* BOX DRAWINGS HEAVY TRIPLE DASH VERTICAL -> '|' */ + { 0x08, 0x2D }, /* BOX DRAWINGS LIGHT QUADRUPLE DASH HORIZONTAL -> '-' */ + { 0x09, 0x2D }, /* BOX DRAWINGS HEAVY QUADRUPLE DASH HORIZONTAL -> '-' */ + { 0x0A, 0x7C }, /* BOX DRAWINGS LIGHT QUADRUPLE DASH VERTICAL -> '|' */ + { 0x0B, 0x7C }, /* BOX DRAWINGS HEAVY QUADRUPLE DASH VERTICAL -> '|' */ + { 0x0C, 0x00 }, /* BOX DRAWINGS LIGHT DOWN AND RIGHT -> ... */ + { 0x4B, 0x2B }, /* BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL -> '+' */ + { 0x4C, 0x2D }, /* BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL -> '-' */ + { 0x4D, 0x2D }, /* BOX DRAWINGS HEAVY DOUBLE DASH HORIZONTAL -> '-' */ + { 0x4E, 0x7C }, /* BOX DRAWINGS LIGHT DOUBLE DASH VERTICAL -> '|' */ + { 0x4F, 0x7C }, /* BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL -> '|' */ + { 0x50, 0x2D }, /* BOX DRAWINGS DOUBLE HORIZONTAL -> '-' */ + { 0x51, 0x7C }, /* BOX DRAWINGS DOUBLE VERTICAL -> '|' */ + { 0x52, 0x00 }, /* BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE -> ... */ + { 0x70, 0x2B }, /* BOX DRAWINGS LIGHT ARC UP AND RIGHT -> '+' */ + { 0x71, 0x2F }, /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT -> '/' */ + { 0x72, 0x5C }, /* BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT -> '\' */ + { 0x73, 0x58 }, /* BOX DRAWINGS LIGHT DIAGONAL CROSS -> 'X' */ + { 0x74, 0x2D }, /* BOX DRAWINGS LIGHT LEFT -> '-' */ + { 0x75, 0x7C }, /* BOX DRAWINGS LIGHT UP -> '|' */ + { 0x76, 0x2D }, /* BOX DRAWINGS LIGHT RIGHT -> '-' */ + { 0x77, 0x7C }, /* BOX DRAWINGS LIGHT DOWN -> '|' */ + { 0x78, 0x2D }, /* BOX DRAWINGS HEAVY LEFT -> '-' */ + { 0x79, 0x7C }, /* BOX DRAWINGS HEAVY UP -> '|' */ + { 0x7A, 0x2D }, /* BOX DRAWINGS HEAVY RIGHT -> '-' */ + { 0x7B, 0x7C }, /* BOX DRAWINGS HEAVY DOWN -> '|' */ + { 0x7C, 0x2D }, /* BOX DRAWINGS LIGHT LEFT AND HEAVY RIGHT -> '-' */ + { 0x7D, 0x7C }, /* BOX DRAWINGS LIGHT UP AND HEAVY DOWN -> '|' */ + { 0x7E, 0x2D }, /* BOX DRAWINGS HEAVY LEFT AND LIGHT RIGHT -> '-' */ + { 0x7F, 0x7C }, /* BOX DRAWINGS HEAVY UP AND LIGHT DOWN -> '|' */ + { 0x80, 0x00 }, /* UPPER HALF BLOCK -> ... */ + { 0x93, 0x23 }, /* DARK SHADE -> '#' */ + { 0x94, 0x2D }, /* UPPER ONE EIGHTH BLOCK -> '-' */ + { 0x95, 0x7C }, /* RIGHT ONE EIGHTH BLOCK -> '|' */ + { 0x96, 0x00 }, /* QUADRANT LOWER LEFT -> ... */ + { 0xB1, 0x23 }, /* WHITE PARALLELOGRAM -> '#' */ + { 0xB2, 0x00 }, /* BLACK UP-POINTING TRIANGLE -> ... */ + { 0xB5, 0x5E }, /* WHITE UP-POINTING SMALL TRIANGLE -> '^' */ + { 0xB6, 0x00 }, /* BLACK RIGHT-POINTING TRIANGLE -> ... */ + { 0xBB, 0x3E }, /* WHITE RIGHT-POINTING POINTER -> '>' */ + { 0xBC, 0x00 }, /* BLACK DOWN-POINTING TRIANGLE -> ... */ + { 0xBF, 0x56 }, /* WHITE DOWN-POINTING SMALL TRIANGLE -> 'V' */ + { 0xC0, 0x00 }, /* BLACK LEFT-POINTING TRIANGLE -> ... */ + { 0xC5, 0x3C }, /* WHITE LEFT-POINTING POINTER -> '<' */ + { 0xC6, 0x00 }, /* BLACK DIAMOND -> ... */ + { 0xE6, 0x2A }, /* WHITE BULLET -> '*' */ + { 0xE7, 0x00 }, /* SQUARE WITH LEFT HALF BLACK -> ... */ + { 0xEB, 0x23 }, /* WHITE SQUARE WITH VERTICAL BISECTING LINE -> '#' */ + { 0xEC, 0x00 }, /* WHITE UP-POINTING TRIANGLE WITH DOT -> ... */ + { 0xEE, 0x5E }, /* UP-POINTING TRIANGLE WITH RIGHT HALF BLACK -> '^' */ + { 0xEF, 0x4F }, /* LARGE CIRCLE -> 'O' */ + { 0xF0, 0x00 }, /* WHITE SQUARE WITH UPPER LEFT QUADRANT -> ... */ + { 0xF7, 0x23 }, /* WHITE CIRCLE WITH UPPER RIGHT QUADRANT -> '#' */ + /* Entries for page 0x26 */ + { 0x05, 0x2A }, /* BLACK STAR -> '*' */ + { 0x06, 0x2A }, /* WHITE STAR -> '*' */ + { 0x2A, 0x2A }, /* STAR AND CRESCENT -> '*' */ + { 0x6F, 0x23 }, /* MUSIC SHARP SIGN -> '#' */ + { 0x98, 0x2A }, /* FLOWER -> '*' */ + { 0x9D, 0x2A }, /* OUTLINED WHITE STAR -> '*' */ + /* Entries for page 0x27 */ + { 0x13, 0x76 }, /* CHECK MARK -> 'v' */ + { 0x14, 0x56 }, /* HEAVY CHECK MARK -> 'V' */ + { 0x15, 0x78 }, /* MULTIPLICATION X -> 'x' */ + { 0x16, 0x58 }, /* HEAVY MULTIPLICATION X -> 'X' */ + { 0x17, 0x78 }, /* BALLOT X -> 'x' */ + { 0x18, 0x58 }, /* HEAVY BALLOT X -> 'X' */ + { 0x21, 0x00 }, /* STAR OF DAVID -> ... */ + { 0x46, 0x2A }, /* HEAVY CHEVRON SNOWFLAKE -> '*' */ + { 0x49, 0x00 }, /* BALLOON-SPOKED ASTERISK -> ... */ + { 0x4B, 0x2A }, /* HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK -> '*' */ + { 0x58, 0x7C }, /* LIGHT VERTICAL BAR -> '|' */ + { 0x5C, 0x27 }, /* HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT -> ''' */ + { 0x5D, 0x22 }, /* HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT -> '"' */ + { 0x5E, 0x22 }, /* HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT -> '"' */ + { 0x5F, 0x2C }, /* HEAVY LOW SINGLE COMMA QUOTATION MARK ORNAMENT -> ',' */ + { 0x62, 0x21 }, /* HEAVY EXCLAMATION MARK ORNAMENT -> '!' */ + { 0xE6, 0x5B }, /* MATHEMATICAL LEFT WHITE SQUARE BRACKET -> '[' */ + { 0xE8, 0x3C }, /* MATHEMATICAL LEFT ANGLE BRACKET -> '<' */ + /* Entries for page 0x28 */ + { 0x00, 0x20 }, /* BRAILLE PATTERN BLANK -> ' ' */ + { 0x01, 0x61 }, /* BRAILLE PATTERN DOTS-1 -> 'a' */ + { 0x02, 0x31 }, /* BRAILLE PATTERN DOTS-2 -> '1' */ + { 0x03, 0x62 }, /* BRAILLE PATTERN DOTS-12 -> 'b' */ + { 0x04, 0x27 }, /* BRAILLE PATTERN DOTS-3 -> ''' */ + { 0x05, 0x6B }, /* BRAILLE PATTERN DOTS-13 -> 'k' */ + { 0x06, 0x32 }, /* BRAILLE PATTERN DOTS-23 -> '2' */ + { 0x07, 0x6C }, /* BRAILLE PATTERN DOTS-123 -> 'l' */ + { 0x08, 0x40 }, /* BRAILLE PATTERN DOTS-4 -> '@' */ + { 0x09, 0x63 }, /* BRAILLE PATTERN DOTS-14 -> 'c' */ + { 0x0A, 0x69 }, /* BRAILLE PATTERN DOTS-24 -> 'i' */ + { 0x0B, 0x66 }, /* BRAILLE PATTERN DOTS-124 -> 'f' */ + { 0x0C, 0x2F }, /* BRAILLE PATTERN DOTS-34 -> '/' */ + { 0x0D, 0x6D }, /* BRAILLE PATTERN DOTS-134 -> 'm' */ + { 0x0E, 0x73 }, /* BRAILLE PATTERN DOTS-234 -> 's' */ + { 0x0F, 0x70 }, /* BRAILLE PATTERN DOTS-1234 -> 'p' */ + { 0x10, 0x22 }, /* BRAILLE PATTERN DOTS-5 -> '"' */ + { 0x11, 0x65 }, /* BRAILLE PATTERN DOTS-15 -> 'e' */ + { 0x12, 0x33 }, /* BRAILLE PATTERN DOTS-25 -> '3' */ + { 0x13, 0x68 }, /* BRAILLE PATTERN DOTS-125 -> 'h' */ + { 0x14, 0x39 }, /* BRAILLE PATTERN DOTS-35 -> '9' */ + { 0x15, 0x6F }, /* BRAILLE PATTERN DOTS-135 -> 'o' */ + { 0x16, 0x36 }, /* BRAILLE PATTERN DOTS-235 -> '6' */ + { 0x17, 0x72 }, /* BRAILLE PATTERN DOTS-1235 -> 'r' */ + { 0x18, 0x5E }, /* BRAILLE PATTERN DOTS-45 -> '^' */ + { 0x19, 0x64 }, /* BRAILLE PATTERN DOTS-145 -> 'd' */ + { 0x1A, 0x6A }, /* BRAILLE PATTERN DOTS-245 -> 'j' */ + { 0x1B, 0x67 }, /* BRAILLE PATTERN DOTS-1245 -> 'g' */ + { 0x1C, 0x3E }, /* BRAILLE PATTERN DOTS-345 -> '>' */ + { 0x1D, 0x6E }, /* BRAILLE PATTERN DOTS-1345 -> 'n' */ + { 0x1E, 0x74 }, /* BRAILLE PATTERN DOTS-2345 -> 't' */ + { 0x1F, 0x71 }, /* BRAILLE PATTERN DOTS-12345 -> 'q' */ + { 0x20, 0x2C }, /* BRAILLE PATTERN DOTS-6 -> ',' */ + { 0x21, 0x2A }, /* BRAILLE PATTERN DOTS-16 -> '*' */ + { 0x22, 0x35 }, /* BRAILLE PATTERN DOTS-26 -> '5' */ + { 0x23, 0x3C }, /* BRAILLE PATTERN DOTS-126 -> '<' */ + { 0x24, 0x2D }, /* BRAILLE PATTERN DOTS-36 -> '-' */ + { 0x25, 0x75 }, /* BRAILLE PATTERN DOTS-136 -> 'u' */ + { 0x26, 0x38 }, /* BRAILLE PATTERN DOTS-236 -> '8' */ + { 0x27, 0x76 }, /* BRAILLE PATTERN DOTS-1236 -> 'v' */ + { 0x28, 0x2E }, /* BRAILLE PATTERN DOTS-46 -> '.' */ + { 0x29, 0x25 }, /* BRAILLE PATTERN DOTS-146 -> '%' */ + { 0x2A, 0x5B }, /* BRAILLE PATTERN DOTS-246 -> '[' */ + { 0x2B, 0x24 }, /* BRAILLE PATTERN DOTS-1246 -> '$' */ + { 0x2C, 0x2B }, /* BRAILLE PATTERN DOTS-346 -> '+' */ + { 0x2D, 0x78 }, /* BRAILLE PATTERN DOTS-1346 -> 'x' */ + { 0x2E, 0x21 }, /* BRAILLE PATTERN DOTS-2346 -> '!' */ + { 0x2F, 0x26 }, /* BRAILLE PATTERN DOTS-12346 -> '&' */ + { 0x30, 0x3B }, /* BRAILLE PATTERN DOTS-56 -> ';' */ + { 0x31, 0x3A }, /* BRAILLE PATTERN DOTS-156 -> ':' */ + { 0x32, 0x34 }, /* BRAILLE PATTERN DOTS-256 -> '4' */ + { 0x33, 0x5C }, /* BRAILLE PATTERN DOTS-1256 -> '\' */ + { 0x34, 0x30 }, /* BRAILLE PATTERN DOTS-356 -> '0' */ + { 0x35, 0x7A }, /* BRAILLE PATTERN DOTS-1356 -> 'z' */ + { 0x36, 0x37 }, /* BRAILLE PATTERN DOTS-2356 -> '7' */ + { 0x37, 0x28 }, /* BRAILLE PATTERN DOTS-12356 -> '(' */ + { 0x38, 0x5F }, /* BRAILLE PATTERN DOTS-456 -> '_' */ + { 0x39, 0x3F }, /* BRAILLE PATTERN DOTS-1456 -> '?' */ + { 0x3A, 0x77 }, /* BRAILLE PATTERN DOTS-2456 -> 'w' */ + { 0x3B, 0x5D }, /* BRAILLE PATTERN DOTS-12456 -> ']' */ + { 0x3C, 0x23 }, /* BRAILLE PATTERN DOTS-3456 -> '#' */ + { 0x3D, 0x79 }, /* BRAILLE PATTERN DOTS-13456 -> 'y' */ + { 0x3E, 0x29 }, /* BRAILLE PATTERN DOTS-23456 -> ')' */ + { 0x3F, 0x3D }, /* BRAILLE PATTERN DOTS-123456 -> '=' */ + /* Entries for page 0x29 */ + { 0x83, 0x7B }, /* LEFT WHITE CURLY BRACKET -> '{' */ + /* Entries for page 0x2C */ + { 0x60, 0x4C }, /* LATIN CAPITAL LETTER L WITH DOUBLE BAR -> 'L' */ + { 0x61, 0x6C }, /* LATIN SMALL LETTER L WITH DOUBLE BAR -> 'l' */ + { 0x62, 0x4C }, /* LATIN CAPITAL LETTER L WITH MIDDLE TILDE -> 'L' */ + { 0x63, 0x50 }, /* LATIN CAPITAL LETTER P WITH STROKE -> 'P' */ + { 0x64, 0x52 }, /* LATIN CAPITAL LETTER R WITH TAIL -> 'R' */ + { 0x65, 0x61 }, /* LATIN SMALL LETTER A WITH STROKE -> 'a' */ + { 0x66, 0x74 }, /* LATIN SMALL LETTER T WITH DIAGONAL STROKE -> 't' */ + { 0x67, 0x48 }, /* LATIN CAPITAL LETTER H WITH DESCENDER -> 'H' */ + { 0x68, 0x68 }, /* LATIN SMALL LETTER H WITH DESCENDER -> 'h' */ + { 0x69, 0x4B }, /* LATIN CAPITAL LETTER K WITH DESCENDER -> 'K' */ + { 0x6A, 0x6B }, /* LATIN SMALL LETTER K WITH DESCENDER -> 'k' */ + { 0x6B, 0x5A }, /* LATIN CAPITAL LETTER Z WITH DESCENDER -> 'Z' */ + { 0x6C, 0x7A }, /* LATIN SMALL LETTER Z WITH DESCENDER -> 'z' */ + { 0x6E, 0x4D }, /* LATIN CAPITAL LETTER M WITH HOOK -> 'M' */ + { 0x6F, 0x41 }, /* LATIN CAPITAL LETTER TURNED A -> 'A' */ + /* Entries for page 0x2E */ + { 0x00, 0x72 }, /* RIGHT ANGLE SUBSTITUTION MARKER -> 'r' */ + { 0x06, 0x54 }, /* RAISED INTERPOLATION MARKER -> 'T' */ + { 0x09, 0x73 }, /* LEFT TRANSPOSITION BRACKET -> 's' */ + { 0x0C, 0x5C }, /* LEFT RAISED OMISSION BRACKET -> '\' */ + { 0x0D, 0x2F }, /* RIGHT RAISED OMISSION BRACKET -> '/' */ + { 0x12, 0x3E }, /* HYPODIASTOLE -> '>' */ + { 0x13, 0x25 }, /* DOTTED OBELOS -> '%' */ + { 0x16, 0x3E }, /* DOTTED RIGHT-POINTING ANGLE -> '>' */ + { 0x17, 0x3D }, /* DOUBLE OBLIQUE HYPHEN -> '=' */ + { 0x19, 0x2F }, /* PALM BRANCH -> '/' */ + { 0x1A, 0x2D }, /* HYPHEN WITH DIAERESIS -> '-' */ + { 0x1B, 0x7E }, /* TILDE WITH RING ABOVE -> '~' */ + { 0x1C, 0x5C }, /* LEFT LOW PARAPHRASE BRACKET -> '\' */ + { 0x1D, 0x2F }, /* RIGHT LOW PARAPHRASE BRACKET -> '/' */ + { 0x1E, 0x7E }, /* TILDE WITH DOT ABOVE -> '~' */ + { 0x1F, 0x7E }, /* TILDE WITH DOT BELOW -> '~' */ + { 0x2E, 0x3F }, /* REVERSED QUESTION MARK -> '?' */ + { 0x2F, 0x27 }, /* VERTICAL TILDE -> ''' */ + { 0x30, 0x6F }, /* RING POINT -> 'o' */ + { 0x31, 0x2E }, /* WORD SEPARATOR MIDDLE DOT -> '.' */ + { 0x32, 0x2C }, /* TURNED COMMA -> ',' */ + { 0x33, 0x2E }, /* RAISED DOT -> '.' */ + { 0x34, 0x2C }, /* RAISED COMMA -> ',' */ + { 0x35, 0x3B }, /* TURNED SEMICOLON -> ';' */ + { 0x3C, 0x78 }, /* STENOGRAPHIC FULL STOP -> 'x' */ + { 0x3D, 0x7C }, /* VERTICAL SIX DOTS -> '|' */ + { 0x40, 0x3D }, /* DOUBLE HYPHEN -> '=' */ + { 0x41, 0x2C }, /* REVERSED COMMA -> ',' */ + { 0x42, 0x22 }, /* DOUBLE LOW-REVERSED-9 QUOTATION MARK -> '"' */ + /* Entries for page 0x30 */ + { 0x00, 0x20 }, /* IDEOGRAPHIC SPACE -> ' ' */ + { 0x03, 0x22 }, /* DITTO MARK -> '"' */ + { 0x05, 0x22 }, /* IDEOGRAPHIC ITERATION MARK -> '"' */ + { 0x06, 0x2F }, /* IDEOGRAPHIC CLOSING MARK -> '/' */ + { 0x07, 0x30 }, /* IDEOGRAPHIC NUMBER ZERO -> '0' */ + { 0x08, 0x3C }, /* LEFT ANGLE BRACKET -> '<' */ + { 0x0C, 0x5B }, /* LEFT CORNER BRACKET -> '[' */ + { 0x0E, 0x7B }, /* LEFT WHITE CORNER BRACKET -> '{' */ + { 0x12, 0x40 }, /* POSTAL MARK -> '@' */ + { 0x14, 0x5B }, /* LEFT TORTOISE SHELL BRACKET -> '[' */ + { 0x20, 0x40 }, /* POSTAL MARK FACE -> '@' */ + { 0x21, 0x31 }, /* HANGZHOU NUMERAL ONE -> '1' */ + { 0x22, 0x32 }, /* HANGZHOU NUMERAL TWO -> '2' */ + { 0x23, 0x33 }, /* HANGZHOU NUMERAL THREE -> '3' */ + { 0x24, 0x34 }, /* HANGZHOU NUMERAL FOUR -> '4' */ + { 0x25, 0x35 }, /* HANGZHOU NUMERAL FIVE -> '5' */ + { 0x26, 0x36 }, /* HANGZHOU NUMERAL SIX -> '6' */ + { 0x27, 0x37 }, /* HANGZHOU NUMERAL SEVEN -> '7' */ + { 0x28, 0x38 }, /* HANGZHOU NUMERAL EIGHT -> '8' */ + { 0x29, 0x39 }, /* HANGZHOU NUMERAL NINE -> '9' */ + { 0x30, 0x7E }, /* WAVY DASH -> '~' */ + { 0x31, 0x00 }, /* VERTICAL KANA REPEAT MARK -> ... */ + { 0x34, 0x2B }, /* VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF -> '+' */ + { 0x36, 0x40 }, /* CIRCLED POSTAL MARK -> '@' */ + { 0x41, 0x61 }, /* HIRAGANA LETTER SMALL A -> 'a' */ + { 0x42, 0x61 }, /* HIRAGANA LETTER A -> 'a' */ + { 0x43, 0x69 }, /* HIRAGANA LETTER SMALL I -> 'i' */ + { 0x44, 0x69 }, /* HIRAGANA LETTER I -> 'i' */ + { 0x45, 0x75 }, /* HIRAGANA LETTER SMALL U -> 'u' */ + { 0x46, 0x75 }, /* HIRAGANA LETTER U -> 'u' */ + { 0x47, 0x65 }, /* HIRAGANA LETTER SMALL E -> 'e' */ + { 0x48, 0x65 }, /* HIRAGANA LETTER E -> 'e' */ + { 0x49, 0x6F }, /* HIRAGANA LETTER SMALL O -> 'o' */ + { 0x4A, 0x6F }, /* HIRAGANA LETTER O -> 'o' */ + { 0x93, 0x6E }, /* HIRAGANA LETTER N -> 'n' */ + { 0x9D, 0x22 }, /* HIRAGANA ITERATION MARK -> '"' */ + { 0x9E, 0x22 }, /* HIRAGANA VOICED ITERATION MARK -> '"' */ + { 0xA0, 0x3D }, /* KATAKANA-HIRAGANA DOUBLE HYPHEN -> '=' */ + { 0xA1, 0x61 }, /* KATAKANA LETTER SMALL A -> 'a' */ + { 0xA2, 0x61 }, /* KATAKANA LETTER A -> 'a' */ + { 0xA3, 0x69 }, /* KATAKANA LETTER SMALL I -> 'i' */ + { 0xA4, 0x69 }, /* KATAKANA LETTER I -> 'i' */ + { 0xA5, 0x75 }, /* KATAKANA LETTER SMALL U -> 'u' */ + { 0xA6, 0x75 }, /* KATAKANA LETTER U -> 'u' */ + { 0xA7, 0x65 }, /* KATAKANA LETTER SMALL E -> 'e' */ + { 0xA8, 0x65 }, /* KATAKANA LETTER E -> 'e' */ + { 0xA9, 0x6F }, /* KATAKANA LETTER SMALL O -> 'o' */ + { 0xAA, 0x6F }, /* KATAKANA LETTER O -> 'o' */ + { 0xF3, 0x6E }, /* KATAKANA LETTER N -> 'n' */ + { 0xFB, 0x2A }, /* KATAKANA MIDDLE DOT -> '*' */ + { 0xFC, 0x2D }, /* KATAKANA-HIRAGANA PROLONGED SOUND MARK -> '-' */ + { 0xFD, 0x22 }, /* KATAKANA ITERATION MARK -> '"' */ + { 0xFE, 0x22 }, /* KATAKANA VOICED ITERATION MARK -> '"' */ + /* Entries for page 0x31 */ + { 0x05, 0x42 }, /* BOPOMOFO LETTER B -> 'B' */ + { 0x06, 0x50 }, /* BOPOMOFO LETTER P -> 'P' */ + { 0x07, 0x4D }, /* BOPOMOFO LETTER M -> 'M' */ + { 0x08, 0x46 }, /* BOPOMOFO LETTER F -> 'F' */ + { 0x09, 0x44 }, /* BOPOMOFO LETTER D -> 'D' */ + { 0x0A, 0x54 }, /* BOPOMOFO LETTER T -> 'T' */ + { 0x0B, 0x4E }, /* BOPOMOFO LETTER N -> 'N' */ + { 0x0C, 0x4C }, /* BOPOMOFO LETTER L -> 'L' */ + { 0x0D, 0x47 }, /* BOPOMOFO LETTER G -> 'G' */ + { 0x0E, 0x4B }, /* BOPOMOFO LETTER K -> 'K' */ + { 0x0F, 0x48 }, /* BOPOMOFO LETTER H -> 'H' */ + { 0x10, 0x4A }, /* BOPOMOFO LETTER J -> 'J' */ + { 0x11, 0x51 }, /* BOPOMOFO LETTER Q -> 'Q' */ + { 0x12, 0x58 }, /* BOPOMOFO LETTER X -> 'X' */ + { 0x16, 0x52 }, /* BOPOMOFO LETTER R -> 'R' */ + { 0x17, 0x5A }, /* BOPOMOFO LETTER Z -> 'Z' */ + { 0x18, 0x43 }, /* BOPOMOFO LETTER C -> 'C' */ + { 0x19, 0x53 }, /* BOPOMOFO LETTER S -> 'S' */ + { 0x1A, 0x41 }, /* BOPOMOFO LETTER A -> 'A' */ + { 0x1B, 0x4F }, /* BOPOMOFO LETTER O -> 'O' */ + { 0x1C, 0x45 }, /* BOPOMOFO LETTER E -> 'E' */ + { 0x27, 0x49 }, /* BOPOMOFO LETTER I -> 'I' */ + { 0x28, 0x55 }, /* BOPOMOFO LETTER U -> 'U' */ + { 0x2A, 0x56 }, /* BOPOMOFO LETTER V -> 'V' */ + { 0x31, 0x67 }, /* HANGUL LETTER KIYEOK -> 'g' */ + { 0x34, 0x6E }, /* HANGUL LETTER NIEUN -> 'n' */ + { 0x37, 0x64 }, /* HANGUL LETTER TIKEUT -> 'd' */ + { 0x39, 0x72 }, /* HANGUL LETTER RIEUL -> 'r' */ + { 0x41, 0x6D }, /* HANGUL LETTER MIEUM -> 'm' */ + { 0x42, 0x62 }, /* HANGUL LETTER PIEUP -> 'b' */ + { 0x45, 0x73 }, /* HANGUL LETTER SIOS -> 's' */ + { 0x48, 0x6A }, /* HANGUL LETTER CIEUC -> 'j' */ + { 0x4A, 0x63 }, /* HANGUL LETTER CHIEUCH -> 'c' */ + { 0x4B, 0x6B }, /* HANGUL LETTER KHIEUKH -> 'k' */ + { 0x4C, 0x74 }, /* HANGUL LETTER THIEUTH -> 't' */ + { 0x4D, 0x70 }, /* HANGUL LETTER PHIEUPH -> 'p' */ + { 0x4E, 0x68 }, /* HANGUL LETTER HIEUH -> 'h' */ + { 0x4F, 0x61 }, /* HANGUL LETTER A -> 'a' */ + { 0x54, 0x65 }, /* HANGUL LETTER E -> 'e' */ + { 0x57, 0x6F }, /* HANGUL LETTER O -> 'o' */ + { 0x5C, 0x75 }, /* HANGUL LETTER U -> 'u' */ + { 0x63, 0x69 }, /* HANGUL LETTER I -> 'i' */ + { 0x7F, 0x5A }, /* HANGUL LETTER PANSIOS -> 'Z' */ + { 0x81, 0x4E }, /* HANGUL LETTER YESIEUNG -> 'N' */ + { 0x86, 0x51 }, /* HANGUL LETTER YEORINHIEUH -> 'Q' */ + { 0x8D, 0x55 }, /* HANGUL LETTER ARAEA -> 'U' */ + { 0xB4, 0x50 }, /* BOPOMOFO FINAL LETTER P -> 'P' */ + { 0xB5, 0x54 }, /* BOPOMOFO FINAL LETTER T -> 'T' */ + { 0xB6, 0x4B }, /* BOPOMOFO FINAL LETTER K -> 'K' */ + { 0xB7, 0x48 }, /* BOPOMOFO FINAL LETTER H -> 'H' */ + /* Entries for page 0x32 */ + { 0xD0, 0x61 }, /* CIRCLED KATAKANA A -> 'a' */ + { 0xD1, 0x69 }, /* CIRCLED KATAKANA I -> 'i' */ + { 0xD2, 0x75 }, /* CIRCLED KATAKANA U -> 'u' */ + { 0xD3, 0x75 }, /* CIRCLED KATAKANA E -> 'u' */ + { 0xD4, 0x6F }, /* CIRCLED KATAKANA O -> 'o' */ + /* Entries for page 0xA0 */ + { 0x02, 0x69 }, /* YI SYLLABLE I -> 'i' */ + { 0x0A, 0x61 }, /* YI SYLLABLE A -> 'a' */ + { 0x11, 0x6F }, /* YI SYLLABLE O -> 'o' */ + { 0x14, 0x65 }, /* YI SYLLABLE E -> 'e' */ + /* Entries for page 0xC5 */ + { 0x44, 0x61 }, /* HANGUL SYLLABLE A -> 'a' */ + { 0xD0, 0x65 }, /* HANGUL SYLLABLE E -> 'e' */ + /* Entries for page 0xC6 */ + { 0x24, 0x6F }, /* HANGUL SYLLABLE O -> 'o' */ + { 0xB0, 0x75 }, /* HANGUL SYLLABLE U -> 'u' */ + /* Entries for page 0xC7 */ + { 0x74, 0x69 }, /* HANGUL SYLLABLE I -> 'i' */ + /* Entries for page 0xFB */ + { 0x1D, 0x69 }, /* HEBREW LETTER YOD WITH HIRIQ -> 'i' */ + { 0x20, 0x60 }, /* HEBREW LETTER ALTERNATIVE AYIN -> '`' */ + { 0x21, 0x41 }, /* HEBREW LETTER WIDE ALEF -> 'A' */ + { 0x22, 0x64 }, /* HEBREW LETTER WIDE DALET -> 'd' */ + { 0x23, 0x68 }, /* HEBREW LETTER WIDE HE -> 'h' */ + { 0x25, 0x6C }, /* HEBREW LETTER WIDE LAMED -> 'l' */ + { 0x26, 0x6D }, /* HEBREW LETTER WIDE FINAL MEM -> 'm' */ + { 0x27, 0x72 }, /* HEBREW LETTER WIDE RESH -> 'r' */ + { 0x28, 0x74 }, /* HEBREW LETTER WIDE TAV -> 't' */ + { 0x29, 0x2B }, /* HEBREW LETTER ALTERNATIVE PLUS SIGN -> '+' */ + { 0x2B, 0x53 }, /* HEBREW LETTER SHIN WITH SIN DOT -> 'S' */ + { 0x2D, 0x53 }, /* HEBREW LETTER SHIN WITH DAGESH AND SIN DOT -> 'S' */ + { 0x2E, 0x61 }, /* HEBREW LETTER ALEF WITH PATAH -> 'a' */ + { 0x2F, 0x61 }, /* HEBREW LETTER ALEF WITH QAMATS -> 'a' */ + { 0x30, 0x41 }, /* HEBREW LETTER ALEF WITH MAPIQ -> 'A' */ + { 0x31, 0x62 }, /* HEBREW LETTER BET WITH DAGESH -> 'b' */ + { 0x32, 0x67 }, /* HEBREW LETTER GIMEL WITH DAGESH -> 'g' */ + { 0x33, 0x64 }, /* HEBREW LETTER DALET WITH DAGESH -> 'd' */ + { 0x34, 0x68 }, /* HEBREW LETTER HE WITH MAPIQ -> 'h' */ + { 0x35, 0x76 }, /* HEBREW LETTER VAV WITH DAGESH -> 'v' */ + { 0x36, 0x7A }, /* HEBREW LETTER ZAYIN WITH DAGESH -> 'z' */ + { 0x38, 0x74 }, /* HEBREW LETTER TET WITH DAGESH -> 't' */ + { 0x39, 0x79 }, /* HEBREW LETTER YOD WITH DAGESH -> 'y' */ + { 0x3C, 0x6C }, /* HEBREW LETTER LAMED WITH DAGESH -> 'l' */ + { 0x3E, 0x6D }, /* HEBREW LETTER MEM WITH DAGESH -> 'm' */ + { 0x40, 0x6E }, /* HEBREW LETTER NUN WITH DAGESH -> 'n' */ + { 0x41, 0x73 }, /* HEBREW LETTER SAMEKH WITH DAGESH -> 's' */ + { 0x43, 0x70 }, /* HEBREW LETTER FINAL PE WITH DAGESH -> 'p' */ + { 0x44, 0x70 }, /* HEBREW LETTER PE WITH DAGESH -> 'p' */ + { 0x47, 0x6B }, /* HEBREW LETTER QOF WITH DAGESH -> 'k' */ + { 0x48, 0x72 }, /* HEBREW LETTER RESH WITH DAGESH -> 'r' */ + { 0x4A, 0x74 }, /* HEBREW LETTER TAV WITH DAGESH -> 't' */ + { 0x4B, 0x6F }, /* HEBREW LETTER VAV WITH HOLAM -> 'o' */ + { 0x4C, 0x76 }, /* HEBREW LETTER BET WITH RAFE -> 'v' */ + { 0x4E, 0x66 }, /* HEBREW LETTER PE WITH RAFE -> 'f' */ + /* Entries for page 0xFE */ + { 0x23, 0x7E }, /* COMBINING DOUBLE TILDE RIGHT HALF -> '~' */ + { 0x32, 0x2D }, /* PRESENTATION FORM FOR VERTICAL EN DASH -> '-' */ + { 0x33, 0x5F }, /* PRESENTATION FORM FOR VERTICAL LOW LINE -> '_' */ + { 0x34, 0x5F }, /* PRESENTATION FORM FOR VERTICAL WAVY LOW LINE -> '_' */ + { 0x35, 0x28 }, /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS -> '(' */ + { 0x37, 0x7B }, /* PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET -> '{' */ + { 0x39, 0x5B }, /* PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET -> '[' */ + { 0x3F, 0x3C }, /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET -> '<' */ + { 0x41, 0x5B }, /* PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET -> '[' */ + { 0x43, 0x7B }, /* PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET -> '{' */ + { 0x44, 0x7D }, /* PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET -> '}' */ + { 0x50, 0x2C }, /* SMALL COMMA -> ',' */ + { 0x51, 0x2C }, /* SMALL IDEOGRAPHIC COMMA -> ',' */ + { 0x52, 0x2E }, /* SMALL FULL STOP -> '.' */ + { 0x54, 0x3B }, /* SMALL SEMICOLON -> ';' */ + { 0x55, 0x3A }, /* SMALL COLON -> ':' */ + { 0x56, 0x3F }, /* SMALL QUESTION MARK -> '?' */ + { 0x57, 0x21 }, /* SMALL EXCLAMATION MARK -> '!' */ + { 0x58, 0x2D }, /* SMALL EM DASH -> '-' */ + { 0x59, 0x28 }, /* SMALL LEFT PARENTHESIS -> '(' */ + { 0x5A, 0x29 }, /* SMALL RIGHT PARENTHESIS -> ')' */ + { 0x5B, 0x7B }, /* SMALL LEFT CURLY BRACKET -> '{' */ + { 0x5C, 0x7D }, /* SMALL RIGHT CURLY BRACKET -> '}' */ + { 0x5D, 0x7B }, /* SMALL LEFT TORTOISE SHELL BRACKET -> '{' */ + { 0x5E, 0x7D }, /* SMALL RIGHT TORTOISE SHELL BRACKET -> '}' */ + { 0x5F, 0x23 }, /* SMALL NUMBER SIGN -> '#' */ + { 0x60, 0x26 }, /* SMALL AMPERSAND -> '&' */ + { 0x61, 0x2A }, /* SMALL ASTERISK -> '*' */ + { 0x62, 0x2B }, /* SMALL PLUS SIGN -> '+' */ + { 0x63, 0x2D }, /* SMALL HYPHEN-MINUS -> '-' */ + { 0x64, 0x3C }, /* SMALL LESS-THAN SIGN -> '<' */ + { 0x65, 0x3E }, /* SMALL GREATER-THAN SIGN -> '>' */ + { 0x66, 0x3D }, /* SMALL EQUALS SIGN -> '=' */ + { 0x68, 0x5C }, /* SMALL REVERSE SOLIDUS -> '\' */ + { 0x69, 0x24 }, /* SMALL DOLLAR SIGN -> '$' */ + { 0x6A, 0x25 }, /* SMALL PERCENT SIGN -> '%' */ + { 0x6B, 0x40 }, /* SMALL COMMERCIAL AT -> '@' */ + /* Entries for page 0xFF */ + { 0x01, 0x21 }, /* FULLWIDTH EXCLAMATION MARK -> '!' */ + { 0x02, 0x22 }, /* FULLWIDTH QUOTATION MARK -> '"' */ + { 0x03, 0x23 }, /* FULLWIDTH NUMBER SIGN -> '#' */ + { 0x04, 0x24 }, /* FULLWIDTH DOLLAR SIGN -> '$' */ + { 0x05, 0x25 }, /* FULLWIDTH PERCENT SIGN -> '%' */ + { 0x06, 0x26 }, /* FULLWIDTH AMPERSAND -> '&' */ + { 0x07, 0x27 }, /* FULLWIDTH APOSTROPHE -> ''' */ + { 0x08, 0x28 }, /* FULLWIDTH LEFT PARENTHESIS -> '(' */ + { 0x09, 0x29 }, /* FULLWIDTH RIGHT PARENTHESIS -> ')' */ + { 0x0A, 0x2A }, /* FULLWIDTH ASTERISK -> '*' */ + { 0x0B, 0x2B }, /* FULLWIDTH PLUS SIGN -> '+' */ + { 0x0C, 0x2C }, /* FULLWIDTH COMMA -> ',' */ + { 0x0D, 0x2D }, /* FULLWIDTH HYPHEN-MINUS -> '-' */ + { 0x0E, 0x2E }, /* FULLWIDTH FULL STOP -> '.' */ + { 0x0F, 0x2F }, /* FULLWIDTH SOLIDUS -> '/' */ + { 0x10, 0x30 }, /* FULLWIDTH DIGIT ZERO -> '0' */ + { 0x11, 0x31 }, /* FULLWIDTH DIGIT ONE -> '1' */ + { 0x12, 0x32 }, /* FULLWIDTH DIGIT TWO -> '2' */ + { 0x13, 0x33 }, /* FULLWIDTH DIGIT THREE -> '3' */ + { 0x14, 0x34 }, /* FULLWIDTH DIGIT FOUR -> '4' */ + { 0x15, 0x35 }, /* FULLWIDTH DIGIT FIVE -> '5' */ + { 0x16, 0x36 }, /* FULLWIDTH DIGIT SIX -> '6' */ + { 0x17, 0x37 }, /* FULLWIDTH DIGIT SEVEN -> '7' */ + { 0x18, 0x38 }, /* FULLWIDTH DIGIT EIGHT -> '8' */ + { 0x19, 0x39 }, /* FULLWIDTH DIGIT NINE -> '9' */ + { 0x1A, 0x3A }, /* FULLWIDTH COLON -> ':' */ + { 0x1B, 0x3B }, /* FULLWIDTH SEMICOLON -> ';' */ + { 0x1C, 0x3C }, /* FULLWIDTH LESS-THAN SIGN -> '<' */ + { 0x1D, 0x3D }, /* FULLWIDTH EQUALS SIGN -> '=' */ + { 0x1E, 0x3E }, /* FULLWIDTH GREATER-THAN SIGN -> '>' */ + { 0x1F, 0x3F }, /* FULLWIDTH QUESTION MARK -> '?' */ + { 0x20, 0x40 }, /* FULLWIDTH COMMERCIAL AT -> '@' */ + { 0x21, 0x41 }, /* FULLWIDTH LATIN CAPITAL LETTER A -> 'A' */ + { 0x22, 0x42 }, /* FULLWIDTH LATIN CAPITAL LETTER B -> 'B' */ + { 0x23, 0x43 }, /* FULLWIDTH LATIN CAPITAL LETTER C -> 'C' */ + { 0x24, 0x44 }, /* FULLWIDTH LATIN CAPITAL LETTER D -> 'D' */ + { 0x25, 0x45 }, /* FULLWIDTH LATIN CAPITAL LETTER E -> 'E' */ + { 0x26, 0x46 }, /* FULLWIDTH LATIN CAPITAL LETTER F -> 'F' */ + { 0x27, 0x47 }, /* FULLWIDTH LATIN CAPITAL LETTER G -> 'G' */ + { 0x28, 0x48 }, /* FULLWIDTH LATIN CAPITAL LETTER H -> 'H' */ + { 0x29, 0x49 }, /* FULLWIDTH LATIN CAPITAL LETTER I -> 'I' */ + { 0x2A, 0x4A }, /* FULLWIDTH LATIN CAPITAL LETTER J -> 'J' */ + { 0x2B, 0x4B }, /* FULLWIDTH LATIN CAPITAL LETTER K -> 'K' */ + { 0x2C, 0x4C }, /* FULLWIDTH LATIN CAPITAL LETTER L -> 'L' */ + { 0x2D, 0x4D }, /* FULLWIDTH LATIN CAPITAL LETTER M -> 'M' */ + { 0x2E, 0x4E }, /* FULLWIDTH LATIN CAPITAL LETTER N -> 'N' */ + { 0x2F, 0x4F }, /* FULLWIDTH LATIN CAPITAL LETTER O -> 'O' */ + { 0x30, 0x50 }, /* FULLWIDTH LATIN CAPITAL LETTER P -> 'P' */ + { 0x31, 0x51 }, /* FULLWIDTH LATIN CAPITAL LETTER Q -> 'Q' */ + { 0x32, 0x52 }, /* FULLWIDTH LATIN CAPITAL LETTER R -> 'R' */ + { 0x33, 0x53 }, /* FULLWIDTH LATIN CAPITAL LETTER S -> 'S' */ + { 0x34, 0x54 }, /* FULLWIDTH LATIN CAPITAL LETTER T -> 'T' */ + { 0x35, 0x55 }, /* FULLWIDTH LATIN CAPITAL LETTER U -> 'U' */ + { 0x36, 0x56 }, /* FULLWIDTH LATIN CAPITAL LETTER V -> 'V' */ + { 0x37, 0x57 }, /* FULLWIDTH LATIN CAPITAL LETTER W -> 'W' */ + { 0x38, 0x58 }, /* FULLWIDTH LATIN CAPITAL LETTER X -> 'X' */ + { 0x39, 0x59 }, /* FULLWIDTH LATIN CAPITAL LETTER Y -> 'Y' */ + { 0x3A, 0x5A }, /* FULLWIDTH LATIN CAPITAL LETTER Z -> 'Z' */ + { 0x3B, 0x5B }, /* FULLWIDTH LEFT SQUARE BRACKET -> '[' */ + { 0x3C, 0x5C }, /* FULLWIDTH REVERSE SOLIDUS -> '\' */ + { 0x3D, 0x5D }, /* FULLWIDTH RIGHT SQUARE BRACKET -> ']' */ + { 0x3E, 0x5E }, /* FULLWIDTH CIRCUMFLEX ACCENT -> '^' */ + { 0x3F, 0x5F }, /* FULLWIDTH LOW LINE -> '_' */ + { 0x40, 0x60 }, /* FULLWIDTH GRAVE ACCENT -> '`' */ + { 0x41, 0x61 }, /* FULLWIDTH LATIN SMALL LETTER A -> 'a' */ + { 0x42, 0x62 }, /* FULLWIDTH LATIN SMALL LETTER B -> 'b' */ + { 0x43, 0x63 }, /* FULLWIDTH LATIN SMALL LETTER C -> 'c' */ + { 0x44, 0x64 }, /* FULLWIDTH LATIN SMALL LETTER D -> 'd' */ + { 0x45, 0x65 }, /* FULLWIDTH LATIN SMALL LETTER E -> 'e' */ + { 0x46, 0x66 }, /* FULLWIDTH LATIN SMALL LETTER F -> 'f' */ + { 0x47, 0x67 }, /* FULLWIDTH LATIN SMALL LETTER G -> 'g' */ + { 0x48, 0x68 }, /* FULLWIDTH LATIN SMALL LETTER H -> 'h' */ + { 0x49, 0x69 }, /* FULLWIDTH LATIN SMALL LETTER I -> 'i' */ + { 0x4A, 0x6A }, /* FULLWIDTH LATIN SMALL LETTER J -> 'j' */ + { 0x4B, 0x6B }, /* FULLWIDTH LATIN SMALL LETTER K -> 'k' */ + { 0x4C, 0x6C }, /* FULLWIDTH LATIN SMALL LETTER L -> 'l' */ + { 0x4D, 0x6D }, /* FULLWIDTH LATIN SMALL LETTER M -> 'm' */ + { 0x4E, 0x6E }, /* FULLWIDTH LATIN SMALL LETTER N -> 'n' */ + { 0x4F, 0x6F }, /* FULLWIDTH LATIN SMALL LETTER O -> 'o' */ + { 0x50, 0x70 }, /* FULLWIDTH LATIN SMALL LETTER P -> 'p' */ + { 0x51, 0x71 }, /* FULLWIDTH LATIN SMALL LETTER Q -> 'q' */ + { 0x52, 0x72 }, /* FULLWIDTH LATIN SMALL LETTER R -> 'r' */ + { 0x53, 0x73 }, /* FULLWIDTH LATIN SMALL LETTER S -> 's' */ + { 0x54, 0x74 }, /* FULLWIDTH LATIN SMALL LETTER T -> 't' */ + { 0x55, 0x75 }, /* FULLWIDTH LATIN SMALL LETTER U -> 'u' */ + { 0x56, 0x76 }, /* FULLWIDTH LATIN SMALL LETTER V -> 'v' */ + { 0x57, 0x77 }, /* FULLWIDTH LATIN SMALL LETTER W -> 'w' */ + { 0x58, 0x78 }, /* FULLWIDTH LATIN SMALL LETTER X -> 'x' */ + { 0x59, 0x79 }, /* FULLWIDTH LATIN SMALL LETTER Y -> 'y' */ + { 0x5A, 0x7A }, /* FULLWIDTH LATIN SMALL LETTER Z -> 'z' */ + { 0x5B, 0x7B }, /* FULLWIDTH LEFT CURLY BRACKET -> '{' */ + { 0x5C, 0x7C }, /* FULLWIDTH VERTICAL LINE -> '|' */ + { 0x5D, 0x7D }, /* FULLWIDTH RIGHT CURLY BRACKET -> '}' */ + { 0x5E, 0x7E }, /* FULLWIDTH TILDE -> '~' */ + { 0x61, 0x2E }, /* HALFWIDTH IDEOGRAPHIC FULL STOP -> '.' */ + { 0x62, 0x5B }, /* HALFWIDTH LEFT CORNER BRACKET -> '[' */ + { 0x63, 0x5D }, /* HALFWIDTH RIGHT CORNER BRACKET -> ']' */ + { 0x64, 0x2C }, /* HALFWIDTH IDEOGRAPHIC COMMA -> ',' */ + { 0x65, 0x2A }, /* HALFWIDTH KATAKANA MIDDLE DOT -> '*' */ + { 0x67, 0x61 }, /* HALFWIDTH KATAKANA LETTER SMALL A -> 'a' */ + { 0x68, 0x69 }, /* HALFWIDTH KATAKANA LETTER SMALL I -> 'i' */ + { 0x69, 0x75 }, /* HALFWIDTH KATAKANA LETTER SMALL U -> 'u' */ + { 0x6A, 0x65 }, /* HALFWIDTH KATAKANA LETTER SMALL E -> 'e' */ + { 0x6B, 0x6F }, /* HALFWIDTH KATAKANA LETTER SMALL O -> 'o' */ + { 0x70, 0x2B }, /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -> '+' */ + { 0x71, 0x61 }, /* HALFWIDTH KATAKANA LETTER A -> 'a' */ + { 0x72, 0x69 }, /* HALFWIDTH KATAKANA LETTER I -> 'i' */ + { 0x73, 0x75 }, /* HALFWIDTH KATAKANA LETTER U -> 'u' */ + { 0x74, 0x65 }, /* HALFWIDTH KATAKANA LETTER E -> 'e' */ + { 0x75, 0x6F }, /* HALFWIDTH KATAKANA LETTER O -> 'o' */ + { 0x9D, 0x6E }, /* HALFWIDTH KATAKANA LETTER N -> 'n' */ + { 0x9E, 0x3A }, /* HALFWIDTH KATAKANA VOICED SOUND MARK -> ':' */ + { 0x9F, 0x3B }, /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -> ';' */ + { 0xA1, 0x67 }, /* HALFWIDTH HANGUL LETTER KIYEOK -> 'g' */ + { 0xA4, 0x6E }, /* HALFWIDTH HANGUL LETTER NIEUN -> 'n' */ + { 0xA7, 0x64 }, /* HALFWIDTH HANGUL LETTER TIKEUT -> 'd' */ + { 0xA9, 0x72 }, /* HALFWIDTH HANGUL LETTER RIEUL -> 'r' */ + { 0xB1, 0x6D }, /* HALFWIDTH HANGUL LETTER MIEUM -> 'm' */ + { 0xB2, 0x62 }, /* HALFWIDTH HANGUL LETTER PIEUP -> 'b' */ + { 0xB5, 0x73 }, /* HALFWIDTH HANGUL LETTER SIOS -> 's' */ + { 0xB8, 0x6A }, /* HALFWIDTH HANGUL LETTER CIEUC -> 'j' */ + { 0xBA, 0x63 }, /* HALFWIDTH HANGUL LETTER CHIEUCH -> 'c' */ + { 0xBB, 0x6B }, /* HALFWIDTH HANGUL LETTER KHIEUKH -> 'k' */ + { 0xBC, 0x74 }, /* HALFWIDTH HANGUL LETTER THIEUTH -> 't' */ + { 0xBD, 0x70 }, /* HALFWIDTH HANGUL LETTER PHIEUPH -> 'p' */ + { 0xBE, 0x68 }, /* HALFWIDTH HANGUL LETTER HIEUH -> 'h' */ + { 0xC2, 0x61 }, /* HALFWIDTH HANGUL LETTER A -> 'a' */ + { 0xC7, 0x65 }, /* HALFWIDTH HANGUL LETTER E -> 'e' */ + { 0xCC, 0x6F }, /* HALFWIDTH HANGUL LETTER O -> 'o' */ + { 0xD3, 0x75 }, /* HALFWIDTH HANGUL LETTER U -> 'u' */ + { 0xDC, 0x69 }, /* HALFWIDTH HANGUL LETTER I -> 'i' */ + { 0xE2, 0x21 }, /* FULLWIDTH NOT SIGN -> '!' */ + { 0xE3, 0x2D }, /* FULLWIDTH MACRON -> '-' */ + { 0xE4, 0x7C }, /* FULLWIDTH BROKEN BAR -> '|' */ + { 0xE8, 0x7C }, /* HALFWIDTH FORMS LIGHT VERTICAL -> '|' */ + { 0xE9, 0x3C }, /* HALFWIDTH LEFTWARDS ARROW -> '<' */ + { 0xEA, 0x5E }, /* HALFWIDTH UPWARDS ARROW -> '^' */ + { 0xEB, 0x3E }, /* HALFWIDTH RIGHTWARDS ARROW -> '>' */ + { 0xEC, 0x76 }, /* HALFWIDTH DOWNWARDS ARROW -> 'v' */ + { 0xED, 0x23 }, /* HALFWIDTH BLACK SQUARE -> '#' */ + { 0xEE, 0x4F }, /* HALFWIDTH WHITE CIRCLE -> 'O' */ + { 0xF9, 0x7B }, /* INTERLINEAR ANNOTATION ANCHOR -> '{' */ + { 0xFA, 0x7C }, /* INTERLINEAR ANNOTATION SEPARATOR -> '|' */ + { 0xFB, 0x7D }, /* INTERLINEAR ANNOTATION TERMINATOR -> '}' */ +}; + +#define UCS_PAGE_ENTRY_RANGE_MARKER 0 From fe26933cf1e151ce0a5d858c263e8dacb2f12cee Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:21 -0400 Subject: [PATCH 099/105] vt: add ucs_get_fallback() This is the code querying the newly introduced tables. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250507141535.40655-7-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/Makefile | 3 +- drivers/tty/vt/ucs.c | 84 ++++++++++++++++++++++++++++++++++++++ include/linux/consolemap.h | 6 +++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index 509362a3e11e..ae746dcdeec8 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -36,7 +36,8 @@ $(obj)/defkeymap.c: $(obj)/%.c: $(src)/%.map endif -$(obj)/ucs.o: $(src)/ucs.c $(obj)/ucs_width_table.h $(obj)/ucs_recompose_table.h +$(obj)/ucs.o: $(src)/ucs.c $(obj)/ucs_width_table.h \ + $(obj)/ucs_recompose_table.h $(obj)/ucs_fallback_table.h # You may uncomment one of those to have the UCS tables be regenerated # during the build process. By default the _shipped versions are used. diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index b0b23830170d..6c15c5deda5b 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -157,3 +157,87 @@ u32 ucs_recompose(u32 base, u32 mark) return result ? result->recomposed : 0; } + +/* + * The fallback table structures implement a 2-level lookup. + */ + +struct ucs_page_desc { + u8 page; /* Page index (high byte of code points) */ + u8 count; /* Number of entries in this page */ + u16 start; /* Start index in entries array */ +}; + +struct ucs_page_entry { + u8 offset; /* Offset within page (0-255) */ + u8 fallback; /* Fallback character or range start marker */ +}; + +#include "ucs_fallback_table.h" + +static int ucs_page_desc_cmp(const void *key, const void *element) +{ + u8 page = *(u8 *)key; + const struct ucs_page_desc *entry = element; + + if (page < entry->page) + return -1; + if (page > entry->page) + return 1; + return 0; +} + +static int ucs_page_entry_cmp(const void *key, const void *element) +{ + u8 offset = *(u8 *)key; + const struct ucs_page_entry *entry = element; + + if (offset < entry->offset) + return -1; + if (entry->fallback == UCS_PAGE_ENTRY_RANGE_MARKER) { + if (offset > entry[1].offset) + return 1; + } else { + if (offset > entry->offset) + return 1; + } + return 0; +} + +/** + * ucs_get_fallback() - Get a substitution for the provided Unicode character + * @base: Base Unicode code point (UCS-4) + * + * Get a simpler fallback character for the provided Unicode character. + * This is used for terminal display when corresponding glyph is unavailable. + * The substitution may not be as good as the actual glyph for the original + * character but still way more helpful than a squared question mark. + * + * Return: Fallback Unicode code point, or 0 if none is available + */ +u32 ucs_get_fallback(u32 cp) +{ + const struct ucs_page_desc *page; + const struct ucs_page_entry *entry; + u8 page_idx = cp >> 8, offset = cp; + + if (!UCS_IS_BMP(cp)) + return 0; + + page = __inline_bsearch(&page_idx, ucs_fallback_pages, + ARRAY_SIZE(ucs_fallback_pages), + sizeof(*ucs_fallback_pages), + ucs_page_desc_cmp); + if (!page) + return 0; + + entry = __inline_bsearch(&offset, ucs_fallback_entries + page->start, + page->count, sizeof(*ucs_fallback_entries), + ucs_page_entry_cmp); + if (!entry) + return 0; + + if (entry->fallback == UCS_PAGE_ENTRY_RANGE_MARKER) + entry++; + return entry->fallback; +} diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 8167494229db..6180b803795c 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -31,6 +31,7 @@ void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); u32 ucs_recompose(u32 base, u32 mark); +u32 ucs_get_fallback(u32 cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -75,6 +76,11 @@ static inline u32 ucs_recompose(u32 base, u32 mark) { return 0; } + +static inline u32 ucs_get_fallback(u32 cp) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ From 6802f3591fdc732d2460eed3112b02cc07acab8f Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:22 -0400 Subject: [PATCH 100/105] vt: make use of ucs_get_fallback() when glyph is unavailable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attempt to display a fallback character when given character doesn't have an available glyph. The substitution may not be as good as the original character but still way more helpful than a squared question mark. Example substitutions: À -> A, ç -> c, ø -> o, ─ -> -, © -> C, etc. See gen_ucs_fallback_table.py for a comprehensive list. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250507141535.40655-8-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 556af82a9231..efb761454166 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -3007,6 +3007,19 @@ static int vc_get_glyph(struct vc_data *vc, int tc) return tc; } + /* + * The Unicode screen memory is allocated only when required. + * This is one such case: we're about to "cheat" with the displayed + * character meaning the simple screen buffer won't hold the original + * information, whereas the Unicode screen buffer always does. + */ + vc_uniscr_check(vc); + + /* Try getting a simpler fallback character. */ + tc = ucs_get_fallback(tc); + if (tc) + return vc_get_glyph(vc, tc); + /* Display U+FFFD (Unicode Replacement Character). */ return conv_uni_to_pc(vc, UCS_REPLACEMENT); } From 63f0d28dcabe2733ddb8d0b3813a52bb585b7642 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 7 May 2025 10:13:23 -0400 Subject: [PATCH 101/105] vt: process the full-width ASCII fallback range programmatically This shaves about 170 bytes from ucs.o. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250507141535.40655-9-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/gen_ucs_fallback_table.py | 8 ++ drivers/tty/vt/ucs.c | 8 ++ drivers/tty/vt/ucs_fallback_table.h_shipped | 96 +-------------------- 3 files changed, 17 insertions(+), 95 deletions(-) diff --git a/drivers/tty/vt/gen_ucs_fallback_table.py b/drivers/tty/vt/gen_ucs_fallback_table.py index 80257c6df440..6e09c1cb6d4b 100755 --- a/drivers/tty/vt/gen_ucs_fallback_table.py +++ b/drivers/tty/vt/gen_ucs_fallback_table.py @@ -197,6 +197,14 @@ def get_special_overrides(): # Exclude U+2028 (LINE SEPARATOR) overrides[0x2028] = 0 # LINE SEPARATOR (unidecode: '\n') + # Full-width to ASCII mapping (covering all printable ASCII 33-126) + # 0xFF01 (!) to 0xFF5E (~) -> ASCII 33 (!) to 126 (~) + # Those are excluded here to reduce the table size. + # It is more efficient to process them programmatically in + # ucs.c:ucs_get_fallback(). + for cp in range(0xFF01, 0xFF5E + 1): + overrides[cp] = 0 # Double-width ASCII characters + return overrides def organize_by_pages(fallback_map): diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index 6c15c5deda5b..6ead622b7713 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -224,6 +224,14 @@ u32 ucs_get_fallback(u32 cp) if (!UCS_IS_BMP(cp)) return 0; + /* + * Full-width to ASCII mapping (covering all printable ASCII 33-126) + * 0xFF01 (!) to 0xFF5E (~) -> ASCII 33 (!) to 126 (~) + * We process them programmatically to reduce the table size. + */ + if (cp >= 0xFF01 && cp <= 0xFF5E) + return cp - 0xFF01 + 33; + page = __inline_bsearch(&page_idx, ucs_fallback_pages, ARRAY_SIZE(ucs_fallback_pages), sizeof(*ucs_fallback_pages), diff --git a/drivers/tty/vt/ucs_fallback_table.h_shipped b/drivers/tty/vt/ucs_fallback_table.h_shipped index 7fa803511eb5..2da5a8fe1cf1 100644 --- a/drivers/tty/vt/ucs_fallback_table.h_shipped +++ b/drivers/tty/vt/ucs_fallback_table.h_shipped @@ -60,7 +60,7 @@ static const struct ucs_page_desc ucs_fallback_pages[] = { { 0xC7, 1, 3105 }, { 0xFB, 35, 3106 }, { 0xFE, 37, 3141 }, - { 0xFF, 144, 3178 }, + { 0xFF, 50, 3178 }, }; /* Page entries array (referenced by page descriptors) */ @@ -3291,100 +3291,6 @@ static const struct ucs_page_entry ucs_fallback_entries[] = { { 0x6A, 0x25 }, /* SMALL PERCENT SIGN -> '%' */ { 0x6B, 0x40 }, /* SMALL COMMERCIAL AT -> '@' */ /* Entries for page 0xFF */ - { 0x01, 0x21 }, /* FULLWIDTH EXCLAMATION MARK -> '!' */ - { 0x02, 0x22 }, /* FULLWIDTH QUOTATION MARK -> '"' */ - { 0x03, 0x23 }, /* FULLWIDTH NUMBER SIGN -> '#' */ - { 0x04, 0x24 }, /* FULLWIDTH DOLLAR SIGN -> '$' */ - { 0x05, 0x25 }, /* FULLWIDTH PERCENT SIGN -> '%' */ - { 0x06, 0x26 }, /* FULLWIDTH AMPERSAND -> '&' */ - { 0x07, 0x27 }, /* FULLWIDTH APOSTROPHE -> ''' */ - { 0x08, 0x28 }, /* FULLWIDTH LEFT PARENTHESIS -> '(' */ - { 0x09, 0x29 }, /* FULLWIDTH RIGHT PARENTHESIS -> ')' */ - { 0x0A, 0x2A }, /* FULLWIDTH ASTERISK -> '*' */ - { 0x0B, 0x2B }, /* FULLWIDTH PLUS SIGN -> '+' */ - { 0x0C, 0x2C }, /* FULLWIDTH COMMA -> ',' */ - { 0x0D, 0x2D }, /* FULLWIDTH HYPHEN-MINUS -> '-' */ - { 0x0E, 0x2E }, /* FULLWIDTH FULL STOP -> '.' */ - { 0x0F, 0x2F }, /* FULLWIDTH SOLIDUS -> '/' */ - { 0x10, 0x30 }, /* FULLWIDTH DIGIT ZERO -> '0' */ - { 0x11, 0x31 }, /* FULLWIDTH DIGIT ONE -> '1' */ - { 0x12, 0x32 }, /* FULLWIDTH DIGIT TWO -> '2' */ - { 0x13, 0x33 }, /* FULLWIDTH DIGIT THREE -> '3' */ - { 0x14, 0x34 }, /* FULLWIDTH DIGIT FOUR -> '4' */ - { 0x15, 0x35 }, /* FULLWIDTH DIGIT FIVE -> '5' */ - { 0x16, 0x36 }, /* FULLWIDTH DIGIT SIX -> '6' */ - { 0x17, 0x37 }, /* FULLWIDTH DIGIT SEVEN -> '7' */ - { 0x18, 0x38 }, /* FULLWIDTH DIGIT EIGHT -> '8' */ - { 0x19, 0x39 }, /* FULLWIDTH DIGIT NINE -> '9' */ - { 0x1A, 0x3A }, /* FULLWIDTH COLON -> ':' */ - { 0x1B, 0x3B }, /* FULLWIDTH SEMICOLON -> ';' */ - { 0x1C, 0x3C }, /* FULLWIDTH LESS-THAN SIGN -> '<' */ - { 0x1D, 0x3D }, /* FULLWIDTH EQUALS SIGN -> '=' */ - { 0x1E, 0x3E }, /* FULLWIDTH GREATER-THAN SIGN -> '>' */ - { 0x1F, 0x3F }, /* FULLWIDTH QUESTION MARK -> '?' */ - { 0x20, 0x40 }, /* FULLWIDTH COMMERCIAL AT -> '@' */ - { 0x21, 0x41 }, /* FULLWIDTH LATIN CAPITAL LETTER A -> 'A' */ - { 0x22, 0x42 }, /* FULLWIDTH LATIN CAPITAL LETTER B -> 'B' */ - { 0x23, 0x43 }, /* FULLWIDTH LATIN CAPITAL LETTER C -> 'C' */ - { 0x24, 0x44 }, /* FULLWIDTH LATIN CAPITAL LETTER D -> 'D' */ - { 0x25, 0x45 }, /* FULLWIDTH LATIN CAPITAL LETTER E -> 'E' */ - { 0x26, 0x46 }, /* FULLWIDTH LATIN CAPITAL LETTER F -> 'F' */ - { 0x27, 0x47 }, /* FULLWIDTH LATIN CAPITAL LETTER G -> 'G' */ - { 0x28, 0x48 }, /* FULLWIDTH LATIN CAPITAL LETTER H -> 'H' */ - { 0x29, 0x49 }, /* FULLWIDTH LATIN CAPITAL LETTER I -> 'I' */ - { 0x2A, 0x4A }, /* FULLWIDTH LATIN CAPITAL LETTER J -> 'J' */ - { 0x2B, 0x4B }, /* FULLWIDTH LATIN CAPITAL LETTER K -> 'K' */ - { 0x2C, 0x4C }, /* FULLWIDTH LATIN CAPITAL LETTER L -> 'L' */ - { 0x2D, 0x4D }, /* FULLWIDTH LATIN CAPITAL LETTER M -> 'M' */ - { 0x2E, 0x4E }, /* FULLWIDTH LATIN CAPITAL LETTER N -> 'N' */ - { 0x2F, 0x4F }, /* FULLWIDTH LATIN CAPITAL LETTER O -> 'O' */ - { 0x30, 0x50 }, /* FULLWIDTH LATIN CAPITAL LETTER P -> 'P' */ - { 0x31, 0x51 }, /* FULLWIDTH LATIN CAPITAL LETTER Q -> 'Q' */ - { 0x32, 0x52 }, /* FULLWIDTH LATIN CAPITAL LETTER R -> 'R' */ - { 0x33, 0x53 }, /* FULLWIDTH LATIN CAPITAL LETTER S -> 'S' */ - { 0x34, 0x54 }, /* FULLWIDTH LATIN CAPITAL LETTER T -> 'T' */ - { 0x35, 0x55 }, /* FULLWIDTH LATIN CAPITAL LETTER U -> 'U' */ - { 0x36, 0x56 }, /* FULLWIDTH LATIN CAPITAL LETTER V -> 'V' */ - { 0x37, 0x57 }, /* FULLWIDTH LATIN CAPITAL LETTER W -> 'W' */ - { 0x38, 0x58 }, /* FULLWIDTH LATIN CAPITAL LETTER X -> 'X' */ - { 0x39, 0x59 }, /* FULLWIDTH LATIN CAPITAL LETTER Y -> 'Y' */ - { 0x3A, 0x5A }, /* FULLWIDTH LATIN CAPITAL LETTER Z -> 'Z' */ - { 0x3B, 0x5B }, /* FULLWIDTH LEFT SQUARE BRACKET -> '[' */ - { 0x3C, 0x5C }, /* FULLWIDTH REVERSE SOLIDUS -> '\' */ - { 0x3D, 0x5D }, /* FULLWIDTH RIGHT SQUARE BRACKET -> ']' */ - { 0x3E, 0x5E }, /* FULLWIDTH CIRCUMFLEX ACCENT -> '^' */ - { 0x3F, 0x5F }, /* FULLWIDTH LOW LINE -> '_' */ - { 0x40, 0x60 }, /* FULLWIDTH GRAVE ACCENT -> '`' */ - { 0x41, 0x61 }, /* FULLWIDTH LATIN SMALL LETTER A -> 'a' */ - { 0x42, 0x62 }, /* FULLWIDTH LATIN SMALL LETTER B -> 'b' */ - { 0x43, 0x63 }, /* FULLWIDTH LATIN SMALL LETTER C -> 'c' */ - { 0x44, 0x64 }, /* FULLWIDTH LATIN SMALL LETTER D -> 'd' */ - { 0x45, 0x65 }, /* FULLWIDTH LATIN SMALL LETTER E -> 'e' */ - { 0x46, 0x66 }, /* FULLWIDTH LATIN SMALL LETTER F -> 'f' */ - { 0x47, 0x67 }, /* FULLWIDTH LATIN SMALL LETTER G -> 'g' */ - { 0x48, 0x68 }, /* FULLWIDTH LATIN SMALL LETTER H -> 'h' */ - { 0x49, 0x69 }, /* FULLWIDTH LATIN SMALL LETTER I -> 'i' */ - { 0x4A, 0x6A }, /* FULLWIDTH LATIN SMALL LETTER J -> 'j' */ - { 0x4B, 0x6B }, /* FULLWIDTH LATIN SMALL LETTER K -> 'k' */ - { 0x4C, 0x6C }, /* FULLWIDTH LATIN SMALL LETTER L -> 'l' */ - { 0x4D, 0x6D }, /* FULLWIDTH LATIN SMALL LETTER M -> 'm' */ - { 0x4E, 0x6E }, /* FULLWIDTH LATIN SMALL LETTER N -> 'n' */ - { 0x4F, 0x6F }, /* FULLWIDTH LATIN SMALL LETTER O -> 'o' */ - { 0x50, 0x70 }, /* FULLWIDTH LATIN SMALL LETTER P -> 'p' */ - { 0x51, 0x71 }, /* FULLWIDTH LATIN SMALL LETTER Q -> 'q' */ - { 0x52, 0x72 }, /* FULLWIDTH LATIN SMALL LETTER R -> 'r' */ - { 0x53, 0x73 }, /* FULLWIDTH LATIN SMALL LETTER S -> 's' */ - { 0x54, 0x74 }, /* FULLWIDTH LATIN SMALL LETTER T -> 't' */ - { 0x55, 0x75 }, /* FULLWIDTH LATIN SMALL LETTER U -> 'u' */ - { 0x56, 0x76 }, /* FULLWIDTH LATIN SMALL LETTER V -> 'v' */ - { 0x57, 0x77 }, /* FULLWIDTH LATIN SMALL LETTER W -> 'w' */ - { 0x58, 0x78 }, /* FULLWIDTH LATIN SMALL LETTER X -> 'x' */ - { 0x59, 0x79 }, /* FULLWIDTH LATIN SMALL LETTER Y -> 'y' */ - { 0x5A, 0x7A }, /* FULLWIDTH LATIN SMALL LETTER Z -> 'z' */ - { 0x5B, 0x7B }, /* FULLWIDTH LEFT CURLY BRACKET -> '{' */ - { 0x5C, 0x7C }, /* FULLWIDTH VERTICAL LINE -> '|' */ - { 0x5D, 0x7D }, /* FULLWIDTH RIGHT CURLY BRACKET -> '}' */ - { 0x5E, 0x7E }, /* FULLWIDTH TILDE -> '~' */ { 0x61, 0x2E }, /* HALFWIDTH IDEOGRAPHIC FULL STOP -> '.' */ { 0x62, 0x5B }, /* HALFWIDTH LEFT CORNER BRACKET -> '[' */ { 0x63, 0x5D }, /* HALFWIDTH RIGHT CORNER BRACKET -> ']' */ From c4c7ead7b86c1e7f11c64915b7e5bb6d2e242691 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 15 May 2025 11:30:52 -0400 Subject: [PATCH 102/105] vt: remove VT_RESIZE and VT_RESIZEX from vt_compat_ioctl() They are listed amon those cmd values that "treat 'arg' as an integer" which is wrong. They should instead fall into the default case. Probably nobody ever relied on that code since 2009 but still. Fixes: e92166517e3c ("tty: handle VT specific compat ioctls in vt driver") Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/pr214s15-36r8-6732-2pop-159nq85o48r7@syhkavp.arg Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 4b91072f3a4e..1f2bdd2e1cc5 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -1103,8 +1103,6 @@ long vt_compat_ioctl(struct tty_struct *tty, case VT_WAITACTIVE: case VT_RELDISP: case VT_DISALLOCATE: - case VT_RESIZE: - case VT_RESIZEX: return vt_ioctl(tty, cmd, arg); /* From 80fa7a03378588582eb40f89b6f418c0c256cf24 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 20 May 2025 13:16:43 -0400 Subject: [PATCH 103/105] vt: bracketed paste support This is comprised of 3 aspects: - Take note of when applications advertise bracketed paste support via "\e[?2004h" and "\e[?2004l". - Insert bracketed paste markers ("\e[200~" and "\e[201~") around pasted content in paste_selection() when bracketed paste is active. - Add TIOCL_GETBRACKETEDPASTE to return bracketed paste status so user space daemons implementing cut-and-paste functionality (e.g. gpm, BRLTTY) may know when to insert bracketed paste markers. Link: https://en.wikipedia.org/wiki/Bracketed-paste Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20250520171851.1219676-2-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 31 +++++++++++++++++++++++++++---- drivers/tty/vt/vt.c | 15 +++++++++++++++ include/linux/console_struct.h | 1 + include/uapi/linux/tiocl.h | 1 + 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 791e2f1f7c0b..24b0a53e5a79 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -403,6 +403,12 @@ int paste_selection(struct tty_struct *tty) DECLARE_WAITQUEUE(wait, current); int ret = 0; + bool bp = vc->vc_bracketed_paste; + static const char bracketed_paste_start[] = "\033[200~"; + static const char bracketed_paste_end[] = "\033[201~"; + const char *bps = bp ? bracketed_paste_start : NULL; + const char *bpe = bp ? bracketed_paste_end : NULL; + console_lock(); poke_blanked_console(); console_unlock(); @@ -414,7 +420,7 @@ int paste_selection(struct tty_struct *tty) add_wait_queue(&vc->paste_wait, &wait); mutex_lock(&vc_sel.lock); - while (vc_sel.buffer && vc_sel.buf_len > pasted) { + while (vc_sel.buffer && (vc_sel.buf_len > pasted || bpe)) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { ret = -EINTR; @@ -427,10 +433,27 @@ int paste_selection(struct tty_struct *tty) continue; } __set_current_state(TASK_RUNNING); + + if (bps) { + bps += tty_ldisc_receive_buf(ld, bps, NULL, strlen(bps)); + if (*bps != '\0') + continue; + bps = NULL; + } + count = vc_sel.buf_len - pasted; - count = tty_ldisc_receive_buf(ld, vc_sel.buffer + pasted, NULL, - count); - pasted += count; + if (count) { + pasted += tty_ldisc_receive_buf(ld, vc_sel.buffer + pasted, + NULL, count); + if (vc_sel.buf_len > pasted) + continue; + } + + if (bpe) { + bpe += tty_ldisc_receive_buf(ld, bpe, NULL, strlen(bpe)); + if (*bpe == '\0') + bpe = NULL; + } } mutex_unlock(&vc_sel.lock); remove_wait_queue(&vc->paste_wait, &wait); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index efb761454166..ed39d9cb4432 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1870,6 +1870,14 @@ int mouse_reporting(void) return vc_cons[fg_console].d->vc_report_mouse; } +/* invoked via ioctl(TIOCLINUX) */ +static int get_bracketed_paste(struct tty_struct *tty) +{ + struct vc_data *vc = tty->driver_data; + + return vc->vc_bracketed_paste; +} + enum { CSI_DEC_hl_CURSOR_KEYS = 1, /* CKM: cursor keys send ^[Ox/^[[x */ CSI_DEC_hl_132_COLUMNS = 3, /* COLM: 80/132 mode switch */ @@ -1880,6 +1888,7 @@ enum { CSI_DEC_hl_MOUSE_X10 = 9, CSI_DEC_hl_SHOW_CURSOR = 25, /* TCEM */ CSI_DEC_hl_MOUSE_VT200 = 1000, + CSI_DEC_hl_BRACKETED_PASTE = 2004, }; /* console_lock is held */ @@ -1932,6 +1941,9 @@ static void csi_DEC_hl(struct vc_data *vc, bool on_off) case CSI_DEC_hl_MOUSE_VT200: vc->vc_report_mouse = on_off ? 2 : 0; break; + case CSI_DEC_hl_BRACKETED_PASTE: + vc->vc_bracketed_paste = on_off; + break; } } @@ -2157,6 +2169,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) vc->state.charset = 0; vc->vc_need_wrap = 0; vc->vc_report_mouse = 0; + vc->vc_bracketed_paste = 0; vc->vc_utf = default_utf8; vc->vc_utf_count = 0; @@ -3483,6 +3496,8 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) break; case TIOCL_BLANKEDSCREEN: return console_blanked; + case TIOCL_GETBRACKETEDPASTE: + return get_bracketed_paste(tty); default: return -EINVAL; } diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 20f564e98552..59b4fec5f254 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -145,6 +145,7 @@ struct vc_data { unsigned int vc_need_wrap : 1; unsigned int vc_can_do_color : 1; unsigned int vc_report_mouse : 2; + unsigned int vc_bracketed_paste : 1; unsigned char vc_utf : 1; /* Unicode UTF-8 encoding */ unsigned char vc_utf_count; int vc_utf_char; diff --git a/include/uapi/linux/tiocl.h b/include/uapi/linux/tiocl.h index b32acc229024..88faba506c3d 100644 --- a/include/uapi/linux/tiocl.h +++ b/include/uapi/linux/tiocl.h @@ -36,5 +36,6 @@ struct tiocl_selection { #define TIOCL_BLANKSCREEN 14 /* keep screen blank even if a key is pressed */ #define TIOCL_BLANKEDSCREEN 15 /* return which vt was blanked */ #define TIOCL_GETKMSGREDIRECT 17 /* get the vt the kernel messages are restricted to */ +#define TIOCL_GETBRACKETEDPASTE 18 /* get whether paste may be bracketed */ #endif /* _LINUX_TIOCL_H */ From 81cf4d7d2379df853a0cbb8486286783c7380ac3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 20 May 2025 13:16:44 -0400 Subject: [PATCH 104/105] vt: add VT_GETCONSIZECSRPOS to retrieve console size and cursor position The console dimension and cursor position are available through the /dev/vcsa interface already. However the /dev/vcsa header format uses single-byte fields therefore those values are clamped to 255. As surprizing as this may seem, some people do use 240-column 67-row screens (a 1920x1080 monitor with 8x16 pixel fonts) which is getting close to the limit. Monitors with higher resolution are not uncommon these days (3840x2160 producing a 480x135 character display) and it is just a matter of time before someone with, say, a braille display using the Linux VT console and BRLTTY on such a screen reports a bug about missing and oddly misaligned screen content. Let's add VT_GETCONSIZECSRPOS for the retrieval of console size and cursor position without byte-sized limitations. The actual console size limit as encoded in vt.c is 32767x32767 so using a short here is appropriate. Then this can be used to get the cursor position when /dev/vcsa reports 255. The screen dimension may already be obtained using TIOCGWINSZ and adding the same information to VT_GETCONSIZECSRPOS might be redundant. However applications that care about cursor position also care about display size and having 2 separate system calls to obtain them separately is wasteful. Also, the cursor position can be queried by writing "\e[6n" to a tty and reading back the result but that may be done only by the actual application using that tty and not a sideline observer. Signed-off-by: Nicolas Pitre Link: https://lore.kernel.org/r/20250520171851.1219676-3-nico@fluxnic.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 16 ++++++++++++++++ include/uapi/linux/vt.h | 11 +++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 1f2bdd2e1cc5..61342e06970a 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -951,6 +951,22 @@ int vt_ioctl(struct tty_struct *tty, (unsigned short __user *)arg); case VT_WAITEVENT: return vt_event_wait_ioctl((struct vt_event __user *)arg); + + case VT_GETCONSIZECSRPOS: + { + struct vt_consizecsrpos concsr; + + console_lock(); + concsr.con_cols = vc->vc_cols; + concsr.con_rows = vc->vc_rows; + concsr.csr_col = vc->state.x; + concsr.csr_row = vc->state.y; + console_unlock(); + if (copy_to_user(up, &concsr, sizeof(concsr))) + return -EFAULT; + return 0; + } + default: return -ENOIOCTLCMD; } diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h index e9d39c48520a..e5b0c492aa18 100644 --- a/include/uapi/linux/vt.h +++ b/include/uapi/linux/vt.h @@ -2,6 +2,8 @@ #ifndef _UAPI_LINUX_VT_H #define _UAPI_LINUX_VT_H +#include +#include /* * These constants are also useful for user-level apps (e.g., VC @@ -84,4 +86,13 @@ struct vt_setactivate { #define VT_SETACTIVATE 0x560F /* Activate and set the mode of a console */ +/* get console size and cursor position */ +struct vt_consizecsrpos { + __u16 con_rows; /* number of console rows */ + __u16 con_cols; /* number of console columns */ + __u16 csr_row; /* current cursor's row */ + __u16 csr_col; /* current cursor's column */ +}; +#define VT_GETCONSIZECSRPOS _IOR('V', 0x10, struct vt_consizecsrpos) + #endif /* _UAPI_LINUX_VT_H */ From b495021a973e2468497689bd3e29b736747b896f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 22 May 2025 07:38:35 +0200 Subject: [PATCH 105/105] tty: serial: 8250_omap: fix TX with DMA for am33xx Commit 1788cf6a91d9 ("tty: serial: switch from circ_buf to kfifo") introduced an error in the TX DMA handling for 8250_omap. When the OMAP_DMA_TX_KICK flag is set, the "skip_byte" is pulled from the kfifo and emitted directly in order to start the DMA. While the kfifo is updated, dma->tx_size is not decreased. This leads to uart_xmit_advance() called in omap_8250_dma_tx_complete() advancing the kfifo by one too much. In practice, transmitting N bytes has been seen to result in the last N-1 bytes being sent repeatedly. This change fixes the problem by moving all of the dma setup after the OMAP_DMA_TX_KICK handling and using kfifo_len() instead of the DMA size for the 4-byte cutoff check. This slightly changes the behaviour at buffer wraparound, but it still transmits the correct bytes somehow. Now, the "skip_byte" would no longer be accounted to the stats. As previously, dma->tx_size included also this skip byte, up->icount.tx was updated by aforementioned uart_xmit_advance() in omap_8250_dma_tx_complete(). Fix this by using the uart_fifo_out() helper instead of bare kfifo_get(). Based on patch by Mans Rullgard Signed-off-by: "Jiri Slaby (SUSE)" Fixes: 1788cf6a91d9 ("tty: serial: switch from circ_buf to kfifo") Link: https://lore.kernel.org/all/20250506150748.3162-1-mans@mansr.com/ Reported-by: Mans Rullgard Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250522053835.3495975-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 2a0ce11f405d..72ae08d6204f 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1173,16 +1173,6 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) return 0; } - sg_init_table(&sg, 1); - ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, - UART_XMIT_SIZE, dma->tx_addr); - if (ret != 1) { - serial8250_clear_THRI(p); - return 0; - } - - dma->tx_size = sg_dma_len(&sg); - if (priv->habit & OMAP_DMA_TX_KICK) { unsigned char c; u8 tx_lvl; @@ -1207,18 +1197,22 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) ret = -EBUSY; goto err; } - if (dma->tx_size < 4) { + if (kfifo_len(&tport->xmit_fifo) < 4) { ret = -EINVAL; goto err; } - if (!kfifo_get(&tport->xmit_fifo, &c)) { + if (!uart_fifo_out(&p->port, &c, 1)) { ret = -EINVAL; goto err; } skip_byte = c; - /* now we need to recompute due to kfifo_get */ - kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, - UART_XMIT_SIZE, dma->tx_addr); + } + + sg_init_table(&sg, 1); + ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, UART_XMIT_SIZE, dma->tx_addr); + if (ret != 1) { + ret = -EINVAL; + goto err; } desc = dmaengine_prep_slave_sg(dma->txchan, &sg, 1, DMA_MEM_TO_DEV, @@ -1228,6 +1222,7 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) goto err; } + dma->tx_size = sg_dma_len(&sg); dma->tx_running = 1; desc->callback = omap_8250_dma_tx_complete;