From 43bf9d9755bd21970d8382dc88f071f74fc18fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 9 Aug 2024 17:38:53 +0200 Subject: [PATCH 1/4] unicode: constify utf8 data table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All users already handle the table as const data. Move the table itself into .rodata to guard against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240809-unicode-const-v1-1-69968a258092@weissschuh.net Signed-off-by: Gabriel Krisman Bertazi --- fs/unicode/mkutf8data.c | 2 +- fs/unicode/utf8data.c_shipped | 2 +- fs/unicode/utf8n.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/unicode/mkutf8data.c b/fs/unicode/mkutf8data.c index 77b685db8275..57e0e290ce6f 100644 --- a/fs/unicode/mkutf8data.c +++ b/fs/unicode/mkutf8data.c @@ -3338,7 +3338,7 @@ static void write_file(void) } fprintf(file, "};\n"); fprintf(file, "\n"); - fprintf(file, "struct utf8data_table utf8_data_table = {\n"); + fprintf(file, "const struct utf8data_table utf8_data_table = {\n"); fprintf(file, "\t.utf8agetab = utf8agetab,\n"); fprintf(file, "\t.utf8agetab_size = ARRAY_SIZE(utf8agetab),\n"); fprintf(file, "\n"); diff --git a/fs/unicode/utf8data.c_shipped b/fs/unicode/utf8data.c_shipped index dafa5fed761d..73a93d49b3ba 100644 --- a/fs/unicode/utf8data.c_shipped +++ b/fs/unicode/utf8data.c_shipped @@ -4107,7 +4107,7 @@ static const unsigned char utf8data[64256] = { 0x81,0x80,0xcf,0x86,0x85,0x84,0xcf,0x86,0xcf,0x06,0x02,0x00,0x00,0x00,0x00,0x00 }; -struct utf8data_table utf8_data_table = { +const struct utf8data_table utf8_data_table = { .utf8agetab = utf8agetab, .utf8agetab_size = ARRAY_SIZE(utf8agetab), diff --git a/fs/unicode/utf8n.h b/fs/unicode/utf8n.h index bd00d587747a..fc703aa4b28e 100644 --- a/fs/unicode/utf8n.h +++ b/fs/unicode/utf8n.h @@ -78,6 +78,6 @@ struct utf8data_table { const unsigned char *utf8data; }; -extern struct utf8data_table utf8_data_table; +extern const struct utf8data_table utf8_data_table; #endif /* UTF8NORM_H */ From 156bb2c569cd869583c593d27a5bd69e7b2a4264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 2 Sep 2024 19:55:03 -0300 Subject: [PATCH 2/4] unicode: Fix utf8_load() error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit utf8_load() requests the symbol "utf8_data_table" and then checks if the requested UTF-8 version is supported. If it's unsupported, it tries to put the data table using symbol_put(). If an unsupported version is requested, symbol_put() fails like this: kernel BUG at kernel/module/main.c:786! RIP: 0010:__symbol_put+0x93/0xb0 Call Trace: ? __die_body.cold+0x19/0x27 ? die+0x2e/0x50 ? do_trap+0xca/0x110 ? do_error_trap+0x65/0x80 ? __symbol_put+0x93/0xb0 ? exc_invalid_op+0x51/0x70 ? __symbol_put+0x93/0xb0 ? asm_exc_invalid_op+0x1a/0x20 ? __pfx_cmp_name+0x10/0x10 ? __symbol_put+0x93/0xb0 ? __symbol_put+0x62/0xb0 utf8_load+0xf8/0x150 That happens because symbol_put() expects the unique string that identify the symbol, instead of a pointer to the loaded symbol. Fix that by using such string. Fixes: 2b3d04787012 ("unicode: Add utf8-data module") Signed-off-by: André Almeida Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20240902225511.757831-2-andrealmeid@igalia.com Signed-off-by: Gabriel Krisman Bertazi --- fs/unicode/utf8-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 8395066341a4..0400824ef493 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -198,7 +198,7 @@ struct unicode_map *utf8_load(unsigned int version) return um; out_symbol_put: - symbol_put(um->tables); + symbol_put(utf8_data_table); out_free_um: kfree(um); return ERR_PTR(-EINVAL); From 66715f005bdea3b58393ffe8c9be7d692b240558 Mon Sep 17 00:00:00 2001 From: Gan Jie Date: Thu, 12 Sep 2024 11:19:32 +0800 Subject: [PATCH 3/4] unicode: change the reference of database file Commit 2b3d04787012 ("unicode: Add utf8-data module") changed the database file from 'utf8data.h' to 'utf8data.c' to build separate module, but it seems forgot to update README.utf8data , which may causes confusion. Update the README.utf8data and the default 'UTF8_NAME' in 'mkutf8data.c'. Signed-off-by: Gan Jie Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240912031932.1161-1-ganjie182@gmail.com Signed-off-by: Gabriel Krisman Bertazi --- fs/unicode/README.utf8data | 8 ++++---- fs/unicode/mkutf8data.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/unicode/README.utf8data b/fs/unicode/README.utf8data index c73786807d3b..f75567e28138 100644 --- a/fs/unicode/README.utf8data +++ b/fs/unicode/README.utf8data @@ -1,4 +1,4 @@ -The utf8data.h file in this directory is generated from the Unicode +The utf8data.c file in this directory is generated from the Unicode Character Database for version 12.1.0 of the Unicode standard. The full set of files can be found here: @@ -45,13 +45,13 @@ Then, build under fs/unicode/ with REGENERATE_UTF8DATA=1: make REGENERATE_UTF8DATA=1 fs/unicode/ -After sanity checking the newly generated utf8data.h file (the +After sanity checking the newly generated utf8data.c file (the version generated from the 12.1.0 UCD should be 4,109 lines long, and have a total size of 324k) and/or comparing it with the older version -of utf8data.h_shipped, rename it to utf8data.h_shipped. +of utf8data.c_shipped, rename it to utf8data.c_shipped. If you are a kernel developer updating to a newer version of the Unicode Character Database, please update this README.utf8data file with the version of the UCD that was used, the md5sum and sha1sums of -the *.txt files, before checking in the new versions of the utf8data.h +the *.txt files, before checking in the new versions of the utf8data.c and README.utf8data files. diff --git a/fs/unicode/mkutf8data.c b/fs/unicode/mkutf8data.c index 57e0e290ce6f..401f5d3aeb0c 100644 --- a/fs/unicode/mkutf8data.c +++ b/fs/unicode/mkutf8data.c @@ -36,7 +36,7 @@ #define FOLD_NAME "CaseFolding.txt" #define NORM_NAME "NormalizationCorrections.txt" #define TEST_NAME "NormalizationTest.txt" -#define UTF8_NAME "utf8data.h" +#define UTF8_NAME "utf8data.c" const char *age_name = AGE_NAME; const char *ccc_name = CCC_NAME; From 6b56a63d286f6f57066c4b5648d8fbec9510beae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 10 Oct 2024 17:02:42 -0300 Subject: [PATCH 4/4] MAINTAINERS: Add Unicode tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unicode subsystem tree is missing from MAINTAINERS, add it. Signed-off-by: André Almeida Signed-off-by: Gabriel Krisman Bertazi --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8766f3e5e87e..fa9340bfd640 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23443,6 +23443,7 @@ UNICODE SUBSYSTEM M: Gabriel Krisman Bertazi L: linux-fsdevel@vger.kernel.org S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/krisman/unicode.git F: fs/unicode/ UNIFDEF