lib/charset: utf8_get() should return error
utf8_get() should return an error if hitting an illegal UTF-8 sequence and not silently convert the input to a question mark. Correct utf_8() and the its unit test. console_read_unicode() now will ignore illegal UTF-8 sequences. Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
This commit is contained in:
@@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] =
|
|||||||
*
|
*
|
||||||
* @read_u8: - stream reader
|
* @read_u8: - stream reader
|
||||||
* @src: - string buffer passed to stream reader, optional
|
* @src: - string buffer passed to stream reader, optional
|
||||||
* Return: - Unicode code point
|
* Return: - Unicode code point, or -1
|
||||||
*/
|
*/
|
||||||
static int get_code(u8 (*read_u8)(void *data), void *data)
|
static int get_code(u8 (*read_u8)(void *data), void *data)
|
||||||
{
|
{
|
||||||
@@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data)
|
|||||||
}
|
}
|
||||||
return ch;
|
return ch;
|
||||||
error:
|
error:
|
||||||
return '?';
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -120,15 +120,22 @@ static u8 read_console(void *data)
|
|||||||
|
|
||||||
int console_read_unicode(s32 *code)
|
int console_read_unicode(s32 *code)
|
||||||
{
|
{
|
||||||
|
for (;;) {
|
||||||
|
s32 c;
|
||||||
|
|
||||||
if (!tstc()) {
|
if (!tstc()) {
|
||||||
/* No input available */
|
/* No input available */
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read Unicode code */
|
/* Read Unicode code */
|
||||||
*code = get_code(read_console, NULL);
|
c = get_code(read_console, NULL);
|
||||||
|
if (c > 0) {
|
||||||
|
*code = c;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
s32 utf8_get(const char **src)
|
s32 utf8_get(const char **src)
|
||||||
{
|
{
|
||||||
|
@@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
|
|||||||
static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
|
static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
|
||||||
static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
|
static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
|
||||||
static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
|
static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
|
||||||
|
static const char j4[] = {0xa1, 0x00};
|
||||||
|
|
||||||
static int unicode_test_u16_strlen(struct unit_test_state *uts)
|
static int unicode_test_u16_strlen(struct unit_test_state *uts)
|
||||||
{
|
{
|
||||||
@@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts)
|
|||||||
ut_asserteq(0x0001048d, code);
|
ut_asserteq(0x0001048d, code);
|
||||||
ut_asserteq_ptr(s, d4 + 4);
|
ut_asserteq_ptr(s, d4 + 4);
|
||||||
|
|
||||||
|
/* Check illegal character */
|
||||||
|
s = j4;
|
||||||
|
code = utf8_get((const char **)&s);
|
||||||
|
ut_asserteq(-1, code);
|
||||||
|
ut_asserteq_ptr(j4 + 1, s);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
UNICODE_TEST(unicode_test_utf8_get);
|
UNICODE_TEST(unicode_test_utf8_get);
|
||||||
|
Reference in New Issue
Block a user