From df6d27b33a86e2ecdc5a8e1deff275d19b2cbde1 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 16 May 2017 18:50:21 +0200 Subject: [PATCH] shared: add nm_utils_str_utf8safe_*() API to sanitize UTF-8 strings Use C-style backslash escaping to sanitize non-UTF-8 strings. The functions are compatible with glib's g_strcompress() and g_strescape(). The difference is only that g_strescape() escapes all non-printable, non ASCII character as well, while nm_utils_str_utf8safe_escape() -- depending on the flags -- preserves valid UTF-8 sequence except backslash. The flags allow to optionally escape ASCII control characters and all non-ASCII (valid UTF-8) characters. But the option to preserve valid UTF-8 (non-ASCII) characters verbatim, is what distinguishes from g_strescape(). --- libnm-core/tests/test-general.c | 95 ++++++++++++++++++++ shared/nm-utils/nm-shared-utils.c | 138 ++++++++++++++++++++++++++++++ shared/nm-utils/nm-shared-utils.h | 16 ++++ 3 files changed, 249 insertions(+) diff --git a/libnm-core/tests/test-general.c b/libnm-core/tests/test-general.c index 30f80303b..7ec63f9a8 100644 --- a/libnm-core/tests/test-general.c +++ b/libnm-core/tests/test-general.c @@ -5323,6 +5323,100 @@ static void test_nm_utils_enum (void) /*****************************************************************************/ +static void +do_test_utils_str_utf8safe (const char *str, const char *expected, NMUtilsStrUtf8SafeFlags flags) +{ + const char *str_safe, *s; + gs_free char *str2 = NULL; + gs_free char *str3 = NULL; + + str_safe = nm_utils_str_utf8safe_escape (str, flags, &str2); + + str3 = nm_utils_str_utf8safe_escape_cp (str, flags); + g_assert_cmpstr (str3, ==, str_safe); + g_assert ((!str && !str3) || (str != str3)); + g_clear_pointer (&str3, g_free); + + if (expected == NULL) { + g_assert (str_safe == str); + g_assert (!str2); + if (str) { + g_assert (!strchr (str, '\\')); + g_assert (g_utf8_validate (str, -1, NULL)); + } + + g_assert (str == nm_utils_str_utf8safe_unescape (str_safe, &str3)); + g_assert (!str3); + + str3 = nm_utils_str_utf8safe_unescape_cp (str_safe); + if (str) { + g_assert (str3 != str); + g_assert_cmpstr (str3, ==, str); + } else + g_assert (!str3); + g_clear_pointer (&str3, g_free); + return; + } + + g_assert (str); + g_assert (str_safe != str); + g_assert (str_safe == str2); + g_assert ( strchr (str, '\\') + || !g_utf8_validate (str, -1, NULL) + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) + && NM_STRCHAR_ANY (str, ch, (guchar) ch >= 127)) + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) + && NM_STRCHAR_ANY (str, ch, (guchar) ch < ' '))); + g_assert (g_utf8_validate (str_safe, -1, NULL)); + + str3 = g_strcompress (str_safe); + g_assert_cmpstr (str, ==, str3); + g_clear_pointer (&str3, g_free); + + str3 = nm_utils_str_utf8safe_unescape_cp (str_safe); + g_assert (str3 != str); + g_assert_cmpstr (str3, ==, str); + g_clear_pointer (&str3, g_free); + + s = nm_utils_str_utf8safe_unescape (str_safe, &str3); + g_assert (str3 != str); + g_assert (s == str3); + g_assert_cmpstr (str3, ==, str); + g_clear_pointer (&str3, g_free); + + g_assert_cmpstr (str_safe, ==, expected); +} + +static void +test_utils_str_utf8safe (void) +{ + do_test_utils_str_utf8safe (NULL, NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\314", "\\314", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\314\315x\315\315x", "\\314\\315x\\315\\315x", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\314\315xx", "\\314\\315xx", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\314xx", "\\314xx", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\xa0", "\\240", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\xe2\x91\xa0", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\xe2\xe2\x91\xa0", "\\342\xe2\x91\xa0", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\xe2\xe2\x91\xa0\xa0", "\\342\xe2\x91\xa0\\240", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("a", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("ab", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("ab\314", "ab\\314", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("ab\314adsf", "ab\\314adsf", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("abadsf", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("abäb", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("x\xa0", "x\\240", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("Ä\304ab\\äb", "Ä\\304ab\\\\äb", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("Äab\\äb", "Äab\\\\äb", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("ÄÄab\\äb", "ÄÄab\\\\äb", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("㈞abä㈞b", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("abäb", "ab\\303\\244b", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII); + do_test_utils_str_utf8safe ("ab\ab", "ab\\007b", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); +} + +/*****************************************************************************/ + static int _test_nm_in_set_get (int *call_counter, gboolean allow_called, int value) { @@ -5680,6 +5774,7 @@ int main (int argc, char **argv) nmtst_init (&argc, &argv, TRUE); /* The tests */ + g_test_add_func ("/core/general/test_utils_str_utf8safe", test_utils_str_utf8safe); g_test_add_func ("/core/general/test_nm_in_set", test_nm_in_set); g_test_add_func ("/core/general/test_nm_in_strset", test_nm_in_strset); g_test_add_func ("/core/general/test_setting_vpn_items", test_setting_vpn_items); diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index 8bd81dafd..9a87536e5 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -397,3 +397,141 @@ nm_g_object_set_property (GObject *object, } /*****************************************************************************/ + +static void +_str_append_escape (GString *s, char ch) +{ + g_string_append_c (s, '\\'); + g_string_append_c (s, '0' + ((((guchar) ch) >> 6) & 07)); + g_string_append_c (s, '0' + ((((guchar) ch) >> 3) & 07)); + g_string_append_c (s, '0' + ( ((guchar) ch) & 07)); +} + +/** + * nm_utils_str_utf8safe_escape: + * @str: NUL terminated input string, possibly in utf-8 encoding + * @flags: #NMUtilsStrUtf8SafeFlags flags + * @to_free: (out): return the pointer location of the string + * if a copying was necessary. + * + * Returns the possible non-UTF-8 NUL terminated string @str + * and uses backslash escaping (C escaping, like g_strescape()) + * to sanitize non UTF-8 characters. The result is valid + * UTF-8. + * + * The operation can be reverted with g_strcompress() or + * nm_utils_str_utf8safe_unescape(). + * + * Depending on @flags, valid UTF-8 characters are not escaped at all + * (except the escape character '\\'). This is the difference to g_strescape(), + * which escapes all non-ASCII characters. This allows to pass on + * valid UTF-8 characters as-is and can be directly shown to the user + * as UTF-8 -- with exception of the backslash escape character, + * invalid UTF-8 sequences, and other (depending on @flags). + * + * Returns: the escaped input string, as valid UTF-8. If no escaping + * is necessary, it returns the input @str. Otherwise, an allocated + * string @to_free is returned which must be freed by the caller + * with g_free. The escaping can be reverted by g_strcompress(). + **/ +const char * +nm_utils_str_utf8safe_escape (const char *str, NMUtilsStrUtf8SafeFlags flags, char **to_free) +{ + const char *p = NULL; + GString *s; + + g_return_val_if_fail (to_free, NULL); + + *to_free = NULL; + if (!str || !str[0]) + return str; + + if ( g_utf8_validate (str, -1, &p) + && !NM_STRCHAR_ANY (str, ch, + ( ch == '\\' \ + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) \ + && ch < ' ') \ + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) \ + && ((guchar) ch) >= 127)))) + return str; + + s = g_string_sized_new ((p - str) + strlen (p) + 5); + + do { + for (; str < p; str++) { + char ch = str[0]; + + if (ch == '\\') + g_string_append (s, "\\\\"); + else if ( ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) \ + && ch < ' ') \ + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) \ + && ((guchar) ch) >= 127)) + _str_append_escape (s, ch); + else + g_string_append_c (s, ch); + } + + if (p[0] == '\0') + break; + _str_append_escape (s, p[0]); + + str = &p[1]; + g_utf8_validate (str, -1, &p); + } while (TRUE); + + *to_free = g_string_free (s, FALSE); + return *to_free; +} + +const char * +nm_utils_str_utf8safe_unescape (const char *str, char **to_free) +{ + g_return_val_if_fail (to_free, NULL); + + if (!str || !strchr (str, '\\')) { + *to_free = NULL; + return str; + } + return (*to_free = g_strcompress (str)); +} + +/** + * nm_utils_str_utf8safe_escape_cp: + * @str: NUL terminated input string, possibly in utf-8 encoding + * @flags: #NMUtilsStrUtf8SafeFlags flags + * + * Like nm_utils_str_utf8safe_escape(), except the returned value + * is always a copy of the input and must be freed by the caller. + * + * Returns: the escaped input string in UTF-8 encoding. The returned + * value should be freed with g_free(). + * The escaping can be reverted by g_strcompress(). + **/ +char * +nm_utils_str_utf8safe_escape_cp (const char *str, NMUtilsStrUtf8SafeFlags flags) +{ + char *s; + + nm_utils_str_utf8safe_escape (str, flags, &s); + return s ?: g_strdup (str); +} + +char * +nm_utils_str_utf8safe_unescape_cp (const char *str) +{ + return str ? g_strcompress (str) : NULL; +} + +char * +nm_utils_str_utf8safe_escape_take (char *str, NMUtilsStrUtf8SafeFlags flags) +{ + char *str_to_free; + + nm_utils_str_utf8safe_escape (str, flags, &str_to_free); + if (str_to_free) { + g_free (str); + return str_to_free; + } + return str; +} diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index 3776c1590..438a7a9e7 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -97,4 +97,20 @@ gboolean nm_g_object_set_property (GObject *object, /*****************************************************************************/ +typedef enum { + NM_UTILS_STR_UTF8_SAFE_FLAG_NONE = 0, + NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL = 0x0001, + NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII = 0x0002, +} NMUtilsStrUtf8SafeFlags; + +const char *nm_utils_str_utf8safe_escape (const char *str, NMUtilsStrUtf8SafeFlags flags, char **to_free); +const char *nm_utils_str_utf8safe_unescape (const char *str, char **to_free); + +char *nm_utils_str_utf8safe_escape_cp (const char *str, NMUtilsStrUtf8SafeFlags flags); +char *nm_utils_str_utf8safe_unescape_cp (const char *str); + +char *nm_utils_str_utf8safe_escape_take (char *str, NMUtilsStrUtf8SafeFlags flags); + +/*****************************************************************************/ + #endif /* __NM_SHARED_UTILS_H__ */