shared: add nm_utils_str_utf8safe_*() API to sanitize UTF-8 strings
Use C-style backslash escaping to sanitize non-UTF-8 strings. The functions are compatible with glib's g_strcompress() and g_strescape(). The difference is only that g_strescape() escapes all non-printable, non ASCII character as well, while nm_utils_str_utf8safe_escape() -- depending on the flags -- preserves valid UTF-8 sequence except backslash. The flags allow to optionally escape ASCII control characters and all non-ASCII (valid UTF-8) characters. But the option to preserve valid UTF-8 (non-ASCII) characters verbatim, is what distinguishes from g_strescape().
This commit is contained in:
@@ -5323,6 +5323,100 @@ static void test_nm_utils_enum (void)
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
static void
|
||||
do_test_utils_str_utf8safe (const char *str, const char *expected, NMUtilsStrUtf8SafeFlags flags)
|
||||
{
|
||||
const char *str_safe, *s;
|
||||
gs_free char *str2 = NULL;
|
||||
gs_free char *str3 = NULL;
|
||||
|
||||
str_safe = nm_utils_str_utf8safe_escape (str, flags, &str2);
|
||||
|
||||
str3 = nm_utils_str_utf8safe_escape_cp (str, flags);
|
||||
g_assert_cmpstr (str3, ==, str_safe);
|
||||
g_assert ((!str && !str3) || (str != str3));
|
||||
g_clear_pointer (&str3, g_free);
|
||||
|
||||
if (expected == NULL) {
|
||||
g_assert (str_safe == str);
|
||||
g_assert (!str2);
|
||||
if (str) {
|
||||
g_assert (!strchr (str, '\\'));
|
||||
g_assert (g_utf8_validate (str, -1, NULL));
|
||||
}
|
||||
|
||||
g_assert (str == nm_utils_str_utf8safe_unescape (str_safe, &str3));
|
||||
g_assert (!str3);
|
||||
|
||||
str3 = nm_utils_str_utf8safe_unescape_cp (str_safe);
|
||||
if (str) {
|
||||
g_assert (str3 != str);
|
||||
g_assert_cmpstr (str3, ==, str);
|
||||
} else
|
||||
g_assert (!str3);
|
||||
g_clear_pointer (&str3, g_free);
|
||||
return;
|
||||
}
|
||||
|
||||
g_assert (str);
|
||||
g_assert (str_safe != str);
|
||||
g_assert (str_safe == str2);
|
||||
g_assert ( strchr (str, '\\')
|
||||
|| !g_utf8_validate (str, -1, NULL)
|
||||
|| ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII)
|
||||
&& NM_STRCHAR_ANY (str, ch, (guchar) ch >= 127))
|
||||
|| ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL)
|
||||
&& NM_STRCHAR_ANY (str, ch, (guchar) ch < ' ')));
|
||||
g_assert (g_utf8_validate (str_safe, -1, NULL));
|
||||
|
||||
str3 = g_strcompress (str_safe);
|
||||
g_assert_cmpstr (str, ==, str3);
|
||||
g_clear_pointer (&str3, g_free);
|
||||
|
||||
str3 = nm_utils_str_utf8safe_unescape_cp (str_safe);
|
||||
g_assert (str3 != str);
|
||||
g_assert_cmpstr (str3, ==, str);
|
||||
g_clear_pointer (&str3, g_free);
|
||||
|
||||
s = nm_utils_str_utf8safe_unescape (str_safe, &str3);
|
||||
g_assert (str3 != str);
|
||||
g_assert (s == str3);
|
||||
g_assert_cmpstr (str3, ==, str);
|
||||
g_clear_pointer (&str3, g_free);
|
||||
|
||||
g_assert_cmpstr (str_safe, ==, expected);
|
||||
}
|
||||
|
||||
static void
|
||||
test_utils_str_utf8safe (void)
|
||||
{
|
||||
do_test_utils_str_utf8safe (NULL, NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("\314", "\\314", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("\314\315x\315\315x", "\\314\\315x\\315\\315x", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("\314\315xx", "\\314\\315xx", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("\314xx", "\\314xx", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("\xa0", "\\240", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("\xe2\x91\xa0", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("\xe2\xe2\x91\xa0", "\\342\xe2\x91\xa0", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("\xe2\xe2\x91\xa0\xa0", "\\342\xe2\x91\xa0\\240", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("a", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("ab", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("ab\314", "ab\\314", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("ab\314adsf", "ab\\314adsf", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("abadsf", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("abäb", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("x\xa0", "x\\240", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("Ä\304ab\\äb", "Ä\\304ab\\\\äb", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("Äab\\äb", "Äab\\\\äb", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("ÄÄab\\äb", "ÄÄab\\\\äb", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("㈞abä㈞b", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE);
|
||||
do_test_utils_str_utf8safe ("abäb", "ab\\303\\244b", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII);
|
||||
do_test_utils_str_utf8safe ("ab\ab", "ab\\007b", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL);
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
static int
|
||||
_test_nm_in_set_get (int *call_counter, gboolean allow_called, int value)
|
||||
{
|
||||
@@ -5680,6 +5774,7 @@ int main (int argc, char **argv)
|
||||
nmtst_init (&argc, &argv, TRUE);
|
||||
|
||||
/* The tests */
|
||||
g_test_add_func ("/core/general/test_utils_str_utf8safe", test_utils_str_utf8safe);
|
||||
g_test_add_func ("/core/general/test_nm_in_set", test_nm_in_set);
|
||||
g_test_add_func ("/core/general/test_nm_in_strset", test_nm_in_strset);
|
||||
g_test_add_func ("/core/general/test_setting_vpn_items", test_setting_vpn_items);
|
||||
|
@@ -397,3 +397,141 @@ nm_g_object_set_property (GObject *object,
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
static void
|
||||
_str_append_escape (GString *s, char ch)
|
||||
{
|
||||
g_string_append_c (s, '\\');
|
||||
g_string_append_c (s, '0' + ((((guchar) ch) >> 6) & 07));
|
||||
g_string_append_c (s, '0' + ((((guchar) ch) >> 3) & 07));
|
||||
g_string_append_c (s, '0' + ( ((guchar) ch) & 07));
|
||||
}
|
||||
|
||||
/**
|
||||
* nm_utils_str_utf8safe_escape:
|
||||
* @str: NUL terminated input string, possibly in utf-8 encoding
|
||||
* @flags: #NMUtilsStrUtf8SafeFlags flags
|
||||
* @to_free: (out): return the pointer location of the string
|
||||
* if a copying was necessary.
|
||||
*
|
||||
* Returns the possible non-UTF-8 NUL terminated string @str
|
||||
* and uses backslash escaping (C escaping, like g_strescape())
|
||||
* to sanitize non UTF-8 characters. The result is valid
|
||||
* UTF-8.
|
||||
*
|
||||
* The operation can be reverted with g_strcompress() or
|
||||
* nm_utils_str_utf8safe_unescape().
|
||||
*
|
||||
* Depending on @flags, valid UTF-8 characters are not escaped at all
|
||||
* (except the escape character '\\'). This is the difference to g_strescape(),
|
||||
* which escapes all non-ASCII characters. This allows to pass on
|
||||
* valid UTF-8 characters as-is and can be directly shown to the user
|
||||
* as UTF-8 -- with exception of the backslash escape character,
|
||||
* invalid UTF-8 sequences, and other (depending on @flags).
|
||||
*
|
||||
* Returns: the escaped input string, as valid UTF-8. If no escaping
|
||||
* is necessary, it returns the input @str. Otherwise, an allocated
|
||||
* string @to_free is returned which must be freed by the caller
|
||||
* with g_free. The escaping can be reverted by g_strcompress().
|
||||
**/
|
||||
const char *
|
||||
nm_utils_str_utf8safe_escape (const char *str, NMUtilsStrUtf8SafeFlags flags, char **to_free)
|
||||
{
|
||||
const char *p = NULL;
|
||||
GString *s;
|
||||
|
||||
g_return_val_if_fail (to_free, NULL);
|
||||
|
||||
*to_free = NULL;
|
||||
if (!str || !str[0])
|
||||
return str;
|
||||
|
||||
if ( g_utf8_validate (str, -1, &p)
|
||||
&& !NM_STRCHAR_ANY (str, ch,
|
||||
( ch == '\\' \
|
||||
|| ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) \
|
||||
&& ch < ' ') \
|
||||
|| ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) \
|
||||
&& ((guchar) ch) >= 127))))
|
||||
return str;
|
||||
|
||||
s = g_string_sized_new ((p - str) + strlen (p) + 5);
|
||||
|
||||
do {
|
||||
for (; str < p; str++) {
|
||||
char ch = str[0];
|
||||
|
||||
if (ch == '\\')
|
||||
g_string_append (s, "\\\\");
|
||||
else if ( ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) \
|
||||
&& ch < ' ') \
|
||||
|| ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) \
|
||||
&& ((guchar) ch) >= 127))
|
||||
_str_append_escape (s, ch);
|
||||
else
|
||||
g_string_append_c (s, ch);
|
||||
}
|
||||
|
||||
if (p[0] == '\0')
|
||||
break;
|
||||
_str_append_escape (s, p[0]);
|
||||
|
||||
str = &p[1];
|
||||
g_utf8_validate (str, -1, &p);
|
||||
} while (TRUE);
|
||||
|
||||
*to_free = g_string_free (s, FALSE);
|
||||
return *to_free;
|
||||
}
|
||||
|
||||
const char *
|
||||
nm_utils_str_utf8safe_unescape (const char *str, char **to_free)
|
||||
{
|
||||
g_return_val_if_fail (to_free, NULL);
|
||||
|
||||
if (!str || !strchr (str, '\\')) {
|
||||
*to_free = NULL;
|
||||
return str;
|
||||
}
|
||||
return (*to_free = g_strcompress (str));
|
||||
}
|
||||
|
||||
/**
|
||||
* nm_utils_str_utf8safe_escape_cp:
|
||||
* @str: NUL terminated input string, possibly in utf-8 encoding
|
||||
* @flags: #NMUtilsStrUtf8SafeFlags flags
|
||||
*
|
||||
* Like nm_utils_str_utf8safe_escape(), except the returned value
|
||||
* is always a copy of the input and must be freed by the caller.
|
||||
*
|
||||
* Returns: the escaped input string in UTF-8 encoding. The returned
|
||||
* value should be freed with g_free().
|
||||
* The escaping can be reverted by g_strcompress().
|
||||
**/
|
||||
char *
|
||||
nm_utils_str_utf8safe_escape_cp (const char *str, NMUtilsStrUtf8SafeFlags flags)
|
||||
{
|
||||
char *s;
|
||||
|
||||
nm_utils_str_utf8safe_escape (str, flags, &s);
|
||||
return s ?: g_strdup (str);
|
||||
}
|
||||
|
||||
char *
|
||||
nm_utils_str_utf8safe_unescape_cp (const char *str)
|
||||
{
|
||||
return str ? g_strcompress (str) : NULL;
|
||||
}
|
||||
|
||||
char *
|
||||
nm_utils_str_utf8safe_escape_take (char *str, NMUtilsStrUtf8SafeFlags flags)
|
||||
{
|
||||
char *str_to_free;
|
||||
|
||||
nm_utils_str_utf8safe_escape (str, flags, &str_to_free);
|
||||
if (str_to_free) {
|
||||
g_free (str);
|
||||
return str_to_free;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
@@ -97,4 +97,20 @@ gboolean nm_g_object_set_property (GObject *object,
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
typedef enum {
|
||||
NM_UTILS_STR_UTF8_SAFE_FLAG_NONE = 0,
|
||||
NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL = 0x0001,
|
||||
NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII = 0x0002,
|
||||
} NMUtilsStrUtf8SafeFlags;
|
||||
|
||||
const char *nm_utils_str_utf8safe_escape (const char *str, NMUtilsStrUtf8SafeFlags flags, char **to_free);
|
||||
const char *nm_utils_str_utf8safe_unescape (const char *str, char **to_free);
|
||||
|
||||
char *nm_utils_str_utf8safe_escape_cp (const char *str, NMUtilsStrUtf8SafeFlags flags);
|
||||
char *nm_utils_str_utf8safe_unescape_cp (const char *str);
|
||||
|
||||
char *nm_utils_str_utf8safe_escape_take (char *str, NMUtilsStrUtf8SafeFlags flags);
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
#endif /* __NM_SHARED_UTILS_H__ */
|
||||
|
Reference in New Issue
Block a user