libnm: make caching of encodings in nm_utils_ssid_to_utf8() thread safe
libnm's data structures are commonly not thread safe (like NMConnection). However, it must be possible that all operations can operate on *different* data in a thread safe manner. That means, we need to take care about our global variables. nm_utils_ssid_to_utf8() uses a list of encodings, which gets cached. - replace the GHashTables with a static list. Since it doesn't cost anything, make the list sorted and look it up via binary search.
This commit is contained in:
@@ -66,4 +66,8 @@ void _nm_team_settings_property_from_dbus_link_watchers(
|
||||
GVariant *_nm_utils_ip4_dns_to_variant(const char *const *dns, gssize len);
|
||||
GVariant *_nm_utils_ip6_dns_to_variant(const char *const *dns, gssize len);
|
||||
|
||||
const char *const *nmtst_system_encodings_for_lang(const char *lang);
|
||||
const char *const *nmtst_system_encodings_get_default(void);
|
||||
const char *const *nmtst_system_encodings_get(void);
|
||||
|
||||
#endif
|
||||
|
@@ -328,169 +328,181 @@ good:
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
struct IsoLangToEncodings {
|
||||
const char *lang;
|
||||
const char *const *encodings;
|
||||
};
|
||||
typedef const char *const StrvArray4Type[4];
|
||||
|
||||
#define LANG_ENCODINGS(l, ...) \
|
||||
#define LL(l, ...) \
|
||||
{ \
|
||||
.lang = l, .encodings = NM_MAKE_STRV(__VA_ARGS__), \
|
||||
.name = l, .value = {__VA_ARGS__, NULL}, \
|
||||
}
|
||||
|
||||
/* 5-letter language codes */
|
||||
static const struct IsoLangToEncodings isoLangEntries5[] = {
|
||||
/* Simplified Chinese */
|
||||
LANG_ENCODINGS("zh_cn", "euc-cn", "gb2312", "gb18030"), /* PRC */
|
||||
LANG_ENCODINGS("zh_sg", "euc-cn", "gb2312", "gb18030"), /* Singapore */
|
||||
|
||||
/* Traditional Chinese */
|
||||
LANG_ENCODINGS("zh_tw", "big5", "euc-tw"), /* Taiwan */
|
||||
LANG_ENCODINGS("zh_hk", "big5", "euc-tw", "big5-hkcs"), /* Hong Kong */
|
||||
LANG_ENCODINGS("zh_mo", "big5", "euc-tw"), /* Macau */
|
||||
|
||||
LANG_ENCODINGS(NULL, NULL)};
|
||||
static _NM_UTILS_STRING_TABLE_LOOKUP_DEFINE(
|
||||
_iso_lang_entries_5_lookup,
|
||||
StrvArray4Type,
|
||||
const char *const *,
|
||||
{ nm_assert(name); },
|
||||
{ return NULL; },
|
||||
,
|
||||
LL("zh_cn", "euc-cn", "gb2312", "gb18030"), /* Simplified Chinese, PRC */
|
||||
LL("zh_hk", "big5", "euc-tw", "big5-hkcs"), /* Traditional Chinese, Hong Kong */
|
||||
LL("zh_mo", "big5", "euc-tw"), /* Traditional Chinese, Macau */
|
||||
LL("zh_sg", "euc-cn", "gb2312", "gb18030"), /* Simplified Chinese, Singapore */
|
||||
LL("zh_tw", "big5", "euc-tw"), /* Traditional Chinese, Taiwan */
|
||||
);
|
||||
|
||||
/* 2-letter language codes; we don't care about the other 3 in this table */
|
||||
static const struct IsoLangToEncodings isoLangEntries2[] = {
|
||||
/* Japanese */
|
||||
LANG_ENCODINGS("ja", "euc-jp", "shift_jis", "iso-2022-jp"),
|
||||
static _NM_UTILS_STRING_TABLE_LOOKUP_DEFINE(
|
||||
_iso_lang_entries_2_lookup,
|
||||
StrvArray4Type,
|
||||
const char *const *,
|
||||
{ nm_assert(name); },
|
||||
{ return NULL; },
|
||||
,
|
||||
LL("ar", "iso-8859-6", "windows-1256"), /* Arabic */
|
||||
LL("be", "koi8-r", "windows-1251", "iso-8859-5"), /* Cyrillic, Belorussian */
|
||||
LL("bg", "windows-1251", "koi8-r", "iso-8859-5"), /* Cyrillic, Bulgarian */
|
||||
LL("cs", "iso-8859-2", "windows-1250"), /* Central European, Czech */
|
||||
LL("el", "iso-8859-7", "windows-1253"), /* Greek */
|
||||
LL("et", "iso-8859-4", "windows-1257"), /* Baltic, Estonian */
|
||||
LL("he", "iso-8859-8", "windows-1255"), /* Hebrew */
|
||||
LL("hr", "iso-8859-2", "windows-1250"), /* Central European, Croatian */
|
||||
LL("hu", "iso-8859-2", "windows-1250"), /* Central European, Hungarian */
|
||||
LL("iw", "iso-8859-8", "windows-1255"), /* Hebrew */
|
||||
LL("ja", "euc-jp", "shift_jis", "iso-2022-jp"), /* Japanese */
|
||||
LL("ko", "euc-kr", "iso-2022-kr", "johab"), /* Korean */
|
||||
LL("lt", "iso-8859-4", "windows-1257"), /* Baltic, Lithuanian */
|
||||
LL("lv", "iso-8859-4", "windows-1257"), /* Baltic, Latvian */
|
||||
LL("mk", "koi8-r", "windows-1251", "iso-8859-5"), /* Cyrillic, Macedonian */
|
||||
LL("pl", "iso-8859-2", "windows-1250"), /* Central European, Polish */
|
||||
LL("ro", "iso-8859-2", "windows-1250"), /* Central European, Romanian */
|
||||
LL("ru", "koi8-r", "windows-1251", "iso-8859-5"), /* Cyrillic, Russian */
|
||||
LL("sh", "iso-8859-2", "windows-1250"), /* Central European, Serbo-Croatian */
|
||||
LL("sk", "iso-8859-2", "windows-1250"), /* Central European, Slovakian */
|
||||
LL("sl", "iso-8859-2", "windows-1250"), /* Central European, Slovenian */
|
||||
LL("sr", "koi8-r", "windows-1251", "iso-8859-5"), /* Cyrillic, Serbian */
|
||||
LL("th", "iso-8859-11", "windows-874"), /* Thai */
|
||||
LL("tr", "iso-8859-9", "windows-1254"), /* Turkish */
|
||||
LL("uk", "koi8-u", "koi8-r", "windows-1251"), /* Cyrillic, Ukrainian */
|
||||
);
|
||||
|
||||
/* Korean */
|
||||
LANG_ENCODINGS("ko", "euc-kr", "iso-2022-kr", "johab"),
|
||||
|
||||
/* Thai */
|
||||
LANG_ENCODINGS("th", "iso-8859-11", "windows-874"),
|
||||
|
||||
/* Central European */
|
||||
LANG_ENCODINGS("hu", "iso-8859-2", "windows-1250"), /* Hungarian */
|
||||
LANG_ENCODINGS("cs", "iso-8859-2", "windows-1250"), /* Czech */
|
||||
LANG_ENCODINGS("hr", "iso-8859-2", "windows-1250"), /* Croatian */
|
||||
LANG_ENCODINGS("pl", "iso-8859-2", "windows-1250"), /* Polish */
|
||||
LANG_ENCODINGS("ro", "iso-8859-2", "windows-1250"), /* Romanian */
|
||||
LANG_ENCODINGS("sk", "iso-8859-2", "windows-1250"), /* Slovakian */
|
||||
LANG_ENCODINGS("sl", "iso-8859-2", "windows-1250"), /* Slovenian */
|
||||
LANG_ENCODINGS("sh", "iso-8859-2", "windows-1250"), /* Serbo-Croatian */
|
||||
|
||||
/* Cyrillic */
|
||||
LANG_ENCODINGS("ru", "koi8-r", "windows-1251", "iso-8859-5"), /* Russian */
|
||||
LANG_ENCODINGS("be", "koi8-r", "windows-1251", "iso-8859-5"), /* Belorussian */
|
||||
LANG_ENCODINGS("bg", "windows-1251", "koi8-r", "iso-8859-5"), /* Bulgarian */
|
||||
LANG_ENCODINGS("mk", "koi8-r", "windows-1251", "iso-8859-5"), /* Macedonian */
|
||||
LANG_ENCODINGS("sr", "koi8-r", "windows-1251", "iso-8859-5"), /* Serbian */
|
||||
LANG_ENCODINGS("uk", "koi8-u", "koi8-r", "windows-1251"), /* Ukrainian */
|
||||
|
||||
/* Arabic */
|
||||
LANG_ENCODINGS("ar", "iso-8859-6", "windows-1256"),
|
||||
|
||||
/* Baltic */
|
||||
LANG_ENCODINGS("et", "iso-8859-4", "windows-1257"), /* Estonian */
|
||||
LANG_ENCODINGS("lt", "iso-8859-4", "windows-1257"), /* Lithuanian */
|
||||
LANG_ENCODINGS("lv", "iso-8859-4", "windows-1257"), /* Latvian */
|
||||
|
||||
/* Greek */
|
||||
LANG_ENCODINGS("el", "iso-8859-7", "windows-1253"),
|
||||
|
||||
/* Hebrew */
|
||||
LANG_ENCODINGS("he", "iso-8859-8", "windows-1255"),
|
||||
LANG_ENCODINGS("iw", "iso-8859-8", "windows-1255"),
|
||||
|
||||
/* Turkish */
|
||||
LANG_ENCODINGS("tr", "iso-8859-9", "windows-1254"),
|
||||
|
||||
/* Table end */
|
||||
LANG_ENCODINGS(NULL, NULL)};
|
||||
|
||||
static GHashTable *langToEncodings5 = NULL;
|
||||
static GHashTable *langToEncodings2 = NULL;
|
||||
|
||||
static void
|
||||
init_lang_to_encodings_hash(void)
|
||||
static const char *const *
|
||||
_system_encodings_for_lang(const char *lang)
|
||||
{
|
||||
struct IsoLangToEncodings *enc;
|
||||
char tmp_lang[3];
|
||||
const char *const *e;
|
||||
|
||||
if (G_UNLIKELY(langToEncodings5 == NULL)) {
|
||||
/* Five-letter codes */
|
||||
enc = (struct IsoLangToEncodings *) &isoLangEntries5[0];
|
||||
langToEncodings5 = g_hash_table_new(nm_str_hash, g_str_equal);
|
||||
while (enc->lang) {
|
||||
g_hash_table_insert(langToEncodings5, (gpointer) enc->lang, (gpointer) enc->encodings);
|
||||
enc++;
|
||||
}
|
||||
nm_assert(lang);
|
||||
|
||||
if (lang[0] == '\0' || lang[1] == '\0') {
|
||||
/* need at least two characters. */
|
||||
nm_assert(!_iso_lang_entries_5_lookup(lang));
|
||||
nm_assert(!_iso_lang_entries_2_lookup(lang));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (G_UNLIKELY(langToEncodings2 == NULL)) {
|
||||
/* Two-letter codes */
|
||||
enc = (struct IsoLangToEncodings *) &isoLangEntries2[0];
|
||||
langToEncodings2 = g_hash_table_new(nm_str_hash, g_str_equal);
|
||||
while (enc->lang) {
|
||||
g_hash_table_insert(langToEncodings2, (gpointer) enc->lang, (gpointer) enc->encodings);
|
||||
enc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (lang[2] != '\0') {
|
||||
nm_assert(!_iso_lang_entries_2_lookup(lang));
|
||||
|
||||
static gboolean
|
||||
get_encodings_for_lang(const char *lang, const char *const **encodings)
|
||||
{
|
||||
gs_free char *tmp_lang = NULL;
|
||||
if (lang[3] != '\0' && lang[4] != '\0' && lang[5] == '\0') {
|
||||
/* lang is 5 characters long. Try it. */
|
||||
if ((e = _iso_lang_entries_5_lookup(lang)))
|
||||
return e;
|
||||
} else
|
||||
nm_assert(!_iso_lang_entries_5_lookup(lang));
|
||||
|
||||
g_return_val_if_fail(lang, FALSE);
|
||||
g_return_val_if_fail(encodings, FALSE);
|
||||
|
||||
init_lang_to_encodings_hash();
|
||||
|
||||
if ((*encodings = g_hash_table_lookup(langToEncodings5, lang)))
|
||||
return TRUE;
|
||||
|
||||
/* Truncate tmp_lang to length of 2 */
|
||||
if (strlen(lang) > 2) {
|
||||
tmp_lang = g_strdup(lang);
|
||||
/* extract the first 2 characters and ignore the rest. */
|
||||
tmp_lang[0] = lang[0];
|
||||
tmp_lang[1] = lang[1];
|
||||
tmp_lang[2] = '\0';
|
||||
if ((*encodings = g_hash_table_lookup(langToEncodings2, tmp_lang)))
|
||||
return TRUE;
|
||||
lang = tmp_lang;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
if ((e = _iso_lang_entries_2_lookup(lang)))
|
||||
return e;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *const *
|
||||
nmtst_system_encodings_for_lang(const char *lang)
|
||||
{
|
||||
return _system_encodings_for_lang(lang);
|
||||
}
|
||||
|
||||
static const char *const *
|
||||
get_system_encodings(void)
|
||||
_system_encodings_get_default(void)
|
||||
{
|
||||
static const char *const *cached_encodings;
|
||||
static char *default_encodings[4];
|
||||
const char *const *encodings = NULL;
|
||||
char *lang;
|
||||
static gsize init_once = 0;
|
||||
static const char *default_encodings[4];
|
||||
|
||||
if (cached_encodings)
|
||||
return cached_encodings;
|
||||
if (g_once_init_enter(&init_once)) {
|
||||
const char *e_default = NULL;
|
||||
int i;
|
||||
|
||||
g_get_charset(&e_default);
|
||||
|
||||
i = 0;
|
||||
if (e_default)
|
||||
default_encodings[i++] = e_default;
|
||||
if (!nm_streq0(e_default, "iso-8859-1"))
|
||||
default_encodings[i++] = "iso-8859-1";
|
||||
if (!nm_streq0(e_default, "windows-1251"))
|
||||
default_encodings[i++] = "windows-1251";
|
||||
default_encodings[i++] = NULL;
|
||||
nm_assert(i <= G_N_ELEMENTS(default_encodings));
|
||||
|
||||
g_once_init_leave(&init_once, 1);
|
||||
}
|
||||
|
||||
return default_encodings;
|
||||
}
|
||||
|
||||
const char *const *
|
||||
nmtst_system_encodings_get_default(void)
|
||||
{
|
||||
return _system_encodings_get_default();
|
||||
}
|
||||
|
||||
static const char *const *
|
||||
_system_encodings_get(void)
|
||||
{
|
||||
static const char *const *cached = NULL;
|
||||
const char *const *e;
|
||||
|
||||
again:
|
||||
if (!(e = g_atomic_pointer_get(&cached))) {
|
||||
const char *lang;
|
||||
|
||||
/* Use environment variables as encoding hint */
|
||||
lang = getenv("LC_ALL");
|
||||
if (!lang)
|
||||
lang = getenv("LC_CTYPE");
|
||||
if (!lang)
|
||||
lang = getenv("LANG");
|
||||
lang = getenv("LC_ALL") ?: getenv("LC_CTYPE") ?: getenv("LANG");
|
||||
|
||||
if (lang) {
|
||||
gs_free char *lang_down = NULL;
|
||||
char *dot;
|
||||
|
||||
lang = g_ascii_strdown(lang, -1);
|
||||
if ((dot = strchr(lang, '.')))
|
||||
lang_down = g_ascii_strdown(lang, -1);
|
||||
if ((dot = strchr(lang_down, '.')))
|
||||
*dot = '\0';
|
||||
|
||||
get_encodings_for_lang(lang, &encodings);
|
||||
g_free(lang);
|
||||
}
|
||||
if (!encodings) {
|
||||
g_get_charset((const char **) &default_encodings[0]);
|
||||
default_encodings[1] = "iso-8859-1";
|
||||
default_encodings[2] = "windows-1251";
|
||||
default_encodings[3] = NULL;
|
||||
encodings = (const char *const *) default_encodings;
|
||||
e = _system_encodings_for_lang(lang_down);
|
||||
}
|
||||
|
||||
cached_encodings = encodings;
|
||||
return cached_encodings;
|
||||
if (!e)
|
||||
e = _system_encodings_get_default();
|
||||
|
||||
/* in any case, @e is now a static buffer, that we may cache. */
|
||||
nm_assert(e);
|
||||
|
||||
if (!g_atomic_pointer_compare_and_exchange(&cached, NULL, e))
|
||||
goto again;
|
||||
}
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
const char *const *
|
||||
nmtst_system_encodings_get(void)
|
||||
{
|
||||
return _system_encodings_get();
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
@@ -567,7 +579,7 @@ nm_utils_ssid_to_utf8(const guint8 *ssid, gsize len)
|
||||
if (g_utf8_validate((const char *) ssid, len, NULL))
|
||||
return g_strndup((const char *) ssid, len);
|
||||
|
||||
encodings = get_system_encodings();
|
||||
encodings = _system_encodings_get();
|
||||
|
||||
for (e = encodings; *e; e++) {
|
||||
converted = g_convert((const char *) ssid, len, "UTF-8", *e, NULL, NULL, NULL);
|
||||
|
@@ -10666,6 +10666,98 @@ test_vpn_connection_state_reason(void)
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
static void
|
||||
test_system_encodings(void)
|
||||
{
|
||||
const int N_RUN = 10000;
|
||||
int i_run;
|
||||
|
||||
g_assert(nmtst_system_encodings_for_lang("") == NULL);
|
||||
g_assert(nmtst_system_encodings_for_lang("zh") == NULL);
|
||||
g_assert(nmtst_system_encodings_for_lang("zh_cx") == NULL);
|
||||
|
||||
#define LL(lang, ...) \
|
||||
G_STMT_START \
|
||||
{ \
|
||||
const char *const _lang = "" lang ""; \
|
||||
\
|
||||
nmtst_assert_strv(nmtst_system_encodings_for_lang(_lang), __VA_ARGS__); \
|
||||
\
|
||||
if (strlen(_lang) == 2) { \
|
||||
gs_free char *_lang2 = g_strdup_printf("%s%s", _lang, "x"); \
|
||||
\
|
||||
nmtst_assert_strv(nmtst_system_encodings_for_lang(_lang2), __VA_ARGS__); \
|
||||
} \
|
||||
} \
|
||||
G_STMT_END
|
||||
|
||||
LL("zh_cn", "euc-cn", "gb2312", "gb18030");
|
||||
LL("zh_hk", "big5", "euc-tw", "big5-hkcs");
|
||||
LL("zh_mo", "big5", "euc-tw");
|
||||
LL("zh_sg", "euc-cn", "gb2312", "gb18030");
|
||||
LL("zh_tw", "big5", "euc-tw");
|
||||
|
||||
LL("ar", "iso-8859-6", "windows-1256");
|
||||
LL("be", "koi8-r", "windows-1251", "iso-8859-5");
|
||||
LL("bg", "windows-1251", "koi8-r", "iso-8859-5");
|
||||
LL("cs", "iso-8859-2", "windows-1250");
|
||||
LL("el", "iso-8859-7", "windows-1253");
|
||||
LL("et", "iso-8859-4", "windows-1257");
|
||||
LL("he", "iso-8859-8", "windows-1255");
|
||||
LL("hr", "iso-8859-2", "windows-1250");
|
||||
LL("hu", "iso-8859-2", "windows-1250");
|
||||
LL("iw", "iso-8859-8", "windows-1255");
|
||||
LL("ja", "euc-jp", "shift_jis", "iso-2022-jp");
|
||||
LL("ko", "euc-kr", "iso-2022-kr", "johab");
|
||||
LL("lt", "iso-8859-4", "windows-1257");
|
||||
LL("lv", "iso-8859-4", "windows-1257");
|
||||
LL("mk", "koi8-r", "windows-1251", "iso-8859-5");
|
||||
LL("pl", "iso-8859-2", "windows-1250");
|
||||
LL("ro", "iso-8859-2", "windows-1250");
|
||||
LL("ru", "koi8-r", "windows-1251", "iso-8859-5");
|
||||
LL("sh", "iso-8859-2", "windows-1250");
|
||||
LL("sk", "iso-8859-2", "windows-1250");
|
||||
LL("sl", "iso-8859-2", "windows-1250");
|
||||
LL("sr", "koi8-r", "windows-1251", "iso-8859-5");
|
||||
LL("th", "iso-8859-11", "windows-874");
|
||||
LL("tr", "iso-8859-9", "windows-1254");
|
||||
LL("uk", "koi8-u", "koi8-r", "windows-1251");
|
||||
|
||||
g_assert(nmtst_system_encodings_get_default());
|
||||
g_assert(nmtst_system_encodings_get());
|
||||
|
||||
for (i_run = 0; i_run < N_RUN; i_run++) {
|
||||
char buf[7];
|
||||
int n_buf;
|
||||
int i_buf;
|
||||
const char *const *e;
|
||||
|
||||
if (i_run < N_RUN / 3)
|
||||
n_buf = 2;
|
||||
else if (i_run < 2 * N_RUN / 3)
|
||||
n_buf = 5;
|
||||
else
|
||||
n_buf = nmtst_get_rand_uint32() % G_N_ELEMENTS(buf);
|
||||
|
||||
for (i_buf = 0; i_buf < n_buf; i_buf++) {
|
||||
do {
|
||||
buf[i_buf] = (char) nmtst_get_rand_uint32();
|
||||
} while (buf[i_buf] == '\0');
|
||||
}
|
||||
g_assert(i_buf < G_N_ELEMENTS(buf));
|
||||
buf[i_buf] = '\0';
|
||||
|
||||
g_assert_cmpint(n_buf, <, G_N_ELEMENTS(buf));
|
||||
g_assert_cmpint(strlen(buf), ==, n_buf);
|
||||
|
||||
e = nmtst_system_encodings_for_lang(buf);
|
||||
if (e)
|
||||
g_assert_cmpint(n_buf, >=, 2);
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
NMTST_DEFINE();
|
||||
|
||||
int
|
||||
@@ -11008,5 +11100,7 @@ main(int argc, char **argv)
|
||||
g_test_add_func("/core/general/test_vpn_connection_state_reason",
|
||||
test_vpn_connection_state_reason);
|
||||
|
||||
g_test_add_func("/core/general/test_system_encodings", test_system_encodings);
|
||||
|
||||
return g_test_run();
|
||||
}
|
||||
|
Reference in New Issue
Block a user