charsets: refactor coding style

Mostly to use GLib types like gchar or gint, and also to use
G_N_ELEMENTS() instead of custom end of array terminating items.
This commit is contained in:
Aleksander Morgado
2020-08-20 11:32:18 +02:00
parent eb5443b197
commit 81162df15d
2 changed files with 170 additions and 162 deletions

View File

@@ -27,14 +27,14 @@
#include "mm-log.h" #include "mm-log.h"
typedef struct { typedef struct {
const char *gsm_name; const gchar *gsm_name;
const char *other_name; const gchar *other_name;
const char *iconv_from_name; const gchar *iconv_from_name;
const char *iconv_to_name; const gchar *iconv_to_name;
MMModemCharset charset; MMModemCharset charset;
} CharsetEntry; } CharsetEntry;
static CharsetEntry charset_map[] = { static const CharsetEntry charset_map[] = {
{ "UTF-8", "UTF8", "UTF-8", "UTF-8//TRANSLIT", MM_MODEM_CHARSET_UTF8 }, { "UTF-8", "UTF8", "UTF-8", "UTF-8//TRANSLIT", MM_MODEM_CHARSET_UTF8 },
{ "UCS2", NULL, "UCS-2BE", "UCS-2BE//TRANSLIT", MM_MODEM_CHARSET_UCS2 }, { "UCS2", NULL, "UCS-2BE", "UCS-2BE//TRANSLIT", MM_MODEM_CHARSET_UCS2 },
{ "IRA", "ASCII", "ASCII", "ASCII//TRANSLIT", MM_MODEM_CHARSET_IRA }, { "IRA", "ASCII", "ASCII", "ASCII//TRANSLIT", MM_MODEM_CHARSET_IRA },
@@ -44,72 +44,63 @@ static CharsetEntry charset_map[] = {
{ "PCDN", "CP850", "CP850", "CP850//TRANSLIT", MM_MODEM_CHARSET_PCDN }, { "PCDN", "CP850", "CP850", "CP850//TRANSLIT", MM_MODEM_CHARSET_PCDN },
{ "HEX", NULL, NULL, NULL, MM_MODEM_CHARSET_HEX }, { "HEX", NULL, NULL, NULL, MM_MODEM_CHARSET_HEX },
{ "UTF-16", "UTF16", "UTF-16BE", "UTF-16BE//TRANSLIT", MM_MODEM_CHARSET_UTF16 }, { "UTF-16", "UTF16", "UTF-16BE", "UTF-16BE//TRANSLIT", MM_MODEM_CHARSET_UTF16 },
{ NULL, NULL, NULL, NULL, MM_MODEM_CHARSET_UNKNOWN }
}; };
const char *
mm_modem_charset_to_string (MMModemCharset charset)
{
CharsetEntry *iter = &charset_map[0];
g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL);
while (iter->gsm_name) {
if (iter->charset == charset)
return iter->gsm_name;
iter++;
}
g_warn_if_reached ();
return NULL;
}
MMModemCharset MMModemCharset
mm_modem_charset_from_string (const char *string) mm_modem_charset_from_string (const gchar *string)
{ {
CharsetEntry *iter = &charset_map[0]; guint i;
g_return_val_if_fail (string != NULL, MM_MODEM_CHARSET_UNKNOWN); g_return_val_if_fail (string != NULL, MM_MODEM_CHARSET_UNKNOWN);
while (iter->gsm_name) { for (i = 0; i < G_N_ELEMENTS (charset_map); i++) {
if (strcasestr (string, iter->gsm_name)) if (strcasestr (string, charset_map[i].gsm_name))
return iter->charset; return charset_map[i].charset;
if (iter->other_name && strcasestr (string, iter->other_name)) if (charset_map[i].other_name && strcasestr (string, charset_map[i].other_name))
return iter->charset; return charset_map[i].charset;
iter++;
} }
return MM_MODEM_CHARSET_UNKNOWN; return MM_MODEM_CHARSET_UNKNOWN;
} }
static const char * static const CharsetEntry *
charset_iconv_to (MMModemCharset charset) lookup_charset_by_id (MMModemCharset charset)
{ {
CharsetEntry *iter = &charset_map[0]; guint i;
g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL); g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL);
for (i = 0; i < G_N_ELEMENTS (charset_map); i++) {
while (iter->gsm_name) { if (charset_map[i].charset == charset)
if (iter->charset == charset) return &charset_map[i];
return iter->iconv_to_name;
iter++;
} }
g_warn_if_reached (); g_warn_if_reached ();
return NULL; return NULL;
} }
static const char * const gchar *
mm_modem_charset_to_string (MMModemCharset charset)
{
const CharsetEntry *entry;
entry = lookup_charset_by_id (charset);
return entry ? entry->gsm_name : NULL;
}
static const gchar *
charset_iconv_to (MMModemCharset charset)
{
const CharsetEntry *entry;
entry = lookup_charset_by_id (charset);
return entry ? entry->iconv_to_name : NULL;
}
static const gchar *
charset_iconv_from (MMModemCharset charset) charset_iconv_from (MMModemCharset charset)
{ {
CharsetEntry *iter = &charset_map[0]; const CharsetEntry *entry;
g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL); entry = lookup_charset_by_id (charset);
return entry ? entry->iconv_from_name : NULL;
while (iter->gsm_name) {
if (iter->charset == charset)
return iter->iconv_from_name;
iter++;
}
g_warn_if_reached ();
return NULL;
} }
gboolean gboolean
@@ -149,9 +140,9 @@ gchar *
mm_modem_charset_byte_array_to_utf8 (GByteArray *array, mm_modem_charset_byte_array_to_utf8 (GByteArray *array,
MMModemCharset charset) MMModemCharset charset)
{ {
char *converted; const gchar *iconv_from;
const char *iconv_from; g_autofree gchar *converted = NULL;
GError *error = NULL; g_autoptr(GError) error = NULL;
g_return_val_if_fail (array != NULL, NULL); g_return_val_if_fail (array != NULL, NULL);
g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL); g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL);
@@ -162,21 +153,21 @@ mm_modem_charset_byte_array_to_utf8 (GByteArray *array,
converted = g_convert ((const gchar *)array->data, array->len, converted = g_convert ((const gchar *)array->data, array->len,
"UTF-8//TRANSLIT", iconv_from, "UTF-8//TRANSLIT", iconv_from,
NULL, NULL, &error); NULL, NULL, &error);
if (!converted || error) { if (!converted || error)
g_clear_error (&error); return NULL;
converted = NULL;
}
return converted; return g_steal_pointer (&converted);
} }
char * gchar *
mm_modem_charset_hex_to_utf8 (const char *src, MMModemCharset charset) mm_modem_charset_hex_to_utf8 (const gchar *src,
MMModemCharset charset)
{ {
char *unconverted, *converted; const gchar *iconv_from;
const char *iconv_from; g_autofree gchar *unconverted = NULL;
g_autofree gchar *converted = NULL;
g_autoptr(GError) error = NULL;
gsize unconverted_len = 0; gsize unconverted_len = 0;
GError *error = NULL;
g_return_val_if_fail (src != NULL, NULL); g_return_val_if_fail (src != NULL, NULL);
g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL); g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL);
@@ -189,29 +180,25 @@ mm_modem_charset_hex_to_utf8 (const char *src, MMModemCharset charset)
return NULL; return NULL;
if (charset == MM_MODEM_CHARSET_UTF8 || charset == MM_MODEM_CHARSET_IRA) if (charset == MM_MODEM_CHARSET_UTF8 || charset == MM_MODEM_CHARSET_IRA)
return unconverted; return g_steal_pointer (&unconverted);
converted = g_convert (unconverted, unconverted_len, converted = g_convert (unconverted, unconverted_len,
"UTF-8//TRANSLIT", iconv_from, "UTF-8//TRANSLIT", iconv_from,
NULL, NULL, &error); NULL, NULL, &error);
if (!converted || error) { if (!converted || error)
g_clear_error (&error); return NULL;
converted = NULL;
}
g_free (unconverted); return g_steal_pointer (&converted);
return converted;
} }
char * gchar *
mm_modem_charset_utf8_to_hex (const char *src, MMModemCharset charset) mm_modem_charset_utf8_to_hex (const gchar *src,
MMModemCharset charset)
{ {
const gchar *iconv_to;
g_autofree gchar *converted = NULL;
g_autoptr(GError) error = NULL;
gsize converted_len = 0; gsize converted_len = 0;
char *converted;
const char *iconv_to;
GError *error = NULL;
gchar *hex;
g_return_val_if_fail (src != NULL, NULL); g_return_val_if_fail (src != NULL, NULL);
g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL); g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, NULL);
@@ -225,16 +212,11 @@ mm_modem_charset_utf8_to_hex (const char *src, MMModemCharset charset)
converted = g_convert (src, strlen (src), converted = g_convert (src, strlen (src),
iconv_to, "UTF-8//TRANSLIT", iconv_to, "UTF-8//TRANSLIT",
NULL, &converted_len, &error); NULL, &converted_len, &error);
if (!converted || error) { if (!converted || error)
g_clear_error (&error);
g_free (converted);
return NULL; return NULL;
}
/* Get hex representation of the string */ /* Get hex representation of the string */
hex = mm_utils_bin2hexstr ((guint8 *)converted, converted_len); return mm_utils_bin2hexstr ((guint8 *)converted, converted_len);
g_free (converted);
return hex;
} }
/* GSM 03.38 encoding conversion stuff */ /* GSM 03.38 encoding conversion stuff */
@@ -327,7 +309,8 @@ static const GsmUtf8Mapping gsm_def_utf8_alphabet[GSM_DEF_ALPHABET_SIZE] = {
}; };
static guint8 static guint8
gsm_def_char_to_utf8 (const guint8 gsm, guint8 out_utf8[2]) gsm_def_char_to_utf8 (const guint8 gsm,
guint8 out_utf8[2])
{ {
g_return_val_if_fail (gsm < GSM_DEF_ALPHABET_SIZE, 0); g_return_val_if_fail (gsm < GSM_DEF_ALPHABET_SIZE, 0);
memcpy (&out_utf8[0], &gsm_def_utf8_alphabet[gsm].chars[0], gsm_def_utf8_alphabet[gsm].len); memcpy (&out_utf8[0], &gsm_def_utf8_alphabet[gsm].chars[0], gsm_def_utf8_alphabet[gsm].len);
@@ -335,9 +318,11 @@ gsm_def_char_to_utf8 (const guint8 gsm, guint8 out_utf8[2])
} }
static gboolean static gboolean
utf8_to_gsm_def_char (const char *utf8, guint32 len, guint8 *out_gsm) utf8_to_gsm_def_char (const gchar *utf8,
guint32 len,
guint8 *out_gsm)
{ {
int i; gint i;
if (len > 0 && len < 4) { if (len > 0 && len < 4) {
for (i = 0; i < GSM_DEF_ALPHABET_SIZE; i++) { for (i = 0; i < GSM_DEF_ALPHABET_SIZE; i++) {
@@ -374,7 +359,8 @@ static const GsmUtf8Mapping gsm_ext_utf8_alphabet[GSM_EXT_ALPHABET_SIZE] = {
#define GSM_ESCAPE_CHAR 0x1b #define GSM_ESCAPE_CHAR 0x1b
static guint8 static guint8
gsm_ext_char_to_utf8 (const guint8 gsm, guint8 out_utf8[3]) gsm_ext_char_to_utf8 (const guint8 gsm,
guint8 out_utf8[3])
{ {
int i; int i;
@@ -388,7 +374,9 @@ gsm_ext_char_to_utf8 (const guint8 gsm, guint8 out_utf8[3])
} }
static gboolean static gboolean
utf8_to_gsm_ext_char (const char *utf8, guint32 len, guint8 *out_gsm) utf8_to_gsm_ext_char (const gchar *utf8,
guint32 len,
guint8 *out_gsm)
{ {
int i; int i;
@@ -406,7 +394,8 @@ utf8_to_gsm_ext_char (const char *utf8, guint32 len, guint8 *out_gsm)
} }
guint8 * guint8 *
mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len) mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm,
guint32 len)
{ {
guint i; guint i;
GByteArray *utf8; GByteArray *utf8;
@@ -465,12 +454,13 @@ mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len)
} }
guint8 * guint8 *
mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len) mm_charset_utf8_to_unpacked_gsm (const gchar *utf8,
guint32 *out_len)
{ {
GByteArray *gsm; GByteArray *gsm;
const char *c = utf8, *next = c; const gchar *c;
const gchar *next;
static const guint8 gesc = GSM_ESCAPE_CHAR; static const guint8 gesc = GSM_ESCAPE_CHAR;
int i = 0;
g_return_val_if_fail (utf8 != NULL, NULL); g_return_val_if_fail (utf8 != NULL, NULL);
g_return_val_if_fail (g_utf8_validate (utf8, -1, NULL), NULL); g_return_val_if_fail (g_utf8_validate (utf8, -1, NULL), NULL);
@@ -486,6 +476,8 @@ mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len)
return g_byte_array_free (gsm, FALSE); return g_byte_array_free (gsm, FALSE);
} }
next = utf8;
c = utf8;
while (next && *next) { while (next && *next) {
guint8 gch = 0x3f; /* 0x3f == '?' */ guint8 gch = 0x3f; /* 0x3f == '?' */
@@ -500,7 +492,6 @@ mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len)
g_byte_array_append (gsm, &gch, 1); g_byte_array_append (gsm, &gch, 1);
c = next; c = next;
i++;
} }
/* Output length doesn't consider terminating NUL byte */ /* Output length doesn't consider terminating NUL byte */
@@ -513,7 +504,9 @@ mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len)
} }
static gboolean static gboolean
gsm_is_subset (gunichar c, const char *utf8, gsize ulen) gsm_is_subset (gunichar c,
const gchar *utf8,
gsize ulen)
{ {
guint8 gsm; guint8 gsm;
@@ -525,13 +518,17 @@ gsm_is_subset (gunichar c, const char *utf8, gsize ulen)
} }
static gboolean static gboolean
ira_is_subset (gunichar c, const char *utf8, gsize ulen) ira_is_subset (gunichar c,
const gchar *utf8,
gsize ulen)
{ {
return (ulen == 1); return (ulen == 1);
} }
static gboolean static gboolean
ucs2_is_subset (gunichar c, const char *utf8, gsize ulen) ucs2_is_subset (gunichar c,
const gchar *utf8,
gsize ulen)
{ {
return (c <= 0xFFFF); return (c <= 0xFFFF);
} }
@@ -545,13 +542,17 @@ utf16_is_subset (gunichar c,
} }
static gboolean static gboolean
iso88591_is_subset (gunichar c, const char *utf8, gsize ulen) iso88591_is_subset (gunichar c,
const gchar *utf8,
gsize ulen)
{ {
return (c <= 0xFF); return (c <= 0xFF);
} }
static gboolean static gboolean
pccp437_is_subset (gunichar c, const char *utf8, gsize ulen) pccp437_is_subset (gunichar c,
const gchar *utf8,
gsize ulen)
{ {
static const gunichar t[] = { static const gunichar t[] = {
0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea,
@@ -582,7 +583,9 @@ pccp437_is_subset (gunichar c, const char *utf8, gsize ulen)
} }
static gboolean static gboolean
pcdn_is_subset (gunichar c, const char *utf8, gsize ulen) pcdn_is_subset (gunichar c,
const gchar *utf8,
gsize ulen)
{ {
static const gunichar t[] = { static const gunichar t[] = {
0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea,
@@ -614,11 +617,12 @@ pcdn_is_subset (gunichar c, const char *utf8, gsize ulen)
typedef struct { typedef struct {
MMModemCharset cs; MMModemCharset cs;
gboolean (*func) (gunichar c, const char *utf8, gsize ulen); gboolean (*func) (gunichar c,
guint charsize; const gchar *utf8,
gsize ulen);
} SubsetEntry; } SubsetEntry;
SubsetEntry subset_table[] = { const SubsetEntry subset_table[] = {
{ MM_MODEM_CHARSET_GSM, gsm_is_subset }, { MM_MODEM_CHARSET_GSM, gsm_is_subset },
{ MM_MODEM_CHARSET_IRA, ira_is_subset }, { MM_MODEM_CHARSET_IRA, ira_is_subset },
{ MM_MODEM_CHARSET_UCS2, ucs2_is_subset }, { MM_MODEM_CHARSET_UCS2, ucs2_is_subset },
@@ -626,7 +630,6 @@ SubsetEntry subset_table[] = {
{ MM_MODEM_CHARSET_8859_1, iso88591_is_subset }, { MM_MODEM_CHARSET_8859_1, iso88591_is_subset },
{ MM_MODEM_CHARSET_PCCP437, pccp437_is_subset }, { MM_MODEM_CHARSET_PCCP437, pccp437_is_subset },
{ MM_MODEM_CHARSET_PCDN, pcdn_is_subset }, { MM_MODEM_CHARSET_PCDN, pcdn_is_subset },
{ MM_MODEM_CHARSET_UNKNOWN, NULL },
}; };
/** /**
@@ -637,11 +640,11 @@ SubsetEntry subset_table[] = {
* Returns: %TRUE if the conversion is possible without errors, %FALSE otherwise. * Returns: %TRUE if the conversion is possible without errors, %FALSE otherwise.
*/ */
gboolean gboolean
mm_charset_can_convert_to (const char *utf8, mm_charset_can_convert_to (const gchar *utf8,
MMModemCharset charset) MMModemCharset charset)
{ {
const char *p = utf8; const gchar *p;
SubsetEntry *e; guint i;
g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, FALSE); g_return_val_if_fail (charset != MM_MODEM_CHARSET_UNKNOWN, FALSE);
g_return_val_if_fail (utf8 != NULL, FALSE); g_return_val_if_fail (utf8 != NULL, FALSE);
@@ -650,11 +653,13 @@ mm_charset_can_convert_to (const char *utf8,
return TRUE; return TRUE;
/* Find the charset in our subset table */ /* Find the charset in our subset table */
for (e = &subset_table[0]; for (i = 0; i < G_N_ELEMENTS (subset_table); i++) {
e->cs != charset && e->cs != MM_MODEM_CHARSET_UNKNOWN; if (subset_table[i].cs == charset)
e++); break;
g_return_val_if_fail (e->cs != MM_MODEM_CHARSET_UNKNOWN, FALSE); }
g_return_val_if_fail (i < G_N_ELEMENTS (subset_table), FALSE);
p = utf8;
while (*p) { while (*p) {
gunichar c; gunichar c;
const char *end; const char *end;
@@ -668,7 +673,7 @@ mm_charset_can_convert_to (const char *utf8,
while (*++end); while (*++end);
} }
if (!e->func (c, p, (end - p))) if (!subset_table[i].func (c, p, (end - p)))
return FALSE; return FALSE;
p = end; p = end;
@@ -754,7 +759,8 @@ mm_charset_gsm_pack (const guint8 *src,
* the hex representation of the charset-encoded string, so we need to cope with * the hex representation of the charset-encoded string, so we need to cope with
* that case. */ * that case. */
gchar * gchar *
mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset) mm_charset_take_and_convert_to_utf8 (gchar *str,
MMModemCharset charset)
{ {
gchar *utf8 = NULL; gchar *utf8 = NULL;

View File

@@ -31,9 +31,8 @@ typedef enum {
MM_MODEM_CHARSET_UTF16 = 0x00000100, MM_MODEM_CHARSET_UTF16 = 0x00000100,
} MMModemCharset; } MMModemCharset;
const char *mm_modem_charset_to_string (MMModemCharset charset); const gchar *mm_modem_charset_to_string (MMModemCharset charset);
MMModemCharset mm_modem_charset_from_string (const gchar *string);
MMModemCharset mm_modem_charset_from_string (const char *string);
/* Append the given string to the given byte array but re-encode it /* Append the given string to the given byte array but re-encode it
* into the given charset first. The original string is assumed to be * into the given charset first. The original string is assumed to be
@@ -53,19 +52,22 @@ gchar *mm_modem_charset_byte_array_to_utf8 (GByteArray *array,
/* Take a string in hex representation ("00430052" or "A4BE11" for example) /* Take a string in hex representation ("00430052" or "A4BE11" for example)
* and convert it from the given character set to UTF-8. * and convert it from the given character set to UTF-8.
*/ */
char *mm_modem_charset_hex_to_utf8 (const char *src, MMModemCharset charset); gchar *mm_modem_charset_hex_to_utf8 (const gchar *src,
MMModemCharset charset);
/* Take a string in UTF-8 and convert it to the given charset in hex /* Take a string in UTF-8 and convert it to the given charset in hex
* representation. * representation.
*/ */
char *mm_modem_charset_utf8_to_hex (const char *src, MMModemCharset charset); gchar *mm_modem_charset_utf8_to_hex (const gchar *src,
MMModemCharset charset);
guint8 *mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len); guint8 *mm_charset_utf8_to_unpacked_gsm (const gchar *utf8,
guint32 *out_len);
guint8 *mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len); guint8 *mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm,
guint32 len);
/* Checks whether conversion to the given charset may be done without errors */ /* Checks whether conversion to the given charset may be done without errors */
gboolean mm_charset_can_convert_to (const char *utf8, gboolean mm_charset_can_convert_to (const gchar *utf8,
MMModemCharset charset); MMModemCharset charset);
guint8 *mm_charset_gsm_unpack (const guint8 *gsm, guint8 *mm_charset_gsm_unpack (const guint8 *gsm,
@@ -78,8 +80,8 @@ guint8 *mm_charset_gsm_pack (const guint8 *src,
guint8 start_offset, /* in bits */ guint8 start_offset, /* in bits */
guint32 *out_packed_len); guint32 *out_packed_len);
gchar *mm_charset_take_and_convert_to_utf8 (gchar *str, MMModemCharset charset); gchar *mm_charset_take_and_convert_to_utf8 (gchar *str,
MMModemCharset charset);
gchar *mm_utf8_take_and_convert_to_charset (gchar *str, gchar *mm_utf8_take_and_convert_to_charset (gchar *str,
MMModemCharset charset); MMModemCharset charset);