From 4ebf8ca4e0577e47095969b71b10834ecb37802d Mon Sep 17 00:00:00 2001 From: Andrey Skvortsov Date: Tue, 30 Aug 2022 01:38:03 +0300 Subject: [PATCH] sms: fix spliting messages into chunks in gsm7 encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) Not every allowed GSM7 character in UTF-8 incoding takes one byte. Some (for example, 'à') take several bytes in input string, but signle byte in GSM7. 2) Extended characters in GSM7 encoding take two bytes. Otherwise for example sending following SMS fails: ``` mmcli -m a --messaging-create-sms="text='[wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww',number='+XXXXXXXXXXX'" Successfully created new SMS: /org/freedesktop/ModemManager1/SMS/99 mmcli --send -s 99 error: couldn't send the SMS: 'GDBus.Error:org.freedesktop.libqmi.Error.Protocol.WmsEncoding: Couldn't write SMS part: QMI protocol error (58): 'WmsEncoding'' ``` ``` mmcli -m a --messaging-create-sms="text='|àààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààààà',number='+XXXXXXXXXXX'" Successfully created new SMS: /org/freedesktop/ModemManager1/SMS/72 mmcli --send -s 72 error: couldn't send the SMS: 'GDBus.Error:org.freedesktop.ModemManager1.Error.Core.InvalidArgs: Couldn't convert UTF-8 to GSM: input UTF-8 validation failed' ``` --- src/mm-charsets.c | 76 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/src/mm-charsets.c b/src/mm-charsets.c index 1bc6e87e..0cbad337 100644 --- a/src/mm-charsets.c +++ b/src/mm-charsets.c @@ -281,6 +281,18 @@ utf8_to_gsm_ext_char (const gchar *utf8, return FALSE; } +static guint8 +utf8_to_gsm_char (const gchar *utf8, + guint32 len, + guint8 *out_gsm) +{ + if (utf8_to_gsm_def_char (utf8, len, out_gsm)) + return 1; + if (utf8_to_gsm_ext_char (utf8, len, out_gsm)) + return 2; + return 0; +} + static guint8 * charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len, @@ -980,29 +992,63 @@ util_split_text_gsm7 (const gchar *text, gsize text_len, gpointer log_object) { - gchar **out; - guint n_chunks; - guint i; - guint j; + g_autoptr(GPtrArray) chunks = NULL; + const gchar *walker; + const char *end; + const gchar *chunk_start; + glong encoded_chunk_length; + glong total_encoded_chunk_length; + + chunks = g_ptr_array_new_with_free_func ((GDestroyNotify)g_free); + + walker = text; + chunk_start = text; + encoded_chunk_length = 0; + total_encoded_chunk_length = 0; + while (walker && *walker) { + guint8 symbol[2] = {0, 0}; + glong written_bytes = 0; + + end = g_utf8_find_next_char (walker, NULL); + if (end == NULL) { + /* Find the string terminating NULL */ + end = walker; + while (*++end); + } + + written_bytes = utf8_to_gsm_char (walker, (end - walker), symbol); + + /* If more than one chunk is needed, these have to be of 140 - 6 = 134 + * bytes each, as additional space is needed for the UDH header. + * That means up to 153 input characters can be packed: + * 134 * 8 = 1072; 1072/7=153.14 + */ + if ((encoded_chunk_length + written_bytes) > 153) { + g_ptr_array_add (chunks, g_strndup (chunk_start, walker - chunk_start)); + chunk_start = walker; + encoded_chunk_length = written_bytes; + } else + encoded_chunk_length += written_bytes; + + total_encoded_chunk_length += written_bytes; + walker = g_utf8_next_char (walker); + } /* No splitting needed? */ - if (text_len <= 160) { + if (total_encoded_chunk_length <= 160) { + gchar **out; + out = g_new0 (gchar *, 2); out[0] = g_strdup (text); return out; } - /* Compute number of chunks needed */ - n_chunks = text_len / 153; - if (text_len % 153 != 0) - n_chunks++; + /* Otherwise, we do need the splitted chunks. Add the last one + * with contents plus the last trailing NULL */ + g_ptr_array_add (chunks, g_strndup (chunk_start, walker - chunk_start)); + g_ptr_array_add (chunks, NULL); - /* Fill in all chunks */ - out = g_new0 (gchar *, n_chunks + 1); - for (i = 0, j = 0; i < n_chunks; i++, j += 153) - out[i] = g_strndup (&text[j], 153); - - return out; + return (gchar **) g_ptr_array_free (g_steal_pointer (&chunks), FALSE); } static gchar **