From de1dccba189ae3689fc9c41e34005ce15c790742 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 31 Jan 2023 18:32:31 +0100 Subject: [PATCH 1/5] platform/tests: suppress noisy output in test_cache_consistency_routes() test --- src/core/platform/tests/test-route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/core/platform/tests/test-route.c b/src/core/platform/tests/test-route.c index bae38f265..0d8449b11 100644 --- a/src/core/platform/tests/test-route.c +++ b/src/core/platform/tests/test-route.c @@ -2166,8 +2166,12 @@ _ensure_onlink_routes(void) int i; for (i = 0; i < G_N_ELEMENTS(NMTSTP_ENV1_DEVICE_NAME) && NMTSTP_ENV1_DEVICE_NAME[i]; i++) { - nmtstp_run_command("ip route append 7.7.7.0/24 dev %s", NMTSTP_ENV1_DEVICE_NAME[i]); - nmtstp_run_command("ip route append 7:7:7::/64 dev %s", NMTSTP_ENV1_DEVICE_NAME[i]); + nmtstp_run_command("ip route append 7.7.7.0/24 dev %s%s", + NMTSTP_ENV1_DEVICE_NAME[i], + nmtst_is_debug() ? "" : " &>/dev/null"); + nmtstp_run_command("ip route append 7:7:7::/64 dev %s%s", + NMTSTP_ENV1_DEVICE_NAME[i], + nmtst_is_debug() ? "" : " &>/dev/null"); } } From 8089133f1c8418bc76f8b63f968477c0873bc9ef Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Mon, 30 Jan 2023 17:40:16 +0100 Subject: [PATCH 2/5] platform/tests: flush all tables in test_cache_consistency_routes() test --- src/core/platform/tests/test-route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/platform/tests/test-route.c b/src/core/platform/tests/test-route.c index 0d8449b11..569d371db 100644 --- a/src/core/platform/tests/test-route.c +++ b/src/core/platform/tests/test-route.c @@ -2234,10 +2234,12 @@ test_cache_consistency_routes(gconstpointer test_data) continue; } nmtstp_run_command("ip -%c route flush dev %s" - "%s" /* redirect */ + " table %s" /* table */ + "%s" /* redirect */ "", addr_family_char[IS_IPv4], ifname, + nmtst_rand_select_str("main", "10222", "10223", "all"), nmtst_is_debug() ? "" : " &>/dev/null"); _ensure_onlink_routes(); goto done; From 0347dc7ddc1c7f3b55d2273c11f93e8b58c71548 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Mon, 30 Jan 2023 21:52:13 +0100 Subject: [PATCH 3/5] platform/tests: disable check for sorted IPv4 routes by weak-id Due to a kernel bug, this assert can fail and I don't think it can be fixed in NetworkManager. Disable the check. See-also: https://bugzilla.redhat.com/show_bug.cgi?id=2165720 --- src/core/platform/tests/test-common.c | 23 ++++++++++++++++------- src/core/platform/tests/test-route.c | 12 +++++++++++- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/core/platform/tests/test-common.c b/src/core/platform/tests/test-common.c index 8064ea439..a1ac373fe 100644 --- a/src/core/platform/tests/test-common.c +++ b/src/core/platform/tests/test-common.c @@ -1329,14 +1329,23 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags) /* For IPv4, it also does not reliably always work. This may * be a bug we want to fix. For now, ignore the check. * - * This is probably caused by kernel bug - * https://bugzilla.redhat.com/show_bug.cgi?id=2162315 - * for which I think there is no workaround. + * a) Kernel can wrongly allow to configure the same route twice. + * That means, the same route is visible in `ip route` output, + * meaning, it would be added twice to the platform cache. + * At least due to that problem, may the weak-id not be properly sorted. + * See https://bugzilla.redhat.com/show_bug.cgi?id=2165720 which is + * a bug of kernel allowing to configure the exact same route twice. * - * Also, rhbz#2162315 means NMPlatform will merge two different - * routes together, if one of them were deleted, the RTM_DELROUTE - * message would wrongly delete single entry, leading to cache - * inconsistency. */ + * b) See https://bugzilla.redhat.com/show_bug.cgi?id=2162315 which is + * a bug where kernel does allow to configure single-hop routes that differ by + * their next-hop weight, but on the netlink API those routes look the same. + * + * Due to a) and b), the platform cache may contain only one instance + * of a route, which is visible more than once in `ip route` output. + * This merging of different routes causes problems, and it also means + * that the RTM_NEWROUTE events are wrongly interpreted and the weak-id + * is not properly sorted. + */ } else { /* Assert that also the original, not-sorted lists agree. */ _assert_platform_compare_arr(obj_type, diff --git a/src/core/platform/tests/test-route.c b/src/core/platform/tests/test-route.c index 569d371db..d52e6e61a 100644 --- a/src/core/platform/tests/test-route.c +++ b/src/core/platform/tests/test-route.c @@ -2312,7 +2312,17 @@ test_cache_consistency_routes(gconstpointer test_data) } extra_options[n_extra_options++] = "dev"; extra_options[n_extra_options++] = NMTSTP_ENV1_DEVICE_NAME[nmtst_get_rand_bool()]; - if (nmtst_get_rand_one_case_in(3)) { + if (IS_IPv4 && i == 0) { + /* For IPv4, there is a problem if we configure a route with + * only one next-hop and a weight. In that case, kernel allows + * to add duplicates (that only differ by weight), but on netlink + * the weight is not exposed, so the routes look identical and + * are deduplicated by the hash. + * See https://bugzilla.redhat.com/show_bug.cgi?id=2162315 + * + * This needs a kernel fix. Workaround that issue here, otherwise the test + * will randomly fail. */ + } else if (nmtst_get_rand_one_case_in(3)) { extra_options[n_extra_options++] = "weight"; extra_options[n_extra_options++] = "5"; } From 82e21a490625d038ef46f57040326391a23f38ea Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 31 Jan 2023 18:19:30 +0100 Subject: [PATCH 4/5] platform/tests: workaround failure of nmtstp_assert_platform() --- src/core/platform/tests/test-common.c | 172 +++++++++++++++++--------- src/core/platform/tests/test-common.h | 5 +- src/core/platform/tests/test-route.c | 2 +- 3 files changed, 116 insertions(+), 63 deletions(-) diff --git a/src/core/platform/tests/test-common.c b/src/core/platform/tests/test-common.c index a1ac373fe..a37982fc6 100644 --- a/src/core/platform/tests/test-common.c +++ b/src/core/platform/tests/test-common.c @@ -911,13 +911,14 @@ _assert_platform_normalize_all(GPtrArray *arr) return normalized; } -static void +static gboolean _assert_platform_compare_arr(NMPObjectType obj_type, const char *detail_type, GPtrArray *arr1, GPtrArray *arr2, gboolean normalized, - gboolean share_multi_idx) + gboolean share_multi_idx, + gboolean do_assert) { const NMPClass *obj_class = nmp_class_from_type(obj_type); char sbuf1[NM_UTILS_TO_STRING_BUFFER_SIZE]; @@ -925,48 +926,64 @@ _assert_platform_compare_arr(NMPObjectType obj_type, int idx; int idx_pointer_comp = -1; +#define _fail_msg(do_assert, ...) \ + G_STMT_START \ + { \ + if (do_assert) { \ + g_error(__VA_ARGS__); \ + } else { \ + _LOGW(__VA_ARGS__); \ + return FALSE; \ + } \ + } \ + G_STMT_END + for (idx = 0; TRUE; idx++) { if (nm_g_ptr_array_len(arr1) == idx && nm_g_ptr_array_len(arr2) == idx) break; if (idx >= nm_g_ptr_array_len(arr1)) { _assert_platform_printarr(obj_type, arr1, arr2); - g_error("Comparing %s (%s) for platform fails. Platform now shows entry #%u which is " - "not in the cache but expected %s", - obj_class->obj_type_name, - detail_type, - idx, - nmp_object_to_string(arr2->pdata[idx], - NMP_OBJECT_TO_STRING_ALL, - sbuf1, - sizeof(sbuf1))); + _fail_msg(do_assert, + "Comparing %s (%s) for platform fails. Platform now shows entry #%u which is " + "not in the cache but expected %s", + obj_class->obj_type_name, + detail_type, + idx, + nmp_object_to_string(arr2->pdata[idx], + NMP_OBJECT_TO_STRING_ALL, + sbuf1, + sizeof(sbuf1))); } if (idx >= nm_g_ptr_array_len(arr2)) { _assert_platform_printarr(obj_type, arr1, arr2); - g_error("Comparing %s (%s) for platform fails. Platform has no more entry #%u which is " - "still in the cache as %s", - obj_class->obj_type_name, - detail_type, - idx, - nmp_object_to_string(arr1->pdata[idx], - NMP_OBJECT_TO_STRING_ALL, - sbuf1, - sizeof(sbuf1))); + _fail_msg( + do_assert, + "Comparing %s (%s) for platform fails. Platform has no more entry #%u which is " + "still in the cache as %s", + obj_class->obj_type_name, + detail_type, + idx, + nmp_object_to_string(arr1->pdata[idx], + NMP_OBJECT_TO_STRING_ALL, + sbuf1, + sizeof(sbuf1))); } if (!nmp_object_equal(arr1->pdata[idx], arr2->pdata[idx])) { _assert_platform_printarr(obj_type, arr1, arr2); - g_error("Comparing %s (%s) for platform fails. Platform entry #%u is now %s but in " - "cache is %s", - obj_class->obj_type_name, - detail_type, - idx, - nmp_object_to_string(arr2->pdata[idx], - NMP_OBJECT_TO_STRING_ALL, - sbuf1, - sizeof(sbuf1)), - nmp_object_to_string(arr1->pdata[idx], - NMP_OBJECT_TO_STRING_ALL, - sbuf2, - sizeof(sbuf2))); + _fail_msg(do_assert, + "Comparing %s (%s) for platform fails. Platform entry #%u is now %s but in " + "cache is %s", + obj_class->obj_type_name, + detail_type, + idx, + nmp_object_to_string(arr2->pdata[idx], + NMP_OBJECT_TO_STRING_ALL, + sbuf1, + sizeof(sbuf1)), + nmp_object_to_string(arr1->pdata[idx], + NMP_OBJECT_TO_STRING_ALL, + sbuf2, + sizeof(sbuf2))); } if (!normalized && (share_multi_idx != (arr1->pdata[idx] == arr2->pdata[idx])) @@ -976,20 +993,23 @@ _assert_platform_compare_arr(NMPObjectType obj_type, if (idx_pointer_comp != -1) { _assert_platform_printarr(obj_type, arr1, arr2); - g_error("Comparing %s (%s) for platform fails for pointer comparison. Platform entry " - "#%u is now %s but in cache is %s", - obj_class->obj_type_name, - detail_type, - idx_pointer_comp, - nmp_object_to_string(arr2->pdata[idx_pointer_comp], - NMP_OBJECT_TO_STRING_ALL, - sbuf1, - sizeof(sbuf1)), - nmp_object_to_string(arr1->pdata[idx_pointer_comp], - NMP_OBJECT_TO_STRING_ALL, - sbuf2, - sizeof(sbuf2))); + _fail_msg(do_assert, + "Comparing %s (%s) for platform fails for pointer comparison. Platform entry " + "#%u is now %s but in cache is %s", + obj_class->obj_type_name, + detail_type, + idx_pointer_comp, + nmp_object_to_string(arr2->pdata[idx_pointer_comp], + NMP_OBJECT_TO_STRING_ALL, + sbuf1, + sizeof(sbuf1)), + nmp_object_to_string(arr1->pdata[idx_pointer_comp], + NMP_OBJECT_TO_STRING_ALL, + sbuf2, + sizeof(sbuf2))); } + + return TRUE; } /*****************************************************************************/ @@ -1211,8 +1231,8 @@ out: return result; } -void -nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags) +gboolean +nmtstp_check_platform_full(NMPlatform *platform, guint32 obj_type_flags, gboolean do_assert) { static const NMPObjectType obj_types[] = { NMP_OBJECT_TYPE_IP4_ADDRESS, @@ -1275,7 +1295,14 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags) g_ptr_array_sort(arr2, _assert_platform_sort_objs); } - _assert_platform_compare_arr(obj_type, "main", arr1, arr2, normalized, share_multi_idx); + if (!_assert_platform_compare_arr(obj_type, + "main", + arr1, + arr2, + normalized, + share_multi_idx, + do_assert)) + return FALSE; if (NM_IN_SET(obj_type, NMP_OBJECT_TYPE_IP4_ROUTE, NMP_OBJECT_TYPE_IP6_ROUTE)) { /* For routes, the WEAK_ID needs to be sorted and match the expected order. Check that. */ @@ -1309,12 +1336,14 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags) arr2b_sorted = nm_g_ptr_array_new_clone(arr2b, NULL, NULL, NULL); g_ptr_array_sort(arr1b_sorted, _assert_platform_sort_objs); g_ptr_array_sort(arr2b_sorted, _assert_platform_sort_objs); - _assert_platform_compare_arr(obj_type, - "weak-id-sorted", - arr1b_sorted, - arr2b_sorted, - normalized, - share_multi_idx); + if (!_assert_platform_compare_arr(obj_type, + "weak-id-sorted", + arr1b_sorted, + arr2b_sorted, + normalized, + share_multi_idx, + do_assert)) + return FALSE; if (obj_type == NMP_OBJECT_TYPE_IP6_ROUTE) { /* For IPv6, the weak-ids are actually not sorted correctly. @@ -1348,12 +1377,14 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags) */ } else { /* Assert that also the original, not-sorted lists agree. */ - _assert_platform_compare_arr(obj_type, - "weak-id", - arr1b, - arr2b, - normalized, - share_multi_idx); + if (!_assert_platform_compare_arr(obj_type, + "weak-id", + arr1b, + arr2b, + normalized, + share_multi_idx, + do_assert)) + return FALSE; } for (i = 0; i < arr1b->len; i++) { @@ -1374,6 +1405,25 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags) _LOGD("assert-platform: done"); g_assert_cmpint(obj_type_flags, ==, 0u); + + return TRUE; +} + +void +nmtstp_check_platform(NMPlatform *platform, guint32 obj_type_flags) +{ + if (!nmtstp_check_platform_full(platform, obj_type_flags, FALSE)) { + /* It's unclear why this failure sometimes happens. It happens + * on gitlab-ci on Ubuntu/Debian(??). + * + * Retrying shortly after seems to avoid it. */ + g_usleep(20 * 1000); + nm_platform_process_events(platform); + nmtstp_run_command("ip route"); + nm_platform_process_events(platform); + + nmtstp_check_platform_full(platform, obj_type_flags, TRUE); + } } /*****************************************************************************/ diff --git a/src/core/platform/tests/test-common.h b/src/core/platform/tests/test-common.h index 7a4018a3b..2802d8fda 100644 --- a/src/core/platform/tests/test-common.h +++ b/src/core/platform/tests/test-common.h @@ -139,7 +139,10 @@ int nmtstp_run_command(const char *format, ...) _nm_printf(1, 2); /*****************************************************************************/ -void nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags); +gboolean +nmtstp_check_platform_full(NMPlatform *platform, guint32 obj_type_flags, gboolean do_assert); + +void nmtstp_check_platform(NMPlatform *platform, guint32 obj_type_flags); /*****************************************************************************/ diff --git a/src/core/platform/tests/test-route.c b/src/core/platform/tests/test-route.c index d52e6e61a..2c9737bea 100644 --- a/src/core/platform/tests/test-route.c +++ b/src/core/platform/tests/test-route.c @@ -2367,7 +2367,7 @@ done: nm_platform_process_events(platform); if (!is_test_quick || (i_run + 1 == N_RUN) || nmtst_get_rand_one_case_in(5)) { - nmtstp_assert_platform( + nmtstp_check_platform( platform, nmtst_get_rand_one_case_in(5) ? 0u From 5c324adc7c3b35e63c7f04f6230326901c0403c0 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 19 Jan 2023 20:25:21 +0100 Subject: [PATCH 5/5] platform/tests: re-enable and fix "/route/test_cache_consistency_routes" tests The tests failed in certain cases on gitlab-ci and were temporarily disabled. These issues should be fixed now and the test pass. Reenable. --- src/core/platform/tests/test-route.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/core/platform/tests/test-route.c b/src/core/platform/tests/test-route.c index 2c9737bea..bd8fdc271 100644 --- a/src/core/platform/tests/test-route.c +++ b/src/core/platform/tests/test-route.c @@ -2185,9 +2185,6 @@ test_cache_consistency_routes(gconstpointer test_data) int i_run; gs_unref_ptrarray GPtrArray *keeper = g_ptr_array_new_with_free_func(g_free); - g_test_skip("Test is currently known to fail. TODO. SKIP"); - return; - _ensure_onlink_routes(); for (i_run = 0; i_run < N_RUN; i_run++) {