platform/tests: merge branch 'th/platform-cache-test-2'

https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1525
This commit is contained in:
Thomas Haller
2023-02-01 22:44:09 +01:00
3 changed files with 152 additions and 77 deletions

View File

@@ -911,13 +911,14 @@ _assert_platform_normalize_all(GPtrArray *arr)
return normalized;
}
static void
static gboolean
_assert_platform_compare_arr(NMPObjectType obj_type,
const char *detail_type,
GPtrArray *arr1,
GPtrArray *arr2,
gboolean normalized,
gboolean share_multi_idx)
gboolean share_multi_idx,
gboolean do_assert)
{
const NMPClass *obj_class = nmp_class_from_type(obj_type);
char sbuf1[NM_UTILS_TO_STRING_BUFFER_SIZE];
@@ -925,48 +926,64 @@ _assert_platform_compare_arr(NMPObjectType obj_type,
int idx;
int idx_pointer_comp = -1;
#define _fail_msg(do_assert, ...) \
G_STMT_START \
{ \
if (do_assert) { \
g_error(__VA_ARGS__); \
} else { \
_LOGW(__VA_ARGS__); \
return FALSE; \
} \
} \
G_STMT_END
for (idx = 0; TRUE; idx++) {
if (nm_g_ptr_array_len(arr1) == idx && nm_g_ptr_array_len(arr2) == idx)
break;
if (idx >= nm_g_ptr_array_len(arr1)) {
_assert_platform_printarr(obj_type, arr1, arr2);
g_error("Comparing %s (%s) for platform fails. Platform now shows entry #%u which is "
"not in the cache but expected %s",
obj_class->obj_type_name,
detail_type,
idx,
nmp_object_to_string(arr2->pdata[idx],
NMP_OBJECT_TO_STRING_ALL,
sbuf1,
sizeof(sbuf1)));
_fail_msg(do_assert,
"Comparing %s (%s) for platform fails. Platform now shows entry #%u which is "
"not in the cache but expected %s",
obj_class->obj_type_name,
detail_type,
idx,
nmp_object_to_string(arr2->pdata[idx],
NMP_OBJECT_TO_STRING_ALL,
sbuf1,
sizeof(sbuf1)));
}
if (idx >= nm_g_ptr_array_len(arr2)) {
_assert_platform_printarr(obj_type, arr1, arr2);
g_error("Comparing %s (%s) for platform fails. Platform has no more entry #%u which is "
"still in the cache as %s",
obj_class->obj_type_name,
detail_type,
idx,
nmp_object_to_string(arr1->pdata[idx],
NMP_OBJECT_TO_STRING_ALL,
sbuf1,
sizeof(sbuf1)));
_fail_msg(
do_assert,
"Comparing %s (%s) for platform fails. Platform has no more entry #%u which is "
"still in the cache as %s",
obj_class->obj_type_name,
detail_type,
idx,
nmp_object_to_string(arr1->pdata[idx],
NMP_OBJECT_TO_STRING_ALL,
sbuf1,
sizeof(sbuf1)));
}
if (!nmp_object_equal(arr1->pdata[idx], arr2->pdata[idx])) {
_assert_platform_printarr(obj_type, arr1, arr2);
g_error("Comparing %s (%s) for platform fails. Platform entry #%u is now %s but in "
"cache is %s",
obj_class->obj_type_name,
detail_type,
idx,
nmp_object_to_string(arr2->pdata[idx],
NMP_OBJECT_TO_STRING_ALL,
sbuf1,
sizeof(sbuf1)),
nmp_object_to_string(arr1->pdata[idx],
NMP_OBJECT_TO_STRING_ALL,
sbuf2,
sizeof(sbuf2)));
_fail_msg(do_assert,
"Comparing %s (%s) for platform fails. Platform entry #%u is now %s but in "
"cache is %s",
obj_class->obj_type_name,
detail_type,
idx,
nmp_object_to_string(arr2->pdata[idx],
NMP_OBJECT_TO_STRING_ALL,
sbuf1,
sizeof(sbuf1)),
nmp_object_to_string(arr1->pdata[idx],
NMP_OBJECT_TO_STRING_ALL,
sbuf2,
sizeof(sbuf2)));
}
if (!normalized && (share_multi_idx != (arr1->pdata[idx] == arr2->pdata[idx]))
@@ -976,20 +993,23 @@ _assert_platform_compare_arr(NMPObjectType obj_type,
if (idx_pointer_comp != -1) {
_assert_platform_printarr(obj_type, arr1, arr2);
g_error("Comparing %s (%s) for platform fails for pointer comparison. Platform entry "
"#%u is now %s but in cache is %s",
obj_class->obj_type_name,
detail_type,
idx_pointer_comp,
nmp_object_to_string(arr2->pdata[idx_pointer_comp],
NMP_OBJECT_TO_STRING_ALL,
sbuf1,
sizeof(sbuf1)),
nmp_object_to_string(arr1->pdata[idx_pointer_comp],
NMP_OBJECT_TO_STRING_ALL,
sbuf2,
sizeof(sbuf2)));
_fail_msg(do_assert,
"Comparing %s (%s) for platform fails for pointer comparison. Platform entry "
"#%u is now %s but in cache is %s",
obj_class->obj_type_name,
detail_type,
idx_pointer_comp,
nmp_object_to_string(arr2->pdata[idx_pointer_comp],
NMP_OBJECT_TO_STRING_ALL,
sbuf1,
sizeof(sbuf1)),
nmp_object_to_string(arr1->pdata[idx_pointer_comp],
NMP_OBJECT_TO_STRING_ALL,
sbuf2,
sizeof(sbuf2)));
}
return TRUE;
}
/*****************************************************************************/
@@ -1211,8 +1231,8 @@ out:
return result;
}
void
nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags)
gboolean
nmtstp_check_platform_full(NMPlatform *platform, guint32 obj_type_flags, gboolean do_assert)
{
static const NMPObjectType obj_types[] = {
NMP_OBJECT_TYPE_IP4_ADDRESS,
@@ -1275,7 +1295,14 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags)
g_ptr_array_sort(arr2, _assert_platform_sort_objs);
}
_assert_platform_compare_arr(obj_type, "main", arr1, arr2, normalized, share_multi_idx);
if (!_assert_platform_compare_arr(obj_type,
"main",
arr1,
arr2,
normalized,
share_multi_idx,
do_assert))
return FALSE;
if (NM_IN_SET(obj_type, NMP_OBJECT_TYPE_IP4_ROUTE, NMP_OBJECT_TYPE_IP6_ROUTE)) {
/* For routes, the WEAK_ID needs to be sorted and match the expected order. Check that. */
@@ -1309,12 +1336,14 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags)
arr2b_sorted = nm_g_ptr_array_new_clone(arr2b, NULL, NULL, NULL);
g_ptr_array_sort(arr1b_sorted, _assert_platform_sort_objs);
g_ptr_array_sort(arr2b_sorted, _assert_platform_sort_objs);
_assert_platform_compare_arr(obj_type,
"weak-id-sorted",
arr1b_sorted,
arr2b_sorted,
normalized,
share_multi_idx);
if (!_assert_platform_compare_arr(obj_type,
"weak-id-sorted",
arr1b_sorted,
arr2b_sorted,
normalized,
share_multi_idx,
do_assert))
return FALSE;
if (obj_type == NMP_OBJECT_TYPE_IP6_ROUTE) {
/* For IPv6, the weak-ids are actually not sorted correctly.
@@ -1329,22 +1358,33 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags)
/* For IPv4, it also does not reliably always work. This may
* be a bug we want to fix. For now, ignore the check.
*
* This is probably caused by kernel bug
* https://bugzilla.redhat.com/show_bug.cgi?id=2162315
* for which I think there is no workaround.
* a) Kernel can wrongly allow to configure the same route twice.
* That means, the same route is visible in `ip route` output,
* meaning, it would be added twice to the platform cache.
* At least due to that problem, may the weak-id not be properly sorted.
* See https://bugzilla.redhat.com/show_bug.cgi?id=2165720 which is
* a bug of kernel allowing to configure the exact same route twice.
*
* Also, rhbz#2162315 means NMPlatform will merge two different
* routes together, if one of them were deleted, the RTM_DELROUTE
* message would wrongly delete single entry, leading to cache
* inconsistency. */
* b) See https://bugzilla.redhat.com/show_bug.cgi?id=2162315 which is
* a bug where kernel does allow to configure single-hop routes that differ by
* their next-hop weight, but on the netlink API those routes look the same.
*
* Due to a) and b), the platform cache may contain only one instance
* of a route, which is visible more than once in `ip route` output.
* This merging of different routes causes problems, and it also means
* that the RTM_NEWROUTE events are wrongly interpreted and the weak-id
* is not properly sorted.
*/
} else {
/* Assert that also the original, not-sorted lists agree. */
_assert_platform_compare_arr(obj_type,
"weak-id",
arr1b,
arr2b,
normalized,
share_multi_idx);
if (!_assert_platform_compare_arr(obj_type,
"weak-id",
arr1b,
arr2b,
normalized,
share_multi_idx,
do_assert))
return FALSE;
}
for (i = 0; i < arr1b->len; i++) {
@@ -1365,6 +1405,25 @@ nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags)
_LOGD("assert-platform: done");
g_assert_cmpint(obj_type_flags, ==, 0u);
return TRUE;
}
void
nmtstp_check_platform(NMPlatform *platform, guint32 obj_type_flags)
{
if (!nmtstp_check_platform_full(platform, obj_type_flags, FALSE)) {
/* It's unclear why this failure sometimes happens. It happens
* on gitlab-ci on Ubuntu/Debian(??).
*
* Retrying shortly after seems to avoid it. */
g_usleep(20 * 1000);
nm_platform_process_events(platform);
nmtstp_run_command("ip route");
nm_platform_process_events(platform);
nmtstp_check_platform_full(platform, obj_type_flags, TRUE);
}
}
/*****************************************************************************/

View File

@@ -139,7 +139,10 @@ int nmtstp_run_command(const char *format, ...) _nm_printf(1, 2);
/*****************************************************************************/
void nmtstp_assert_platform(NMPlatform *platform, guint32 obj_type_flags);
gboolean
nmtstp_check_platform_full(NMPlatform *platform, guint32 obj_type_flags, gboolean do_assert);
void nmtstp_check_platform(NMPlatform *platform, guint32 obj_type_flags);
/*****************************************************************************/

View File

@@ -2166,8 +2166,12 @@ _ensure_onlink_routes(void)
int i;
for (i = 0; i < G_N_ELEMENTS(NMTSTP_ENV1_DEVICE_NAME) && NMTSTP_ENV1_DEVICE_NAME[i]; i++) {
nmtstp_run_command("ip route append 7.7.7.0/24 dev %s", NMTSTP_ENV1_DEVICE_NAME[i]);
nmtstp_run_command("ip route append 7:7:7::/64 dev %s", NMTSTP_ENV1_DEVICE_NAME[i]);
nmtstp_run_command("ip route append 7.7.7.0/24 dev %s%s",
NMTSTP_ENV1_DEVICE_NAME[i],
nmtst_is_debug() ? "" : " &>/dev/null");
nmtstp_run_command("ip route append 7:7:7::/64 dev %s%s",
NMTSTP_ENV1_DEVICE_NAME[i],
nmtst_is_debug() ? "" : " &>/dev/null");
}
}
@@ -2181,9 +2185,6 @@ test_cache_consistency_routes(gconstpointer test_data)
int i_run;
gs_unref_ptrarray GPtrArray *keeper = g_ptr_array_new_with_free_func(g_free);
g_test_skip("Test is currently known to fail. TODO. SKIP");
return;
_ensure_onlink_routes();
for (i_run = 0; i_run < N_RUN; i_run++) {
@@ -2230,10 +2231,12 @@ test_cache_consistency_routes(gconstpointer test_data)
continue;
}
nmtstp_run_command("ip -%c route flush dev %s"
"%s" /* redirect */
" table %s" /* table */
"%s" /* redirect */
"",
addr_family_char[IS_IPv4],
ifname,
nmtst_rand_select_str("main", "10222", "10223", "all"),
nmtst_is_debug() ? "" : " &>/dev/null");
_ensure_onlink_routes();
goto done;
@@ -2306,7 +2309,17 @@ test_cache_consistency_routes(gconstpointer test_data)
}
extra_options[n_extra_options++] = "dev";
extra_options[n_extra_options++] = NMTSTP_ENV1_DEVICE_NAME[nmtst_get_rand_bool()];
if (nmtst_get_rand_one_case_in(3)) {
if (IS_IPv4 && i == 0) {
/* For IPv4, there is a problem if we configure a route with
* only one next-hop and a weight. In that case, kernel allows
* to add duplicates (that only differ by weight), but on netlink
* the weight is not exposed, so the routes look identical and
* are deduplicated by the hash.
* See https://bugzilla.redhat.com/show_bug.cgi?id=2162315
*
* This needs a kernel fix. Workaround that issue here, otherwise the test
* will randomly fail. */
} else if (nmtst_get_rand_one_case_in(3)) {
extra_options[n_extra_options++] = "weight";
extra_options[n_extra_options++] = "5";
}
@@ -2351,7 +2364,7 @@ done:
nm_platform_process_events(platform);
if (!is_test_quick || (i_run + 1 == N_RUN) || nmtst_get_rand_one_case_in(5)) {
nmtstp_assert_platform(
nmtstp_check_platform(
platform,
nmtst_get_rand_one_case_in(5)
? 0u