platform: cope differently with spurious RTM_DELLINK message when unslaving bridge-slave
Unslaving from a bridge causes a wrong RTM_DELLINK event for the former slave. # ip link add dummy0 type dummy # ip link add bridge0 type bridge # ip link set bridge0 up # ip link set dummy0 master bridge0 # ip monitor link & # ip link set dummy0 nomaster 18: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop master bridge0 state DOWN group default link/ether 76:44:5f:b9:38:02 brd ff:ff:ff:ff:ff:ff 18: dummy0: <BROADCAST,NOARP> mtu 1500 master bridge0 state DOWN link/ether 76:44:5f:b9:38:02 Deleted 18: dummy0: <BROADCAST,NOARP> mtu 1500 master bridge0 state DOWN link/ether 76:44:5f:b9:38:02 18: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default link/ether 76:44:5f:b9:38:02 brd ff:ff:ff:ff:ff:ff 19: bridge0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN group default link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff 19: bridge0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN group default link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff Previously, during do_request_link() we would remember the link that is about to be requested (delayed_deletion) and delay processing a new RTM_DELLINK message until the end of do_request_link() -- and possibly forget about about the deletion, if RTM_DELLINK was followed by a RTM_NEWLINK. However, this hack does not catch the case where an external command unslaves the link. Instead just accept the wrong event and raise a "removed" signal right away. This brings the cache in an externally visible, wrong state that will be fixed by a following "added" signal. Still do that because working around the kernel bug is complicated. Also, we already might emit wrong "added" signals for devices that are already removed. As a consequence, a user should not consider the platform signals until all events are processed. Listeners to that signal should accept that added/removed link changes can be wrong and should preferably handle them idly, when the events have settled. It can even be worse, that a RTM_DELLINK is not fixed by a following RTM_NEWLINK: ... # ip link set dummy0 nomaster 36: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop master bridge0 state DOWN link/ether e2:f2:20:98:3a:be brd ff:ff:ff:ff:ff:ff 36: dummy0: <BROADCAST,NOARP> mtu 1500 master bridge0 state DOWN link/ether e2:f2:20:98:3a:be Deleted 36: dummy0: <BROADCAST,NOARP> mtu 1500 master bridge0 state DOWN link/ether e2:f2:20:98:3a:be 37: bridge0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff 37: bridge0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff So, when a slave is deleted, we have to refetch it too. https://bugzilla.redhat.com/show_bug.cgi?id=1285719
This commit is contained in:
@@ -2200,7 +2200,6 @@ struct _NMLinuxPlatformPrivate {
|
|||||||
} delayed_action;
|
} delayed_action;
|
||||||
|
|
||||||
GHashTable *prune_candidates;
|
GHashTable *prune_candidates;
|
||||||
GHashTable *delayed_deletion;
|
|
||||||
|
|
||||||
GHashTable *wifi_data;
|
GHashTable *wifi_data;
|
||||||
};
|
};
|
||||||
@@ -2598,39 +2597,6 @@ cache_prune_candidates_prune (NMPlatform *platform)
|
|||||||
g_hash_table_unref (prune_candidates);
|
g_hash_table_unref (prune_candidates);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
cache_delayed_deletion_prune (NMPlatform *platform)
|
|
||||||
{
|
|
||||||
NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE (platform);
|
|
||||||
GPtrArray *prune_list = NULL;
|
|
||||||
GHashTableIter iter;
|
|
||||||
guint i;
|
|
||||||
NMPObject *obj;
|
|
||||||
|
|
||||||
if (g_hash_table_size (priv->delayed_deletion) == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
g_hash_table_iter_init (&iter, priv->delayed_deletion);
|
|
||||||
while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &obj)) {
|
|
||||||
if (obj) {
|
|
||||||
if (!prune_list)
|
|
||||||
prune_list = g_ptr_array_new_full (g_hash_table_size (priv->delayed_deletion), (GDestroyNotify) nmp_object_unref);
|
|
||||||
g_ptr_array_add (prune_list, nmp_object_ref (obj));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
g_hash_table_remove_all (priv->delayed_deletion);
|
|
||||||
|
|
||||||
if (prune_list) {
|
|
||||||
for (i = 0; i < prune_list->len; i++) {
|
|
||||||
obj = prune_list->pdata[i];
|
|
||||||
_LOGt ("delayed-deletion: delete %s", nmp_object_to_string (obj, NMP_OBJECT_TO_STRING_ID, NULL, 0));
|
|
||||||
cache_remove_netlink (platform, obj, NULL, NULL, NM_PLATFORM_REASON_EXTERNAL);
|
|
||||||
}
|
|
||||||
g_ptr_array_unref (prune_list);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
cache_pre_hook (NMPCache *cache, const NMPObject *old, const NMPObject *new, NMPCacheOpsType ops_type, gpointer user_data)
|
cache_pre_hook (NMPCache *cache, const NMPObject *old, const NMPObject *new, NMPCacheOpsType ops_type, gpointer user_data)
|
||||||
{
|
{
|
||||||
@@ -2805,7 +2771,22 @@ cache_pre_hook (NMPCache *cache, const NMPObject *old, const NMPObject *new, NMP
|
|||||||
if (ifindex2 > 0 && ifindex1 != ifindex2)
|
if (ifindex2 > 0 && ifindex1 != ifindex2)
|
||||||
delayed_action_schedule (platform, DELAYED_ACTION_TYPE_REFRESH_LINK, GINT_TO_POINTER (ifindex2));
|
delayed_action_schedule (platform, DELAYED_ACTION_TYPE_REFRESH_LINK, GINT_TO_POINTER (ifindex2));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
if ( ( (ops_type == NMP_CACHE_OPS_REMOVED)
|
||||||
|
|| ( (ops_type == NMP_CACHE_OPS_UPDATED)
|
||||||
|
&& new
|
||||||
|
&& !new->_link.netlink.is_in_netlink))
|
||||||
|
&& old
|
||||||
|
&& old->_link.netlink.is_in_netlink
|
||||||
|
&& old->link.master) {
|
||||||
|
/* sometimes we receive a wrong RTM_DELLINK message when unslaving
|
||||||
|
* a device. Refetch the link again to check whether the device
|
||||||
|
* is really gone.
|
||||||
|
*
|
||||||
|
* https://bugzilla.redhat.com/show_bug.cgi?id=1285719#c2 */
|
||||||
|
delayed_action_schedule (platform, DELAYED_ACTION_TYPE_REFRESH_LINK, GINT_TO_POINTER (old->link.ifindex));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case NMP_OBJECT_TYPE_IP4_ADDRESS:
|
case NMP_OBJECT_TYPE_IP4_ADDRESS:
|
||||||
@@ -2899,13 +2880,8 @@ do_request_link (NMPlatform *platform, int ifindex, const char *name, gboolean h
|
|||||||
_LOGD ("do-request-link: %d %s", ifindex, name ? name : "");
|
_LOGD ("do-request-link: %d %s", ifindex, name ? name : "");
|
||||||
|
|
||||||
if (ifindex > 0) {
|
if (ifindex > 0) {
|
||||||
NMPObject *obj;
|
|
||||||
|
|
||||||
cache_prune_candidates_record_one (platform,
|
cache_prune_candidates_record_one (platform,
|
||||||
(NMPObject *) nmp_cache_lookup_link (priv->cache, ifindex));
|
(NMPObject *) nmp_cache_lookup_link (priv->cache, ifindex));
|
||||||
obj = nmp_object_new_link (ifindex);
|
|
||||||
_LOGt ("delayed-deletion: protect object %s", nmp_object_to_string (obj, NMP_OBJECT_TO_STRING_ID, NULL, 0));
|
|
||||||
g_hash_table_insert (priv->delayed_deletion, obj, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
event_handler_read_netlink_all (platform, FALSE);
|
event_handler_read_netlink_all (platform, FALSE);
|
||||||
@@ -2926,7 +2902,6 @@ do_request_link (NMPlatform *platform, int ifindex, const char *name, gboolean h
|
|||||||
|
|
||||||
event_handler_read_netlink_all (platform, TRUE);
|
event_handler_read_netlink_all (platform, TRUE);
|
||||||
|
|
||||||
cache_delayed_deletion_prune (platform);
|
|
||||||
cache_prune_candidates_prune (platform);
|
cache_prune_candidates_prune (platform);
|
||||||
|
|
||||||
if (handle_delayed_action)
|
if (handle_delayed_action)
|
||||||
@@ -3084,30 +3059,12 @@ event_notification (struct nl_msg *msg, gpointer user_data)
|
|||||||
switch (msghdr->nlmsg_type) {
|
switch (msghdr->nlmsg_type) {
|
||||||
|
|
||||||
case RTM_NEWLINK:
|
case RTM_NEWLINK:
|
||||||
if (NMP_OBJECT_GET_TYPE (obj) == NMP_OBJECT_TYPE_LINK) {
|
|
||||||
if (g_hash_table_lookup (priv->delayed_deletion, obj) != NULL) {
|
|
||||||
/* the object is scheduled for delayed deletion. Replace that object
|
|
||||||
* by clearing the value from priv->delayed_deletion. */
|
|
||||||
_LOGt ("delayed-deletion: clear delayed deletion of protected object %s", nmp_object_to_string (obj, NMP_OBJECT_TO_STRING_ID, NULL, 0));
|
|
||||||
g_hash_table_insert (priv->delayed_deletion, nmp_object_ref (obj), NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* fall-through */
|
|
||||||
case RTM_NEWADDR:
|
case RTM_NEWADDR:
|
||||||
case RTM_NEWROUTE:
|
case RTM_NEWROUTE:
|
||||||
cache_update_netlink (platform, obj, &obj_cache, NULL, NM_PLATFORM_REASON_EXTERNAL);
|
cache_update_netlink (platform, obj, &obj_cache, NULL, NM_PLATFORM_REASON_EXTERNAL);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RTM_DELLINK:
|
case RTM_DELLINK:
|
||||||
if ( NMP_OBJECT_GET_TYPE (obj) == NMP_OBJECT_TYPE_LINK
|
|
||||||
&& g_hash_table_contains (priv->delayed_deletion, obj)) {
|
|
||||||
/* We sometimes receive spurious RTM_DELLINK events. In this case, we want to delay
|
|
||||||
* the deletion of the object until later. */
|
|
||||||
_LOGt ("delayed-deletion: delay deletion of protected object %s", nmp_object_to_string (obj, NMP_OBJECT_TO_STRING_ID, NULL, 0));
|
|
||||||
g_hash_table_insert (priv->delayed_deletion, nmp_object_ref (obj), nmp_object_ref (obj));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* fall-through */
|
|
||||||
case RTM_DELADDR:
|
case RTM_DELADDR:
|
||||||
case RTM_DELROUTE:
|
case RTM_DELROUTE:
|
||||||
cache_remove_netlink (platform, obj, &obj_cache, NULL, NM_PLATFORM_REASON_EXTERNAL);
|
cache_remove_netlink (platform, obj, &obj_cache, NULL, NM_PLATFORM_REASON_EXTERNAL);
|
||||||
@@ -5311,10 +5268,6 @@ nm_linux_platform_init (NMLinuxPlatform *self)
|
|||||||
|
|
||||||
self->priv = priv;
|
self->priv = priv;
|
||||||
|
|
||||||
priv->delayed_deletion = g_hash_table_new_full ((GHashFunc) nmp_object_id_hash,
|
|
||||||
(GEqualFunc) nmp_object_id_equal,
|
|
||||||
(GDestroyNotify) nmp_object_unref,
|
|
||||||
(GDestroyNotify) nmp_object_unref);
|
|
||||||
priv->cache = nmp_cache_new ();
|
priv->cache = nmp_cache_new ();
|
||||||
priv->delayed_action.list_master_connected = g_ptr_array_new ();
|
priv->delayed_action.list_master_connected = g_ptr_array_new ();
|
||||||
priv->delayed_action.list_refresh_link = g_ptr_array_new ();
|
priv->delayed_action.list_refresh_link = g_ptr_array_new ();
|
||||||
@@ -5417,7 +5370,6 @@ dispose (GObject *object)
|
|||||||
nm_clear_g_source (&priv->delayed_action.idle_id);
|
nm_clear_g_source (&priv->delayed_action.idle_id);
|
||||||
|
|
||||||
g_clear_pointer (&priv->prune_candidates, g_hash_table_unref);
|
g_clear_pointer (&priv->prune_candidates, g_hash_table_unref);
|
||||||
g_clear_pointer (&priv->delayed_deletion, g_hash_table_unref);
|
|
||||||
|
|
||||||
G_OBJECT_CLASS (nm_linux_platform_parent_class)->dispose (object);
|
G_OBJECT_CLASS (nm_linux_platform_parent_class)->dispose (object);
|
||||||
}
|
}
|
||||||
|
@@ -289,10 +289,14 @@ test_slave (int master, int type, SignalData *master_changed)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Release */
|
/* Release */
|
||||||
|
ensure_no_signal (link_added);
|
||||||
ensure_no_signal (link_changed);
|
ensure_no_signal (link_changed);
|
||||||
|
ensure_no_signal (link_removed);
|
||||||
g_assert (nm_platform_link_release (NM_PLATFORM_GET, master, ifindex));
|
g_assert (nm_platform_link_release (NM_PLATFORM_GET, master, ifindex));
|
||||||
g_assert_cmpint (nm_platform_link_get_master (NM_PLATFORM_GET, ifindex), ==, 0);
|
g_assert_cmpint (nm_platform_link_get_master (NM_PLATFORM_GET, ifindex), ==, 0);
|
||||||
|
accept_signals (link_added, 0, 1);
|
||||||
accept_signals (link_changed, 1, 3);
|
accept_signals (link_changed, 1, 3);
|
||||||
|
accept_signals (link_removed, 0, 1);
|
||||||
accept_signals (master_changed, 1, 2);
|
accept_signals (master_changed, 1, 2);
|
||||||
|
|
||||||
ensure_no_signal (master_changed);
|
ensure_no_signal (master_changed);
|
||||||
@@ -304,7 +308,9 @@ test_slave (int master, int type, SignalData *master_changed)
|
|||||||
ensure_no_signal (master_changed);
|
ensure_no_signal (master_changed);
|
||||||
|
|
||||||
/* Remove */
|
/* Remove */
|
||||||
|
ensure_no_signal (link_added);
|
||||||
ensure_no_signal (link_changed);
|
ensure_no_signal (link_changed);
|
||||||
|
ensure_no_signal (link_removed);
|
||||||
g_assert (nm_platform_link_delete (NM_PLATFORM_GET, ifindex));
|
g_assert (nm_platform_link_delete (NM_PLATFORM_GET, ifindex));
|
||||||
accept_signals (master_changed, 0, 1);
|
accept_signals (master_changed, 0, 1);
|
||||||
accept_signals (link_changed, 0, 1);
|
accept_signals (link_changed, 0, 1);
|
||||||
@@ -1493,6 +1499,61 @@ again:
|
|||||||
|
|
||||||
/*****************************************************************************/
|
/*****************************************************************************/
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_nl_bugs_spuroius_dellink (void)
|
||||||
|
{
|
||||||
|
const char *IFACE_BRIDGE0 = "nm-test-bridge0";
|
||||||
|
const char *IFACE_DUMMY0 = "nm-test-dummy0";
|
||||||
|
int ifindex_bridge0, ifindex_dummy0;
|
||||||
|
const NMPlatformLink *pllink;
|
||||||
|
gboolean wait_for_settle;
|
||||||
|
|
||||||
|
/* see https://bugzilla.redhat.com/show_bug.cgi?id=1285719 */
|
||||||
|
|
||||||
|
nmtstp_run_command_check ("ip link add %s type dummy", IFACE_DUMMY0);
|
||||||
|
ifindex_dummy0 = nmtstp_assert_wait_for_link (IFACE_DUMMY0, NM_LINK_TYPE_DUMMY, 100)->ifindex;
|
||||||
|
|
||||||
|
nmtstp_run_command_check ("ip link add %s type bridge", IFACE_BRIDGE0);
|
||||||
|
ifindex_bridge0 = nmtstp_assert_wait_for_link (IFACE_BRIDGE0, NM_LINK_TYPE_BRIDGE, 100)->ifindex;
|
||||||
|
|
||||||
|
nmtstp_link_set_updown (-1, ifindex_bridge0, TRUE);
|
||||||
|
|
||||||
|
nmtstp_run_command_check ("ip link set %s master %s", IFACE_DUMMY0, IFACE_BRIDGE0);
|
||||||
|
NMTST_WAIT_ASSERT (100, {
|
||||||
|
nmtstp_wait_for_signal (50);
|
||||||
|
|
||||||
|
pllink = nm_platform_link_get (NM_PLATFORM_GET, ifindex_dummy0);
|
||||||
|
g_assert (pllink);
|
||||||
|
if (pllink->master == ifindex_bridge0)
|
||||||
|
break;
|
||||||
|
});
|
||||||
|
|
||||||
|
nm_platform_process_events (NM_PLATFORM_GET);
|
||||||
|
|
||||||
|
nmtstp_run_command_check ("ip link set %s nomaster", IFACE_DUMMY0);
|
||||||
|
|
||||||
|
wait_for_settle = TRUE;
|
||||||
|
nmtstp_wait_for_signal (50);
|
||||||
|
again:
|
||||||
|
nm_platform_process_events (NM_PLATFORM_GET);
|
||||||
|
pllink = nm_platform_link_get (NM_PLATFORM_GET, ifindex_bridge0);
|
||||||
|
g_assert (pllink);
|
||||||
|
pllink = nm_platform_link_get (NM_PLATFORM_GET, ifindex_dummy0);
|
||||||
|
g_assert (pllink);
|
||||||
|
g_assert_cmpint (pllink->parent, ==, 0);
|
||||||
|
|
||||||
|
if (wait_for_settle) {
|
||||||
|
wait_for_settle = FALSE;
|
||||||
|
NMTST_WAIT (300, { nmtstp_wait_for_signal (50); });
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
|
nm_platform_link_delete (NM_PLATFORM_GET, ifindex_bridge0);
|
||||||
|
nm_platform_link_delete (NM_PLATFORM_GET, ifindex_dummy0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*****************************************************************************/
|
||||||
|
|
||||||
void
|
void
|
||||||
init_tests (int *argc, char ***argv)
|
init_tests (int *argc, char ***argv)
|
||||||
{
|
{
|
||||||
@@ -1531,5 +1592,6 @@ setup_tests (void)
|
|||||||
|
|
||||||
g_test_add_func ("/link/nl-bugs/veth", test_nl_bugs_veth);
|
g_test_add_func ("/link/nl-bugs/veth", test_nl_bugs_veth);
|
||||||
g_test_add_func ("/link/nl-bugs/spurious-newlink", test_nl_bugs_spuroius_newlink);
|
g_test_add_func ("/link/nl-bugs/spurious-newlink", test_nl_bugs_spuroius_newlink);
|
||||||
|
g_test_add_func ("/link/nl-bugs/spurious-dellink", test_nl_bugs_spuroius_dellink);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user