platform: fix cache to use kernel's notion for equality of routes
Until now, NetworkManager's platform cache for routes used the quadruple network/plen,metric,ifindex for equaliy. That is not kernel's understanding of how routes behave. For example, with `ip route append` you can add two IPv4 routes that only differ by their gateway. To the previous form of platform cache, these two routes would wrongly look identical, as the cache could not contain both routes. This also easily leads to cache-inconsistencies. Now that we have NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID, fix the route's compare operator to match kernel's. Well, not entirely. Kernel understands more properties for routes then NetworkManager. Some of these properties may also be part of the ID according to kernel. To NetworkManager such routes would still look identical as they only differ in a property that is not understood. This can still cause cache-inconsistencies. The only fix here is to add support for all these properties in NetworkManager as well. However, it's less serious, because with this commit we support several of the more important properties. See also the related bug rh#1337855 for kernel. Another difficulty is that `ip route replace` and `ip route change` changes an existing route. The replaced route has the same NM_PLATFORM_IP_ROUTE_CMP_TYPE_WEAK_ID, but differ in the actual NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID: # ip -d -4 route show dev v # ip monitor route & # ip route add 192.168.5.0/24 dev v 192.168.5.0/24 dev v scope link # ip route change 192.168.5.0/24 dev v scope 10 192.168.5.0/24 dev v scope 10 # ip -d -4 route show dev v unicast 192.168.5.0/24 proto boot scope 10 Note that we only got one RTM_NEWROUTE message, although from NMPCache's point of view, a new route (with a particular ID) was added and another route (with a different ID) was deleted. The cumbersome workaround is, to keep an ordered list of the routes, and figure out which route was replaced in response to an RTM_NEWROUTE. In absence of bugs, this should work fine. However, as we only rely on events, we might wrongly introduce a cache-inconsistancy as well. See the related bug rh#1337860. Also drop nm_platform_ip4_route_get() and the like. The ID of routes is complex, so it makes little sense to look up a route directly.
This commit is contained in:
@@ -3392,7 +3392,7 @@ cache_prune_one_type (NMPlatform *platform, NMPObjectType obj_type)
|
||||
|
||||
obj = iter.current->obj;
|
||||
_LOGt ("cache-prune: prune %s", nmp_object_to_string (obj, NMP_OBJECT_TO_STRING_ALL, NULL, 0));
|
||||
cache_op = nmp_cache_remove (cache, obj, TRUE, &obj_old);
|
||||
cache_op = nmp_cache_remove (cache, obj, TRUE, TRUE, &obj_old);
|
||||
nm_assert (cache_op == NMP_CACHE_OPS_REMOVED);
|
||||
cache_on_change (platform, cache_op, obj_old, NULL);
|
||||
nm_platform_cache_update_emit_signal (platform, cache_op, obj_old, NULL);
|
||||
@@ -3645,39 +3645,6 @@ cache_on_change (NMPlatform *platform,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cache_post (NMPlatform *platform,
|
||||
struct nlmsghdr *msghdr,
|
||||
NMPCacheOpsType cache_op,
|
||||
const NMPObject *obj,
|
||||
const NMPObject *obj_old,
|
||||
const NMPObject *obj_new)
|
||||
{
|
||||
if (msghdr->nlmsg_type == RTM_NEWROUTE) {
|
||||
DelayedActionType action_type;
|
||||
|
||||
action_type = NMP_OBJECT_GET_TYPE (obj) == NMP_OBJECT_TYPE_IP4_ROUTE
|
||||
? DELAYED_ACTION_TYPE_REFRESH_ALL_IP4_ROUTES
|
||||
: DELAYED_ACTION_TYPE_REFRESH_ALL_IP6_ROUTES;
|
||||
if ( !delayed_action_refresh_all_in_progress (platform, action_type)
|
||||
&& nmp_cache_find_other_route_for_same_destination (nm_platform_get_cache (platform), obj)) {
|
||||
/* via `iproute route change` the user can update an existing route which effectively
|
||||
* means that a new object (with a different ID) comes into existance, replacing the
|
||||
* old on. In other words, as the ID of the object changes, we really see a new
|
||||
* object with the old one deleted.
|
||||
* However, kernel decides not to send a RTM_DELROUTE event for that.
|
||||
*
|
||||
* To hack around that, check if the update leaves us with multiple routes for the
|
||||
* same network/plen,metric part. In that case, we cannot do better then requesting
|
||||
* all routes anew, which sucks.
|
||||
*
|
||||
* One mitigation to avoid a dump is only to request a new dump, if we are not in
|
||||
* the middle of an ongoing dump (delayed_action_refresh_all_in_progress). */
|
||||
delayed_action_schedule (platform, action_type, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
static int
|
||||
@@ -3939,16 +3906,73 @@ event_valid_msg (NMPlatform *platform, struct nl_msg *msg, gboolean handle_event
|
||||
|
||||
case RTM_NEWLINK:
|
||||
case RTM_NEWADDR:
|
||||
case RTM_NEWROUTE:
|
||||
case RTM_GETLINK:
|
||||
cache_op = nmp_cache_update_netlink (cache, obj, is_dump, &obj_old, &obj_new);
|
||||
if (cache_op != NMP_CACHE_OPS_UNCHANGED) {
|
||||
cache_on_change (platform, cache_op, obj_old, obj_new);
|
||||
cache_post (platform, msghdr, cache_op, obj, obj_old, obj_new);
|
||||
nm_platform_cache_update_emit_signal (platform, cache_op, obj_old, obj_new);
|
||||
}
|
||||
break;
|
||||
|
||||
case RTM_NEWROUTE: {
|
||||
nm_auto_nmpobj const NMPObject *obj_replace = NULL;
|
||||
gboolean resync_required = FALSE;
|
||||
gboolean only_dirty = FALSE;
|
||||
|
||||
cache_op = nmp_cache_update_netlink_route (cache,
|
||||
obj,
|
||||
is_dump,
|
||||
msghdr->nlmsg_flags,
|
||||
&obj_old,
|
||||
&obj_new,
|
||||
&obj_replace,
|
||||
&resync_required);
|
||||
if (cache_op != NMP_CACHE_OPS_UNCHANGED) {
|
||||
if (obj_replace) {
|
||||
const NMDedupMultiEntry *entry_replace;
|
||||
|
||||
/* we found an object that is to be replaced by the RTM_NEWROUTE message.
|
||||
* While we invoke the signal, the platform cache might change and invalidate
|
||||
* the findings. Mitigate that (for the most part), by marking the entry as
|
||||
* dirty and only delete @obj_replace if it is still dirty afterwards.
|
||||
*
|
||||
* Yes, there is a tiny tiny chance for still getting it wrong. But in practice,
|
||||
* the signal handlers do not cause to call the platform again, so the cache
|
||||
* is not really changing. -- if they would, it would anyway be dangerous to overflow
|
||||
* the stack and it's not ensured that the processing of netlink messages is
|
||||
* reentrant (maybe it is).
|
||||
*/
|
||||
entry_replace = nmp_cache_lookup_entry (cache, obj_replace);
|
||||
nm_assert (entry_replace && entry_replace->obj == obj_replace);
|
||||
nm_dedup_multi_entry_set_dirty (entry_replace, TRUE);
|
||||
only_dirty = TRUE;
|
||||
}
|
||||
cache_on_change (platform, cache_op, obj_old, obj_new);
|
||||
nm_platform_cache_update_emit_signal (platform, cache_op, obj_old, obj_new);
|
||||
}
|
||||
|
||||
if (obj_replace) {
|
||||
/* the RTM_NEWROUTE message indicates that another route was replaced.
|
||||
* Remove it now. */
|
||||
cache_op = nmp_cache_remove (cache, obj_replace, TRUE, only_dirty, NULL);
|
||||
if (cache_op != NMP_CACHE_OPS_UNCHANGED) {
|
||||
nm_assert (cache_op == NMP_CACHE_OPS_REMOVED);
|
||||
cache_on_change (platform, cache_op, obj_replace, NULL);
|
||||
nm_platform_cache_update_emit_signal (platform, cache_op, obj_replace, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
if (resync_required) {
|
||||
/* we'd like to avoid such resyncs as they are expensive and we should only rely on the
|
||||
* netlink events. This needs investigation. */
|
||||
_LOGT ("schedule resync of routes after RTM_NEWROUTE");
|
||||
delayed_action_schedule (platform,
|
||||
delayed_action_refresh_from_object_type (NMP_OBJECT_GET_TYPE (obj)),
|
||||
NULL);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case RTM_DELLINK:
|
||||
case RTM_DELADDR:
|
||||
case RTM_DELROUTE:
|
||||
@@ -5829,6 +5853,7 @@ ip_route_add (NMPlatform *platform,
|
||||
r6 = NMP_OBJECT_CAST_IP6_ROUTE (&obj);
|
||||
nm_utils_ip6_address_clear_host_address (&r6->network, &r6->network, r6->plen);
|
||||
r6->rt_source = nmp_utils_ip_config_source_round_trip_rtprot (r6->rt_source),
|
||||
r6->metric = nm_utils_ip6_route_metric_normalize (r6->metric);
|
||||
nm_utils_ip6_address_clear_host_address (&r6->src, &r6->src, r6->src_plen);
|
||||
break;
|
||||
default:
|
||||
|
Reference in New Issue
Block a user