merge: branch 'ih/rt6_replace_resync'

platform: avoid routes resync for routes that we don't track

https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1930
This commit is contained in:
Íñigo Huguet
2024-05-02 10:50:07 +00:00
3 changed files with 59 additions and 34 deletions

2
NEWS
View File

@@ -18,6 +18,8 @@ USE AT YOUR OWN RISK. NOT RECOMMENDED FOR PRODUCTION USE!
* Fix detection of 6 GHz band capability for WiFi devices * Fix detection of 6 GHz band capability for WiFi devices
* Allow IPv6 SLAAC and static IPv6 DNS server assignment for modem broadband * Allow IPv6 SLAAC and static IPv6 DNS server assignment for modem broadband
when IPv6 device address was not explicitly passed on by ModemManager when IPv6 device address was not explicitly passed on by ModemManager
* Fix a performance issue that was leading to 100% CPU usage by NetworkManager
if external programs were doing a big amount of routes updates.
============================================= =============================================
NetworkManager-1.46 NetworkManager-1.46

View File

@@ -3903,6 +3903,34 @@ _new_from_nl_addr(const struct nlmsghdr *nlh, gboolean id_only)
return g_steal_pointer(&obj); return g_steal_pointer(&obj);
} }
static gboolean
ip_route_is_tracked(guint8 proto, guint8 type)
{
if (proto > RTPROT_STATIC && !NM_IN_SET(proto, RTPROT_DHCP, RTPROT_RA)) {
/* We ignore certain rtm_protocol, because NetworkManager would only ever
* configure certain protocols. Other routes are not configured by NetworkManager
* and we don't track them in the platform cache.
*
* This is to help with the performance overhead of a huge number of
* routes, for example with the bird BGP software, that adds routes
* with RTPROT_BIRD protocol. */
return FALSE;
}
if (!NM_IN_SET(type,
RTN_UNICAST,
RTN_LOCAL,
RTN_BLACKHOLE,
RTN_UNREACHABLE,
RTN_PROHIBIT,
RTN_THROW)) {
/* Certain route types are ignored and not placed into the cache. */
return FALSE;
}
return TRUE;
}
/* Copied and heavily modified from libnl3's rtnl_route_parse() and parse_multipath(). */ /* Copied and heavily modified from libnl3's rtnl_route_parse() and parse_multipath(). */
static NMPObject * static NMPObject *
_new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter *parse_nlmsg_iter) _new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter *parse_nlmsg_iter)
@@ -3963,6 +3991,16 @@ _new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter
* only handle ~supported~ routes. * only handle ~supported~ routes.
*****************************************************************/ *****************************************************************/
/* If it's a route that we don't need to track, abort here to avoid unnecessary
* memory allocations to create the nmp_object. However, if the message has the
* NLM_F_REPLACE flag, it might be replacing a route that we were tracking so we
* have to stop tracking it. That means that we have to process all messages with
* NLM_F_REPLACE. See nmp_cache_update_netlink_route().
*/
if (!ip_route_is_tracked(rtm->rtm_protocol, rtm->rtm_type)
&& !(nlh->nlmsg_flags & NLM_F_REPLACE))
return NULL;
addr_family = rtm->rtm_family; addr_family = rtm->rtm_family;
if (addr_family == AF_INET) if (addr_family == AF_INET)
@@ -5519,39 +5557,18 @@ ip_route_get_lock_flag(const NMPlatformIPRoute *route)
static gboolean static gboolean
ip_route_is_alive(const NMPlatformIPRoute *route) ip_route_is_alive(const NMPlatformIPRoute *route)
{ {
guint8 prot; guint8 proto, type;
nm_assert(route); nm_assert(route);
nm_assert(route->rt_source >= NM_IP_CONFIG_SOURCE_RTPROT_UNSPEC nm_assert(route->rt_source >= NM_IP_CONFIG_SOURCE_RTPROT_UNSPEC
&& route->rt_source <= _NM_IP_CONFIG_SOURCE_RTPROT_LAST); && route->rt_source <= _NM_IP_CONFIG_SOURCE_RTPROT_LAST);
prot = route->rt_source - 1; proto = route->rt_source - 1;
type = nm_platform_route_type_uncoerce(route->type_coerced);
nm_assert(nmp_utils_ip_config_source_from_rtprot(prot) == route->rt_source); nm_assert(nmp_utils_ip_config_source_from_rtprot(proto) == route->rt_source);
if (prot > RTPROT_STATIC && !NM_IN_SET(prot, RTPROT_DHCP, RTPROT_RA)) { return ip_route_is_tracked(proto, type);
/* We ignore certain rtm_protocol, because NetworkManager would only ever
* configure certain protocols. Other routes are not configured by NetworkManager
* and we don't track them in the platform cache.
*
* This is to help with the performance overhead of a huge number of
* routes, for example with the bird BGP software, that adds routes
* with RTPROT_BIRD protocol. */
return FALSE;
}
if (!NM_IN_SET(nm_platform_route_type_uncoerce(route->type_coerced),
RTN_UNICAST,
RTN_LOCAL,
RTN_BLACKHOLE,
RTN_UNREACHABLE,
RTN_PROHIBIT,
RTN_THROW)) {
/* Certain route types are ignored and not placed into the cache. */
return FALSE;
}
return TRUE;
} }
/* Copied and modified from libnl3's build_route_msg() and rtnl_route_build_msg(). */ /* Copied and modified from libnl3's build_route_msg() and rtnl_route_build_msg(). */

View File

@@ -2988,6 +2988,13 @@ nmp_cache_update_netlink_route(NMPCache *cache,
* Since we don't cache all routes (see "route_is_alive"), we cannot know * Since we don't cache all routes (see "route_is_alive"), we cannot know
* with certainty which route was replaced. * with certainty which route was replaced.
* *
* For example, the kernel might have 3 similar routes (same WEAK_ID), one
* of which is not tracked by us so we don't have it into the cache. If we
* receive a route replace message, we don't know to what of the 3 routes
* it affects (one of the 3 we don't even know that exists). Moreover, if
* we only have one route on cache, we don't know if the replace is for a
* different one that we don't track.
*
* Even if we would cache *all* routes (which we cannot, if kernel adds new * Even if we would cache *all* routes (which we cannot, if kernel adds new
* routing features that modify the known nmp_object_id_equal()), it would * routing features that modify the known nmp_object_id_equal()), it would
* be hard to find the right route that was replaced. Well, probably we * be hard to find the right route that was replaced. Well, probably we
@@ -3002,15 +3009,14 @@ nmp_cache_update_netlink_route(NMPCache *cache,
* [2] https://bugzilla.redhat.com/show_bug.cgi?id=1337860 * [2] https://bugzilla.redhat.com/show_bug.cgi?id=1337860
* *
* We need to resync. * We need to resync.
*/
if (NMP_OBJECT_GET_TYPE(obj_hand_over) == NMP_OBJECT_TYPE_IP4_ROUTE
&& !nmp_cache_lookup_all(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, obj_hand_over)) {
/* For IPv4, we can do a small optimization. We skip the resync, if we have
* no conflicting routes (by weak-id).
* *
* This optimization does not work for IPv6 (maybe should be fixed). * However, a resync is expensive. Think of a routing daemon that updates
* hundreds of routes per second, the performance penalty is huge. We can
* optimize it: if we don't have any matching route on cache (by WEAK_ID),
* we don't have anything to replace and we don't need a full resync, but
* only to add or discard the new route as usual.
*/ */
} else { if (nmp_cache_lookup_all(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, obj_hand_over)) {
entry_replace = NULL; entry_replace = NULL;
resync_required = TRUE; resync_required = TRUE;
goto out; goto out;