merge: branch 'ih/rt6_replace_resync'

platform: avoid routes resync for routes that we don't track

https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1930
This commit is contained in:
Íñigo Huguet
2024-05-02 10:50:07 +00:00
3 changed files with 59 additions and 34 deletions

2
NEWS
View File

@@ -18,6 +18,8 @@ USE AT YOUR OWN RISK. NOT RECOMMENDED FOR PRODUCTION USE!
* Fix detection of 6 GHz band capability for WiFi devices
* Allow IPv6 SLAAC and static IPv6 DNS server assignment for modem broadband
when IPv6 device address was not explicitly passed on by ModemManager
* Fix a performance issue that was leading to 100% CPU usage by NetworkManager
if external programs were doing a big amount of routes updates.
=============================================
NetworkManager-1.46

View File

@@ -3903,6 +3903,34 @@ _new_from_nl_addr(const struct nlmsghdr *nlh, gboolean id_only)
return g_steal_pointer(&obj);
}
static gboolean
ip_route_is_tracked(guint8 proto, guint8 type)
{
if (proto > RTPROT_STATIC && !NM_IN_SET(proto, RTPROT_DHCP, RTPROT_RA)) {
/* We ignore certain rtm_protocol, because NetworkManager would only ever
* configure certain protocols. Other routes are not configured by NetworkManager
* and we don't track them in the platform cache.
*
* This is to help with the performance overhead of a huge number of
* routes, for example with the bird BGP software, that adds routes
* with RTPROT_BIRD protocol. */
return FALSE;
}
if (!NM_IN_SET(type,
RTN_UNICAST,
RTN_LOCAL,
RTN_BLACKHOLE,
RTN_UNREACHABLE,
RTN_PROHIBIT,
RTN_THROW)) {
/* Certain route types are ignored and not placed into the cache. */
return FALSE;
}
return TRUE;
}
/* Copied and heavily modified from libnl3's rtnl_route_parse() and parse_multipath(). */
static NMPObject *
_new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter *parse_nlmsg_iter)
@@ -3963,6 +3991,16 @@ _new_from_nl_route(const struct nlmsghdr *nlh, gboolean id_only, ParseNlmsgIter
* only handle ~supported~ routes.
*****************************************************************/
/* If it's a route that we don't need to track, abort here to avoid unnecessary
* memory allocations to create the nmp_object. However, if the message has the
* NLM_F_REPLACE flag, it might be replacing a route that we were tracking so we
* have to stop tracking it. That means that we have to process all messages with
* NLM_F_REPLACE. See nmp_cache_update_netlink_route().
*/
if (!ip_route_is_tracked(rtm->rtm_protocol, rtm->rtm_type)
&& !(nlh->nlmsg_flags & NLM_F_REPLACE))
return NULL;
addr_family = rtm->rtm_family;
if (addr_family == AF_INET)
@@ -5519,39 +5557,18 @@ ip_route_get_lock_flag(const NMPlatformIPRoute *route)
static gboolean
ip_route_is_alive(const NMPlatformIPRoute *route)
{
guint8 prot;
guint8 proto, type;
nm_assert(route);
nm_assert(route->rt_source >= NM_IP_CONFIG_SOURCE_RTPROT_UNSPEC
&& route->rt_source <= _NM_IP_CONFIG_SOURCE_RTPROT_LAST);
prot = route->rt_source - 1;
proto = route->rt_source - 1;
type = nm_platform_route_type_uncoerce(route->type_coerced);
nm_assert(nmp_utils_ip_config_source_from_rtprot(prot) == route->rt_source);
nm_assert(nmp_utils_ip_config_source_from_rtprot(proto) == route->rt_source);
if (prot > RTPROT_STATIC && !NM_IN_SET(prot, RTPROT_DHCP, RTPROT_RA)) {
/* We ignore certain rtm_protocol, because NetworkManager would only ever
* configure certain protocols. Other routes are not configured by NetworkManager
* and we don't track them in the platform cache.
*
* This is to help with the performance overhead of a huge number of
* routes, for example with the bird BGP software, that adds routes
* with RTPROT_BIRD protocol. */
return FALSE;
}
if (!NM_IN_SET(nm_platform_route_type_uncoerce(route->type_coerced),
RTN_UNICAST,
RTN_LOCAL,
RTN_BLACKHOLE,
RTN_UNREACHABLE,
RTN_PROHIBIT,
RTN_THROW)) {
/* Certain route types are ignored and not placed into the cache. */
return FALSE;
}
return TRUE;
return ip_route_is_tracked(proto, type);
}
/* Copied and modified from libnl3's build_route_msg() and rtnl_route_build_msg(). */

View File

@@ -2988,6 +2988,13 @@ nmp_cache_update_netlink_route(NMPCache *cache,
* Since we don't cache all routes (see "route_is_alive"), we cannot know
* with certainty which route was replaced.
*
* For example, the kernel might have 3 similar routes (same WEAK_ID), one
* of which is not tracked by us so we don't have it into the cache. If we
* receive a route replace message, we don't know to what of the 3 routes
* it affects (one of the 3 we don't even know that exists). Moreover, if
* we only have one route on cache, we don't know if the replace is for a
* different one that we don't track.
*
* Even if we would cache *all* routes (which we cannot, if kernel adds new
* routing features that modify the known nmp_object_id_equal()), it would
* be hard to find the right route that was replaced. Well, probably we
@@ -3002,15 +3009,14 @@ nmp_cache_update_netlink_route(NMPCache *cache,
* [2] https://bugzilla.redhat.com/show_bug.cgi?id=1337860
*
* We need to resync.
*
* However, a resync is expensive. Think of a routing daemon that updates
* hundreds of routes per second, the performance penalty is huge. We can
* optimize it: if we don't have any matching route on cache (by WEAK_ID),
* we don't have anything to replace and we don't need a full resync, but
* only to add or discard the new route as usual.
*/
if (NMP_OBJECT_GET_TYPE(obj_hand_over) == NMP_OBJECT_TYPE_IP4_ROUTE
&& !nmp_cache_lookup_all(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, obj_hand_over)) {
/* For IPv4, we can do a small optimization. We skip the resync, if we have
* no conflicting routes (by weak-id).
*
* This optimization does not work for IPv6 (maybe should be fixed).
*/
} else {
if (nmp_cache_lookup_all(cache, NMP_CACHE_ID_TYPE_ROUTES_BY_WEAK_ID, obj_hand_over)) {
entry_replace = NULL;
resync_required = TRUE;
goto out;