netns: fix configuring onlink routes for ECMP routes

Kernel enforces that all nexthops must be reachable through a route.
L3Cfg is generating dependent onlink routes to solve this problem but
the IPv4 ECMP commit is happening before that.

To solve this we introduce two boolean fields "is_new" and "is_ready" to
know in which state is the L3Cfg affected. Initially, "is_new" is TRUE
and "is_ready" is FALSE. Here we schedule a commit on idle and we set
"is_new" to FALSE. When revisiting, we set "is_ready" to TRUE and then
we set the ECMP IPv4 routes.

When a reapply kicks in we reset the L3Cfg state by setting "is_new" to
TRUE.

https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/merge_requests/1520
(cherry picked from commit 7a844ecba9)
This commit is contained in:
Fernando Fernandez Mancera
2023-01-31 10:30:04 +01:00
parent 79a9fcb166
commit d9d33e2acc
3 changed files with 84 additions and 92 deletions

View File

@@ -1180,7 +1180,10 @@ loop_done:
/* NMNetns will merge the routes. The ones that found a merge partner are true multihop /* NMNetns will merge the routes. The ones that found a merge partner are true multihop
* routes, with potentially a next hop on different interfaces. The routes * routes, with potentially a next hop on different interfaces. The routes
* that didn't find a merge partner are returned in "singlehop_routes". */ * that didn't find a merge partner are returned in "singlehop_routes". */
nm_netns_ip_route_ecmp_commit(self->priv.netns, self, &singlehop_routes); nm_netns_ip_route_ecmp_commit(self->priv.netns,
self,
&singlehop_routes,
NM_IN_SET(commit_type, NM_L3_CFG_COMMIT_TYPE_REAPPLY));
if (singlehop_routes) { if (singlehop_routes) {
for (i = 0; i < singlehop_routes->len; i++) { for (i = 0; i < singlehop_routes->len; i++) {

View File

@@ -29,7 +29,6 @@ typedef struct {
NMPGlobalTracker *global_tracker; NMPGlobalTracker *global_tracker;
GHashTable *l3cfgs; GHashTable *l3cfgs;
GHashTable *shared_ips; GHashTable *shared_ips;
GHashTable *ecmp_routes;
GHashTable *ecmp_track_by_obj; GHashTable *ecmp_track_by_obj;
GHashTable *ecmp_track_by_ecmpid; GHashTable *ecmp_track_by_ecmpid;
CList l3cfg_signal_pending_lst_head; CList l3cfg_signal_pending_lst_head;
@@ -71,12 +70,6 @@ NM_DEFINE_SINGLETON_GETTER(NMNetns, nm_netns_get, NM_TYPE_NETNS);
/*****************************************************************************/ /*****************************************************************************/
void _netns_ip_route_ecmp_update_mh(NMNetns *self,
const GPtrArray *mhrts_del,
const GPtrArray *mhrts_add);
/*****************************************************************************/
#define nm_assert_l3cfg(self, l3cfg) \ #define nm_assert_l3cfg(self, l3cfg) \
G_STMT_START \ G_STMT_START \
{ \ { \
@@ -112,6 +105,8 @@ typedef struct {
/* Calling nm_netns_ip_route_ecmp_register() will ensure that the tracked /* Calling nm_netns_ip_route_ecmp_register() will ensure that the tracked
* entry is non-dirty. This can be used to remove stale entries. */ * entry is non-dirty. This can be used to remove stale entries. */
bool dirty : 1; bool dirty : 1;
bool is_new : 1;
bool is_ready : 1;
} EcmpTrackObj; } EcmpTrackObj;
static int static int
@@ -607,6 +602,8 @@ nm_netns_ip_route_ecmp_register(NMNetns *self, NML3Cfg *l3cfg, const NMPObject *
.l3cfg = l3cfg, .l3cfg = l3cfg,
.parent_track_ecmpid = track_ecmpid, .parent_track_ecmpid = track_ecmpid,
.dirty = FALSE, .dirty = FALSE,
.is_new = TRUE,
.is_ready = FALSE,
}; };
g_hash_table_add(priv->ecmp_track_by_obj, track_obj); g_hash_table_add(priv->ecmp_track_by_obj, track_obj);
@@ -624,17 +621,19 @@ nm_netns_ip_route_ecmp_register(NMNetns *self, NML3Cfg *l3cfg, const NMPObject *
} }
void void
nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **out_singlehop_routes) nm_netns_ip_route_ecmp_commit(NMNetns *self,
NML3Cfg *l3cfg,
GPtrArray **out_singlehop_routes,
gboolean is_reapply)
{ {
NMNetnsPrivate *priv = NM_NETNS_GET_PRIVATE(self); NMNetnsPrivate *priv = NM_NETNS_GET_PRIVATE(self);
EcmpTrackObj *track_obj; EcmpTrackObj *track_obj;
EcmpTrackObj *track_obj_safe; EcmpTrackObj *track_obj_safe;
EcmpTrackEcmpid *track_ecmpid; EcmpTrackEcmpid *track_ecmpid;
const NMPObject *route_obj; const NMPObject *route_obj;
const NMPlatformIP4Route *route; const NMPlatformIP4Route *route;
char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE]; char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE];
gs_unref_ptrarray GPtrArray *mhrts_del = NULL; gboolean already_notified = FALSE;
gs_unref_ptrarray GPtrArray *mhrts_add = NULL;
nm_assert_l3cfg(self, l3cfg); nm_assert_l3cfg(self, l3cfg);
@@ -658,6 +657,41 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **out_sin
/* This one is still in used. Keep it, but mark dirty, so that on the /* This one is still in used. Keep it, but mark dirty, so that on the
* next update cycle, it needs to be touched again or will be deleted. */ * next update cycle, it needs to be touched again or will be deleted. */
track_obj->dirty = TRUE; track_obj->dirty = TRUE;
if (is_reapply) {
track_obj->is_new = TRUE;
track_obj->is_ready = FALSE;
}
if (track_obj->is_new) {
/* This is a new route entry that was just added. Upon first
* addition, the route is not yet ready for configuration,
* because we need to make sure that the gateway is reachable
* via an onlink route. The calling l3cfg will configure that
* route, but only after returning from this function. So we
* need to go through one more commit.
*
* We also need to make sure that we are called back right
* after l3cfg configured that route. We achieve that by
* scheduling another idle commit on "l3cfg". */
track_obj->is_new = FALSE;
if (c_list_length_is(&track_ecmpid->ecmpid_lst_head, 1)) {
/* This route has no merge partner and ends up being a
* single hop route. It will be returned and configured by
* the calling "l3cfg".
*
* Unlike for multi-hop routes, we don't need to be called
* again after the onlink route was added. We are done, and
* don't need to schedule an idle commit. */
track_obj->is_ready = TRUE;
} else {
if (!already_notified) {
already_notified = TRUE;
nm_l3cfg_commit_on_idle_schedule(l3cfg, NM_L3_CFG_COMMIT_TYPE_AUTO);
}
}
} else {
/* We see this entry the second time (or more) so it's ready. */
track_obj->is_ready = TRUE;
}
continue; continue;
} }
@@ -667,14 +701,8 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **out_sin
if (c_list_is_empty(&track_ecmpid->ecmpid_lst_head)) { if (c_list_is_empty(&track_ecmpid->ecmpid_lst_head)) {
if (track_ecmpid->merged_obj) { if (track_ecmpid->merged_obj) {
if (NMP_OBJECT_CAST_IP4_ROUTE(track_ecmpid->merged_obj)->n_nexthops > 1) { if (NMP_OBJECT_CAST_IP4_ROUTE(track_ecmpid->merged_obj)->n_nexthops > 1)
if (!mhrts_del) nm_platform_object_delete(priv->platform, track_ecmpid->merged_obj);
mhrts_del =
g_ptr_array_new_with_free_func((GDestroyNotify) nmp_object_unref);
g_ptr_array_add(mhrts_del,
(gpointer) g_steal_pointer(&track_ecmpid->merged_obj));
} else
nm_l3cfg_commit_on_idle_schedule(l3cfg, NM_L3_CFG_COMMIT_TYPE_AUTO);
} }
g_hash_table_remove(priv->ecmp_track_by_ecmpid, track_ecmpid); g_hash_table_remove(priv->ecmp_track_by_ecmpid, track_ecmpid);
@@ -692,8 +720,10 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **out_sin
c_list_for_each_entry (track_obj, c_list_for_each_entry (track_obj,
&l3cfg->internal_netns.ecmp_track_ifindex_lst_head, &l3cfg->internal_netns.ecmp_track_ifindex_lst_head,
ifindex_lst) { ifindex_lst) {
EcmpTrackObj *track_obj2;
nm_auto_nmpobj const NMPObject *obj_del = NULL; nm_auto_nmpobj const NMPObject *obj_del = NULL;
gboolean changed; gboolean changed;
gboolean all_is_ready;
track_ecmpid = track_obj->parent_track_ecmpid; track_ecmpid = track_obj->parent_track_ecmpid;
if (track_ecmpid->already_visited) { if (track_ecmpid->already_visited) {
@@ -703,23 +733,32 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **out_sin
} }
track_ecmpid->already_visited = TRUE; track_ecmpid->already_visited = TRUE;
all_is_ready = TRUE;
c_list_for_each_entry (track_obj2, &track_ecmpid->ecmpid_lst_head, ecmpid_lst) {
if (!track_obj2->is_ready) {
all_is_ready = FALSE;
break;
}
}
if (!all_is_ready) {
/* Here we might have a merged_obj already which can have the wrong
* setting e.g the wrong nexthops. We leave them for the moment and
* then we reconfigure it when this entry is ready. */
continue;
}
changed = _ecmp_track_init_merged_obj(track_obj->parent_track_ecmpid, &obj_del); changed = _ecmp_track_init_merged_obj(track_obj->parent_track_ecmpid, &obj_del);
nm_assert(!obj_del || changed); nm_assert(!obj_del || changed);
route_obj = track_obj->parent_track_ecmpid->merged_obj; route_obj = track_ecmpid->merged_obj;
route = NMP_OBJECT_CAST_IP4_ROUTE(route_obj); route = NMP_OBJECT_CAST_IP4_ROUTE(route_obj);
if (obj_del) { if (obj_del) {
if (NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->n_nexthops > 1) { if (NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->n_nexthops > 1)
if (!mhrts_del) nm_platform_object_delete(priv->platform, obj_del);
mhrts_del = g_ptr_array_new_with_free_func((GDestroyNotify) nmp_object_unref); else if (track_obj->l3cfg != l3cfg)
g_ptr_array_add(mhrts_del, (gpointer) g_steal_pointer(&obj_del)); nm_l3cfg_commit_on_idle_schedule(track_obj->l3cfg, NM_L3_CFG_COMMIT_TYPE_AUTO);
} else {
if (track_obj->l3cfg != l3cfg) {
nm_l3cfg_commit_on_idle_schedule(track_obj->l3cfg, NM_L3_CFG_COMMIT_TYPE_AUTO);
}
}
} }
if (route->n_nexthops <= 1) { if (route->n_nexthops <= 1) {
@@ -742,57 +781,10 @@ nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **out_sin
continue; continue;
} }
if (changed) { if (changed || is_reapply) {
_LOGT("ecmp-route: multi-hop %s", _LOGT("ecmp-route: multi-hop %s",
nmp_object_to_string(route_obj, NMP_OBJECT_TO_STRING_PUBLIC, sbuf, sizeof(sbuf))); nmp_object_to_string(route_obj, NMP_OBJECT_TO_STRING_PUBLIC, sbuf, sizeof(sbuf)));
if (!mhrts_add) { nm_platform_ip_route_add(priv->platform, NMP_NLM_FLAG_APPEND, route_obj);
/* mhrts_add doesn't own the pointers. It relies on them being alive long enough. */
mhrts_add = g_ptr_array_new();
}
g_ptr_array_add(mhrts_add, (gpointer) route_obj);
}
}
_netns_ip_route_ecmp_update_mh(self, mhrts_del, mhrts_add);
}
void
_netns_ip_route_ecmp_update_mh(NMNetns *self,
const GPtrArray *mhrts_del,
const GPtrArray *mhrts_add)
{
NMNetnsPrivate *priv = NM_NETNS_GET_PRIVATE(self);
guint i;
if (mhrts_del) {
for (i = 0; i < mhrts_del->len; i++) {
const NMPObject *obj = mhrts_del->pdata[i];
if (!g_hash_table_remove(priv->ecmp_routes, obj))
nm_assert_not_reached();
nm_platform_object_delete(priv->platform, obj);
}
}
if (mhrts_add) {
for (i = 0; i < mhrts_add->len; i++) {
const NMPObject *obj = mhrts_add->pdata[i];
nm_auto_nmpobj const NMPObject *obj_old = NULL;
gpointer unused;
if (g_hash_table_steal_extended(priv->ecmp_routes,
obj,
(gpointer *) &obj_old,
&unused)) {
if (obj != obj_old)
nm_platform_object_delete(priv->platform, obj_old);
}
if (!g_hash_table_add(priv->ecmp_routes, (gpointer) nmp_object_ref(obj)))
nm_assert_not_reached();
nm_platform_ip_route_add(priv->platform, NMP_NLM_FLAG_APPEND, obj);
} }
} }
} }
@@ -829,11 +821,6 @@ nm_netns_init(NMNetns *self)
priv->_self_signal_user_data = self; priv->_self_signal_user_data = self;
c_list_init(&priv->l3cfg_signal_pending_lst_head); c_list_init(&priv->l3cfg_signal_pending_lst_head);
priv->ecmp_routes = g_hash_table_new_full((GHashFunc) nmp_object_id_hash,
(GEqualFunc) nmp_object_id_equal,
(GDestroyNotify) nmp_object_unref,
NULL);
G_STATIC_ASSERT_EXPR(G_STRUCT_OFFSET(EcmpTrackObj, obj) == 0); G_STATIC_ASSERT_EXPR(G_STRUCT_OFFSET(EcmpTrackObj, obj) == 0);
priv->ecmp_track_by_obj = priv->ecmp_track_by_obj =
g_hash_table_new_full(nm_pdirect_hash, nm_pdirect_equal, _ecmp_routes_by_obj_free, NULL); g_hash_table_new_full(nm_pdirect_hash, nm_pdirect_equal, _ecmp_routes_by_obj_free, NULL);
@@ -920,7 +907,6 @@ dispose(GObject *object)
nm_assert(c_list_is_empty(&priv->l3cfg_signal_pending_lst_head)); nm_assert(c_list_is_empty(&priv->l3cfg_signal_pending_lst_head));
nm_assert(!priv->shared_ips); nm_assert(!priv->shared_ips);
nm_clear_pointer(&priv->ecmp_routes, g_hash_table_destroy);
nm_clear_pointer(&priv->ecmp_track_by_obj, g_hash_table_destroy); nm_clear_pointer(&priv->ecmp_track_by_obj, g_hash_table_destroy);
nm_clear_pointer(&priv->ecmp_track_by_ecmpid, g_hash_table_destroy); nm_clear_pointer(&priv->ecmp_track_by_ecmpid, g_hash_table_destroy);

View File

@@ -54,6 +54,9 @@ void nm_netns_shared_ip_release(NMNetnsSharedIPHandle *handle);
/*****************************************************************************/ /*****************************************************************************/
void nm_netns_ip_route_ecmp_register(NMNetns *self, NML3Cfg *l3cfg, const NMPObject *obj); void nm_netns_ip_route_ecmp_register(NMNetns *self, NML3Cfg *l3cfg, const NMPObject *obj);
void nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **routes); void nm_netns_ip_route_ecmp_commit(NMNetns *self,
NML3Cfg *l3cfg,
GPtrArray **routes,
gboolean is_reapply);
#endif /* __NM_NETNS_H__ */ #endif /* __NM_NETNS_H__ */