nm-netns: track ECMP routes

Co-authored-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
This commit is contained in:
Thomas Haller
2022-11-23 08:29:48 +01:00
committed by Fernando Fernandez Mancera
parent af26b19dae
commit 5b5ce42682
4 changed files with 503 additions and 2 deletions

View File

@@ -1143,7 +1143,7 @@ _commit_collect_routes(NML3Cfg *self,
NMP_OBJECT_TYPE_IP_ROUTE(IS_IPv4));
if (!head_entry)
return;
goto loop_done;
c_list_for_each_entry (entry, &head_entry->lst_entries_head, lst_entries) {
const NMPObject *obj = entry->obj;
@@ -1152,6 +1152,14 @@ _commit_collect_routes(NML3Cfg *self,
if (_obj_is_route_nodev(obj))
r = routes_nodev;
else {
nm_assert(NMP_OBJECT_CAST_IP_ROUTE(obj)->ifindex == self->priv.ifindex);
if (IS_IPv4 && NMP_OBJECT_CAST_IP4_ROUTE(obj)->weight > 0) {
/* This route needs to be registered as ECMP route. */
nm_netns_ip_route_ecmp_register(self->priv.netns, self, obj);
continue;
}
if (!_obj_states_sync_filter(self, obj, commit_type))
continue;
r = routes;
@@ -1162,6 +1170,33 @@ _commit_collect_routes(NML3Cfg *self,
g_ptr_array_add(*r, (gpointer) nmp_object_ref(obj));
}
loop_done:
if (IS_IPv4) {
gs_unref_ptrarray GPtrArray *singlehop_routes = NULL;
guint i;
/* NMNetns will merge the routes. The ones that found a merge partner are true multihop
* routes, with potentially a next hop on different interfaces. The routes
* that didn't find a merge partner are returned in "singlehop_routes". */
nm_netns_ip_route_ecmp_commit(self->priv.netns, self, &singlehop_routes);
if (singlehop_routes) {
for (i = 0; i < singlehop_routes->len; i++) {
const NMPObject *obj = singlehop_routes->pdata[i];
if (!_obj_states_sync_filter(self, obj, commit_type))
continue;
if (!*routes)
*routes =
g_ptr_array_new_with_free_func((GDestroyNotify) nm_dedup_multi_obj_unref);
g_ptr_array_add(*routes, (gpointer) nmp_object_ref(obj));
}
}
}
}
static void
@@ -5122,6 +5157,7 @@ nm_l3cfg_init(NML3Cfg *self)
c_list_init(&self->priv.p->blocked_lst_head_6);
c_list_init(&self->internal_netns.signal_pending_lst);
c_list_init(&self->internal_netns.ecmp_track_ifindex_lst_head);
self->priv.p->obj_state_hash = g_hash_table_new_full(nmp_object_indirect_id_hash,
nmp_object_indirect_id_equal,
@@ -5166,6 +5202,7 @@ finalize(GObject *object)
gboolean changed;
nm_assert(c_list_is_empty(&self->internal_netns.signal_pending_lst));
nm_assert(c_list_is_empty(&self->internal_netns.ecmp_track_ifindex_lst_head));
nm_assert(!self->priv.p->ipconfig_4);
nm_assert(!self->priv.p->ipconfig_6);

View File

@@ -217,6 +217,7 @@ struct _NML3Cfg {
struct {
guint32 signal_pending_obj_type_flags;
CList signal_pending_lst;
CList ecmp_track_ifindex_lst_head;
} internal_netns;
};

View File

@@ -16,6 +16,7 @@
#include "libnm-platform/nm-platform.h"
#include "libnm-platform/nmp-netns.h"
#include "libnm-platform/nmp-global-tracker.h"
#include "libnm-std-aux/c-list-util.h"
/*****************************************************************************/
@@ -28,6 +29,9 @@ typedef struct {
NMPGlobalTracker *global_tracker;
GHashTable *l3cfgs;
GHashTable *shared_ips;
GHashTable *ecmp_routes;
GHashTable *ecmp_track_by_obj;
GHashTable *ecmp_track_by_ecmpid;
CList l3cfg_signal_pending_lst_head;
GSource *signal_pending_idle_source;
} NMNetnsPrivate;
@@ -67,6 +71,154 @@ NM_DEFINE_SINGLETON_GETTER(NMNetns, nm_netns_get, NM_TYPE_NETNS);
/*****************************************************************************/
void _netns_ip_route_ecmp_update_mh(NMNetns *self,
const GPtrArray *mhrts_del,
const GPtrArray *mhrts_add);
/*****************************************************************************/
#define nm_assert_l3cfg(self, l3cfg) \
G_STMT_START \
{ \
NMNetns *_self = (self); \
NML3Cfg *_l3cfg = (l3cfg); \
\
nm_assert(NM_IS_NETNS(self)); \
nm_assert(NM_IS_L3CFG(_l3cfg)); \
if (NM_MORE_ASSERTS > 5) \
nm_assert(_l3cfg == nm_netns_l3cfg_get(_self, nm_l3cfg_get_ifindex(_l3cfg))); \
} \
G_STMT_END
/*****************************************************************************/
typedef struct {
const NMPObject *representative_obj;
const NMPObject *merged_obj;
CList ecmpid_lst_head;
bool needs_update : 1;
bool already_visited : 1;
} EcmpTrackEcmpid;
typedef struct {
const NMPObject *obj;
NML3Cfg *l3cfg;
EcmpTrackEcmpid *parent_track_ecmpid;
CList ifindex_lst;
CList ecmpid_lst;
/* Calling nm_netns_ip_route_ecmp_register() will ensure that the tracked
* entry is non-dirty. This can be used to remove stale entries. */
bool dirty : 1;
} EcmpTrackObj;
static int
_ecmp_track_sort_lst_cmp(const CList *a, const CList *b, const void *user_data)
{
EcmpTrackObj *track_obj_a = c_list_entry(a, EcmpTrackObj, ecmpid_lst);
EcmpTrackObj *track_obj_b = c_list_entry(b, EcmpTrackObj, ecmpid_lst);
const NMPlatformIP4Route *route_a = NMP_OBJECT_CAST_IP4_ROUTE(track_obj_a->obj);
const NMPlatformIP4Route *route_b = NMP_OBJECT_CAST_IP4_ROUTE(track_obj_b->obj);
nm_assert(route_a->ifindex > 0);
nm_assert(route_a->n_nexthops <= 1);
nm_assert(route_b->ifindex > 0);
nm_assert(route_b->n_nexthops <= 1);
NM_CMP_FIELD(route_a, route_b, ifindex);
NM_CMP_FIELD(route_b, route_a, weight);
NM_CMP_DIRECT(htonl(route_a->gateway), htonl(route_b->gateway));
return nm_assert_unreachable_val(
nm_platform_ip4_route_cmp(route_a, route_b, NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID));
}
static gboolean
_ecmp_track_init_merged_obj(EcmpTrackEcmpid *track_ecmpid, const NMPObject **out_obj_del)
{
EcmpTrackObj *track_obj;
nm_auto_nmpobj const NMPObject *obj_new = NULL;
gsize n_nexthops;
gsize i;
nm_assert(track_ecmpid);
nm_assert(!c_list_is_empty(&track_ecmpid->ecmpid_lst_head));
nm_assert(track_ecmpid->representative_obj
== c_list_first_entry(&track_ecmpid->ecmpid_lst_head, EcmpTrackObj, ecmpid_lst)->obj);
nm_assert(out_obj_del && !*out_obj_del);
if (!track_ecmpid->needs_update) {
/* Already up to date. Nothing to do. */
return FALSE;
}
track_ecmpid->needs_update = FALSE;
n_nexthops = c_list_length(&track_ecmpid->ecmpid_lst_head);
if (n_nexthops == 1) {
/* There is only a single entry. There is nothing to merge, just set
* the first entry. */
obj_new = nmp_object_ref(track_ecmpid->representative_obj);
goto out;
}
/* We want that the nexthop list is deterministic. We thus sort the list and update
* the representative_obj. */
c_list_sort(&track_ecmpid->ecmpid_lst_head, _ecmp_track_sort_lst_cmp, NULL);
nmp_object_ref_set(
&track_ecmpid->representative_obj,
c_list_first_entry(&track_ecmpid->ecmpid_lst_head, EcmpTrackObj, ecmpid_lst)->obj);
obj_new = nmp_object_clone(track_ecmpid->representative_obj, FALSE);
nm_assert(obj_new->ip4_route.n_nexthops <= 1);
nm_assert(!obj_new->_ip4_route.extra_nexthops);
/* Note that there actually cannot be duplicate (ifindex,gateway,weight) tuples, because
* NML3Cfg uses NM_PLATFORM_IP_ROUTE_CMP_TYPE_ID to track the routes, and track_ecmpid
* groups them further by NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID. The comparison for
* ECMP_ID is a strict superset of ID, hence there are no dupliated.
*
* Also, kernel wouldn't care if there were duplicate nexthops anyway.
*
* This means, it's gonna be simple. We sorted the single-hop routes by next-hop,
* now just create a plain list of the nexthops (no check for duplciates, etc). */
((NMPObject *) obj_new)->ip4_route.n_nexthops = n_nexthops;
((NMPObject *) obj_new)->_ip4_route.extra_nexthops =
g_new(NMPlatformIP4RtNextHop, n_nexthops - 1u);
i = 0;
c_list_for_each_entry (track_obj, &track_ecmpid->ecmpid_lst_head, ecmpid_lst) {
if (i > 0) {
const NMPlatformIP4Route *r = NMP_OBJECT_CAST_IP4_ROUTE(track_obj->obj);
NMPlatformIP4RtNextHop *nh = (gpointer) &obj_new->_ip4_route.extra_nexthops[i - 1];
*nh = (NMPlatformIP4RtNextHop){
.ifindex = r->ifindex,
.gateway = r->gateway,
.weight = r->weight,
};
}
i++;
}
out:
nm_assert(obj_new);
if (nmp_object_equal(track_ecmpid->merged_obj, obj_new))
return FALSE;
if (track_ecmpid->merged_obj)
*out_obj_del = g_steal_pointer(&track_ecmpid->merged_obj);
track_ecmpid->merged_obj = g_steal_pointer(&obj_new);
return TRUE;
}
/*****************************************************************************/
NMPNetns *
nm_netns_get_platform_netns(NMNetns *self)
{
@@ -93,6 +245,51 @@ nm_netns_get_multi_idx(NMNetns *self)
/*****************************************************************************/
static guint
_ecmp_routes_by_ecmpid_hash(gconstpointer ptr)
{
const NMPObject *const *p_obj = ptr;
return nm_platform_ip4_route_hash(NMP_OBJECT_CAST_IP4_ROUTE(*p_obj),
NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID);
}
static int
_ecmp_routes_by_ecmpid_equal(gconstpointer ptr_a, gconstpointer ptr_b)
{
const NMPObject *const *p_obj_a = ptr_a;
const NMPObject *const *p_obj_b = ptr_b;
return nm_platform_ip4_route_cmp(NMP_OBJECT_CAST_IP4_ROUTE(*p_obj_a),
NMP_OBJECT_CAST_IP4_ROUTE(*p_obj_b),
NM_PLATFORM_IP_ROUTE_CMP_TYPE_ECMP_ID)
== 0;
}
static void
_ecmp_routes_by_ecmpid_free(gpointer ptr)
{
EcmpTrackEcmpid *track_ecmpid = ptr;
c_list_unlink_stale(&track_ecmpid->ecmpid_lst_head);
nmp_object_unref(track_ecmpid->representative_obj);
nmp_object_unref(track_ecmpid->merged_obj);
nm_g_slice_free(track_ecmpid);
}
static void
_ecmp_routes_by_obj_free(gpointer ptr)
{
EcmpTrackObj *track_obj = ptr;
c_list_unlink_stale(&track_obj->ifindex_lst);
c_list_unlink_stale(&track_obj->ecmpid_lst);
nmp_object_unref(track_obj->obj);
nm_g_slice_free(track_obj);
}
/*****************************************************************************/
static NML3Cfg *
_l3cfg_hashed_to_l3cfg(gpointer ptr)
{
@@ -219,7 +416,7 @@ _platform_signal_cb(NMPlatform *platform,
NMNetnsPrivate *priv = NM_NETNS_GET_PRIVATE(self);
const NMPObjectType obj_type = obj_type_i;
const NMPlatformSignalChangeType change_type = change_type_i;
NML3Cfg *l3cfg = NULL;
NML3Cfg *l3cfg;
if (ifindex <= 0) {
/* platform signal callback could be triggered by nodev routes, skip them */
@@ -349,6 +546,250 @@ nm_netns_shared_ip_release(NMNetnsSharedIPHandle *handle)
/*****************************************************************************/
void
nm_netns_ip_route_ecmp_register(NMNetns *self, NML3Cfg *l3cfg, const NMPObject *obj)
{
NMNetnsPrivate *priv;
EcmpTrackObj *track_obj;
const NMPlatformIP4Route *route;
char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE];
nm_assert_l3cfg(self, l3cfg);
route = NMP_OBJECT_CAST_IP4_ROUTE(obj);
nm_assert(route->ifindex > 0);
nm_assert(route->ifindex == nm_l3cfg_get_ifindex(l3cfg));
nm_assert(route->n_nexthops <= 1);
priv = NM_NETNS_GET_PRIVATE(self);
track_obj = g_hash_table_lookup(priv->ecmp_track_by_obj, &obj);
if (NM_MORE_ASSERTS > 10) {
EcmpTrackObj *track_obj2;
gboolean found = FALSE;
c_list_for_each_entry (track_obj2,
&l3cfg->internal_netns.ecmp_track_ifindex_lst_head,
ifindex_lst) {
if (track_obj2->obj == obj) {
found = TRUE;
break;
}
}
nm_assert((!!track_obj) == found);
}
if (!track_obj) {
EcmpTrackEcmpid *track_ecmpid;
track_ecmpid = g_hash_table_lookup(priv->ecmp_track_by_ecmpid, &obj);
if (!track_ecmpid) {
track_ecmpid = g_slice_new(EcmpTrackEcmpid);
*track_ecmpid = (EcmpTrackEcmpid){
.representative_obj = nmp_object_ref(obj),
.merged_obj = NULL,
.ecmpid_lst_head = C_LIST_INIT(track_ecmpid->ecmpid_lst_head),
.needs_update = TRUE,
};
g_hash_table_add(priv->ecmp_track_by_ecmpid, track_ecmpid);
} else
track_ecmpid->needs_update = TRUE;
track_obj = g_slice_new(EcmpTrackObj);
*track_obj = (EcmpTrackObj){
.obj = nmp_object_ref(obj),
.l3cfg = l3cfg,
.parent_track_ecmpid = track_ecmpid,
.dirty = FALSE,
};
g_hash_table_add(priv->ecmp_track_by_obj, track_obj);
c_list_link_tail(&l3cfg->internal_netns.ecmp_track_ifindex_lst_head,
&track_obj->ifindex_lst);
c_list_link_tail(&track_ecmpid->ecmpid_lst_head, &track_obj->ecmpid_lst);
_LOGT(
"ecmp-route: track %s",
nmp_object_to_string(track_obj->obj, NMP_OBJECT_TO_STRING_PUBLIC, sbuf, sizeof(sbuf)));
} else
track_obj->dirty = FALSE;
}
void
nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **out_singlehop_routes)
{
NMNetnsPrivate *priv = NM_NETNS_GET_PRIVATE(self);
EcmpTrackObj *track_obj;
EcmpTrackObj *track_obj_safe;
EcmpTrackEcmpid *track_ecmpid;
const NMPObject *route_obj;
const NMPlatformIP4Route *route;
char sbuf[NM_UTILS_TO_STRING_BUFFER_SIZE];
gs_unref_ptrarray GPtrArray *mhrts_del = NULL;
gs_unref_ptrarray GPtrArray *mhrts_add = NULL;
nm_assert_l3cfg(self, l3cfg);
/* First, delete all dirty entries, and mark the survivors as dirty, so that on the
* next update they must be touched again. */
c_list_for_each_entry_safe (track_obj,
track_obj_safe,
&l3cfg->internal_netns.ecmp_track_ifindex_lst_head,
ifindex_lst) {
track_ecmpid = track_obj->parent_track_ecmpid;
track_ecmpid->already_visited = FALSE;
if (!track_obj->dirty) {
/* This one is still in used. Keep it, but mark dirty, so that on the
* next update cycle, it needs to be touched again or will be deleted. */
track_obj->dirty = TRUE;
continue;
}
/* This entry can be dropped. */
if (!g_hash_table_remove(priv->ecmp_track_by_obj, &track_obj->obj))
nm_assert_not_reached();
if (c_list_is_empty(&track_ecmpid->ecmpid_lst_head)) {
if (track_ecmpid->merged_obj) {
if (NMP_OBJECT_CAST_IP4_ROUTE(track_ecmpid->merged_obj)->n_nexthops > 1) {
if (!mhrts_del)
mhrts_del =
g_ptr_array_new_with_free_func((GDestroyNotify) nmp_object_unref);
g_ptr_array_add(mhrts_del,
(gpointer) g_steal_pointer(&track_ecmpid->merged_obj));
} else {
if (track_obj->l3cfg != l3cfg) {
nm_l3cfg_commit_on_idle_schedule(track_obj->l3cfg,
NM_L3_CFG_COMMIT_TYPE_AUTO);
}
}
}
g_hash_table_remove(priv->ecmp_track_by_ecmpid, track_ecmpid);
continue;
}
/* We need to update the representative obj. */
nmp_object_ref_set(
&track_ecmpid->representative_obj,
c_list_first_entry(&track_ecmpid->ecmpid_lst_head, EcmpTrackObj, ecmpid_lst)->obj);
track_ecmpid->needs_update = TRUE;
}
/* Now, we need to iterate again over all objects, and regenerate the merged_obj. */
c_list_for_each_entry (track_obj,
&l3cfg->internal_netns.ecmp_track_ifindex_lst_head,
ifindex_lst) {
nm_auto_nmpobj const NMPObject *obj_del = NULL;
gboolean changed;
track_ecmpid = track_obj->parent_track_ecmpid;
if (track_ecmpid->already_visited) {
/* We already visited this ecmpid in the same loop. We can skip, otherwise
* we might add the same route twice. */
continue;
}
track_ecmpid->already_visited = TRUE;
changed = _ecmp_track_init_merged_obj(track_obj->parent_track_ecmpid, &obj_del);
nm_assert(!obj_del || changed);
route_obj = track_obj->parent_track_ecmpid->merged_obj;
route = NMP_OBJECT_CAST_IP4_ROUTE(route_obj);
if (obj_del) {
if (NMP_OBJECT_CAST_IP4_ROUTE(obj_del)->n_nexthops > 1) {
if (!mhrts_del)
mhrts_del = g_ptr_array_new_with_free_func((GDestroyNotify) nmp_object_unref);
g_ptr_array_add(mhrts_del, (gpointer) g_steal_pointer(&obj_del));
} else {
if (track_obj->l3cfg != l3cfg) {
nm_l3cfg_commit_on_idle_schedule(track_obj->l3cfg, NM_L3_CFG_COMMIT_TYPE_AUTO);
}
}
}
if (route->n_nexthops <= 1) {
/* This is a single hop route. Return it to the caller. */
if (!*out_singlehop_routes) {
/* Note that the returned array does not own a reference. This
* function has only one caller, and for that caller, it's just
* fine that the result is not additionally kept alive. */
*out_singlehop_routes =
g_ptr_array_new_with_free_func((GDestroyNotify) nmp_object_unref);
}
g_ptr_array_add(*out_singlehop_routes, (gpointer) nmp_object_ref(route_obj));
if (changed) {
_LOGT("ecmp-route: single-hop %s",
nmp_object_to_string(route_obj,
NMP_OBJECT_TO_STRING_PUBLIC,
sbuf,
sizeof(sbuf)));
}
continue;
}
if (changed) {
_LOGT("ecmp-route: multi-hop %s",
nmp_object_to_string(route_obj, NMP_OBJECT_TO_STRING_PUBLIC, sbuf, sizeof(sbuf)));
if (!mhrts_add) {
/* mhrts_add doesn't own the pointers. It relies on them being alive long enough. */
mhrts_add = g_ptr_array_new();
}
g_ptr_array_add(mhrts_add, (gpointer) route_obj);
}
}
_netns_ip_route_ecmp_update_mh(self, mhrts_del, mhrts_add);
}
void
_netns_ip_route_ecmp_update_mh(NMNetns *self,
const GPtrArray *mhrts_del,
const GPtrArray *mhrts_add)
{
NMNetnsPrivate *priv = NM_NETNS_GET_PRIVATE(self);
guint i;
if (mhrts_del) {
for (i = 0; i < mhrts_del->len; i++) {
const NMPObject *obj = mhrts_del->pdata[i];
if (!g_hash_table_remove(priv->ecmp_routes, obj))
nm_assert_not_reached();
nm_platform_object_delete(priv->platform, obj);
}
}
if (mhrts_add) {
for (i = 0; i < mhrts_add->len; i++) {
const NMPObject *obj = mhrts_add->pdata[i];
nm_auto_nmpobj const NMPObject *obj_old = NULL;
gpointer unused;
if (g_hash_table_steal_extended(priv->ecmp_routes,
obj,
(gpointer *) &obj_old,
&unused)) {
if (obj != obj_old)
nm_platform_object_delete(priv->platform, obj_old);
}
if (!g_hash_table_add(priv->ecmp_routes, (gpointer) nmp_object_ref(obj)))
nm_assert_not_reached();
nm_platform_ip_route_add(priv->platform, NMP_NLM_FLAG_APPEND, obj);
}
}
}
/*****************************************************************************/
static void
set_property(GObject *object, guint prop_id, const GValue *value, GParamSpec *pspec)
{
@@ -378,6 +819,19 @@ nm_netns_init(NMNetns *self)
priv->_self_signal_user_data = self;
c_list_init(&priv->l3cfg_signal_pending_lst_head);
priv->ecmp_routes = g_hash_table_new_full((GHashFunc) nmp_object_id_hash,
(GEqualFunc) nmp_object_id_equal,
(GDestroyNotify) nmp_object_unref,
NULL);
G_STATIC_ASSERT_EXPR(G_STRUCT_OFFSET(EcmpTrackObj, obj) == 0);
priv->ecmp_track_by_obj =
g_hash_table_new_full(nm_pdirect_hash, nm_pdirect_equal, _ecmp_routes_by_obj_free, NULL);
priv->ecmp_track_by_ecmpid = g_hash_table_new_full(_ecmp_routes_by_ecmpid_hash,
_ecmp_routes_by_ecmpid_equal,
_ecmp_routes_by_ecmpid_free,
NULL);
}
static void
@@ -457,6 +911,10 @@ dispose(GObject *object)
nm_assert(c_list_is_empty(&priv->l3cfg_signal_pending_lst_head));
nm_assert(!priv->shared_ips);
nm_clear_pointer(&priv->ecmp_routes, g_hash_table_destroy);
nm_clear_pointer(&priv->ecmp_track_by_obj, g_hash_table_destroy);
nm_clear_pointer(&priv->ecmp_track_by_ecmpid, g_hash_table_destroy);
nm_clear_g_source_inst(&priv->signal_pending_idle_source);
if (priv->platform)

View File

@@ -51,4 +51,9 @@ NMNetnsSharedIPHandle *nm_netns_shared_ip_reserve(NMNetns *self);
void nm_netns_shared_ip_release(NMNetnsSharedIPHandle *handle);
/*****************************************************************************/
void nm_netns_ip_route_ecmp_register(NMNetns *self, NML3Cfg *l3cfg, const NMPObject *obj);
void nm_netns_ip_route_ecmp_commit(NMNetns *self, NML3Cfg *l3cfg, GPtrArray **routes);
#endif /* __NM_NETNS_H__ */