diff --git a/.travis.yml b/.travis.yml
index de6ff770a..1149c8b83 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -48,7 +48,7 @@ addons:
coverity_scan:
project:
name: NetworkManager/NetworkManager
- build_command_prepend: sh autogen.sh --with-systemd-logind=no --enable-more-warnings=no --disable-ovs
+ build_command_prepend: sh autogen.sh --with-systemd-logind=no --enable-more-warnings=no --disable-ovs --without-ebpf
build_command: make -j4
branch_pattern: .*coverity.*
@@ -114,6 +114,7 @@ script:
-D ifcfg_rh=false \
-D ibft=true \
-D ifupdown=true \
+ -D ebpf=false \
&&
ninja -C build &&
ninja -C build test
@@ -136,6 +137,7 @@ script:
--enable-more-warnings=no \
--enable-tests=yes \
--with-crypto=$CRYPTO \
+ --without-ebpf \
\
--with-libnm-glib=yes \
--with-iwd=yes \
diff --git a/Makefile.am b/Makefile.am
index 5b0feefa4..afa6cfc85 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1347,20 +1347,50 @@ shared_libcsiphash_la_SOURCES = \
###############################################################################
+noinst_LTLIBRARIES += shared/libcrbtree.la
+
+shared_libcrbtree_la_CFLAGS = $(AM_CFLAGS) -std=c11
+
+shared_libcrbtree_la_CPPFLAGS = \
+ $(CODE_COVERAGE_CFLAGS) \
+ $(SANITIZER_LIB_CFLAGS) \
+ $(NULL)
+
+shared_libcrbtree_la_SOURCES = \
+ shared/c-rbtree/src/c-rbtree.c \
+ shared/c-rbtree/src/c-rbtree.h \
+ shared/c-rbtree/src/c-rbtree-private.h
+
+###############################################################################
+
noinst_LTLIBRARIES += shared/libnacd.la
-shared_libnacd_la_CFLAGS = -std=gnu99
+shared_libnacd_la_CFLAGS = $(AM_CFLAGS) -std=c11 -Wno-pointer-arith -Wno-vla
+shared_libnacd_la_LIBADD = shared/libcrbtree.la
shared_libnacd_la_CPPFLAGS = \
+ -D_GNU_SOURCE \
+ -DSO_ATTACH_BPF=50 \
$(CODE_COVERAGE_CFLAGS) \
$(SANITIZER_LIB_CFLAGS) \
-I$(srcdir)/shared/c-list/src \
-I$(srcdir)/shared/c-siphash/src \
+ -I$(srcdir)/shared/c-rbtree/src \
$(NULL)
shared_libnacd_la_SOURCES = \
shared/n-acd/src/n-acd.c \
- shared/n-acd/src/n-acd.h
+ shared/n-acd/src/n-acd.h \
+ shared/n-acd/src/n-acd-private.h \
+ shared/n-acd/src/n-acd-probe.c \
+ shared/n-acd/src/util/timer.c \
+ shared/n-acd/src/util/timer.h
+
+if WITH_EBPF
+shared_libnacd_la_SOURCES += shared/n-acd/src/n-acd-bpf.c
+else
+shared_libnacd_la_SOURCES += shared/n-acd/src/n-acd-bpf-fallback.c
+endif
EXTRA_DIST += shared/c-list/src/c-list.h
diff --git a/configure.ac b/configure.ac
index b3fc8b249..a8b0083e5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -518,6 +518,15 @@ case $with_suspend_resume in
;;
esac
+# eBPF support
+AC_ARG_WITH(ebpf,
+ AS_HELP_STRING([--with-ebpf=yes|no], [Build with eBPF support (default: yes)]),
+ [], [with_ebpf=yes])
+if test "$with_ebpf" != "yes" -a "$with_ebpf" != "no"; then
+ AC_MSG_ERROR(--with-ebpf must be one of [yes, no])
+fi
+AM_CONDITIONAL(WITH_EBPF, test "${with_ebpf}" = "yes")
+
# SELinux support
AC_ARG_WITH(selinux,
AS_HELP_STRING([--with-selinux=yes|no|auto], [Build with SELinux (default: auto)]),
@@ -1043,7 +1052,7 @@ else
more_logging_default=no
fi
-NM_COMPILER_WARNINGS(CFLAGS, ${more_warnings_default})
+NM_COMPILER_WARNINGS(AM_CFLAGS, ${more_warnings_default})
NM_COMPILER_FLAG(LIBSYSTEMD_NM_CFLAGS, "-Wno-gnu-variable-sized-type-not-at-end")
AC_SUBST(LIBSYSTEMD_NM_CFLAGS)
@@ -1051,7 +1060,7 @@ AC_SUBST(LIBSYSTEMD_NM_CFLAGS)
CC_CHECK_FLAGS_APPEND([with_cflags], [CFLAGS], [\
-fno-strict-aliasing \
])
-CFLAGS="$CFLAGS $with_cflags"
+AM_CFLAGS="$AM_CFLAGS $with_cflags"
AC_ARG_ENABLE(more-asserts,
AS_HELP_STRING([--enable-more-asserts],
@@ -1248,6 +1257,7 @@ fi
AM_CONDITIONAL(BUILD_DOCS, test "$build_docs" = "yes")
AM_CONDITIONAL(HAVE_DOCS, test "$build_docs" = "yes" -o "$use_pregen_docs" = "yes")
+AC_SUBST(AM_CFLAGS)
AC_CONFIG_FILES([
Makefile
@@ -1352,4 +1362,5 @@ echo " JSON validation for libnm: $enable_json_validation"
echo " crypto: $with_crypto (have-gnutls: $have_crypto_gnutls, have-nss: $have_crypto_nss)"
echo " sanitizers: $sanitizers"
echo " Mozilla Public Suffix List: $with_libpsl"
+echo " eBPF: $with_ebpf"
echo
diff --git a/m4/compiler_options.m4 b/m4/compiler_options.m4
index 0683e44e3..1f283db69 100644
--- a/m4/compiler_options.m4
+++ b/m4/compiler_options.m4
@@ -31,7 +31,7 @@ AC_DEFUN([NM_COMPILER_FLAG], [
dnl Check whether a particular warning is not emitted with code provided,
dnl append an option to disable the warning to a specified variable if the check fails.
-dnl NM_COMPILER_WARNING([ENV-VAR], [C-SNIPPET], [WARNING]])
+dnl NM_COMPILER_WARNING([ENV-VAR], [WARNING], [C-SNIPPET])
AC_DEFUN([NM_COMPILER_WARNING], [
_NM_COMPILER_FLAG([-W$2], [$3], [eval "AS_TR_SH([$1])='$$1 -W$2'"], [eval "AS_TR_SH([$1])='$$1 -Wno-$2'"])
])
@@ -47,10 +47,10 @@ if test "$GCC" = "yes" -a "$set_more_warnings" != "no"; then
dnl This is enabled in clang by default, makes little sense,
dnl and causes the build to abort with -Werror.
- CFLAGS_SAVED="$$1"
- eval "AS_TR_SH([$1])='$$1 -Qunused-arguments'"
- AC_COMPILE_IFELSE([AC_LANG_SOURCE([])], [], eval "AS_TR_SH([$1])='$CFLAGS_SAVED'")
- unset CFLAGS_SAVED
+ CFLAGS_SAVED="$CFLAGS"
+ CFLAGS="$CFLAGS -Qunused-arguments"
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([])], eval "AS_TR_SH([$1])='$$1 -Qunused-arguments'", [])
+ CFLAGS="$CFLAGS_SAVED"
dnl clang only warns about unknown warnings, unless
dnl called with "-Werror=unknown-warning-option"
diff --git a/meson_options.txt b/meson_options.txt
index 7f06d53a7..ee205601c 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -41,6 +41,7 @@ option('libnm_glib', type: 'boolean', value: false, description: 'build legacy l
option('nmcli', type: 'boolean', value: true, description: 'Build nmcli')
option('nmtui', type: 'boolean', value: true, description: 'Build nmtui')
option('bluez5_dun', type: 'boolean', value: false, description: 'enable Bluez5 DUN support')
+option('ebpf', type: 'boolean', value: true, description: 'Enable or disable eBPF support')
# configuration plugins
option('config_plugins_default', type: 'string', value: '', description: 'Default configuration option for main.plugins setting, used as fallback if the configuration option is unset')
diff --git a/shared/c-rbtree/.cherryci/ci-test b/shared/c-rbtree/.cherryci/ci-test
new file mode 100755
index 000000000..78b0423f6
--- /dev/null
+++ b/shared/c-rbtree/.cherryci/ci-test
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+rm -Rf "./ci-build"
+mkdir "./ci-build"
+cd "./ci-build"
+
+${CHERRY_LIB_MESONSETUP} . "${CHERRY_LIB_SRCDIR}"
+${CHERRY_LIB_NINJABUILD}
+CRBTREE_TEST_PTRACE=1 ${CHERRY_LIB_MESONTEST}
+(( ! CHERRY_LIB_VALGRIND )) || ${CHERRY_LIB_MESONTEST} "--wrapper=${CHERRY_LIB_VALGRINDWRAP}"
diff --git a/shared/c-rbtree/.editorconfig b/shared/c-rbtree/.editorconfig
new file mode 100644
index 000000000..b10bb4f3f
--- /dev/null
+++ b/shared/c-rbtree/.editorconfig
@@ -0,0 +1,11 @@
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+charset = utf-8
+
+[*.{c,h}]
+indent_style = space
+indent_size = 8
diff --git a/shared/c-rbtree/.travis.yml b/shared/c-rbtree/.travis.yml
new file mode 100644
index 000000000..99a7bb946
--- /dev/null
+++ b/shared/c-rbtree/.travis.yml
@@ -0,0 +1,21 @@
+os: linux
+dist: trusty
+language: c
+
+services:
+ - docker
+
+before_install:
+ - curl -O -L "https://raw.githubusercontent.com/cherry-pick/cherry-images/v1/scripts/vmrun"
+ - curl -O -L "https://raw.githubusercontent.com/cherry-pick/cherry-ci/v1/scripts/cherryci"
+ - chmod +x "./vmrun" "./cherryci"
+
+jobs:
+ include:
+ - stage: test
+ script:
+ - ./vmrun -- ../src/cherryci -d ../src/.cherryci -s c-util -m
+ - script:
+ - ./vmrun -T armv7hl -- ../src/cherryci -d ../src/.cherryci -s c-util
+ - script:
+ - ./vmrun -T i686 -- ../src/cherryci -d ../src/.cherryci -s c-util
diff --git a/shared/c-rbtree/AUTHORS b/shared/c-rbtree/AUTHORS
new file mode 100644
index 000000000..980d60233
--- /dev/null
+++ b/shared/c-rbtree/AUTHORS
@@ -0,0 +1,37 @@
+LICENSE:
+ This project is dual-licensed under both the Apache License, Version
+ 2.0, and the GNU Lesser General Public License, Version 2.1+.
+
+AUTHORS-ASL:
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+AUTHORS-LGPL:
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; If not, see .
+
+COPYRIGHT: (ordered alphabetically)
+ Copyright (C) 2015-2018 Red Hat, Inc.
+
+AUTHORS: (ordered alphabetically)
+ David Herrmann
+ Tom Gundersen
diff --git a/shared/c-rbtree/AUTHORS-ASL b/shared/c-rbtree/AUTHORS-ASL
new file mode 100644
index 000000000..5d501a728
--- /dev/null
+++ b/shared/c-rbtree/AUTHORS-ASL
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright {yyyy} {name of copyright owner}
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/shared/c-rbtree/AUTHORS-LGPL b/shared/c-rbtree/AUTHORS-LGPL
new file mode 100644
index 000000000..4362b4915
--- /dev/null
+++ b/shared/c-rbtree/AUTHORS-LGPL
@@ -0,0 +1,502 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ , 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/shared/c-rbtree/LICENSE b/shared/c-rbtree/LICENSE
new file mode 120000
index 000000000..da24c5e4a
--- /dev/null
+++ b/shared/c-rbtree/LICENSE
@@ -0,0 +1 @@
+AUTHORS-ASL
\ No newline at end of file
diff --git a/shared/c-rbtree/NEWS b/shared/c-rbtree/NEWS
new file mode 100644
index 000000000..59c03c7ee
--- /dev/null
+++ b/shared/c-rbtree/NEWS
@@ -0,0 +1,40 @@
+c-rbtree - Intrusive Red-Black Tree Collection
+
+CHANGES WITH 3:
+
+ * Add more helpers. Add both a collection of iteratiors and helpers
+ for initializing a tree and checking if a tree is empty, without
+ explicitly accessing the data structure.
+
+ Contributions from: David Herrmann
+
+ - Berlin, 2017-08-13
+
+CHANGES WITH 2:
+
+ * Relicense as ASL-2.0 to make c-rbtree useful for more projects. All
+ code is now fully available under the ASL-2.0. Nothing is covered by
+ the LGPL, anymore.
+
+ * Switch build-system from Autotools to Meson. This simplifies the code
+ base significantly. The Meson Build System is now used by many other
+ projects, including GStreamer, Weston, and several Gnome packages.
+ See http://mesonbuild.com/ for more information.
+
+ Contributions from: David Herrmann
+
+ - Berlin, 2016-12-14
+
+CHANGES WITH 1:
+
+ * Initial release of c-rbtree.
+
+ * This projects provides an RB-Tree API, that is fully implemented in
+ ISO-C11 and has no external dependencies. Furthermore, tree
+ traversal, memory allocations, and key comparisons are completely
+ controlled by the API user. The implementation only provides the
+ RB-Tree specific rebalancing and coloring.
+
+ Contributions from: David Herrmann, Kay Sievers, Tom Gundersen
+
+ - Berlin, 2016-08-31
diff --git a/shared/c-rbtree/README b/shared/c-rbtree/README
new file mode 100644
index 000000000..069e15c65
--- /dev/null
+++ b/shared/c-rbtree/README
@@ -0,0 +1,52 @@
+c-rbtree - Intrusive Red-Black Tree Collection
+
+ABOUT:
+ The c-rbtree project implements an intrusive collection based on
+ red-black trees in ISO-C11. Its API guarantees the user full control
+ over its data-structures, and rather limits itself to just the
+ tree-specific rebalancing and coloring operations.
+
+ For API documentation, see the c-rbtree.h header file, as well as the
+ docbook comments for each function.
+
+DETAILS:
+ https://c-util.github.io/c-rbtree
+
+BUG REPORTS:
+ https://github.com/c-util/c-rbtree/issues
+
+GIT:
+ git@github.com:c-util/c-rbtree.git
+ https://github.com/c-util/c-rbtree.git
+
+GITWEB:
+ https://github.com/c-util/c-rbtree
+
+LICENSE:
+ Apache Software License 2.0
+ Lesser General Public License 2.1+
+ See AUTHORS for details.
+
+REQUIREMENTS:
+ The requirements for c-siphash are:
+
+ libc (e.g., glibc >= 2.16)
+
+ At build-time, the following software is required:
+
+ meson >= 0.41
+ pkg-config >= 0.29
+
+INSTALL:
+ The meson build-system is used for this project. Contact upstream
+ documentation for detailed help. In most situations the following
+ commands are sufficient to build and install from source:
+
+ $ mkdir build
+ $ cd build
+ $ meson setup ..
+ $ ninja
+ $ meson test
+ # ninja install
+
+ No custom configuration options are available.
diff --git a/shared/c-rbtree/meson.build b/shared/c-rbtree/meson.build
new file mode 100644
index 000000000..ce57651e4
--- /dev/null
+++ b/shared/c-rbtree/meson.build
@@ -0,0 +1,15 @@
+project(
+ 'c-rbtree',
+ 'c',
+ version: '3',
+ license: 'Apache',
+ default_options: [
+ 'c_std=c11'
+ ],
+)
+project_description = 'Intrusive Red-Black Tree Collection'
+
+add_project_arguments('-D_GNU_SOURCE', language: 'c')
+mod_pkgconfig = import('pkgconfig')
+
+subdir('src')
diff --git a/shared/c-rbtree/src/c-rbtree-private.h b/shared/c-rbtree/src/c-rbtree-private.h
new file mode 100644
index 000000000..25b9ba01c
--- /dev/null
+++ b/shared/c-rbtree/src/c-rbtree-private.h
@@ -0,0 +1,40 @@
+#pragma once
+
+/*
+ * Private definitions
+ * This file contains private definitions for the RB-Tree implementation, but
+ * which are used by our test-suite.
+ */
+
+#include
+#include "c-rbtree.h"
+
+/*
+ * Macros
+ */
+
+#define _public_ __attribute__((__visibility__("default")))
+
+/*
+ * Nodes
+ */
+
+static inline void *c_rbnode_raw(CRBNode *n) {
+ return (void *)(n->__parent_and_flags & ~C_RBNODE_FLAG_MASK);
+}
+
+static inline unsigned long c_rbnode_flags(CRBNode *n) {
+ return n->__parent_and_flags & C_RBNODE_FLAG_MASK;
+}
+
+static inline _Bool c_rbnode_is_red(CRBNode *n) {
+ return c_rbnode_flags(n) & C_RBNODE_RED;
+}
+
+static inline _Bool c_rbnode_is_black(CRBNode *n) {
+ return !(c_rbnode_flags(n) & C_RBNODE_RED);
+}
+
+static inline _Bool c_rbnode_is_root(CRBNode *n) {
+ return c_rbnode_flags(n) & C_RBNODE_ROOT;
+}
diff --git a/shared/c-rbtree/src/c-rbtree.c b/shared/c-rbtree/src/c-rbtree.c
new file mode 100644
index 000000000..f58db849b
--- /dev/null
+++ b/shared/c-rbtree/src/c-rbtree.c
@@ -0,0 +1,1118 @@
+/*
+ * RB-Tree Implementation
+ * This implements the insertion/removal of elements in RB-Trees. You're highly
+ * recommended to have an RB-Tree documentation at hand when reading this. Both
+ * insertion and removal can be split into a handful of situations that can
+ * occur. Those situations are enumerated as "Case 1" to "Case n" here, and
+ * follow closely the cases described in most RB-Tree documentations. This file
+ * does not explain why it is enough to handle just those cases, nor does it
+ * provide a proof of correctness. Dig out your algorithm 101 handbook if
+ * you're interested.
+ *
+ * This implementation is *not* straightforward. Usually, a handful of
+ * rotation, reparent, swap and link helpers can be used to implement the
+ * rebalance operations. However, those often perform unnecessary writes.
+ * Therefore, this implementation hard-codes all the operations. You're highly
+ * recommended to look at the two basic helpers before reading the code:
+ * c_rbnode_swap_child()
+ * c_rbnode_set_parent_and_flags()
+ * Those are the only helpers used, hence, you should really know what they do
+ * before digging into the code.
+ *
+ * For a highlevel documentation of the API, see the header file and docbook
+ * comments.
+ */
+
+#include
+#include
+#include
+
+#include "c-rbtree-private.h"
+#include "c-rbtree.h"
+
+/*
+ * We use alignas(8) to enforce 64bit alignment of structure fields. This is
+ * according to ISO-C11, so we rely on the compiler to implement this. However,
+ * at the same time we don't want to exceed native malloc() alignment on target
+ * platforms. Hence, we also verify against max_align_t.
+ */
+static_assert(alignof(CRBNode) <= alignof(max_align_t), "Invalid RBNode alignment");
+static_assert(alignof(CRBNode) >= 8, "Invalid CRBNode alignment");
+static_assert(alignof(CRBTree) <= alignof(max_align_t), "Invalid RBTree alignment");
+static_assert(alignof(CRBTree) >= 8, "Invalid CRBTree alignment");
+
+/**
+ * c_rbnode_leftmost() - return leftmost child
+ * @n: current node, or NULL
+ *
+ * This returns the leftmost child of @n. If @n is NULL, this will return NULL.
+ * In all other cases, this function returns a valid pointer. That is, if @n
+ * does not have any left children, this returns @n.
+ *
+ * Worst case runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to leftmost child, or NULL.
+ */
+_public_ CRBNode *c_rbnode_leftmost(CRBNode *n) {
+ if (n)
+ while (n->left)
+ n = n->left;
+ return n;
+}
+
+/**
+ * c_rbnode_rightmost() - return rightmost child
+ * @n: current node, or NULL
+ *
+ * This returns the rightmost child of @n. If @n is NULL, this will return
+ * NULL. In all other cases, this function returns a valid pointer. That is, if
+ * @n does not have any right children, this returns @n.
+ *
+ * Worst case runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to rightmost child, or NULL.
+ */
+_public_ CRBNode *c_rbnode_rightmost(CRBNode *n) {
+ if (n)
+ while (n->right)
+ n = n->right;
+ return n;
+}
+
+/**
+ * c_rbnode_leftdeepest() - return left-deepest child
+ * @n: current node, or NULL
+ *
+ * This returns the left-deepest child of @n. If @n is NULL, this will return
+ * NULL. In all other cases, this function returns a valid pointer. That is, if
+ * @n does not have any children, this returns @n.
+ *
+ * The left-deepest child is defined as the deepest child without any left
+ * (grand-...)siblings.
+ *
+ * Worst case runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to left-deepest child, or NULL.
+ */
+_public_ CRBNode *c_rbnode_leftdeepest(CRBNode *n) {
+ if (n) {
+ for (;;) {
+ if (n->left)
+ n = n->left;
+ else if (n->right)
+ n = n->right;
+ else
+ break;
+ }
+ }
+ return n;
+}
+
+/**
+ * c_rbnode_rightdeepest() - return right-deepest child
+ * @n: current node, or NULL
+ *
+ * This returns the right-deepest child of @n. If @n is NULL, this will return
+ * NULL. In all other cases, this function returns a valid pointer. That is, if
+ * @n does not have any children, this returns @n.
+ *
+ * The right-deepest child is defined as the deepest child without any right
+ * (grand-...)siblings.
+ *
+ * Worst case runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to right-deepest child, or NULL.
+ */
+_public_ CRBNode *c_rbnode_rightdeepest(CRBNode *n) {
+ if (n) {
+ for (;;) {
+ if (n->right)
+ n = n->right;
+ else if (n->left)
+ n = n->left;
+ else
+ break;
+ }
+ }
+ return n;
+}
+
+/**
+ * c_rbnode_next() - return next node
+ * @n: current node, or NULL
+ *
+ * An RB-Tree always defines a linear order of its elements. This function
+ * returns the logically next node to @n. If @n is NULL, the last node or
+ * unlinked, this returns NULL.
+ *
+ * Worst case runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to next node, or NULL.
+ */
+_public_ CRBNode *c_rbnode_next(CRBNode *n) {
+ CRBNode *p;
+
+ if (!c_rbnode_is_linked(n))
+ return NULL;
+ if (n->right)
+ return c_rbnode_leftmost(n->right);
+
+ while ((p = c_rbnode_parent(n)) && n == p->right)
+ n = p;
+
+ return p;
+}
+
+/**
+ * c_rbnode_prev() - return previous node
+ * @n: current node, or NULL
+ *
+ * An RB-Tree always defines a linear order of its elements. This function
+ * returns the logically previous node to @n. If @n is NULL, the first node or
+ * unlinked, this returns NULL.
+ *
+ * Worst case runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to previous node, or NULL.
+ */
+_public_ CRBNode *c_rbnode_prev(CRBNode *n) {
+ CRBNode *p;
+
+ if (!c_rbnode_is_linked(n))
+ return NULL;
+ if (n->left)
+ return c_rbnode_rightmost(n->left);
+
+ while ((p = c_rbnode_parent(n)) && n == p->left)
+ n = p;
+
+ return p;
+}
+
+/**
+ * c_rbnode_next_postorder() - return next node in post-order
+ * @n: current node, or NULL
+ *
+ * This returns the next node to @n, based on a left-to-right post-order
+ * traversal. If @n is NULL, the root node, or unlinked, this returns NULL.
+ *
+ * This implements a left-to-right post-order traversal: First visit the left
+ * child of a node, then the right, and lastly the node itself. Children are
+ * traversed recursively.
+ *
+ * This function can be used to implement a left-to-right post-order traversal:
+ *
+ * for (n = c_rbtree_first_postorder(t); n; n = c_rbnode_next_postorder(n))
+ * visit(n);
+ *
+ * Worst case runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to next node, or NULL.
+ */
+_public_ CRBNode *c_rbnode_next_postorder(CRBNode *n) {
+ CRBNode *p;
+
+ if (!c_rbnode_is_linked(n))
+ return NULL;
+
+ p = c_rbnode_parent(n);
+ if (p && n == p->left && p->right)
+ return c_rbnode_leftdeepest(p->right);
+
+ return p;
+}
+
+/**
+ * c_rbnode_prev_postorder() - return previous node in post-order
+ * @n: current node, or NULL
+ *
+ * This returns the previous node to @n, based on a left-to-right post-order
+ * traversal. That is, it is the inverse operation to c_rbnode_next_postorder().
+ * If @n is NULL, the left-deepest node, or unlinked, this returns NULL.
+ *
+ * This function returns the logical previous node in a directed post-order
+ * traversal. That is, it effectively does a pre-order traversal (since a
+ * reverse post-order traversal is a pre-order traversal). This function does
+ * NOT do a right-to-left post-order traversal! In other words, the following
+ * invariant is guaranteed, if c_rbnode_next_postorder(n) is non-NULL:
+ *
+ * n == c_rbnode_prev_postorder(c_rbnode_next_postorder(n))
+ *
+ * This function can be used to implement a right-to-left pre-order traversal,
+ * using the fact that a reverse post-order traversal is also a valid pre-order
+ * traversal:
+ *
+ * for (n = c_rbtree_last_postorder(t); n; n = c_rbnode_prev_postorder(n))
+ * visit(n);
+ *
+ * This would effectively perform a right-to-left pre-order traversal: first
+ * visit a parent, then its right child, then its left child. Both children are
+ * traversed recursively.
+ *
+ * Worst case runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to previous node in post-order, or NULL.
+ */
+_public_ CRBNode *c_rbnode_prev_postorder(CRBNode *n) {
+ CRBNode *p;
+
+ if (!c_rbnode_is_linked(n))
+ return NULL;
+ if (n->right)
+ return n->right;
+ if (n->left)
+ return n->left;
+
+ while ((p = c_rbnode_parent(n))) {
+ if (p->left && n != p->left)
+ return p->left;
+ n = p;
+ }
+
+ return NULL;
+}
+
+/**
+ * c_rbtree_first() - return first node
+ * @t: tree to operate on
+ *
+ * An RB-Tree always defines a linear order of its elements. This function
+ * returns the logically first node in @t. If @t is empty, NULL is returned.
+ *
+ * Fixed runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to first node, or NULL.
+ */
+_public_ CRBNode *c_rbtree_first(CRBTree *t) {
+ assert(t);
+ return c_rbnode_leftmost(t->root);
+}
+
+/**
+ * c_rbtree_last() - return last node
+ * @t: tree to operate on
+ *
+ * An RB-Tree always defines a linear order of its elements. This function
+ * returns the logically last node in @t. If @t is empty, NULL is returned.
+ *
+ * Fixed runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to last node, or NULL.
+ */
+_public_ CRBNode *c_rbtree_last(CRBTree *t) {
+ assert(t);
+ return c_rbnode_rightmost(t->root);
+}
+
+/**
+ * c_rbtree_first_postorder() - return first node in post-order
+ * @t: tree to operate on
+ *
+ * This returns the first node of a left-to-right post-order traversal. That
+ * is, it returns the left-deepest leaf. If the tree is empty, this returns
+ * NULL.
+ *
+ * This can also be interpreted as the last node of a right-to-left pre-order
+ * traversal.
+ *
+ * Fixed runtime (n: number of elements in tree): O(log(n))
+ *
+ * Return: Pointer to first node in post-order, or NULL.
+ */
+_public_ CRBNode *c_rbtree_first_postorder(CRBTree *t) {
+ assert(t);
+ return c_rbnode_leftdeepest(t->root);
+}
+
+/**
+ * c_rbtree_last_postorder() - return last node in post-order
+ * @t: tree to operate on
+ *
+ * This returns the last node of a left-to-right post-order traversal. That is,
+ * it always returns the root node, or NULL if the tree is empty.
+ *
+ * This can also be interpreted as the first node of a right-to-left pre-order
+ * traversal.
+ *
+ * Fixed runtime (n: number of elements in tree): O(1)
+ *
+ * Return: Pointer to last node in post-order, or NULL.
+ */
+_public_ CRBNode *c_rbtree_last_postorder(CRBTree *t) {
+ assert(t);
+ return t->root;
+}
+
+static inline void c_rbtree_store(CRBNode **ptr, CRBNode *addr) {
+ /*
+ * We use volatile accesses whenever we STORE @left or @right members
+ * of a node. This guarantees that any parallel, lockless lookup gets
+ * to see those stores in the correct order, which itself guarantees
+ * that there're no temporary loops during tree rotation.
+ * Note that you still need to properly synchronize your accesses via
+ * seqlocks, rcu, whatever. We just guarantee that you get *some*
+ * result on a lockless traversal and never run into endless loops, or
+ * undefined behavior.
+ */
+ *(volatile CRBNode **)ptr = addr;
+}
+
+/*
+ * Set the flags and parent of a node. This should be treated as a simple
+ * assignment of the 'flags' and 'parent' fields of the node. No other magic is
+ * applied. But since both fields share its backing memory, this helper
+ * function is provided.
+ */
+static inline void c_rbnode_set_parent_and_flags(CRBNode *n, CRBNode *p, unsigned long flags) {
+ n->__parent_and_flags = (unsigned long)p | flags;
+}
+
+/*
+ * Nodes in the tree do not separately store a point to the tree root. That is,
+ * there is no way to access the tree-root in O(1) given an arbitrary node.
+ * Fortunately, this is usually not required. The only situation where this is
+ * needed is when rotating the root-node itself.
+ *
+ * In case of the root node, c_rbnode_parent() returns NULL. We use this fact
+ * to re-use the parent-pointer storage of the root node to point to the
+ * CRBTree root. This way, we can rotate the root-node (or add/remove it)
+ * without requiring a separate tree-root pointer.
+ *
+ * However, to keep the tree-modification functions simple, we hide this detail
+ * whenever possible. This means, c_rbnode_parent() will continue to return
+ * NULL, and tree modifications will boldly reset the pointer to NULL on
+ * rotation. Hence, the only way to retain this pointer is to call
+ * c_rbnode_pop_root() on a possible root-node before rotating. This returns
+ * NULL if the node in question is not the root node. Otherwise, it returns the
+ * tree-root, and clears the pointer/flag from the node in question. This way,
+ * you can perform tree operations as usual. Afterwards, use
+ * c_rbnode_push_root() to restore the root-pointer on any possible new root.
+ */
+static inline CRBTree *c_rbnode_pop_root(CRBNode *n) {
+ CRBTree *t = NULL;
+
+ if (c_rbnode_is_root(n)) {
+ t = c_rbnode_raw(n);
+ n->__parent_and_flags = c_rbnode_flags(n) & ~C_RBNODE_ROOT;
+ }
+
+ return t;
+}
+
+/* counter-part to c_rbnode_pop_root() */
+static inline CRBTree *c_rbnode_push_root(CRBNode *n, CRBTree *t) {
+ if (t) {
+ if (n)
+ n->__parent_and_flags = (unsigned long)t
+ | c_rbnode_flags(n)
+ | C_RBNODE_ROOT;
+ c_rbtree_store(&t->root, n);
+ }
+
+ return NULL;
+}
+
+/*
+ * This function partially swaps a child node with another one. That is, this
+ * function changes the parent of @old to point to @new. That is, you use it
+ * when swapping @old with @new, to update the parent's left/right pointer.
+ * This function does *NOT* perform a full swap, nor does it touch any 'parent'
+ * pointer.
+ *
+ * The sole purpose of this function is to shortcut left/right conditionals
+ * like this:
+ *
+ * if (old == old->parent->left)
+ * old->parent->left = new;
+ * else
+ * old->parent->right = new;
+ *
+ * That's it! If @old is the root node, this will do nothing. The caller must
+ * employ c_rbnode_pop_root() and c_rbnode_push_root().
+ */
+static inline void c_rbnode_swap_child(CRBNode *old, CRBNode *new) {
+ CRBNode *p = c_rbnode_parent(old);
+
+ if (p) {
+ if (p->left == old)
+ c_rbtree_store(&p->left, new);
+ else
+ c_rbtree_store(&p->right, new);
+ }
+}
+
+/**
+ * c_rbtree_move() - move tree
+ * @to: destination tree
+ * @from: source tree
+ *
+ * This imports the entire tree from @from into @to. @to must be empty! @from
+ * will be empty afterwards.
+ *
+ * Note that this operates in O(1) time. Only the root-entry is updated to
+ * point to the new tree-root.
+ */
+_public_ void c_rbtree_move(CRBTree *to, CRBTree *from) {
+ CRBTree *t;
+
+ assert(!to->root);
+
+ if (from->root) {
+ t = c_rbnode_pop_root(from->root);
+ assert(t == from);
+
+ to->root = from->root;
+ from->root = NULL;
+
+ c_rbnode_push_root(to->root, to);
+ }
+}
+
+static inline void c_rbtree_paint_terminal(CRBNode *n) {
+ CRBNode *p, *g, *gg, *x;
+ CRBTree *t;
+
+ /*
+ * Case 4:
+ * This path assumes @n is red, @p is red, but the uncle is unset or
+ * black. This implies @g exists and is black.
+ *
+ * This case requires up to 2 rotations to restore the tree invariants.
+ * That is, it runs in O(1) time and fully restores the RB-Tree
+ * invariants, all at the cost of performing at mots 2 rotations.
+ */
+
+ p = c_rbnode_parent(n);
+ g = c_rbnode_parent(p);
+ gg = c_rbnode_parent(g);
+
+ assert(c_rbnode_is_red(p));
+ assert(c_rbnode_is_black(g));
+ assert(p == g->left || !g->left || c_rbnode_is_black(g->left));
+ assert(p == g->right || !g->right || c_rbnode_is_black(g->right));
+
+ if (p == g->left) {
+ if (n == p->right) {
+ /*
+ * We're the right red child of a red parent, which is
+ * a left child. Rotate on parent and consider us to be
+ * the old parent and the old parent to be us, making us
+ * the left child instead of the right child so we can
+ * handle it the same as below. Rotating two red nodes
+ * changes none of the invariants.
+ */
+ x = n->left;
+ c_rbtree_store(&p->right, x);
+ c_rbtree_store(&n->left, p);
+ if (x)
+ c_rbnode_set_parent_and_flags(x, p, c_rbnode_flags(x));
+ c_rbnode_set_parent_and_flags(p, n, c_rbnode_flags(p));
+ p = n;
+ }
+
+ /* 'n' is invalid from here on! */
+
+ /*
+ * We're the red left child of a red parent, black grandparent
+ * and uncle. Rotate parent on grandparent and switch their
+ * colors, making the parent black and the grandparent red. The
+ * root of this subtree was changed from the grandparent to the
+ * parent, but the color remained black, so the number of black
+ * nodes on each path stays the same. However, we got rid of
+ * the double red path as we are still the (red) child of the
+ * parent, which has now turned black. Note that had we been
+ * the right child, rather than the left child, we would now be
+ * the left child of the old grandparent, and we would still
+ * have a double red path. As the new grandparent remains
+ * black, we're done.
+ */
+ x = p->right;
+ t = c_rbnode_pop_root(g);
+ c_rbtree_store(&g->left, x);
+ c_rbtree_store(&p->right, g);
+ c_rbnode_swap_child(g, p);
+ if (x)
+ c_rbnode_set_parent_and_flags(x, g, c_rbnode_flags(x) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(p, gg, c_rbnode_flags(p) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(g, p, c_rbnode_flags(g) | C_RBNODE_RED);
+ c_rbnode_push_root(p, t);
+ } else /* if (p == g->right) */ { /* same as above, but mirrored */
+ if (n == p->left) {
+ x = n->right;
+ c_rbtree_store(&p->left, n->right);
+ c_rbtree_store(&n->right, p);
+ if (x)
+ c_rbnode_set_parent_and_flags(x, p, c_rbnode_flags(x));
+ c_rbnode_set_parent_and_flags(p, n, c_rbnode_flags(p));
+ p = n;
+ }
+
+ x = p->left;
+ t = c_rbnode_pop_root(g);
+ c_rbtree_store(&g->right, x);
+ c_rbtree_store(&p->left, g);
+ c_rbnode_swap_child(g, p);
+ if (x)
+ c_rbnode_set_parent_and_flags(x, g, c_rbnode_flags(x) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(p, gg, c_rbnode_flags(p) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(g, p, c_rbnode_flags(g) | C_RBNODE_RED);
+ c_rbnode_push_root(p, t);
+ }
+}
+
+static inline CRBNode *c_rbtree_paint_path(CRBNode *n) {
+ CRBNode *p, *g, *u;
+
+ for (;;) {
+ p = c_rbnode_parent(n);
+ if (!p) {
+ /*
+ * Case 1:
+ * We reached the root. Mark it black and be done. As
+ * all leaf-paths share the root, the ratio of black
+ * nodes on each path stays the same.
+ */
+ c_rbnode_set_parent_and_flags(n, c_rbnode_raw(n), c_rbnode_flags(n) & ~C_RBNODE_RED);
+ return NULL;
+ } else if (c_rbnode_is_black(p)) {
+ /*
+ * Case 2:
+ * The parent is already black. As our node is red, we
+ * did not change the number of black nodes on any
+ * path, nor do we have multiple consecutive red nodes.
+ * There is nothing to be done.
+ */
+ return NULL;
+ }
+
+ g = c_rbnode_parent(p);
+ u = (p == g->left) ? g->right : g->left;
+ if (!u || !c_rbnode_is_red(u)) {
+ /*
+ * Case 4:
+ * The parent is red, but its uncle is black. By
+ * rotating the parent above the uncle, we distribute
+ * the red nodes and thus restore the tree invariants.
+ * No recursive fixup will be needed afterwards. Hence,
+ * just let the caller know about @n and make them do
+ * the rotations.
+ */
+ return n;
+ }
+
+ /*
+ * Case 3:
+ * Parent and uncle are both red, and grandparent is black.
+ * Repaint parent and uncle black, the grandparent red and
+ * recurse into the grandparent. Note that this is the only
+ * recursive case. That is, this step restores the tree
+ * invariants for the sub-tree below @p (including @n), but
+ * needs to continue the re-coloring two levels up.
+ */
+ c_rbnode_set_parent_and_flags(p, g, c_rbnode_flags(p) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(u, g, c_rbnode_flags(u) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(g, c_rbnode_raw(g), c_rbnode_flags(g) | C_RBNODE_RED);
+ n = g;
+ }
+}
+
+static inline void c_rbtree_paint(CRBNode *n) {
+ /*
+ * When a new node is inserted into an RB-Tree, we always link it as a
+ * tail-node and paint it red. This way, the node will not violate the
+ * rb-tree invariants regarding the number of black nodes on all paths.
+ *
+ * However, a red node must never have another bordering red-node (ie.,
+ * child or parent). Since the node is newly linked, it does not have
+ * any children. Therefore, all we need to do is fix the path upwards
+ * through all parents until we hit a black parent or can otherwise fix
+ * the coloring.
+ *
+ * This function first walks up the path from @n towards the tree root
+ * (done in c_rbtree_paint_path()). This recolors its parent/uncle, if
+ * possible, until it hits a sub-tree that cannot be fixed via
+ * re-coloring. After c_rbtree_paint_path() returns, there are two
+ * possible outcomes:
+ *
+ * 1) @n is NULL, in which case the tree invariants were
+ * restored by mere recoloring. Nothing is to be done.
+ *
+ * 2) @n is non-NULL, but points to a red ancestor of the
+ * original node. In this case we need to restore the tree
+ * invariants via a simple left or right rotation. This will
+ * be done by c_rbtree_paint_terminal().
+ *
+ * As a summary, this function runs O(log(n)) re-coloring operations in
+ * the worst case, followed by O(1) rotations as final restoration. The
+ * amortized cost, however, is O(1), since re-coloring only recurses
+ * upwards if it hits a red uncle (which can only happen if a previous
+ * operation terminated its operation on that layer).
+ * While amortized painting of inserted nodes is O(1), finding the
+ * correct spot to link the node (before painting it) still requires a
+ * search in the binary tree in O(log(n)).
+ */
+ n = c_rbtree_paint_path(n);
+ if (n)
+ c_rbtree_paint_terminal(n);
+}
+
+/**
+ * c_rbnode_link() - link node into tree
+ * @p: parent node to link under
+ * @l: left/right slot of @p to link at
+ * @n: node to add
+ *
+ * This links @n into an tree underneath another node. The caller must provide
+ * the exact spot where to link the node. That is, the caller must traverse the
+ * tree based on their search order. Once they hit a leaf where to insert the
+ * node, call this function to link it and rebalance the tree.
+ *
+ * For this to work, the caller must provide a pointer to the parent node. If
+ * the tree might be empty, you must resort to c_rbtree_add().
+ *
+ * In most cases you are better off using c_rbtree_add(). See there for details
+ * how tree-insertion works.
+ */
+_public_ void c_rbnode_link(CRBNode *p, CRBNode **l, CRBNode *n) {
+ assert(p);
+ assert(l);
+ assert(n);
+ assert(l == &p->left || l == &p->right);
+
+ c_rbnode_set_parent_and_flags(n, p, C_RBNODE_RED);
+ c_rbtree_store(&n->left, NULL);
+ c_rbtree_store(&n->right, NULL);
+ c_rbtree_store(l, n);
+
+ c_rbtree_paint(n);
+}
+
+/**
+ * c_rbtree_add() - add node to tree
+ * @t: tree to operate one
+ * @p: parent node to link under, or NULL
+ * @l: left/right slot of @p (or root) to link at
+ * @n: node to add
+ *
+ * This links @n into the tree given as @t. The caller must provide the exact
+ * spot where to link the node. That is, the caller must traverse the tree
+ * based on their search order. Once they hit a leaf where to insert the node,
+ * call this function to link it and rebalance the tree.
+ *
+ * A typical insertion would look like this (@t is your tree, @n is your node):
+ *
+ * CRBNode **i, *p;
+ *
+ * i = &t->root;
+ * p = NULL;
+ * while (*i) {
+ * p = *i;
+ * if (compare(n, *i) < 0)
+ * i = &(*i)->left;
+ * else
+ * i = &(*i)->right;
+ * }
+ *
+ * c_rbtree_add(t, p, i, n);
+ *
+ * Once the node is linked into the tree, a simple lookup on the same tree can
+ * be coded like this:
+ *
+ * CRBNode *i;
+ *
+ * i = t->root;
+ * while (i) {
+ * int v = compare(n, i);
+ * if (v < 0)
+ * i = (*i)->left;
+ * else if (v > 0)
+ * i = (*i)->right;
+ * else
+ * break;
+ * }
+ *
+ * When you add nodes to a tree, the memory contents of the node do not matter.
+ * That is, there is no need to initialize the node via c_rbnode_init().
+ * However, if you relink nodes multiple times during their lifetime, it is
+ * usually very convenient to use c_rbnode_init() and c_rbnode_unlink() (rather
+ * than c_rbnode_unlink_stale()). In those cases, you should validate that a
+ * node is unlinked before you call c_rbtree_add().
+ */
+_public_ void c_rbtree_add(CRBTree *t, CRBNode *p, CRBNode **l, CRBNode *n) {
+ assert(t);
+ assert(l);
+ assert(n);
+ assert(!p || l == &p->left || l == &p->right);
+ assert(p || l == &t->root);
+
+ c_rbnode_set_parent_and_flags(n, p, C_RBNODE_RED);
+ c_rbtree_store(&n->left, NULL);
+ c_rbtree_store(&n->right, NULL);
+
+ if (p)
+ c_rbtree_store(l, n);
+ else
+ c_rbnode_push_root(n, t);
+
+ c_rbtree_paint(n);
+}
+
+static inline void c_rbnode_rebalance_terminal(CRBNode *p, CRBNode *previous) {
+ CRBNode *s, *x, *y, *g;
+ CRBTree *t;
+
+ if (previous == p->left) {
+ s = p->right;
+ if (c_rbnode_is_red(s)) {
+ /*
+ * Case 2:
+ * We have a red node as sibling. Rotate it onto our
+ * side so we can later on turn it black. This way, we
+ * gain the additional black node in our path.
+ */
+ t = c_rbnode_pop_root(p);
+ g = c_rbnode_parent(p);
+ x = s->left;
+ c_rbtree_store(&p->right, x);
+ c_rbtree_store(&s->left, p);
+ c_rbnode_swap_child(p, s);
+ c_rbnode_set_parent_and_flags(x, p, c_rbnode_flags(x) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(s, g, c_rbnode_flags(s) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(p, s, c_rbnode_flags(p) | C_RBNODE_RED);
+ c_rbnode_push_root(s, t);
+ s = x;
+ }
+
+ x = s->right;
+ if (!x || c_rbnode_is_black(x)) {
+ y = s->left;
+ if (!y || c_rbnode_is_black(y)) {
+ /*
+ * Case 3+4:
+ * Our sibling is black and has only black
+ * children. Flip it red and turn parent black.
+ * This way we gained a black node in our path.
+ * Note that the parent must be red, otherwise
+ * it must have been handled by our caller.
+ */
+ assert(c_rbnode_is_red(p));
+ c_rbnode_set_parent_and_flags(s, p, c_rbnode_flags(s) | C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(p, c_rbnode_parent(p), c_rbnode_flags(p) & ~C_RBNODE_RED);
+ return;
+ }
+
+ /*
+ * Case 5:
+ * Left child of our sibling is red, right one is black.
+ * Rotate on parent so the right child of our sibling is
+ * now red, and we can fall through to case 6.
+ */
+ x = y->right;
+ c_rbtree_store(&s->left, y->right);
+ c_rbtree_store(&y->right, s);
+ c_rbtree_store(&p->right, y);
+ if (x)
+ c_rbnode_set_parent_and_flags(x, s, c_rbnode_flags(x) & ~C_RBNODE_RED);
+ x = s;
+ s = y;
+ }
+
+ /*
+ * Case 6:
+ * The right child of our sibling is red. Rotate left and flip
+ * colors, which gains us an additional black node in our path,
+ * that was previously on our sibling.
+ */
+ t = c_rbnode_pop_root(p);
+ g = c_rbnode_parent(p);
+ y = s->left;
+ c_rbtree_store(&p->right, y);
+ c_rbtree_store(&s->left, p);
+ c_rbnode_swap_child(p, s);
+ c_rbnode_set_parent_and_flags(x, s, c_rbnode_flags(x) & ~C_RBNODE_RED);
+ if (y)
+ c_rbnode_set_parent_and_flags(y, p, c_rbnode_flags(y));
+ c_rbnode_set_parent_and_flags(s, g, c_rbnode_flags(p));
+ c_rbnode_set_parent_and_flags(p, s, c_rbnode_flags(p) & ~C_RBNODE_RED);
+ c_rbnode_push_root(s, t);
+ } else /* if (previous == p->right) */ { /* same as above, but mirrored */
+ s = p->left;
+ if (c_rbnode_is_red(s)) {
+ t = c_rbnode_pop_root(p);
+ g = c_rbnode_parent(p);
+ x = s->right;
+ c_rbtree_store(&p->left, x);
+ c_rbtree_store(&s->right, p);
+ c_rbnode_swap_child(p, s);
+ c_rbnode_set_parent_and_flags(x, p, c_rbnode_flags(x) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(s, g, c_rbnode_flags(s) & ~C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(p, s, c_rbnode_flags(p) | C_RBNODE_RED);
+ c_rbnode_push_root(s, t);
+ s = x;
+ }
+
+ x = s->left;
+ if (!x || c_rbnode_is_black(x)) {
+ y = s->right;
+ if (!y || c_rbnode_is_black(y)) {
+ assert(c_rbnode_is_red(p));
+ c_rbnode_set_parent_and_flags(s, p, c_rbnode_flags(s) | C_RBNODE_RED);
+ c_rbnode_set_parent_and_flags(p, c_rbnode_parent(p), c_rbnode_flags(p) & ~C_RBNODE_RED);
+ return;
+ }
+
+ x = y->left;
+ c_rbtree_store(&s->right, y->left);
+ c_rbtree_store(&y->left, s);
+ c_rbtree_store(&p->left, y);
+ if (x)
+ c_rbnode_set_parent_and_flags(x, s, c_rbnode_flags(x) & ~C_RBNODE_RED);
+ x = s;
+ s = y;
+ }
+
+ t = c_rbnode_pop_root(p);
+ g = c_rbnode_parent(p);
+ y = s->right;
+ c_rbtree_store(&p->left, y);
+ c_rbtree_store(&s->right, p);
+ c_rbnode_swap_child(p, s);
+ c_rbnode_set_parent_and_flags(x, s, c_rbnode_flags(x) & ~C_RBNODE_RED);
+ if (y)
+ c_rbnode_set_parent_and_flags(y, p, c_rbnode_flags(y));
+ c_rbnode_set_parent_and_flags(s, g, c_rbnode_flags(p));
+ c_rbnode_set_parent_and_flags(p, s, c_rbnode_flags(p) & ~C_RBNODE_RED);
+ c_rbnode_push_root(s, t);
+ }
+}
+
+static inline CRBNode *c_rbnode_rebalance_path(CRBNode *p, CRBNode **previous) {
+ CRBNode *s, *nl, *nr;
+
+ while (p) {
+ s = (*previous == p->left) ? p->right : p->left;
+ nl = s->left;
+ nr = s->right;
+
+ /*
+ * If the sibling under @p is black and exclusively has black
+ * children itself (i.e., nephews/nieces in @nl/@nr), then we
+ * can easily re-color to fix this sub-tree, and continue one
+ * layer up. However, if that's not the case, we have tree
+ * rotations at our hands to move one of the black nodes into
+ * our path, then turning the red node black to fully restore
+ * the RB-Tree invariants again. This fixup will be done by the
+ * caller, so we just let them know where to do that.
+ */
+ if (c_rbnode_is_red(s) ||
+ (nl && c_rbnode_is_red(nl)) ||
+ (nr && c_rbnode_is_red(nr)))
+ return p;
+
+ /*
+ * Case 3+4:
+ * Sibling is black, and all nephews/nieces are black. Flip
+ * sibling red. This way the sibling lost a black node in its
+ * path, thus getting even with our path. However, paths not
+ * going through @p haven't been fixed up, hence we proceed
+ * recursively one layer up.
+ * Before we continue one layer up, there are two possible
+ * terminations: If the parent is red, we can turn it black.
+ * This terminates the rebalancing, since the entire point of
+ * rebalancing is that everything below @p has one black node
+ * less than everything else. Lastly, if there is no layer
+ * above, we hit the tree root and nothing is left to be done.
+ */
+ c_rbnode_set_parent_and_flags(s, p, c_rbnode_flags(s) | C_RBNODE_RED);
+ if (c_rbnode_is_red(p)) {
+ c_rbnode_set_parent_and_flags(p, c_rbnode_parent(p), c_rbnode_flags(p) & ~C_RBNODE_RED);
+ return NULL;
+ }
+
+ *previous = p;
+ p = c_rbnode_parent(p);
+ }
+
+ return NULL;
+}
+
+static inline void c_rbnode_rebalance(CRBNode *n) {
+ CRBNode *previous = NULL;
+
+ /*
+ * Rebalance a tree after a node was removed. This function must be
+ * called on the parent of the leaf that was removed. It will first
+ * perform a recursive re-coloring on the parents of @n, until it
+ * either hits the tree-root, or a condition where a tree-rotation is
+ * needed to restore the RB-Tree invariants.
+ */
+
+ n = c_rbnode_rebalance_path(n, &previous);
+ if (n)
+ c_rbnode_rebalance_terminal(n, previous);
+}
+
+/**
+ * c_rbnode_unlink_stale() - remove node from tree
+ * @n: node to remove
+ *
+ * This removes the given node from its tree. Once unlinked, the tree is
+ * rebalanced.
+ *
+ * This does *NOT* reset @n to being unlinked. If you need this, use
+ * c_rbtree_unlink().
+ */
+_public_ void c_rbnode_unlink_stale(CRBNode *n) {
+ CRBTree *t;
+
+ assert(n);
+ assert(c_rbnode_is_linked(n));
+
+ /*
+ * There are three distinct cases during node removal of a tree:
+ * * The node has no children, in which case it can simply be removed.
+ * * The node has exactly one child, in which case the child displaces
+ * its parent.
+ * * The node has two children, in which case there is guaranteed to
+ * be a successor to the node (successor being the node ordered
+ * directly after it). This successor is the leftmost descendant of
+ * the node's right child, so it cannot have a left child of its own.
+ * Therefore, we can simply swap the node with its successor (including
+ * color) and remove the node from its new place, which will be one of
+ * the first two cases.
+ *
+ * Whenever the node we removed was black, we have to rebalance the
+ * tree. Note that this affects the actual node we _remove_, not @n (in
+ * case we swap it).
+ */
+
+ if (!n->left && !n->right) {
+ /*
+ * Case 1.0
+ * The node has no children, it is a leaf-node and we
+ * can simply unlink it. If it was also black, we have
+ * to rebalance.
+ */
+ t = c_rbnode_pop_root(n);
+ c_rbnode_swap_child(n, NULL);
+ c_rbnode_push_root(NULL, t);
+
+ if (c_rbnode_is_black(n))
+ c_rbnode_rebalance(c_rbnode_parent(n));
+ } else if (!n->left && n->right) {
+ /*
+ * Case 1.1:
+ * The node has exactly one child, and it is on the
+ * right. The child *must* be red (otherwise, the right
+ * path has more black nodes than the non-existing left
+ * path), and the node to be removed must hence be
+ * black. We simply replace the node with its child,
+ * turning the red child black, and thus no rebalancing
+ * is required.
+ */
+ t = c_rbnode_pop_root(n);
+ c_rbnode_swap_child(n, n->right);
+ c_rbnode_set_parent_and_flags(n->right, c_rbnode_parent(n), c_rbnode_flags(n->right) & ~C_RBNODE_RED);
+ c_rbnode_push_root(n->right, t);
+ } else if (n->left && !n->right) {
+ /*
+ * Case 1.2:
+ * The node has exactly one child, and it is on the left. Treat
+ * it as mirrored case of Case 1.1 (i.e., replace the node by
+ * its child).
+ */
+ t = c_rbnode_pop_root(n);
+ c_rbnode_swap_child(n, n->left);
+ c_rbnode_set_parent_and_flags(n->left, c_rbnode_parent(n), c_rbnode_flags(n->left) & ~C_RBNODE_RED);
+ c_rbnode_push_root(n->left, t);
+ } else /* if (n->left && n->right) */ {
+ CRBNode *s, *p, *c, *next = NULL;
+
+ /* Cache possible tree-root during tree-rotations. */
+ t = c_rbnode_pop_root(n);
+
+ /*
+ * Case 1.3:
+ * We are dealing with a full interior node with a child on
+ * both sides. We want to find its successor and swap it,
+ * then remove the node similar to Case 1. For performance
+ * reasons we don't perform the full swap, but skip links
+ * that are about to be removed, anyway.
+ *
+ * First locate the successor, remember its child and the
+ * parent the original node should have been linked on,
+ * before being removed. Then link up both the successor's
+ * new children and old child.
+ *
+ * s: successor
+ * p: parent
+ * c: right (and only potential) child of successor
+ * next: next node to rebalance on
+ */
+ s = n->right;
+ if (!s->left) {
+ /*
+ * The immediate right child is the successor,
+ * the successor's right child remains linked
+ * as before.
+ */
+ p = s;
+ c = s->right;
+ } else {
+ s = c_rbnode_leftmost(s);
+ p = c_rbnode_parent(s);
+ c = s->right;
+
+ /*
+ * The new parent pointer of the successor's
+ * child is set below.
+ */
+ c_rbtree_store(&p->left, c);
+
+ c_rbtree_store(&s->right, n->right);
+ c_rbnode_set_parent_and_flags(n->right, s, c_rbnode_flags(n->right));
+ }
+
+ /*
+ * In both the above cases, the successor's left child
+ * needs to be replaced with the left child of the node
+ * that is being removed.
+ */
+ c_rbtree_store(&s->left, n->left);
+ c_rbnode_set_parent_and_flags(n->left, s, c_rbnode_flags(n->left));
+
+ /*
+ * As in cases 1.1 and 1.0 above, if successor was a
+ * black leaf, we need to rebalance the tree, otherwise
+ * it must have a red child, so simply recolor that black
+ * and continue. Note that @next must be stored here, as
+ * the original color of the successor is forgotten below.
+ */
+ if (c)
+ c_rbnode_set_parent_and_flags(c, p, c_rbnode_flags(c) & ~C_RBNODE_RED);
+ else
+ next = c_rbnode_is_black(s) ? p : NULL;
+
+ /*
+ * Update the successor, to inherit the parent and color
+ * from the node being removed.
+ */
+ if (c_rbnode_is_red(n))
+ c_rbnode_set_parent_and_flags(s, c_rbnode_parent(n), c_rbnode_flags(s) | C_RBNODE_RED);
+ else
+ c_rbnode_set_parent_and_flags(s, c_rbnode_parent(n), c_rbnode_flags(s) & ~C_RBNODE_RED);
+
+ /*
+ * Update the parent of the node being removed. Note that this
+ * needs to happen after the parent of the successor is set
+ * above, as that call would clear the root pointer, if set.
+ */
+ c_rbnode_swap_child(n, s);
+
+ /* Possibly restore saved tree-root. */
+ c_rbnode_push_root(s, t);
+
+ if (next)
+ c_rbnode_rebalance(next);
+ }
+}
diff --git a/shared/c-rbtree/src/c-rbtree.h b/shared/c-rbtree/src/c-rbtree.h
new file mode 100644
index 000000000..cb33fcf7a
--- /dev/null
+++ b/shared/c-rbtree/src/c-rbtree.h
@@ -0,0 +1,430 @@
+#pragma once
+
+/**
+ * Standalone Red-Black-Tree Implementation in Standard ISO-C11
+ *
+ * This library provides an RB-Tree API, that is fully implemented in ISO-C11
+ * and has no external dependencies. Furthermore, tree traversal, memory
+ * allocations, and key comparisons are completely controlled by the API user.
+ * The implementation only provides the RB-Tree specific rebalancing and
+ * coloring.
+ *
+ * A tree is represented by the "CRBTree" structure. It contains a *single*
+ * field, which is a pointer to the root node. If NULL, the tree is empty. If
+ * non-NULL, there is at least a single element in the tree.
+ *
+ * Each node of the tree is represented by the "CRBNode" structure. It has
+ * three fields. The @left and @right members can be accessed by the API user
+ * directly to traverse the tree. The third member is a combination of the
+ * parent pointer and a set of flags.
+ * API users are required to embed the CRBNode object into their own objects
+ * and then use offsetof() (i.e., container_of() and friends) to turn CRBNode
+ * pointers into pointers to their own structure.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+#include
+#include
+
+typedef struct CRBNode CRBNode;
+typedef struct CRBTree CRBTree;
+
+/* implementation detail */
+#define C_RBNODE_RED (0x1UL)
+#define C_RBNODE_ROOT (0x2UL)
+#define C_RBNODE_UNUSED3 (0x4UL)
+#define C_RBNODE_FLAG_MASK (0x7UL)
+
+/**
+ * struct CRBNode - Node of a Red-Black Tree
+ * @__parent_and_flags: internal state
+ * @left: left child, or NULL
+ * @right: right child, or NULL
+ *
+ * Each node in an RB-Tree must embed a CRBNode object. This object contains
+ * pointers to its left and right child, which can be freely accessed by the
+ * API user at any time. They are NULL, if the node does not have a left/right
+ * child.
+ *
+ * The @__parent_and_flags field must never be accessed directly. It encodes
+ * the pointer to the parent node, and the color of the node. Use the accessor
+ * functions instead.
+ *
+ * There is no reason to initialize a CRBNode object before linking it.
+ * However, if you need a boolean state that tells you whether the node is
+ * linked or not, you should initialize the node via c_rbnode_init() or
+ * C_RBNODE_INIT.
+ */
+struct CRBNode {
+ alignas(8) unsigned long __parent_and_flags;
+ CRBNode *left;
+ CRBNode *right;
+};
+
+#define C_RBNODE_INIT(_var) { .__parent_and_flags = (unsigned long)&(_var) }
+
+CRBNode *c_rbnode_leftmost(CRBNode *n);
+CRBNode *c_rbnode_rightmost(CRBNode *n);
+CRBNode *c_rbnode_leftdeepest(CRBNode *n);
+CRBNode *c_rbnode_rightdeepest(CRBNode *n);
+CRBNode *c_rbnode_next(CRBNode *n);
+CRBNode *c_rbnode_prev(CRBNode *n);
+CRBNode *c_rbnode_next_postorder(CRBNode *n);
+CRBNode *c_rbnode_prev_postorder(CRBNode *n);
+
+void c_rbnode_link(CRBNode *p, CRBNode **l, CRBNode *n);
+void c_rbnode_unlink_stale(CRBNode *n);
+
+/**
+ * struct CRBTree - Red-Black Tree
+ * @root: pointer to the root node, or NULL
+ *
+ * Each Red-Black Tree is rooted in an CRBTree object. This object contains a
+ * pointer to the root node of the tree. The API user is free to access the
+ * @root member at any time, and use it to traverse the tree.
+ *
+ * To initialize an RB-Tree, set it to NULL / all zero.
+ */
+struct CRBTree {
+ alignas(8) CRBNode *root;
+};
+
+#define C_RBTREE_INIT {}
+
+CRBNode *c_rbtree_first(CRBTree *t);
+CRBNode *c_rbtree_last(CRBTree *t);
+CRBNode *c_rbtree_first_postorder(CRBTree *t);
+CRBNode *c_rbtree_last_postorder(CRBTree *t);
+
+void c_rbtree_move(CRBTree *to, CRBTree *from);
+void c_rbtree_add(CRBTree *t, CRBNode *p, CRBNode **l, CRBNode *n);
+
+/**
+ * c_rbnode_init() - mark a node as unlinked
+ * @n: node to operate on
+ *
+ * This marks the node @n as unlinked. The node will be set to a valid state
+ * that can never happen if the node is linked in a tree. Furthermore, this
+ * state is fully known to the implementation, and as such handled gracefully
+ * in all cases.
+ *
+ * You are *NOT* required to call this on your node. c_rbtree_add() can handle
+ * uninitialized nodes just fine. However, calling this allows to use
+ * c_rbnode_is_linked() to check for the state of a node. Furthermore,
+ * iterators and accessors can be called on initialized (yet unlinked) nodes.
+ *
+ * Use the C_RBNODE_INIT macro if you want to initialize static variables.
+ */
+static inline void c_rbnode_init(CRBNode *n) {
+ *n = (CRBNode)C_RBNODE_INIT(*n);
+}
+
+/**
+ * c_rbnode_entry() - get parent container of tree node
+ * @_what: tree node, or NULL
+ * @_t: type of parent container
+ * @_m: member name of tree node in @_t
+ *
+ * If the tree node @_what is embedded into a surrounding structure, this will
+ * turn the tree node pointer @_what into a pointer to the parent container
+ * (using offsetof(3), or sometimes called container_of(3)).
+ *
+ * If @_what is NULL, this will also return NULL.
+ *
+ * Return: Pointer to parent container, or NULL.
+ */
+#define c_rbnode_entry(_what, _t, _m) \
+ ((_t *)(void *)(((unsigned long)(void *)(_what) ?: \
+ offsetof(_t, _m)) - offsetof(_t, _m)))
+
+/**
+ * c_rbnode_parent() - return parent pointer
+ * @n node to access
+ *
+ * This returns a pointer to the parent of the given node @n. If @n does not
+ * have a parent, NULL is returned. If @n is not linked, @n itself is returned.
+ *
+ * You should not call this on unlinked or uninitialized nodes! If you do, you
+ * better know its semantics.
+ *
+ * Return: Pointer to parent.
+ */
+static inline CRBNode *c_rbnode_parent(CRBNode *n) {
+ return (n->__parent_and_flags & C_RBNODE_ROOT) ?
+ NULL :
+ (void *)(n->__parent_and_flags & ~C_RBNODE_FLAG_MASK);
+}
+
+/**
+ * c_rbnode_is_linked() - check whether a node is linked
+ * @n: node to check, or NULL
+ *
+ * This checks whether the passed node is linked. If you pass NULL, or if the
+ * node is not linked into a tree, this will return false. Otherwise, this
+ * returns true.
+ *
+ * Note that you must have either linked the node or initialized it, before
+ * calling this function. Never call this function on uninitialized nodes.
+ * Furthermore, removing a node via c_rbnode_unlink_stale() does *NOT* mark the
+ * node as unlinked. You have to call c_rbnode_init() yourself after removal, or
+ * use the c_rbnode_unlink() helper.
+ *
+ * Return: true if the node is linked, false if not.
+ */
+static inline _Bool c_rbnode_is_linked(CRBNode *n) {
+ return n && c_rbnode_parent(n) != n;
+}
+
+/**
+ * c_rbnode_unlink() - safely remove node from tree and reinitialize it
+ * @n: node to remove, or NULL
+ *
+ * This is almost the same as c_rbnode_unlink_stale(), but extends it slightly, to be
+ * more convenient to use in many cases:
+ * - if @n is unlinked or NULL, this is a no-op
+ * - @n is reinitialized after being removed
+ */
+static inline void c_rbnode_unlink(CRBNode *n) {
+ if (c_rbnode_is_linked(n)) {
+ c_rbnode_unlink_stale(n);
+ c_rbnode_init(n);
+ }
+}
+
+/**
+ * c_rbtree_init() - initialize a new RB-Tree
+ * @t: tree to operate on
+ *
+ * This initializes a new, empty RB-Tree. An RB-Tree must be initialized before
+ * any other functions are called on it. Alternatively, you can zero its memory
+ * or assign C_RBTREE_INIT.
+ */
+static inline void c_rbtree_init(CRBTree *t) {
+ *t = (CRBTree)C_RBTREE_INIT;
+}
+
+/**
+ * c_rbtree_is_empty() - check whether an RB-tree is empty
+ * @t: tree to operate on
+ *
+ * This checks whether the passed RB-Tree is empty.
+ *
+ * Return: True if tree is empty, false otherwise.
+ */
+static inline _Bool c_rbtree_is_empty(CRBTree *t) {
+ return !t->root;
+}
+
+/**
+ * CRBCompareFunc - compare a node to a key
+ * @t: tree where the node is linked to
+ * @k: key to compare
+ * @n: node to compare
+ *
+ * If you use the tree-traversal helpers (which are optional), you need to
+ * provide this callback so they can compare nodes in a tree to the key you
+ * look for.
+ *
+ * The tree @t is provided as optional context to this callback. The key you
+ * look for is provided as @k, the current node that should be compared to is
+ * provided as @n. This function should work like strcmp(), that is, return <0
+ * if @key orders before @n, 0 if both compare equal, and >0 if it orders after
+ * @n.
+ */
+typedef int (*CRBCompareFunc) (CRBTree *t, void *k, CRBNode *n);
+
+/**
+ * c_rbtree_find_node() - find node
+ * @t: tree to search through
+ * @f: comparison function
+ * @k: key to search for
+ *
+ * This searches through @t for a node that compares equal to @k. The function
+ * @f must be provided by the caller, which is used to compare nodes to @k. See
+ * the documentation of CRBCompareFunc for details.
+ *
+ * If there are multiple entries that compare equal to @k, this will return a
+ * pseudo-randomly picked node. If you need stable lookup functions for trees
+ * where duplicate entries are allowed, you better code your own lookup.
+ *
+ * Return: Pointer to matching node, or NULL.
+ */
+static inline CRBNode *c_rbtree_find_node(CRBTree *t, CRBCompareFunc f, const void *k) {
+ CRBNode *i;
+
+ assert(t);
+ assert(f);
+
+ i = t->root;
+ while (i) {
+ int v = f(t, (void *)k, i);
+ if (v < 0)
+ i = i->left;
+ else if (v > 0)
+ i = i->right;
+ else
+ return i;
+ }
+
+ return NULL;
+}
+
+/**
+ * c_rbtree_find_entry() - find entry
+ * @_t: tree to search through
+ * @_f: comparison function
+ * @_k: key to search for
+ * @_s: type of the structure that embeds the nodes
+ * @_m: name of the node-member in type @_t
+ *
+ * This is very similar to c_rbtree_find_node(), but instead of returning a
+ * pointer to the CRBNode, it returns a pointer to the surrounding object. This
+ * object must embed the CRBNode object. The type of the surrounding object
+ * must be given as @_s, and the name of the embedded CRBNode member as @_m.
+ *
+ * See c_rbtree_find_node() and c_rbnode_entry() for more details.
+ *
+ * Return: Pointer to found entry, NULL if not found.
+ */
+#define c_rbtree_find_entry(_t, _f, _k, _s, _m) \
+ c_rbnode_entry(c_rbtree_find_node((_t), (_f), (_k)), _s, _m)
+
+/**
+ * c_rbtree_find_slot() - find slot to insert new node
+ * @t: tree to search through
+ * @f: comparison function
+ * @k: key to search for
+ * @p: output storage for parent pointer
+ *
+ * This searches through @t just like c_rbtree_find_node() does. However,
+ * instead of returning a pointer to a node that compares equal to @k, this
+ * searches for a slot to insert a node with key @k. A pointer to the slot is
+ * returned, and a pointer to the parent of the slot is stored in @p. Both
+ * can be passed directly to c_rbtree_add(), together with your node to insert.
+ *
+ * If there already is a node in the tree, that compares equal to @k, this will
+ * return NULL and store the conflicting node in @p. In all other cases,
+ * this will return a pointer (non-NULL) to the empty slot to insert the node
+ * at. @p will point to the parent node of that slot.
+ *
+ * If you want trees that allow duplicate nodes, you better code your own
+ * insertion function.
+ *
+ * Return: Pointer to slot to insert node, or NULL on conflicts.
+ */
+static inline CRBNode **c_rbtree_find_slot(CRBTree *t, CRBCompareFunc f, const void *k, CRBNode **p) {
+ CRBNode **i;
+
+ assert(t);
+ assert(f);
+ assert(p);
+
+ i = &t->root;
+ *p = NULL;
+ while (*i) {
+ int v = f(t, (void *)k, *i);
+ *p = *i;
+ if (v < 0)
+ i = &(*i)->left;
+ else if (v > 0)
+ i = &(*i)->right;
+ else
+ return NULL;
+ }
+
+ return i;
+}
+
+/**
+ * c_rbtree_for_each*() - iterators
+ *
+ * The c_rbtree_for_each*() macros provide simple for-loop wrappers to iterate
+ * an RB-Tree. They come in a set of flavours:
+ *
+ * - "entry": This combines c_rbnode_entry() with the loop iterator, so the
+ * iterator always has the type of the surrounding object, rather
+ * than CRBNode.
+ *
+ * - "safe": The loop iterator always keeps track of the next element to
+ * visit. This means, you can safely modify the current element,
+ * while retaining loop-integrity.
+ * You still must not touch any other entry of the tree. Otherwise,
+ * the loop-iterator will be corrupted. Also remember to only
+ * modify the tree in a way compatible with your iterator-order.
+ * That is, if you use in-order iteration (default), you can unlink
+ * your current object, including re-balancing the tree. However,
+ * if you use post-order, you must not trigger a tree rebalance
+ * operation, since it is not an invariant of post-order iteration.
+ *
+ * - "postorder": Rather than the default in-order iteration, this iterates
+ * the tree in post-order.
+ *
+ * - "unlink": This unlinks the current element from the tree before the loop
+ * code is run. Note that the tree is not rebalanced. That is,
+ * you must never break out of the loop. If you do so, the tree
+ * is corrupted.
+ */
+
+#define c_rbtree_for_each(_iter, _tree) \
+ for (_iter = c_rbtree_first(_tree); \
+ _iter; \
+ _iter = c_rbnode_next(_iter))
+
+#define c_rbtree_for_each_entry(_iter, _tree, _m) \
+ for (_iter = c_rbnode_entry(c_rbtree_first(_tree), __typeof__(*_iter), _m); \
+ _iter; \
+ _iter = c_rbnode_entry(c_rbnode_next(&_iter->_m), __typeof__(*_iter), _m))
+
+#define c_rbtree_for_each_safe(_iter, _safe, _tree) \
+ for (_iter = c_rbtree_first(_tree), _safe = c_rbnode_next(_iter); \
+ _iter; \
+ _iter = _safe, _safe = c_rbnode_next(_safe))
+
+#define c_rbtree_for_each_entry_safe(_iter, _safe, _tree, _m) \
+ for (_iter = c_rbnode_entry(c_rbtree_first(_tree), __typeof__(*_iter), _m), \
+ _safe = _iter ? c_rbnode_entry(c_rbnode_next(&_iter->_m), __typeof__(*_iter), _m) : NULL; \
+ _iter; \
+ _iter = _safe, \
+ _safe = _safe ? c_rbnode_entry(c_rbnode_next(&_safe->_m), __typeof__(*_iter), _m) : NULL)
+
+#define c_rbtree_for_each_postorder(_iter, _tree) \
+ for (_iter = c_rbtree_first_postorder(_tree); \
+ _iter; \
+ _iter = c_rbnode_next_postorder(_iter)) \
+
+#define c_rbtree_for_each_entry_postorder(_iter, _tree, _m) \
+ for (_iter = c_rbnode_entry(c_rbtree_first_postorder(_tree), __typeof__(*_iter), _m); \
+ _iter; \
+ _iter = c_rbnode_entry(c_rbnode_next_postorder(&_iter->_m), __typeof__(*_iter), _m))
+
+#define c_rbtree_for_each_safe_postorder(_iter, _safe, _tree) \
+ for (_iter = c_rbtree_first_postorder(_tree), _safe = c_rbnode_next_postorder(_iter); \
+ _iter; \
+ _iter = _safe, _safe = c_rbnode_next_postorder(_safe))
+
+#define c_rbtree_for_each_entry_safe_postorder(_iter, _safe, _tree, _m) \
+ for (_iter = c_rbnode_entry(c_rbtree_first_postorder(_tree), __typeof__(*_iter), _m), \
+ _safe = _iter ? c_rbnode_entry(c_rbnode_next_postorder(&_iter->_m), __typeof__(*_iter), _m) : NULL; \
+ _iter; \
+ _iter = _safe, \
+ _safe = _safe ? c_rbnode_entry(c_rbnode_next_postorder(&_safe->_m), __typeof__(*_iter), _m) : NULL)
+
+#define c_rbtree_for_each_safe_postorder_unlink(_iter, _safe, _tree) \
+ for (_iter = c_rbtree_first_postorder(_tree), _safe = c_rbnode_next_postorder(_iter); \
+ _iter ? ((*_iter = (CRBNode)C_RBNODE_INIT(*_iter)), 1) : (((_tree)->root = NULL), 0); \
+ _iter = _safe, _safe = c_rbnode_next_postorder(_safe)) \
+
+#define c_rbtree_for_each_entry_safe_postorder_unlink(_iter, _safe, _tree, _m) \
+ for (_iter = c_rbnode_entry(c_rbtree_first_postorder(_tree), __typeof__(*_iter), _m), \
+ _safe = _iter ? c_rbnode_entry(c_rbnode_next_postorder(&_iter->_m), __typeof__(*_iter), _m) : NULL; \
+ _iter ? ((_iter->_m = (CRBNode)C_RBNODE_INIT(_iter->_m)), 1) : (((_tree)->root = NULL), 0); \
+ _iter = _safe, \
+ _safe = _safe ? c_rbnode_entry(c_rbnode_next_postorder(&_safe->_m), __typeof__(*_iter), _m) : NULL)
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/shared/c-rbtree/src/libcrbtree.sym b/shared/c-rbtree/src/libcrbtree.sym
new file mode 100644
index 000000000..e7b801b81
--- /dev/null
+++ b/shared/c-rbtree/src/libcrbtree.sym
@@ -0,0 +1,21 @@
+LIBCRBTREE_3 {
+global:
+ c_rbnode_leftmost;
+ c_rbnode_rightmost;
+ c_rbnode_leftdeepest;
+ c_rbnode_rightdeepest;
+ c_rbnode_next;
+ c_rbnode_prev;
+ c_rbnode_next_postorder;
+ c_rbnode_prev_postorder;
+ c_rbnode_link;
+ c_rbnode_unlink_stale;
+ c_rbtree_first;
+ c_rbtree_last;
+ c_rbtree_first_postorder;
+ c_rbtree_last_postorder;
+ c_rbtree_add;
+ c_rbtree_move;
+local:
+ *;
+};
diff --git a/shared/c-rbtree/src/meson.build b/shared/c-rbtree/src/meson.build
new file mode 100644
index 000000000..47ccf63aa
--- /dev/null
+++ b/shared/c-rbtree/src/meson.build
@@ -0,0 +1,69 @@
+#
+# target: libcrbtree.so
+#
+
+libcrbtree_symfile = join_paths(meson.current_source_dir(), 'libcrbtree.sym')
+
+libcrbtree_private = static_library(
+ 'crbtree-private',
+ [
+ 'c-rbtree.c',
+ ],
+ c_args: [
+ '-fvisibility=hidden',
+ '-fno-common',
+ ],
+ pic: true,
+)
+
+libcrbtree_shared = shared_library(
+ 'crbtree',
+ objects: libcrbtree_private.extract_all_objects(),
+ install: not meson.is_subproject(),
+ soversion: 0,
+ link_depends: libcrbtree_symfile,
+ link_args: [
+ '-Wl,--no-undefined',
+ '-Wl,--version-script=@0@'.format(libcrbtree_symfile),
+ ],
+)
+
+libcrbtree_dep = declare_dependency(
+ include_directories: include_directories('.'),
+ link_with: libcrbtree_private,
+ version: meson.project_version(),
+)
+
+if not meson.is_subproject()
+ install_headers('c-rbtree.h')
+
+ mod_pkgconfig.generate(
+ libraries: libcrbtree_shared,
+ version: meson.project_version(),
+ name: 'libcrbtree',
+ filebase: 'libcrbtree',
+ description: project_description,
+ )
+endif
+
+#
+# target: test-*
+#
+
+test_api = executable('test-api', ['test-api.c'], link_with: libcrbtree_shared)
+test('API Symbol Visibility', test_api)
+
+test_basic = executable('test-basic', ['test-basic.c'], dependencies: libcrbtree_dep)
+test('Basic API Behavior', test_basic)
+
+test_map = executable('test-map', ['test-map.c'], dependencies: libcrbtree_dep)
+test('Generic Map', test_map)
+
+test_misc = executable('test-misc', ['test-misc.c'], dependencies: libcrbtree_dep)
+test('Miscellaneous', test_misc)
+
+test_parallel = executable('test-parallel', ['test-parallel.c'], dependencies: libcrbtree_dep)
+test('Lockless Parallel Readers', test_parallel)
+
+test_posix = executable('test-posix', ['test-posix.c'], dependencies: libcrbtree_dep)
+test('Posix tsearch(3p) Comparison', test_posix)
diff --git a/shared/c-rbtree/src/test-api.c b/shared/c-rbtree/src/test-api.c
new file mode 100644
index 000000000..55c37af6c
--- /dev/null
+++ b/shared/c-rbtree/src/test-api.c
@@ -0,0 +1,108 @@
+/*
+ * Tests for Public API
+ * This test, unlikely the others, is linked against the real, distributed,
+ * shared library. Its sole purpose is to test for symbol availability.
+ */
+
+#undef NDEBUG
+#include
+#include
+#include
+#include
+
+#include "c-rbtree.h"
+
+typedef struct TestNode {
+ CRBNode rb;
+} TestNode;
+
+static void test_api(void) {
+ CRBTree t = C_RBTREE_INIT, t2 = C_RBTREE_INIT;
+ CRBNode *i, *is, n = C_RBNODE_INIT(n), m = C_RBNODE_INIT(m);
+ TestNode *ie, *ies;
+
+ assert(c_rbtree_is_empty(&t));
+ assert(!c_rbnode_is_linked(&n));
+ assert(!c_rbnode_entry(NULL, TestNode, rb));
+
+ /* init, is_linked, add, link, {unlink{,_stale}} */
+
+ c_rbtree_add(&t, NULL, &t.root, &n);
+ assert(c_rbnode_is_linked(&n));
+
+ c_rbnode_link(&n, &n.left, &m);
+ assert(c_rbnode_is_linked(&m));
+
+ c_rbnode_unlink(&m);
+ assert(!c_rbnode_is_linked(&m));
+
+ c_rbtree_add(&t, NULL, &t.root, &n);
+ assert(c_rbnode_is_linked(&n));
+
+ c_rbnode_link(&n, &n.left, &m);
+ assert(c_rbnode_is_linked(&m));
+
+ c_rbnode_unlink_stale(&m);
+ assert(c_rbnode_is_linked(&m)); /* @m wasn't touched */
+
+ c_rbnode_init(&n);
+ assert(!c_rbnode_is_linked(&n));
+
+ c_rbnode_init(&m);
+ assert(!c_rbnode_is_linked(&m));
+
+ c_rbtree_init(&t);
+ assert(c_rbtree_is_empty(&t));
+
+ /* move */
+
+ c_rbtree_move(&t2, &t);
+
+ /* first, last, leftmost, rightmost, next, prev */
+
+ assert(!c_rbtree_first(&t));
+ assert(!c_rbtree_last(&t));
+ assert(&n == c_rbnode_leftmost(&n));
+ assert(&n == c_rbnode_rightmost(&n));
+ assert(!c_rbnode_next(&n));
+ assert(!c_rbnode_prev(&n));
+
+ /* postorder traversal */
+
+ assert(!c_rbtree_first_postorder(&t));
+ assert(!c_rbtree_last_postorder(&t));
+ assert(&n == c_rbnode_leftdeepest(&n));
+ assert(&n == c_rbnode_rightdeepest(&n));
+ assert(!c_rbnode_next_postorder(&n));
+ assert(!c_rbnode_prev_postorder(&n));
+
+ /* iterators */
+
+ c_rbtree_for_each(i, &t)
+ assert(!i);
+ c_rbtree_for_each_safe(i, is, &t)
+ assert(!i);
+ c_rbtree_for_each_entry(ie, &t, rb)
+ assert(!ie);
+ c_rbtree_for_each_entry_safe(ie, ies, &t, rb)
+ assert(!ie);
+
+ c_rbtree_for_each_postorder(i, &t)
+ assert(!i);
+ c_rbtree_for_each_safe_postorder(i, is, &t)
+ assert(!i);
+ c_rbtree_for_each_entry_postorder(ie, &t, rb)
+ assert(!ie);
+ c_rbtree_for_each_entry_safe_postorder(ie, ies, &t, rb)
+ assert(!ie);
+
+ c_rbtree_for_each_safe_postorder_unlink(i, is, &t)
+ assert(!i);
+ c_rbtree_for_each_entry_safe_postorder_unlink(ie, ies, &t, rb)
+ assert(!ie);
+}
+
+int main(int argc, char **argv) {
+ test_api();
+ return 0;
+}
diff --git a/shared/c-rbtree/src/test-basic.c b/shared/c-rbtree/src/test-basic.c
new file mode 100644
index 000000000..534a10966
--- /dev/null
+++ b/shared/c-rbtree/src/test-basic.c
@@ -0,0 +1,239 @@
+/*
+ * Tests for Basic Tree Operations
+ * This test does some basic tree operations and verifies their correctness. It
+ * validates the RB-Tree invariants after each operation, to guarantee the
+ * stability of the tree.
+ *
+ * For testing purposes, we use the memory address of a node as its key, and
+ * order nodes in ascending order.
+ */
+
+#undef NDEBUG
+#include
+#include
+#include
+#include
+#include
+
+#include "c-rbtree.h"
+#include "c-rbtree-private.h"
+
+static size_t validate(CRBTree *t) {
+ unsigned int i_black, n_black;
+ CRBNode *n, *p, *o;
+ size_t count = 0;
+
+ assert(t);
+ assert(!t->root || c_rbnode_is_black(t->root));
+
+ /* traverse to left-most child, count black nodes */
+ i_black = 0;
+ n = t->root;
+ while (n && n->left) {
+ if (c_rbnode_is_black(n))
+ ++i_black;
+ n = n->left;
+ }
+ n_black = i_black;
+
+ /*
+ * Traverse tree and verify correctness:
+ * 1) A node is either red or black
+ * 2) The root is black
+ * 3) All leaves are black
+ * 4) Every red node must have two black child nodes
+ * 5) Every path to a leaf contains the same number of black nodes
+ *
+ * Note that NULL nodes are considered black, which is why we don't
+ * check for 3).
+ */
+ o = NULL;
+ while (n) {
+ ++count;
+
+ /* verify natural order */
+ assert(n > o);
+ o = n;
+
+ /* verify consistency */
+ assert(!n->right || c_rbnode_parent(n->right) == n);
+ assert(!n->left || c_rbnode_parent(n->left) == n);
+
+ /* verify 2) */
+ if (!c_rbnode_parent(n))
+ assert(c_rbnode_is_black(n));
+
+ if (c_rbnode_is_red(n)) {
+ /* verify 4) */
+ assert(!n->left || c_rbnode_is_black(n->left));
+ assert(!n->right || c_rbnode_is_black(n->right));
+ } else {
+ /* verify 1) */
+ assert(c_rbnode_is_black(n));
+ }
+
+ /* verify 5) */
+ if (!n->left && !n->right)
+ assert(i_black == n_black);
+
+ /* get next node */
+ if (n->right) {
+ n = n->right;
+ if (c_rbnode_is_black(n))
+ ++i_black;
+
+ while (n->left) {
+ n = n->left;
+ if (c_rbnode_is_black(n))
+ ++i_black;
+ }
+ } else {
+ while ((p = c_rbnode_parent(n)) && n == p->right) {
+ n = p;
+ if (c_rbnode_is_black(p->right))
+ --i_black;
+ }
+
+ n = p;
+ if (p && c_rbnode_is_black(p->left))
+ --i_black;
+ }
+ }
+
+ return count;
+}
+
+static void insert(CRBTree *t, CRBNode *n) {
+ CRBNode **i, *p;
+
+ assert(t);
+ assert(n);
+ assert(!c_rbnode_is_linked(n));
+
+ i = &t->root;
+ p = NULL;
+ while (*i) {
+ p = *i;
+ if (n < *i) {
+ i = &(*i)->left;
+ } else {
+ assert(n > *i);
+ i = &(*i)->right;
+ }
+ }
+
+ c_rbtree_add(t, p, i, n);
+}
+
+static void shuffle(CRBNode **nodes, size_t n_memb) {
+ unsigned int i, j;
+ CRBNode *t;
+
+ for (i = 0; i < n_memb; ++i) {
+ j = rand() % n_memb;
+ t = nodes[j];
+ nodes[j] = nodes[i];
+ nodes[i] = t;
+ }
+}
+
+static void test_shuffle(void) {
+ CRBNode *nodes[512];
+ CRBTree t = {};
+ unsigned int i, j;
+ size_t n;
+
+ /* allocate and initialize all nodes */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ nodes[i] = malloc(sizeof(*nodes[i]));
+ assert(nodes[i]);
+ c_rbnode_init(nodes[i]);
+ }
+
+ /* shuffle nodes and validate *empty* tree */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+ n = validate(&t);
+ assert(n == 0);
+
+ /* add all nodes and validate after each insertion */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ insert(&t, nodes[i]);
+ n = validate(&t);
+ assert(n == i + 1);
+ }
+
+ /* shuffle nodes again */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* remove all nodes (in different order) and validate on each round */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ c_rbnode_unlink(nodes[i]);
+ n = validate(&t);
+ assert(n == sizeof(nodes) / sizeof(*nodes) - i - 1);
+ }
+
+ /* shuffle nodes and validate *empty* tree again */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+ n = validate(&t);
+ assert(n == 0);
+
+ /* add all nodes again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ insert(&t, nodes[i]);
+ n = validate(&t);
+ assert(n == i + 1);
+ }
+
+ /* 4 times, remove half of the nodes and add them again */
+ for (j = 0; j < 4; ++j) {
+ /* shuffle nodes again */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* remove half of the nodes */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes) / 2; ++i) {
+ c_rbnode_unlink(nodes[i]);
+ n = validate(&t);
+ assert(n == sizeof(nodes) / sizeof(*nodes) - i - 1);
+ }
+
+ /* shuffle the removed half */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes) / 2);
+
+ /* add the removed half again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes) / 2; ++i) {
+ insert(&t, nodes[i]);
+ n = validate(&t);
+ assert(n == sizeof(nodes) / sizeof(*nodes) / 2 + i + 1);
+ }
+ }
+
+ /* shuffle nodes again */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* remove all */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ c_rbnode_unlink(nodes[i]);
+ n = validate(&t);
+ assert(n == sizeof(nodes) / sizeof(*nodes) - i - 1);
+ }
+
+ /* free nodes again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i)
+ free(nodes[i]);
+}
+
+int main(int argc, char **argv) {
+ unsigned int i;
+
+ /* we want stable tests, so use fixed seed */
+ srand(0xdeadbeef);
+
+ /*
+ * The tests are pseudo random; run them multiple times, each run will
+ * have different orders and thus different results.
+ */
+ for (i = 0; i < 4; ++i)
+ test_shuffle();
+
+ return 0;
+}
diff --git a/shared/c-rbtree/src/test-map.c b/shared/c-rbtree/src/test-map.c
new file mode 100644
index 000000000..3601ee495
--- /dev/null
+++ b/shared/c-rbtree/src/test-map.c
@@ -0,0 +1,277 @@
+/*
+ * RB-Tree based Map
+ * This implements a basic Map between integer keys and objects. It uses the
+ * lookup and insertion helpers, rather than open-coding it.
+ */
+
+#undef NDEBUG
+#include
+#include
+#include
+#include
+#include
+
+#include "c-rbtree.h"
+#include "c-rbtree-private.h"
+
+typedef struct {
+ unsigned long key;
+ unsigned int marker;
+ CRBNode rb;
+} Node;
+
+#define node_from_rb(_rb) ((Node *)((char *)(_rb) - offsetof(Node, rb)))
+
+static int test_compare(CRBTree *t, void *k, CRBNode *n) {
+ unsigned long key = (unsigned long)k;
+ Node *node = node_from_rb(n);
+
+ return (key < node->key) ? -1 : (key > node->key) ? 1 : 0;
+}
+
+static void shuffle(Node **nodes, size_t n_memb) {
+ unsigned int i, j;
+ Node *t;
+
+ for (i = 0; i < n_memb; ++i) {
+ j = rand() % n_memb;
+ t = nodes[j];
+ nodes[j] = nodes[i];
+ nodes[i] = t;
+ }
+}
+
+static void test_map(void) {
+ CRBNode **slot, *p, *safe_p;
+ CRBTree t = {};
+ Node *n, *safe_n, *nodes[2048];
+ unsigned long i, v;
+
+ /* allocate and initialize all nodes */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ nodes[i] = malloc(sizeof(*nodes[i]));
+ assert(nodes[i]);
+ nodes[i]->key = i;
+ nodes[i]->marker = 0;
+ c_rbnode_init(&nodes[i]->rb);
+ }
+
+ /* shuffle nodes */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* add all nodes, and verify that each node is linked */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ assert(!c_rbnode_is_linked(&nodes[i]->rb));
+ assert(!c_rbtree_find_entry(&t, test_compare, (void *)nodes[i]->key, Node, rb));
+
+ slot = c_rbtree_find_slot(&t, test_compare, (void *)nodes[i]->key, &p);
+ assert(slot);
+ c_rbtree_add(&t, p, slot, &nodes[i]->rb);
+
+ assert(c_rbnode_is_linked(&nodes[i]->rb));
+ assert(nodes[i] == c_rbtree_find_entry(&t, test_compare, (void *)nodes[i]->key, Node, rb));
+ }
+
+ /* verify in-order traversal works */
+ i = 0;
+ v = 0;
+ for (p = c_rbtree_first(&t); p; p = c_rbnode_next(p)) {
+ ++i;
+ assert(!node_from_rb(p)->marker);
+ node_from_rb(p)->marker = 1;
+
+ assert(v <= node_from_rb(p)->key);
+ v = node_from_rb(p)->key;
+
+ assert(!c_rbnode_next(p) || p == c_rbnode_prev(c_rbnode_next(p)));
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+
+ /* verify reverse in-order traversal works */
+ i = 0;
+ v = -1;
+ for (p = c_rbtree_last(&t); p; p = c_rbnode_prev(p)) {
+ ++i;
+ assert(node_from_rb(p)->marker);
+ node_from_rb(p)->marker = 0;
+
+ assert(v >= node_from_rb(p)->key);
+ v = node_from_rb(p)->key;
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+
+ /* verify post-order traversal works */
+ i = 0;
+ for (p = c_rbtree_first_postorder(&t); p; p = c_rbnode_next_postorder(p)) {
+ ++i;
+ assert(!node_from_rb(p)->marker);
+ assert(!c_rbnode_parent(p) || !node_from_rb(c_rbnode_parent(p))->marker);
+ assert(!p->left || node_from_rb(p->left)->marker);
+ assert(!p->right || node_from_rb(p->right)->marker);
+ node_from_rb(p)->marker = 1;
+
+ assert(!c_rbnode_next_postorder(p) || p == c_rbnode_prev_postorder(c_rbnode_next_postorder(p)));
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+
+ /* verify pre-order (inverse post-order) traversal works */
+ i = 0;
+ for (p = c_rbtree_last_postorder(&t); p; p = c_rbnode_prev_postorder(p)) {
+ ++i;
+ assert(node_from_rb(p)->marker);
+ assert(!c_rbnode_parent(p) || !node_from_rb(c_rbnode_parent(p))->marker);
+ assert(!p->left || node_from_rb(p->left)->marker);
+ assert(!p->right || node_from_rb(p->right)->marker);
+ node_from_rb(p)->marker = 0;
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+
+ /* verify in-order traversal works via helper */
+ i = 0;
+ v = 0;
+ c_rbtree_for_each(p, &t) {
+ ++i;
+ assert(!node_from_rb(p)->marker);
+ node_from_rb(p)->marker = 1;
+
+ assert(v <= node_from_rb(p)->key);
+ v = node_from_rb(p)->key;
+
+ assert(!c_rbnode_next(p) || p == c_rbnode_prev(c_rbnode_next(p)));
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+
+ /* verify in-order traversal works via entry-helper */
+ i = 0;
+ v = 0;
+ c_rbtree_for_each_entry(n, &t, rb) {
+ ++i;
+ assert(n->marker);
+ n->marker = 0;
+
+ assert(v <= n->key);
+ v = n->key;
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+
+ /* verify post-order traversal works via helper */
+ i = 0;
+ c_rbtree_for_each_postorder(p, &t) {
+ ++i;
+ assert(!node_from_rb(p)->marker);
+ assert(!c_rbnode_parent(p) || !node_from_rb(c_rbnode_parent(p))->marker);
+ assert(!p->left || node_from_rb(p->left)->marker);
+ assert(!p->right || node_from_rb(p->right)->marker);
+ node_from_rb(p)->marker = 1;
+
+ assert(!c_rbnode_next_postorder(p) || p == c_rbnode_prev_postorder(c_rbnode_next_postorder(p)));
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+
+ /* verify post-order traversal works via entry-helper */
+ i = 0;
+ c_rbtree_for_each_entry_postorder(n, &t, rb) {
+ ++i;
+ assert(n->marker);
+ assert(!c_rbnode_parent(&n->rb) || node_from_rb(c_rbnode_parent(&n->rb))->marker);
+ assert(!n->rb.left || !node_from_rb(n->rb.left)->marker);
+ assert(!n->rb.right || !node_from_rb(n->rb.right)->marker);
+ n->marker = 0;
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+
+ /* shuffle nodes again */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* remove all nodes (in different order) */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ assert(c_rbnode_is_linked(&nodes[i]->rb));
+ assert(nodes[i] == c_rbtree_find_entry(&t, test_compare, (void *)nodes[i]->key, Node, rb));
+
+ c_rbnode_unlink(&nodes[i]->rb);
+
+ assert(!c_rbnode_is_linked(&nodes[i]->rb));
+ assert(!c_rbtree_find_entry(&t, test_compare, (void *)nodes[i]->key, Node, rb));
+ }
+ assert(c_rbtree_is_empty(&t));
+
+ /* add all nodes again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ slot = c_rbtree_find_slot(&t, test_compare, (void *)nodes[i]->key, &p);
+ assert(slot);
+ c_rbtree_add(&t, p, slot, &nodes[i]->rb);
+ }
+
+ /* remove all nodes via helper */
+ i = 0;
+ c_rbtree_for_each_safe(p, safe_p, &t) {
+ ++i;
+ c_rbnode_unlink(p);
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+ assert(c_rbtree_is_empty(&t));
+
+ /* add all nodes again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ slot = c_rbtree_find_slot(&t, test_compare, (void *)nodes[i]->key, &p);
+ assert(slot);
+ c_rbtree_add(&t, p, slot, &nodes[i]->rb);
+ }
+
+ /* remove all nodes via entry-helper */
+ i = 0;
+ c_rbtree_for_each_entry_safe(n, safe_n, &t, rb) {
+ ++i;
+ c_rbnode_unlink(&n->rb);
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+ assert(c_rbtree_is_empty(&t));
+
+ /* add all nodes again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ slot = c_rbtree_find_slot(&t, test_compare, (void *)nodes[i]->key, &p);
+ assert(slot);
+ c_rbtree_add(&t, p, slot, &nodes[i]->rb);
+ }
+
+ /* remove all nodes via unlink-helper */
+ i = 0;
+ c_rbtree_for_each_safe_postorder_unlink(p, safe_p, &t) {
+ ++i;
+ assert(!c_rbnode_is_linked(p));
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+ assert(c_rbtree_is_empty(&t));
+
+ /* add all nodes again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ slot = c_rbtree_find_slot(&t, test_compare, (void *)nodes[i]->key, &p);
+ assert(slot);
+ c_rbtree_add(&t, p, slot, &nodes[i]->rb);
+ }
+
+ /* remove all nodes via entry-unlink-helper */
+ i = 0;
+ c_rbtree_for_each_entry_safe_postorder_unlink(n, safe_n, &t, rb) {
+ ++i;
+ assert(!c_rbnode_is_linked(&n->rb));
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+ assert(c_rbtree_is_empty(&t));
+
+ /* free nodes again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ assert(!nodes[i]->marker);
+ free(nodes[i]);
+ }
+
+ assert(c_rbtree_is_empty(&t));
+}
+
+int main(int argc, char **argv) {
+ /* we want stable tests, so use fixed seed */
+ srand(0xdeadbeef);
+
+ test_map();
+ return 0;
+}
diff --git a/shared/c-rbtree/src/test-misc.c b/shared/c-rbtree/src/test-misc.c
new file mode 100644
index 000000000..e5b3289c3
--- /dev/null
+++ b/shared/c-rbtree/src/test-misc.c
@@ -0,0 +1,66 @@
+/*
+ * Tests for Miscellaneous Tree Operations
+ * This test contains all of the minor tests that did not fit anywhere else.
+ */
+
+#undef NDEBUG
+#include
+#include
+#include
+#include
+
+#include "c-rbtree.h"
+#include "c-rbtree-private.h"
+
+static void insert(CRBTree *t, CRBNode *n) {
+ CRBNode **i, *p;
+
+ assert(t);
+ assert(n);
+ assert(!c_rbnode_is_linked(n));
+
+ i = &t->root;
+ p = NULL;
+ while (*i) {
+ p = *i;
+ if (n < *i) {
+ i = &(*i)->left;
+ } else {
+ assert(n > *i);
+ i = &(*i)->right;
+ }
+ }
+
+ c_rbtree_add(t, p, i, n);
+}
+
+static void test_move(void) {
+ CRBTree t1 = C_RBTREE_INIT, t2 = C_RBTREE_INIT;
+ CRBNode n[128];
+ unsigned int i;
+
+ for (i = 0; i < sizeof(n) / sizeof(*n); ++i) {
+ n[i] = (CRBNode)C_RBNODE_INIT(n[i]);
+ insert(&t1, &n[i]);
+ }
+
+ assert(!c_rbtree_is_empty(&t1));
+ assert(c_rbtree_is_empty(&t2));
+
+ c_rbtree_move(&t2, &t1);
+
+ assert(c_rbtree_is_empty(&t1));
+ assert(!c_rbtree_is_empty(&t2));
+
+ while (t2.root)
+ c_rbnode_unlink(t2.root);
+
+ assert(c_rbtree_is_empty(&t1));
+ assert(c_rbtree_is_empty(&t2));
+}
+
+int main(int argc, char **argv) {
+ test_move();
+
+ return 0;
+}
diff --git a/shared/c-rbtree/src/test-parallel.c b/shared/c-rbtree/src/test-parallel.c
new file mode 100644
index 000000000..4513d9ece
--- /dev/null
+++ b/shared/c-rbtree/src/test-parallel.c
@@ -0,0 +1,384 @@
+/*
+ * Tests Lockless Tree Lookups
+ * The RB-Tree implementation supports lockless tree lookups on shared
+ * data-structures. While it does not guarantee correct results (you might skip
+ * entire sub-trees), it does guarantee valid behavior (the traversal is
+ * guaranteed to end and produce some valid result).
+ * This test uses ptrace to run tree operations step-by-step in a separate
+ * process, and after each instruction verify the pseudo-validity of the tree.
+ * This means, a tree must only have valid left/right pointers (or NULL), and
+ * must not contain any loops in those pointers.
+ *
+ * This test runs two processes with a shared context and tree. It runs them in
+ * this order:
+ *
+ * | PARENT | CHILD |
+ * +--------------------+-----------+
+ * ~ ~ ~
+ * test_parent_start
+ * test_child1
+ * test_parent_middle
+ * test_child2
+ * test_parent_end
+ * ~ ~ ~
+ * +--------------------+-----------+
+ *
+ * Additionally, on each TRAP of CHILD, the parent runs test_parent_step(). The
+ * ptrace infrastructure generates a TRAP after each instruction, so this test
+ * is very CPU aggressive in the parent.
+ */
+
+#undef NDEBUG
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "c-rbtree.h"
+#include "c-rbtree-private.h"
+
+typedef struct {
+ CRBNode rb;
+ bool visited;
+} TestNode;
+
+typedef struct {
+ size_t mapsize;
+ char *map;
+ CRBTree *tree;
+ TestNode *node_mem;
+ CRBNode **nodes;
+ CRBNode **cache;
+ size_t n_nodes;
+} TestContext;
+
+/* avoid ptrace-sigstop by using SIGKILL errors in traced children */
+#define child_assert(_expr) ((void)(!!(_expr) ? 1 : (raise(SIGKILL), 0)))
+
+static int compare(CRBTree *t, void *k, CRBNode *n) {
+ return (char *)n - (char *)k;
+}
+
+static void shuffle(CRBNode **nodes, size_t n_memb) {
+ unsigned int i, j;
+ CRBNode *t;
+
+ for (i = 0; i < n_memb; ++i) {
+ j = rand() % n_memb;
+ t = nodes[j];
+ nodes[j] = nodes[i];
+ nodes[i] = t;
+ }
+}
+
+static void toggle_visit(CRBNode *n, bool set) {
+ c_rbnode_entry(n, TestNode, rb)->visited = set;
+}
+
+static bool fetch_visit(CRBNode *n) {
+ return c_rbnode_entry(n, TestNode, rb)->visited;
+}
+
+static void test_child1(TestContext *ctx) {
+ CRBNode *p, **slot;
+ size_t i;
+
+ for (i = 0; i < ctx->n_nodes; ++i) {
+ child_assert(!c_rbnode_is_linked(ctx->nodes[i]));
+ slot = c_rbtree_find_slot(ctx->tree, compare, ctx->nodes[i], &p);
+ c_rbtree_add(ctx->tree, p, slot, ctx->nodes[i]);
+ }
+}
+
+static void test_child2(TestContext *ctx) {
+ size_t i;
+
+ for (i = 0; i < ctx->n_nodes; ++i) {
+ child_assert(c_rbnode_is_linked(ctx->nodes[i]));
+ c_rbnode_unlink(ctx->nodes[i]);
+ }
+}
+
+static void test_parent_start(TestContext *ctx) {
+ size_t i;
+
+ /*
+ * Generate a tree with @n_nodes entries. We store the entries in
+ * @ctx->node_mem, generate a randomized access-map in @ctx->nodes
+ * (i.e., an array of pointers to entries in @ctx->node_mem, but in
+ * random order), and a temporary cache for free use in the parent.
+ *
+ * All this is stored in a MAP_SHARED memory region so it is equivalent
+ * in child and parent.
+ */
+
+ ctx->n_nodes = 32;
+ ctx->mapsize = sizeof(CRBTree);
+ ctx->mapsize += ctx->n_nodes * sizeof(TestNode);
+ ctx->mapsize += ctx->n_nodes * sizeof(CRBNode*);
+ ctx->mapsize += ctx->n_nodes * sizeof(CRBNode*);
+
+ ctx->map = mmap(NULL, ctx->mapsize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ assert(ctx->map != MAP_FAILED);
+
+ ctx->tree = (void *)ctx->map;
+ ctx->node_mem = (void *)(ctx->tree + 1);
+ ctx->nodes = (void *)(ctx->node_mem + ctx->n_nodes);
+ ctx->cache = (void *)(ctx->nodes + ctx->n_nodes);
+
+ for (i = 0; i < ctx->n_nodes; ++i) {
+ ctx->nodes[i] = &ctx->node_mem[i].rb;
+ c_rbnode_init(ctx->nodes[i]);
+ }
+
+ shuffle(ctx->nodes, ctx->n_nodes);
+}
+
+static void test_parent_middle(TestContext *ctx) {
+ size_t i;
+
+ shuffle(ctx->nodes, ctx->n_nodes);
+
+ for (i = 0; i < ctx->n_nodes; ++i)
+ child_assert(c_rbnode_is_linked(ctx->nodes[i]));
+}
+
+static void test_parent_end(TestContext *ctx) {
+ size_t i;
+ int r;
+
+ for (i = 0; i < ctx->n_nodes; ++i)
+ assert(!c_rbnode_is_linked(ctx->nodes[i]));
+
+ r = munmap(ctx->map, ctx->mapsize);
+ assert(r >= 0);
+}
+
+static void test_parent_step(TestContext *ctx) {
+ size_t i, i_level;
+ CRBNode *n, *p;
+
+ n = ctx->tree->root;
+ i_level = 0;
+
+ while (n) {
+ /* verify that we haven't visited @n, yet */
+ assert(!fetch_visit(n));
+
+ /* verify @n is a valid node */
+ for (i = 0; i < ctx->n_nodes; ++i)
+ if (n == ctx->nodes[i])
+ break;
+ assert(i < ctx->n_nodes);
+
+ /* pre-order traversal and marker for cycle detection */
+ if (n->left) {
+ toggle_visit(n, true);
+ ctx->cache[i_level++] = n;
+ n = n->left;
+ } else if (n->right) {
+ toggle_visit(n, true);
+ ctx->cache[i_level++] = n;
+ n = n->right;
+ } else {
+ while (i_level > 0) {
+ p = ctx->cache[i_level - 1];
+ if (p->right && n != p->right) {
+ n = p->right;
+ break;
+ }
+ --i_level;
+ n = p;
+ toggle_visit(n, false);
+ }
+ if (i_level == 0)
+ break;
+ }
+ }
+}
+
+static int test_parallel_child(TestContext *ctx) {
+ int r;
+
+ /*
+ * Make parent trace us and enter stopped state. In case of EPERM, we
+ * are either ptraced already, or are not privileged to run ptrace.
+ * Exit via 0xdf to signal this condition to our parent.
+ */
+ r = ptrace(PTRACE_TRACEME, 0, 0, 0);
+ if (r < 0 && errno == EPERM)
+ return 0xdf;
+
+ child_assert(r >= 0);
+
+ /* SIGUSR1 to signal readiness */
+ r = raise(SIGUSR1);
+ child_assert(r >= 0);
+
+ /* run first part */
+ test_child1(ctx);
+
+ /* SIGURG to cause re-shuffle */
+ r = raise(SIGURG);
+ child_assert(r >= 0);
+
+ /* run second part */
+ test_child2(ctx);
+
+ /* SIGUSR2 to signal end */
+ r = raise(SIGUSR2);
+ child_assert(r >= 0);
+
+ /* return known exit code to parent */
+ return 0xef;
+}
+
+static int test_parallel(void) {
+ TestContext ctx = {};
+ int r, pid, status;
+ uint64_t n_instr, n_event;
+
+ /* create shared area for tree verification */
+ test_parent_start(&ctx);
+
+ /* run child */
+ pid = fork();
+ assert(pid >= 0);
+ if (pid == 0) {
+ r = test_parallel_child(&ctx);
+ _exit(r);
+ }
+
+ /*
+ * After setup, the child immediately enters TRACE-operation and raises
+ * SIGUSR1. Once continued, the child performs the pre-configured tree
+ * operations. When done, it raises SIGUSR2, and then exits.
+ *
+ * Here in the parent we catch all trace-stops of the child via waitpid
+ * until we get no more such stop-events. Based on the stop-event we
+ * get, we verify child-state, STEP it, or perform other state tracking.
+ * We repeat this as long as we catch trace-stops from the child.
+ */
+ n_instr = 0;
+ n_event = 0;
+ for (r = waitpid(pid, &status, 0);
+ r == pid && WIFSTOPPED(status);
+ r = waitpid(pid, &status, 0)) {
+
+ switch (WSTOPSIG(status)) {
+ case SIGUSR1:
+ n_event |= 0x1;
+
+ /* step child */
+ r = ptrace(PTRACE_SINGLESTEP, pid, 0, 0);
+
+ /*
+ * Some architectures (e.g., armv7hl) do not implement
+ * SINGLESTEP, but return EIO. Skip the entire test in
+ * this case.
+ */
+ if (r < 0 && errno == EIO)
+ return 77;
+
+ assert(r >= 0);
+ break;
+
+ case SIGURG:
+ n_event |= 0x2;
+ test_parent_middle(&ctx);
+
+ /* step child */
+ r = ptrace(PTRACE_SINGLESTEP, pid, 0, 0);
+ assert(r >= 0);
+ break;
+
+ case SIGUSR2:
+ n_event |= 0x4;
+ test_parent_end(&ctx);
+
+ /* continue child */
+ r = ptrace(PTRACE_CONT, pid, 0, 0);
+ assert(r >= 0);
+ break;
+
+ case SIGTRAP:
+ ++n_instr;
+ test_parent_step(&ctx);
+
+ /* step repeatedly as long as we get SIGTRAP */
+ r = ptrace(PTRACE_SINGLESTEP, pid, 0, 0);
+ assert(r >= 0);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ /* verify our child exited cleanly */
+ assert(r == pid);
+ assert(!!WIFEXITED(status));
+
+ /*
+ * 0xdf is signalled if ptrace is not allowed or we are already
+ * ptraced. In this case we skip the test.
+ *
+ * 0xef is signalled on success.
+ *
+ * In any other case something went wobbly and we should fail hard.
+ */
+ switch (WEXITSTATUS(status)) {
+ case 0xef:
+ break;
+ case 0xdf:
+ return 77;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* verify we hit all child states */
+ assert(n_event & 0x1);
+ assert(n_event & 0x2);
+ assert(n_event & 0x4);
+ assert(n_instr > 0);
+
+ return 0;
+}
+
+int main(int argc, char **argv) {
+ unsigned int i;
+ int r;
+
+ if (!getenv("CRBTREE_TEST_PTRACE"))
+ return 77;
+
+ /* we want stable tests, so use fixed seed */
+ srand(0xdeadbeef);
+
+ /*
+ * The tests are pseudo random; run them multiple times, each run will
+ * have different orders and thus different results.
+ */
+ for (i = 0; i < 4; ++i) {
+ r = test_parallel();
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/shared/c-rbtree/src/test-posix.c b/shared/c-rbtree/src/test-posix.c
new file mode 100644
index 000000000..213d85fef
--- /dev/null
+++ b/shared/c-rbtree/src/test-posix.c
@@ -0,0 +1,270 @@
+/*
+ * Tests to compare against POSIX RB-Trees
+ * POSIX provides balanced binary trees via the tsearch(3p) API. glibc
+ * implements them as RB-Trees. This file compares the performance of both.
+ *
+ * The semantic differences are:
+ *
+ * o The tsearch(3p) API does memory allocation of node structures itself,
+ * rather than allowing the caller to embed it.
+ *
+ * o The c-rbtree API exposes the tree structure, allowing efficient tree
+ * operations. Furthermore, it allows tree creation/deletion without taking
+ * the expensive insert/remove paths. For instance, imagine you want to
+ * create an rb-tree from a set of objects you have. With c-rbtree you can
+ * do that without a single rotation or tree-restructuring in O(n), while
+ * tsearch(3p) requires O(n log n).
+ *
+ * o The tsearch(3p) API requires one pointer-chase on each node access. This
+ * is inherent to the design as it does not allow embedding the node in the
+ * parent object. This slows down the API considerably.
+ *
+ * o The tsearch(3p) API does not allow multiple entries with the same key.
+ *
+ * o The tsearch(3p) API requires node lookup during removal. This does not
+ * affect the worst-case runtime, but does reduce absolute performance.
+ *
+ * o The tsearch(3p) API does not allow O(1) tests whether a node is linked
+ * or not. It requires a separate state variable per node.
+ *
+ * o The tsearch(3p) API does not allow walking the tree with context. The
+ * only accessor twalk(3p) provides no tree context nor caller context to
+ * the callback function.
+ *
+ * o The glibc implementation of tsearch(3p) uses RB-Trees without parent
+ * pointers. Hence, tree traversal requires back-tracking. Performance is
+ * similar, but it reduces memory consumption (though, at the same time it
+ * stores the key pointer, and allocates the node on the heap, so overall
+ * the memory consumption is higher still).
+ * But the more important issue is, a node itself is not enough context as
+ * tree iterator, but the full depth parent pointers are needed as well.
+ */
+
+#undef NDEBUG
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "c-rbtree.h"
+#include "c-rbtree-private.h"
+
+typedef struct {
+ int key;
+ CRBNode rb;
+} Node;
+
+#define node_from_rb(_rb) ((Node *)((char *)(_rb) - offsetof(Node, rb)))
+#define node_from_key(_key) ((Node *)((char *)(_key) - offsetof(Node, key)))
+
+static void shuffle(Node **nodes, size_t n_memb) {
+ unsigned int i, j;
+ Node *t;
+
+ for (i = 0; i < n_memb; ++i) {
+ j = rand() % n_memb;
+ t = nodes[j];
+ nodes[j] = nodes[i];
+ nodes[i] = t;
+ }
+}
+
+static int compare(CRBTree *t, void *k, CRBNode *n) {
+ int key = (int)(unsigned long)k;
+ Node *node = node_from_rb(n);
+
+ return key - node->key;
+}
+
+static uint64_t now(void) {
+ struct timespec ts;
+ int r;
+
+ r = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
+ assert(r >= 0);
+ return ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec;
+}
+
+/*
+ * POSIX tsearch(3p) based RB-Tree API
+ *
+ * This implements a small rb-tree API alongside c-rbtree but based on
+ * tsearch(3p) and friends.
+ *
+ * Note that we don't care for OOM here, nor do we implement all the same
+ * features as c-rbtree. This just does basic insertion, removal, and lookup
+ * without any conflict detection.
+ *
+ * This also hard-codes 'Node' as object type that can be stored in the tree.
+ */
+
+typedef struct PosixRBTree PosixRBTree;
+
+struct PosixRBTree {
+ void *root;
+};
+
+static int posix_rbtree_compare(const void *a, const void *b) {
+ return *(const int *)a - *(const int *)b;
+}
+
+static void posix_rbtree_add(PosixRBTree *t, const Node *node) {
+ void *res;
+
+ res = tsearch(&node->key, &t->root, posix_rbtree_compare);
+ assert(*(int **)res == &node->key);
+}
+
+static void posix_rbtree_remove(PosixRBTree *t, const Node *node) {
+ void *res;
+
+ res = tdelete(&node->key, &t->root, posix_rbtree_compare);
+ assert(res);
+}
+
+static Node *posix_rbtree_find(PosixRBTree *t, int key) {
+ void *res;
+
+ res = tfind(&key, &t->root, posix_rbtree_compare);
+ return res ? node_from_key(*(int **)res) : NULL;
+}
+
+static void posix_rbtree_visit(const void *n, const VISIT o, const int depth) {
+ static int v;
+
+ /* HACK: twalk() has no context; use static context; reset on root */
+ if (depth == 0 && (o == preorder || o == leaf))
+ v = 0;
+
+ switch (o) {
+ case postorder:
+ case leaf:
+ assert(v <= node_from_key(*(int **)n)->key);
+ v = node_from_key(*(int **)n)->key;
+ break;
+ default:
+ break;
+ }
+}
+
+static void posix_rbtree_traverse(PosixRBTree *t) {
+ twalk(t->root, posix_rbtree_visit);
+}
+
+/*
+ * Comparison between c-rbtree and tsearch(3p)
+ *
+ * Based on the tsearch(3p) API above, this now implements some comparisons
+ * between c-rbtree and the POSIX API.
+ *
+ * The semantic differences are explained above. This does mostly performance
+ * comparisons.
+ */
+
+static void test_posix(void) {
+ uint64_t ts, ts_c1, ts_c2, ts_c3, ts_c4;
+ uint64_t ts_p1, ts_p2, ts_p3, ts_p4;
+ PosixRBTree pt = {};
+ CRBNode **slot, *p;
+ CRBTree t = {};
+ Node *nodes[2048];
+ unsigned long i;
+ int v;
+
+ /* allocate and initialize all nodes */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ nodes[i] = malloc(sizeof(*nodes[i]));
+ assert(nodes[i]);
+ nodes[i]->key = i;
+ c_rbnode_init(&nodes[i]->rb);
+ }
+
+ /* shuffle nodes */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* add all nodes, and verify that each node is linked */
+ ts = now();
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i) {
+ slot = c_rbtree_find_slot(&t, compare, (void *)(unsigned long)nodes[i]->key, &p);
+ assert(slot);
+ c_rbtree_add(&t, p, slot, &nodes[i]->rb);
+ }
+ ts_c1 = now() - ts;
+
+ ts = now();
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i)
+ posix_rbtree_add(&pt, nodes[i]);
+ ts_p1 = now() - ts;
+
+ /* shuffle nodes again */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* traverse tree in-order */
+ ts = now();
+ i = 0;
+ v = 0;
+ for (p = c_rbtree_first(&t); p; p = c_rbnode_next(p)) {
+ ++i;
+
+ assert(v <= node_from_rb(p)->key);
+ v = node_from_rb(p)->key;
+ }
+ assert(i == sizeof(nodes) / sizeof(*nodes));
+ ts_c2 = now() - ts;
+
+ ts = now();
+ posix_rbtree_traverse(&pt);
+ ts_p2 = now() - ts;
+
+ /* shuffle nodes again */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* lookup all nodes (in different order) */
+ ts = now();
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i)
+ assert(nodes[i] == c_rbtree_find_entry(&t, compare,
+ (void *)(unsigned long)nodes[i]->key,
+ Node, rb));
+ ts_c3 = now() - ts;
+
+ ts = now();
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i)
+ assert(nodes[i] == posix_rbtree_find(&pt, nodes[i]->key));
+ ts_p3 = now() - ts;
+
+ /* shuffle nodes again */
+ shuffle(nodes, sizeof(nodes) / sizeof(*nodes));
+
+ /* remove all nodes (in different order) */
+ ts = now();
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i)
+ c_rbnode_unlink(&nodes[i]->rb);
+ ts_c4 = now() - ts;
+
+ ts = now();
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i)
+ posix_rbtree_remove(&pt, nodes[i]);
+ ts_p4 = now() - ts;
+
+ /* free nodes again */
+ for (i = 0; i < sizeof(nodes) / sizeof(*nodes); ++i)
+ free(nodes[i]);
+
+ fprintf(stderr, " insertion traversal lookup removal\n");
+ fprintf(stderr, " c-rbtree: %8"PRIu64"ns %8"PRIu64"ns %8"PRIu64"ns %8"PRIu64"ns\n",
+ ts_c1, ts_c2, ts_c3, ts_c4);
+ fprintf(stderr, "tsearch(3p): %8"PRIu64"ns %8"PRIu64"ns %8"PRIu64"ns %8"PRIu64"ns\n",
+ ts_p1, ts_p2, ts_p3, ts_p4);
+}
+
+int main(int argc, char **argv) {
+ /* we want stable tests, so use fixed seed */
+ srand(0xdeadbeef);
+
+ test_posix();
+ return 0;
+}
diff --git a/shared/meson.build b/shared/meson.build
index e1cf620bf..780c31fef 100644
--- a/shared/meson.build
+++ b/shared/meson.build
@@ -10,14 +10,51 @@ shared_c_siphash_dep = declare_dependency(
link_with: shared_c_siphash,
)
+shared_c_rbtree = static_library(
+ 'c-rbtree',
+ c_args: '-std=c11',
+ sources: files('c-rbtree/src/c-rbtree.c',
+ 'c-rbtree/src/c-rbtree.h',
+ 'c-rbtree/src/c-rbtree-private.h'),
+)
+
+shared_c_rbtree_dep = declare_dependency(
+ include_directories: shared_inc,
+ link_with: shared_c_rbtree,
+)
+
+
+if get_option('ebpf')
+ shared_n_acd_bpf_files = files('n-acd/src/n-acd-bpf.c')
+else
+ shared_n_acd_bpf_files = files('n-acd/src/n-acd-bpf-fallback.c')
+endif
+
shared_n_acd = static_library(
'n-acd',
- sources: 'n-acd/src/n-acd.c',
+ sources: files('n-acd/src/n-acd.c',
+ 'n-acd/src/n-acd.h',
+ 'n-acd/src/n-acd-private.h',
+ 'n-acd/src/n-acd-probe.c',
+ 'n-acd/src/util/timer.c',
+ 'n-acd/src/util/timer.h')
+ + shared_n_acd_bpf_files,
+ c_args: [
+ '-D_GNU_SOURCE',
+ '-DSO_ATTACH_BPF=50',
+ '-std=c11',
+ '-Wno-pointer-arith',
+ '-Wno-vla',
+ ],
include_directories: [
include_directories('c-siphash/src'),
include_directories('c-list/src'),
+ include_directories('c-rbtree/src'),
+ ],
+ dependencies: [
+ shared_c_siphash_dep,
+ shared_c_rbtree_dep
],
- dependencies: shared_c_siphash_dep,
)
shared_n_acd_dep = declare_dependency(
diff --git a/shared/n-acd/.cherryci/ci-test b/shared/n-acd/.cherryci/ci-test
new file mode 100755
index 000000000..71f345703
--- /dev/null
+++ b/shared/n-acd/.cherryci/ci-test
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+rm -Rf "./ci-build"
+mkdir "./ci-build"
+cd "./ci-build"
+
+${CHERRY_LIB_MESONSETUP} . "${CHERRY_LIB_SRCDIR}" ${N_ACD_CONF}
+${CHERRY_LIB_NINJABUILD}
+sudo ${CHERRY_LIB_MESONTEST}
+# no valgrind tests, since bpf(2) is not supported by it
diff --git a/shared/n-acd/.cherryci/matrix b/shared/n-acd/.cherryci/matrix
new file mode 100755
index 000000000..0b5da37c7
--- /dev/null
+++ b/shared/n-acd/.cherryci/matrix
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+set -e
+
+CHERRY_MATRIX+=("export N_ACD_CONF=-Debpf=false ${CHERRY_LIB_M_DEFAULT[*]}")
diff --git a/shared/n-acd/.editorconfig b/shared/n-acd/.editorconfig
index b41176962..b10bb4f3f 100644
--- a/shared/n-acd/.editorconfig
+++ b/shared/n-acd/.editorconfig
@@ -1,16 +1,11 @@
-# http://EditorConfig.org
-
-# top-most EditorConfig file
root = true
-# Unix-style newlines with a newline ending every file, utf-8 charset
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
-# match config files, set indent to spaces with width of eight
[*.{c,h}]
indent_style = space
indent_size = 8
diff --git a/shared/n-acd/.gitmodules b/shared/n-acd/.gitmodules
index ec8b866d2..d73d05a26 100644
--- a/shared/n-acd/.gitmodules
+++ b/shared/n-acd/.gitmodules
@@ -4,3 +4,6 @@
[submodule "subprojects/c-siphash"]
path = subprojects/c-siphash
url = https://github.com/c-util/c-siphash.git
+[submodule "subprojects/c-rbtree"]
+ path = subprojects/c-rbtree
+ url = https://github.com/c-util/c-rbtree.git
diff --git a/shared/n-acd/.travis.yml b/shared/n-acd/.travis.yml
index ed0bcf38c..99a7bb946 100644
--- a/shared/n-acd/.travis.yml
+++ b/shared/n-acd/.travis.yml
@@ -1,18 +1,21 @@
-dist: trusty
-sudo: required
os: linux
+dist: trusty
language: c
-compiler:
- - gcc
- - clang
-install:
- - curl -L "https://github.com/ninja-build/ninja/releases/download/v1.7.2/ninja-linux.zip" -o "ninja-linux.zip"
- - sudo unzip "ninja-linux.zip" -d "/usr/local/bin"
- - sudo chmod 755 "/usr/local/bin/ninja"
- - pip3 install meson
+services:
+ - docker
-script:
- - meson "build"
- - ninja -C "build"
- - sudo MESON_TESTTHREADS=64 ninja -C "build" test
+before_install:
+ - curl -O -L "https://raw.githubusercontent.com/cherry-pick/cherry-images/v1/scripts/vmrun"
+ - curl -O -L "https://raw.githubusercontent.com/cherry-pick/cherry-ci/v1/scripts/cherryci"
+ - chmod +x "./vmrun" "./cherryci"
+
+jobs:
+ include:
+ - stage: test
+ script:
+ - ./vmrun -- ../src/cherryci -d ../src/.cherryci -s c-util -m
+ - script:
+ - ./vmrun -T armv7hl -- ../src/cherryci -d ../src/.cherryci -s c-util
+ - script:
+ - ./vmrun -T i686 -- ../src/cherryci -d ../src/.cherryci -s c-util
diff --git a/shared/n-acd/AUTHORS b/shared/n-acd/AUTHORS
new file mode 100644
index 000000000..89ee27d23
--- /dev/null
+++ b/shared/n-acd/AUTHORS
@@ -0,0 +1,39 @@
+LICENSE:
+ This project is dual-licensed under both the Apache License, Version
+ 2.0, and the GNU Lesser General Public License, Version 2.1+.
+
+AUTHORS-ASL:
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+AUTHORS-LGPL:
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; If not, see .
+
+COPYRIGHT: (ordered alphabetically)
+ Copyright (C) 2015-2018 Red Hat, Inc.
+
+AUTHORS: (ordered alphabetically)
+ Beniamino Galvani
+ David Herrmann
+ Thomas Haller
+ Tom Gundersen
diff --git a/shared/n-acd/AUTHORS-ASL b/shared/n-acd/AUTHORS-ASL
new file mode 100644
index 000000000..5d501a728
--- /dev/null
+++ b/shared/n-acd/AUTHORS-ASL
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright {yyyy} {name of copyright owner}
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/shared/n-acd/AUTHORS-LGPL b/shared/n-acd/AUTHORS-LGPL
new file mode 100644
index 000000000..4362b4915
--- /dev/null
+++ b/shared/n-acd/AUTHORS-LGPL
@@ -0,0 +1,502 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ , 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/shared/n-acd/COPYING b/shared/n-acd/COPYING
deleted file mode 100644
index 81c0566b8..000000000
--- a/shared/n-acd/COPYING
+++ /dev/null
@@ -1,19 +0,0 @@
-LICENSE:
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
-COPYRIGHT: (ordered alphabetically)
- Copyright (C) 2015-2017 Red Hat, Inc.
-
-AUTHORS: (ordered alphabetically)
- David Herrmann
- Tom Gundersen
diff --git a/shared/n-acd/LICENSE b/shared/n-acd/LICENSE
deleted file mode 100644
index 5d501a728..000000000
--- a/shared/n-acd/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
-5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
-
-APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "{}"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
-Copyright {yyyy} {name of copyright owner}
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
diff --git a/shared/n-acd/LICENSE b/shared/n-acd/LICENSE
new file mode 120000
index 000000000..da24c5e4a
--- /dev/null
+++ b/shared/n-acd/LICENSE
@@ -0,0 +1 @@
+AUTHORS-ASL
\ No newline at end of file
diff --git a/shared/n-acd/NEWS b/shared/n-acd/NEWS
new file mode 100644
index 000000000..bb06abbc5
--- /dev/null
+++ b/shared/n-acd/NEWS
@@ -0,0 +1,21 @@
+n-acd - IPv4 Address Conflict Detection
+
+CHANGES WITH 1:
+
+ * Initial release of n-acd. This project implements the IPv4 Address
+ Conflict Detection standard as defined in RFC-5227. The state machine
+ is implemented in a shared library and provides a stable ISO-C11 API.
+ The implementation is linux-only and relies heavily on the API
+ behavior of recent linux kernel releases.
+
+ * Compared to the pre-releases, this release supports many parallel
+ probes on a single n-acd context. This reduces the number of
+ allocated network resources to O(1), based on the number of running
+ parallel probes.
+
+ * The n-acd project is now dual-licensed: ASL-2.0 and LGPL-2.1+
+
+ Contributions from: Beniamino Galvani, David Herrmann, Thomas Haller,
+ Tom Gundersen
+
+ - Tübingen, 2018-08-08
diff --git a/shared/n-acd/README b/shared/n-acd/README
index 4077cba05..b88d31c71 100644
--- a/shared/n-acd/README
+++ b/shared/n-acd/README
@@ -8,7 +8,7 @@ ABOUT:
kernel releases.
DETAILS:
- https://github.com/nettools/n-acd/wiki
+ https://nettools.github.io/n-acd
BUG REPORTS:
https://github.com/nettools/n-acd/issues
@@ -20,14 +20,18 @@ GIT:
GITWEB:
https://github.com/nettools/n-acd
+MAILINGLIST:
+ https://groups.google.com/forum/#!forum/nettools-devel
+
LICENSE:
- Apache Software License 2.0 (LICENSE)
- See COPYING for details.
+ Apache Software License 2.0
+ Lesser General Public License 2.1+
+ See AUTHORS for details.
REQUIREMENTS:
The requirements for n-acd are:
- Linux kernel >= 3.0
+ Linux kernel >= 3.19
libc (e.g., glibc >= 2.16)
At build-time, the following software is required:
@@ -36,15 +40,15 @@ REQUIREMENTS:
pkg-config >= 0.29
INSTALL:
- The meson build-system is used for n-acd. Contact upstream
+ The meson build-system is used for this project. Contact upstream
documentation for detailed help. In most situations the following
- commands are sufficient to build and install n-acd from source:
+ commands are sufficient to build and install from source:
$ mkdir build
$ cd build
- $ meson setup . ..
+ $ meson setup ..
$ ninja
- $ ninja test
+ $ meson test
# ninja install
- No custom configuration options are available.
+ For custom configuration options see meson_options.txt.
diff --git a/shared/n-acd/meson.build b/shared/n-acd/meson.build
index da923c288..a05164c04 100644
--- a/shared/n-acd/meson.build
+++ b/shared/n-acd/meson.build
@@ -1,19 +1,25 @@
-project('n-acd',
+project(
+ 'n-acd',
'c',
version: '1',
license: 'Apache',
default_options: [
- 'buildtype=release',
'c_std=c11',
- ])
+ ],
+)
+project_description = 'IPv4 Address Conflict Detection'
add_project_arguments('-D_GNU_SOURCE', language: 'c')
mod_pkgconfig = import('pkgconfig')
sub_clist = subproject('c-list')
+sub_crbtree = subproject('c-rbtree')
sub_csiphash = subproject('c-siphash')
dep_clist = sub_clist.get_variable('libclist_dep')
+dep_crbtree = sub_crbtree.get_variable('libcrbtree_dep')
dep_csiphash = sub_csiphash.get_variable('libcsiphash_dep')
+use_ebpf = get_option('ebpf')
+
subdir('src')
diff --git a/shared/n-acd/meson_options.txt b/shared/n-acd/meson_options.txt
new file mode 100644
index 000000000..b024ee1d4
--- /dev/null
+++ b/shared/n-acd/meson_options.txt
@@ -0,0 +1 @@
+option('ebpf', type: 'boolean', value: true, description: 'Enable eBPF packet filtering')
diff --git a/shared/n-acd/src/libnacd.sym b/shared/n-acd/src/libnacd.sym
index c9bd48753..f85e13acf 100644
--- a/shared/n-acd/src/libnacd.sym
+++ b/shared/n-acd/src/libnacd.sym
@@ -1,13 +1,28 @@
-LIBNACD_1 {
+LIBNACD_2 {
global:
+ n_acd_config_new;
+ n_acd_config_free;
+ n_acd_config_set_ifindex;
+ n_acd_config_set_transport;
+ n_acd_config_set_mac;
+
+ n_acd_probe_config_new;
+ n_acd_probe_config_free;
+ n_acd_probe_config_set_ip;
+ n_acd_probe_config_set_timeout;
+
n_acd_new;
- n_acd_free;
+ n_acd_ref;
+ n_acd_unref;
n_acd_get_fd;
n_acd_dispatch;
n_acd_pop_event;
- n_acd_start;
- n_acd_stop;
- n_acd_announce;
+ n_acd_probe;
+
+ n_acd_probe_free;
+ n_acd_probe_set_userdata;
+ n_acd_probe_get_userdata;
+ n_acd_probe_announce;
local:
*;
};
diff --git a/shared/n-acd/src/meson.build b/shared/n-acd/src/meson.build
index ba09d1323..0a405f9c4 100644
--- a/shared/n-acd/src/meson.build
+++ b/shared/n-acd/src/meson.build
@@ -1,76 +1,94 @@
#
# target: libnacd.so
-# We build both, a static and a shared library. We want our tests to get access
-# to internals, so we link them statically.
#
-libnacd_private = static_library('nacd-private',
- ['n-acd.c'],
- c_args: [
- '-fvisibility=hidden',
- '-fno-common'
- ],
- dependencies: [
- dep_clist,
- dep_csiphash,
- ],
- pic: true)
-install_headers('n-acd.h')
libnacd_symfile = join_paths(meson.current_source_dir(), 'libnacd.sym')
-libnacd_shared = shared_library('nacd',
- dependencies: dep_csiphash,
- objects: libnacd_private.extract_all_objects(),
- install: true,
- soversion: 0,
- link_depends: libnacd_symfile,
- link_args: [
- '-Wl,--no-undefined',
- '-Wl,--version-script=@0@'.format(libnacd_symfile)
- ])
-mod_pkgconfig.generate(libraries: libnacd_shared,
- version: meson.project_version(),
- name: 'libnacd',
- filebase: 'libnacd',
- description: 'IPv4 Address Conflict Detection')
-#
-# target: test-api
-# The test-api program explicitly links against the shared library, since it
-# tests for symbol visibility.
-#
+libnacd_deps = [
+ dep_clist,
+ dep_crbtree,
+ dep_csiphash,
+]
-test_api = executable('test-api',
- ['test-api.c'],
- link_with: libnacd_shared)
-test('API Symbol Visibility', test_api)
+libnacd_sources = [
+ 'n-acd.c',
+ 'n-acd-probe.c',
+ 'util/timer.c',
+]
+
+if use_ebpf
+ libnacd_sources += [
+ 'n-acd-bpf.c',
+ ]
+else
+ libnacd_sources += [
+ 'n-acd-bpf-fallback.c',
+ ]
+endif
+
+libnacd_private = static_library(
+ 'nacd-private',
+ libnacd_sources,
+ c_args: [
+ '-fvisibility=hidden',
+ '-fno-common'
+ ],
+ dependencies: libnacd_deps,
+ pic: true,
+)
+
+libnacd_shared = shared_library(
+ 'nacd',
+ objects: libnacd_private.extract_all_objects(),
+ dependencies: libnacd_deps,
+ install: not meson.is_subproject(),
+ soversion: 0,
+ link_depends: libnacd_symfile,
+ link_args: [
+ '-Wl,--no-undefined',
+ '-Wl,--version-script=@0@'.format(libnacd_symfile)
+ ],
+)
+
+libnacd_dep = declare_dependency(
+ include_directories: include_directories('.'),
+ link_with: libnacd_private,
+ dependencies: libnacd_deps,
+ version: meson.project_version(),
+)
+
+if not meson.is_subproject()
+ install_headers('n-acd.h')
+
+ mod_pkgconfig.generate(
+ libraries: libnacd_shared,
+ version: meson.project_version(),
+ name: 'libnacd',
+ filebase: 'libnacd',
+ description: project_description,
+ )
+endif
#
# target: test-*
-# All other tests are listed here. They link against the static library, so
-# they can access internals for verification.
#
-test_basic = executable('test-basic',
- ['test-basic.c'],
- link_with: libnacd_private)
-test('Basic API Behavior', test_basic)
+test_api = executable('test-api', ['test-api.c'], link_with: libnacd_shared)
+test('API Symbol Visibility', test_api)
-test_loopback = executable('test-loopback',
- ['test-loopback.c'],
- link_with: libnacd_private)
+if use_ebpf
+ test_bpf = executable('test-bpf', ['test-bpf.c'], dependencies: libnacd_dep)
+ test('eBPF socket filtering', test_bpf)
+endif
+
+test_loopback = executable('test-loopback', ['test-loopback.c'], dependencies: libnacd_dep)
test('Echo Suppression via Loopback', test_loopback)
-test_twice = executable('test-twice',
- ['test-twice.c'],
- link_with: libnacd_private)
-test('Two ACD in Parallel', test_twice)
+test_timer = executable('test-timer', ['util/test-timer.c'], dependencies: libnacd_dep)
+test('Timer helper', test_timer)
-test_unplug = executable('test-unplug',
- ['test-unplug.c'],
- link_with: libnacd_private)
-test('Async Interface Hotplug', test_unplug)
+#test_unplug = executable('test-unplug', ['test-unplug.c'], dependencies: libnacd_dep)
+#test('Async Interface Hotplug', test_unplug)
-test_unused = executable('test-unsed',
- ['test-unused.c'],
- link_with: libnacd_private)
-test('Unconflicted ACD', test_unused)
+test_veth = executable('test-veth', ['test-veth.c'], dependencies: libnacd_dep)
+test('Parallel ACD instances', test_veth)
diff --git a/shared/n-acd/src/n-acd-bpf-fallback.c b/shared/n-acd/src/n-acd-bpf-fallback.c
new file mode 100644
index 000000000..5e6bdd067
--- /dev/null
+++ b/shared/n-acd/src/n-acd-bpf-fallback.c
@@ -0,0 +1,29 @@
+/*
+ * A noop implementation of eBPF filter for IPv4 Address Conflict Detection
+ *
+ * These are a collection of dummy funcitons that have no effect, but allows
+ * n-acd to compile without eBPF support.
+ *
+ * See n-acd-bpf.c for documentation.
+ */
+
+#include
+#include "n-acd-private.h"
+
+int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
+ *mapfdp = -1;
+ return 0;
+}
+
+int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
+ return 0;
+}
+
+int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
+ return 0;
+}
+
+int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
+ *progfdp = -1;
+ return 0;
+}
diff --git a/shared/n-acd/src/n-acd-bpf.c b/shared/n-acd/src/n-acd-bpf.c
new file mode 100644
index 000000000..771a28eeb
--- /dev/null
+++ b/shared/n-acd/src/n-acd-bpf.c
@@ -0,0 +1,316 @@
+/*
+ * eBPF filter for IPv4 Address Conflict Detection
+ *
+ * An eBPF map and an eBPF program are provided. The map contains all the
+ * addresses address conflict detection is performed on, and the program
+ * filters out all packets except exactly the packets relevant to the ACD
+ * protocol on the addresses currently in the map.
+ *
+ * Note that userspace still has to filter the incoming packets, as filter
+ * are applied when packets are queued on the socket, not when userspace
+ * receives them. It is therefore possible to receive packets about addresses
+ * that have already been removed.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "n-acd-private.h"
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM, \
+ })
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0, \
+ })
+
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = BPF_PSEUDO_MAP_FD, \
+ .off = 0, \
+ .imm = (__u32) (MAP_FD), \
+ }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (MAP_FD)) >> 32, \
+ })
+
+#define BPF_ALU_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+#define BPF_ALU_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM, \
+ })
+
+#define BPF_MOV_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+#define BPF_MOV_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM, \
+ })
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0, \
+ })
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0, \
+ })
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM, \
+ })
+
+#define BPF_EMIT_CALL(FUNC) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = FUNC, \
+ })
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0, \
+ })
+
+static int n_acd_syscall_bpf(int cmd, union bpf_attr *attr, unsigned int size) {
+ return (int)syscall(__NR_bpf, cmd, attr, size);
+}
+
+int n_acd_bpf_map_create(int *mapfdp, size_t max_entries) {
+ union bpf_attr attr;
+ int mapfd;
+
+ memset(&attr, 0, sizeof(attr));
+ attr = (union bpf_attr){
+ .map_type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(uint32_t),
+ .value_size = sizeof(uint8_t), /* values are never used, but must be set */
+ .max_entries = max_entries,
+ };
+
+ mapfd = n_acd_syscall_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (mapfd < 0)
+ return -errno;
+
+ *mapfdp = mapfd;
+ return 0;
+}
+
+int n_acd_bpf_map_add(int mapfd, struct in_addr *addrp) {
+ union bpf_attr attr;
+ uint32_t addr = be32toh(addrp->s_addr);
+ uint8_t _dummy = 0;
+ int r;
+
+ memset(&attr, 0, sizeof(attr));
+ attr = (union bpf_attr){
+ .map_fd = mapfd,
+ .key = (uint64_t)(unsigned long)&addr,
+ .value = (uint64_t)(unsigned long)&_dummy,
+ .flags = BPF_NOEXIST,
+ };
+
+ r = n_acd_syscall_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int n_acd_bpf_map_remove(int mapfd, struct in_addr *addrp) {
+ uint32_t addr = be32toh(addrp->s_addr);
+ union bpf_attr attr;
+ int r;
+
+ memset(&attr, 0, sizeof(attr));
+ attr = (union bpf_attr){
+ .map_fd = mapfd,
+ .key = (uint64_t)(unsigned long)&addr,
+ };
+
+ r = n_acd_syscall_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *macp) {
+ const union {
+ uint8_t u8[6];
+ uint16_t u16[3];
+ uint32_t u32[1];
+ } mac = {
+ .u8 = {
+ macp->ether_addr_octet[0],
+ macp->ether_addr_octet[1],
+ macp->ether_addr_octet[2],
+ macp->ether_addr_octet[3],
+ macp->ether_addr_octet[4],
+ macp->ether_addr_octet[5],
+ },
+ };
+ struct bpf_insn prog[] = {
+ /* for using BPF_LD_ABS r6 must point to the skb, currently in r1 */
+ BPF_MOV_REG(6, 1), /* r6 = r1 */
+
+ /* drop the packet if it is too short */
+ BPF_LDX_MEM(BPF_W, 0, 6, offsetof(struct __sk_buff, len)), /* r0 = skb->len */
+ BPF_JMP_IMM(BPF_JGE, 0, sizeof(struct ether_arp), 2), /* if (r0 >= sizeof(ether_arp)) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /* drop the packet if the header is not as expected */
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_hrd)), /* r0 = header type */
+ BPF_JMP_IMM(BPF_JEQ, 0, ARPHRD_ETHER, 2), /* if (r0 == ethernet) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_pro)), /* r0 = protocol */
+ BPF_JMP_IMM(BPF_JEQ, 0, ETHERTYPE_IP, 2), /* if (r0 == IP) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_hln)), /* r0 = hw addr length */
+ BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct ether_addr), 2), /* if (r0 == sizeof(ether_addr)) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ BPF_LD_ABS(BPF_B, offsetof(struct ether_arp, arp_pln)), /* r0 = protocol addr length */
+ BPF_JMP_IMM(BPF_JEQ, 0, sizeof(struct in_addr), 2), /* if (r0 == sizeof(in_addr)) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /* drop packets from our own mac address */
+ BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_sha)), /* r0 = first four bytes of packet mac address */
+ BPF_JMP_IMM(BPF_JNE, 0, be32toh(mac.u32[0]), 4), /* if (r0 != first four bytes of our mac address) skip 4 */
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_sha) + 4), /* r0 = last two bytes of packet mac address */
+ BPF_JMP_IMM(BPF_JNE, 0, be16toh(mac.u16[2]), 2), /* if (r0 != last two bytes of our mac address) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /*
+ * We listen for two kinds of packets:
+ * Conflicts)
+ * These are requests or replies with the sender address not set to INADDR_ANY. The
+ * conflicted address is the sender address, remember this in r7.
+ * Probes)
+ * These are requests with the sender address set to INADDR_ANY. The probed address
+ * is the target address, remember this in r7.
+ * Any other packets are dropped.
+ */
+ BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_spa)), /* r0 = sender ip address */
+ BPF_JMP_IMM(BPF_JEQ, 0, 0, 7), /* if (r0 == 0) skip 7 */
+ BPF_MOV_REG(7, 0), /* r7 = r0 */
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */
+ BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 3), /* if (r0 == request) skip 3 */
+ BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REPLY, 2), /* if (r0 == reply) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+ BPF_JMP_IMM(BPF_JA, 0, 0, 6), /* skip 6 */
+ BPF_LD_ABS(BPF_W, offsetof(struct ether_arp, arp_tpa)), /* r0 = target ip address */
+ BPF_MOV_REG(7, 0), /* r7 = r0 */
+ BPF_LD_ABS(BPF_H, offsetof(struct ether_arp, arp_op)), /* r0 = operation */
+ BPF_JMP_IMM(BPF_JEQ, 0, ARPOP_REQUEST, 2), /* if (r0 == request) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /* check if the probe or conflict is for an address we are monitoring */
+ BPF_STX_MEM(BPF_W, 10, 7, -4), /* *(uint32_t*)fp - 4 = r7 */
+ BPF_MOV_REG(2, 10), /* r2 = fp */
+ BPF_ALU_IMM(BPF_ADD, 2, -4), /* r2 -= 4 */
+ BPF_LD_MAP_FD(1, mapfd), /* r1 = mapfd */
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), /* r0 = map_lookup_elem(r1, r2) */
+ BPF_JMP_IMM(BPF_JNE, 0, 0, 2), /* if (r0 != NULL) skip 2 */
+ BPF_MOV_IMM(0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(), /* return */
+
+ /* return exactly the packet length*/
+ BPF_MOV_IMM(0, sizeof(struct ether_arp)), /* r0 = sizeof(struct ether_arp) */
+ BPF_EXIT_INSN(), /* return */
+ };
+ union bpf_attr attr;
+ int progfd;
+
+ memset(&attr, 0, sizeof(attr));
+ attr = (union bpf_attr){
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .insns = (uint64_t)(unsigned long)prog,
+ .insn_cnt = sizeof(prog) / sizeof(*prog),
+ .license = (uint64_t)(unsigned long)"ASL",
+ };
+
+ progfd = n_acd_syscall_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (progfd < 0)
+ return -errno;
+
+ *progfdp = progfd;
+ return 0;
+}
diff --git a/shared/n-acd/src/n-acd-private.h b/shared/n-acd/src/n-acd-private.h
new file mode 100644
index 000000000..3f2079123
--- /dev/null
+++ b/shared/n-acd/src/n-acd-private.h
@@ -0,0 +1,172 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "util/timer.h"
+#include "n-acd.h"
+
+typedef struct NAcdEventNode NAcdEventNode;
+
+#define _cleanup_(_x) __attribute__((__cleanup__(_x)))
+#define _public_ __attribute__((__visibility__("default")))
+
+/* This augments the error-codes with internal ones that are never exposed. */
+enum {
+ _N_ACD_INTERNAL = _N_ACD_E_N,
+
+ N_ACD_E_DROPPED,
+};
+
+enum {
+ N_ACD_PROBE_STATE_PROBING,
+ N_ACD_PROBE_STATE_CONFIGURING,
+ N_ACD_PROBE_STATE_ANNOUNCING,
+ N_ACD_PROBE_STATE_FAILED,
+};
+
+struct NAcdConfig {
+ int ifindex;
+ unsigned int transport;
+ uint8_t mac[ETH_ALEN];
+ size_t n_mac;
+};
+
+#define N_ACD_CONFIG_NULL(_x) { \
+ .transport = _N_ACD_TRANSPORT_N, \
+ }
+
+struct NAcdProbeConfig {
+ struct in_addr ip;
+ uint64_t timeout_msecs;
+};
+
+#define N_ACD_PROBE_CONFIG_NULL(_x) { \
+ .timeout_msecs = N_ACD_TIMEOUT_RFC5227, \
+ }
+
+struct NAcdEventNode {
+ CList acd_link;
+ CList probe_link;
+ NAcdEvent event;
+ uint8_t sender[ETH_ALEN];
+ bool is_public : 1;
+};
+
+#define N_ACD_EVENT_NODE_NULL(_x) { \
+ .acd_link = C_LIST_INIT((_x).acd_link), \
+ .probe_link = C_LIST_INIT((_x).probe_link), \
+ }
+
+struct NAcd {
+ unsigned long n_refs;
+ unsigned int seed;
+ int fd_epoll;
+ int fd_socket;
+ CRBTree ip_tree;
+ CList event_list;
+ Timer timer;
+
+ /* BPF map */
+ int fd_bpf_map;
+ size_t n_bpf_map;
+ size_t max_bpf_map;
+
+ /* configuration */
+ int ifindex;
+ uint8_t mac[ETH_ALEN];
+
+ /* flags */
+ bool preempted : 1;
+};
+
+#define N_ACD_NULL(_x) { \
+ .n_refs = 1, \
+ .fd_epoll = -1, \
+ .fd_socket = -1, \
+ .ip_tree = C_RBTREE_INIT, \
+ .event_list = C_LIST_INIT((_x).event_list), \
+ .timer = TIMER_NULL((_x).timer), \
+ .fd_bpf_map = -1, \
+ }
+
+struct NAcdProbe {
+ NAcd *acd;
+ CRBNode ip_node;
+ CList event_list;
+ Timeout timeout;
+
+ /* configuration */
+ struct in_addr ip;
+ uint64_t timeout_multiplier;
+ void *userdata;
+
+ /* state */
+ unsigned int state;
+ unsigned int n_iteration;
+ unsigned int defend;
+ uint64_t last_defend;
+};
+
+#define N_ACD_PROBE_NULL(_x) { \
+ .ip_node = C_RBNODE_INIT((_x).ip_node), \
+ .event_list = C_LIST_INIT((_x).event_list), \
+ .timeout = TIMEOUT_INIT((_x).timeout), \
+ .state = N_ACD_PROBE_STATE_PROBING, \
+ .defend = N_ACD_DEFEND_NEVER, \
+ }
+
+/* events */
+
+int n_acd_event_node_new(NAcdEventNode **nodep);
+NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node);
+
+/* contexts */
+
+void n_acd_remember(NAcd *acd, uint64_t now, bool success);
+int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event);
+int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa);
+int n_acd_ensure_bpf_map_space(NAcd *acd);
+
+/* probes */
+
+int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config);
+int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event);
+int n_acd_probe_handle_timeout(NAcdProbe *probe);
+int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict);
+
+/* eBPF */
+
+int n_acd_bpf_map_create(int *mapfdp, size_t max_elements);
+int n_acd_bpf_map_add(int mapfd, struct in_addr *addr);
+int n_acd_bpf_map_remove(int mapfd, struct in_addr *addr);
+
+int n_acd_bpf_compile(int *progfdp, int mapfd, struct ether_addr *mac);
+
+/* inline helpers */
+
+static inline int n_acd_errno(void) {
+ /*
+ * Compilers continuously warn about uninitialized variables since they
+ * cannot deduce that `return -errno;` will always be negative. This
+ * small wrapper makes sure compilers figure that out. Use it as
+ * replacement for `errno` read access. Yes, it generates worse code,
+ * but only marginally and only affects slow-paths.
+ */
+ return abs(errno) ? : EIO;
+}
+
+static inline void n_acd_event_node_freep(NAcdEventNode **node) {
+ if (*node)
+ n_acd_event_node_free(*node);
+}
+
+static inline void n_acd_closep(int *fdp) {
+ if (*fdp >= 0)
+ close(*fdp);
+}
diff --git a/shared/n-acd/src/n-acd-probe.c b/shared/n-acd/src/n-acd-probe.c
new file mode 100644
index 000000000..8c233b56a
--- /dev/null
+++ b/shared/n-acd/src/n-acd-probe.c
@@ -0,0 +1,636 @@
+/*
+ * IPv4 Address Conflict Detection
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "n-acd.h"
+#include "n-acd-private.h"
+
+/*
+ * These parameters and timing intervals specified in RFC-5227. The original
+ * values are:
+ *
+ * PROBE_NUM 3
+ * PROBE_WAIT 1s
+ * PROBE_MIN 1s
+ * PROBE_MAX 3s
+ * ANNOUNCE_NUM 3
+ * ANNOUNCE_WAIT 2s
+ * ANNOUNCE_INTERVAL 2s
+ * MAX_CONFLICTS 10
+ * RATE_LIMIT_INTERVAL 60s
+ * DEFEND_INTERVAL 10s
+ *
+ * If we assume a best-case and worst-case scenario for non-conflicted runs, we
+ * end up with a runtime between 4s and 9s to finish the probe. Then it still
+ * takes a fixed 4s to finish the announcements.
+ *
+ * RFC 5227 section 1.1:
+ * [...] (Note that the values listed here are fixed constants; they are
+ * not intended to be modifiable by implementers, operators, or end users.
+ * These constants are given symbolic names here to facilitate the writing
+ * of future standards that may want to reference this document with
+ * different values for these named constants; however, at the present time
+ * no such future standards exist.) [...]
+ *
+ * Unfortunately, no-one ever stepped up to write a "future standard" to revise
+ * the timings. A 9s timeout for successful link setups is not acceptable today.
+ * Hence, we will just go forward and ignore the proposed values. On both
+ * wired and wireless local links round-trip latencies of below 3ms are common.
+ * We require the caller to set a timeout multiplier, where 1 corresponds to a
+ * total probe time between 0.5 ms and 1.0 ms. On modern networks a multiplier
+ * of about 100 should be a reasonable default. To comply with the RFC select a
+ * multiplier of 9000.
+ */
+#define N_ACD_RFC_PROBE_NUM (3)
+#define N_ACD_RFC_PROBE_WAIT_NSEC (UINT64_C(111111)) /* 1/9 ms */
+#define N_ACD_RFC_PROBE_MIN_NSEC (UINT64_C(111111)) /* 1/9 ms */
+#define N_ACD_RFC_PROBE_MAX_NSEC (UINT64_C(333333)) /* 3/9 ms */
+#define N_ACD_RFC_ANNOUNCE_NUM (3)
+#define N_ACD_RFC_ANNOUNCE_WAIT_NSEC (UINT64_C(222222)) /* 2/9 ms */
+#define N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC (UINT64_C(222222)) /* 2/9 ms */
+#define N_ACD_RFC_MAX_CONFLICTS (10)
+#define N_ACD_RFC_RATE_LIMIT_INTERVAL_NSEC (UINT64_C(60000000000)) /* 60s */
+#define N_ACD_RFC_DEFEND_INTERVAL_NSEC (UINT64_C(10000000000)) /* 10s */
+
+/**
+ * XXX
+ */
+_public_ int n_acd_probe_config_new(NAcdProbeConfig **configp) {
+ _cleanup_(n_acd_probe_config_freep) NAcdProbeConfig *config = NULL;
+
+ config = malloc(sizeof(*config));
+ if (!config)
+ return -ENOMEM;
+
+ *config = (NAcdProbeConfig)N_ACD_PROBE_CONFIG_NULL(*config);
+
+ *configp = config;
+ config = NULL;
+ return 0;
+}
+
+/**
+ * XXX
+ */
+_public_ NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config) {
+ if (!config)
+ return NULL;
+
+ free(config);
+
+ return NULL;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip) {
+ config->ip = ip;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs) {
+ config->timeout_msecs = msecs;
+}
+
+static void n_acd_probe_schedule(NAcdProbe *probe, uint64_t n_timeout, unsigned int n_jitter) {
+ uint64_t n_time;
+
+ timer_now(&probe->acd->timer, &n_time);
+ n_time += n_timeout;
+
+ /*
+ * ACD specifies jitter values to reduce packet storms on the local
+ * link. This call accepts the maximum relative jitter value in
+ * nanoseconds as @n_jitter. We then use rand_r(3p) to get a
+ * pseudo-random jitter on top of the real timeout given as @n_timeout.
+ */
+ if (n_jitter) {
+ uint64_t random;
+
+ random = ((uint64_t)rand_r(&probe->acd->seed) << 32) | (uint64_t)rand_r(&probe->acd->seed);
+ n_time += random % n_jitter;
+ }
+
+ timeout_schedule(&probe->timeout, &probe->acd->timer, n_time);
+}
+
+static void n_acd_probe_unschedule(NAcdProbe *probe) {
+ timeout_unschedule(&probe->timeout);
+}
+
+static bool n_acd_probe_is_unique(NAcdProbe *probe) {
+ NAcdProbe *sibling;
+
+ if (!c_rbnode_is_linked(&probe->ip_node))
+ return false;
+
+ sibling = c_rbnode_entry(c_rbnode_next(&probe->ip_node), NAcdProbe, ip_node);
+ if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
+ return false;
+
+ sibling = c_rbnode_entry(c_rbnode_prev(&probe->ip_node), NAcdProbe, ip_node);
+ if (sibling && sibling->ip.s_addr == probe->ip.s_addr)
+ return false;
+
+ return true;
+}
+
+static int n_acd_probe_link(NAcdProbe *probe) {
+ int r;
+
+ /*
+ * Make sure the kernel bpf map has space for at least one more
+ * entry.
+ */
+ r = n_acd_ensure_bpf_map_space(probe->acd);
+ if (r)
+ return r;
+
+ /*
+ * Link entry into context, indexed by its IP. Note that we allow
+ * duplicates just fine. It is up to you to decide whether to avoid
+ * duplicates, if you don't want them. Duplicates on the same context
+ * do not conflict with each other, though.
+ */
+ {
+ CRBNode **slot, *parent;
+ NAcdProbe *other;
+
+ slot = &probe->acd->ip_tree.root;
+ parent = NULL;
+ while (*slot) {
+ other = c_rbnode_entry(*slot, NAcdProbe, ip_node);
+ parent = *slot;
+ if (probe->ip.s_addr < other->ip.s_addr)
+ slot = &(*slot)->left;
+ else
+ slot = &(*slot)->right;
+ }
+
+ c_rbtree_add(&probe->acd->ip_tree, parent, slot, &probe->ip_node);
+ }
+
+ /*
+ * Add the ip address to the map, if it is not already there.
+ */
+ if (n_acd_probe_is_unique(probe)) {
+ r = n_acd_bpf_map_add(probe->acd->fd_bpf_map, &probe->ip);
+ if (r) {
+ /*
+ * Make sure the IP address is linked in userspace iff
+ * it is linked in the kernel.
+ */
+ c_rbnode_unlink(&probe->ip_node);
+ return r;
+ }
+ ++probe->acd->n_bpf_map;
+ }
+
+ return 0;
+}
+
+static void n_acd_probe_unlink(NAcdProbe *probe) {
+ int r;
+
+ /*
+ * If this is the only probe for a given IP, remove the IP from the
+ * kernel BPF map.
+ */
+ if (n_acd_probe_is_unique(probe)) {
+ r = n_acd_bpf_map_remove(probe->acd->fd_bpf_map, &probe->ip);
+ assert(r >= 0);
+ --probe->acd->n_bpf_map;
+ }
+ c_rbnode_unlink(&probe->ip_node);
+}
+
+int n_acd_probe_new(NAcdProbe **probep, NAcd *acd, NAcdProbeConfig *config) {
+ _cleanup_(n_acd_probe_freep) NAcdProbe *probe = NULL;
+ int r;
+
+ if (!config->ip.s_addr)
+ return N_ACD_E_INVALID_ARGUMENT;
+
+ probe = malloc(sizeof(*probe));
+ if (!probe)
+ return -ENOMEM;
+
+ *probe = (NAcdProbe)N_ACD_PROBE_NULL(*probe);
+ probe->acd = n_acd_ref(acd);
+ probe->ip = config->ip;
+
+ /*
+ * We use the provided timeout-length as multiplier for all our
+ * timeouts. The provided timeout defines the maximum length of an
+ * entire probe-interval until the first announcement. Given the
+ * spec-provided parameters, this ends up as:
+ *
+ * PROBE_WAIT + PROBE_MAX + PROBE_MAX + ANNOUNCE_WAIT
+ * = 1s + 3s + 3s + 2s
+ * = 9s
+ *
+ * Hence, the default value for this timeout is 9000ms, which just
+ * ends up matching the spec-provided values.
+ *
+ * What we now semantically do is divide this timeout by 1ns/1000000.
+ * This first turns it into nanoseconds, then strips the unit by
+ * turning it into a multiplier. However, rather than performing the
+ * division here, we multiplier all our timeouts by 1000000 statically
+ * at compile time. Therefore, we can use the user-provided timeout as
+ * unmodified multiplier. No conversion necessary.
+ */
+ probe->timeout_multiplier = config->timeout_msecs;
+
+ r = n_acd_probe_link(probe);
+ if (r)
+ return r;
+
+ /*
+ * Now that everything is set up, we have to send the first probe. This
+ * is done after ~PROBE_WAIT seconds, hence we schedule our timer.
+ * In case no timeout-multiplier is set, we pretend we already sent all
+ * probes successfully and schedule the timer so we proceed with the
+ * announcements. We must schedule a fake timer there, since we are not
+ * allowed to advance the state machine outside of n_acd_dispatch().
+ */
+ if (probe->timeout_multiplier) {
+ probe->n_iteration = 0;
+ n_acd_probe_schedule(probe,
+ 0,
+ probe->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_NSEC);
+ } else {
+ probe->n_iteration = N_ACD_RFC_PROBE_NUM;
+ n_acd_probe_schedule(probe, 0, 0);
+ }
+
+ *probep = probe;
+ probe = NULL;
+ return 0;
+}
+
+/**
+ * XXX
+ */
+_public_ NAcdProbe *n_acd_probe_free(NAcdProbe *probe) {
+ NAcdEventNode *node, *t_node;
+
+ if (!probe)
+ return NULL;
+
+ c_list_for_each_entry_safe(node, t_node, &probe->event_list, probe_link)
+ n_acd_event_node_free(node);
+
+ n_acd_probe_unschedule(probe);
+ n_acd_probe_unlink(probe);
+ probe->acd = n_acd_unref(probe->acd);
+ free(probe);
+
+ return NULL;
+}
+
+int n_acd_probe_raise(NAcdProbe *probe, NAcdEventNode **nodep, unsigned int event) {
+ _cleanup_(n_acd_event_node_freep) NAcdEventNode *node = NULL;
+ int r;
+
+ r = n_acd_raise(probe->acd, &node, event);
+ if (r)
+ return r;
+
+ switch (event) {
+ case N_ACD_EVENT_READY:
+ node->event.ready.probe = probe;
+ break;
+ case N_ACD_EVENT_USED:
+ node->event.used.probe = probe;
+ break;
+ case N_ACD_EVENT_DEFENDED:
+ node->event.defended.probe = probe;
+ break;
+ case N_ACD_EVENT_CONFLICT:
+ node->event.conflict.probe = probe;
+ break;
+ default:
+ assert(0);
+ return -EIO;
+ }
+
+ c_list_link_tail(&probe->event_list, &node->probe_link);
+
+ if (nodep)
+ *nodep = node;
+ node = NULL;
+ return 0;
+}
+
+int n_acd_probe_handle_timeout(NAcdProbe *probe) {
+ int r;
+
+ switch (probe->state) {
+ case N_ACD_PROBE_STATE_PROBING:
+ /*
+ * We are still PROBING. We send 3 probes with a random timeout
+ * scheduled between each. If, after a fixed timeout, we did
+ * not receive any conflict we consider the probing successful.
+ */
+ if (probe->n_iteration < N_ACD_RFC_PROBE_NUM) {
+ /*
+ * We have not sent all 3 probes, yet. A timer fired,
+ * so we are ready to send the next probe. If this is
+ * the third probe, schedule a timer for ANNOUNCE_WAIT
+ * to give other peers a chance to answer. If this is
+ * not the third probe, wait between PROBE_MIN and
+ * PROBE_MAX for the next probe.
+ */
+
+ r = n_acd_send(probe->acd, &probe->ip, NULL);
+ if (r) {
+ if (r != -N_ACD_E_DROPPED)
+ return r;
+
+ /*
+ * Packet was dropped, and we know about it. It
+ * never reached the network. Reasons are
+ * manifold, and n_acd_send() raises events if
+ * necessary.
+ * From a probe-perspective, we simply pretend
+ * we never sent the probe and schedule a
+ * timeout for the next probe, effectively
+ * doubling a single probe-interval.
+ */
+ } else {
+ /* Successfully sent, so advance counter. */
+ ++probe->n_iteration;
+ }
+
+ if (probe->n_iteration < N_ACD_RFC_PROBE_NUM)
+ n_acd_probe_schedule(probe,
+ probe->timeout_multiplier * N_ACD_RFC_PROBE_MIN_NSEC,
+ probe->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_NSEC - N_ACD_RFC_PROBE_MIN_NSEC));
+ else
+ n_acd_probe_schedule(probe,
+ probe->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_NSEC,
+ 0);
+ } else {
+ /*
+ * All 3 probes succeeded and we waited enough to
+ * consider this address usable by now. Do not announce
+ * the address, yet. We must first give the caller a
+ * chance to configure the address (so they can answer
+ * ARP requests), before announcing it.
+ */
+ r = n_acd_probe_raise(probe, NULL, N_ACD_EVENT_READY);
+ if (r)
+ return r;
+
+ probe->state = N_ACD_PROBE_STATE_CONFIGURING;
+ }
+
+ break;
+
+ case N_ACD_PROBE_STATE_ANNOUNCING:
+ /*
+ * We are ANNOUNCING, meaning the caller configured the address
+ * on the interface and is actively using it. We send 3
+ * announcements out, in a short interval, and then just
+ * perform passive conflict detection.
+ * Note that once all 3 announcements are sent, we no longer
+ * schedule a timer, so this part should not trigger, anymore.
+ */
+
+ r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
+ if (r) {
+ if (r != -N_ACD_E_DROPPED)
+ return r;
+
+ /*
+ * See above in STATE_PROBING for details. We know the
+ * packet was never sent, so we simply try again after
+ * extending the timer.
+ */
+ } else {
+ /* Successfully sent, so advance counter. */
+ ++probe->n_iteration;
+ }
+
+ if (probe->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) {
+ /*
+ * Announcements are always scheduled according to the
+ * time-intervals specified in the spec. We always use
+ * the RFC5227-mandated multiplier.
+ * If you reconsider this, note that timeout_multiplier
+ * might be 0 here.
+ */
+ n_acd_probe_schedule(probe,
+ N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_NSEC,
+ 0);
+ }
+
+ break;
+
+ case N_ACD_PROBE_STATE_CONFIGURING:
+ case N_ACD_PROBE_STATE_FAILED:
+ default:
+ /*
+ * There are no timeouts in these states. If we trigger one,
+ * something is fishy.
+ */
+ assert(0);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int n_acd_probe_handle_packet(NAcdProbe *probe, struct ether_arp *packet, bool hard_conflict) {
+ NAcdEventNode *node;
+ uint64_t now;
+ int r;
+
+ timer_now(&probe->acd->timer, &now);
+
+ switch (probe->state) {
+ case N_ACD_PROBE_STATE_PROBING:
+ /*
+ * Regardless whether this is a hard or soft conflict, we must
+ * treat this as a probe failure. That is, notify the caller of
+ * the conflict and wait for further instructions. We do not
+ * react to this, until the caller tells us what to do, but we
+ * do stop sending further probes.
+ */
+ r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_USED);
+ if (r)
+ return r;
+
+ node->event.used.sender = node->sender;
+ node->event.used.n_sender = ETH_ALEN;
+ memcpy(node->sender, packet->arp_sha, ETH_ALEN);
+
+ n_acd_probe_unschedule(probe);
+ n_acd_probe_unlink(probe);
+ probe->state = N_ACD_PROBE_STATE_FAILED;
+
+ break;
+
+ case N_ACD_PROBE_STATE_CONFIGURING:
+ /*
+ * We are waiting for the caller to configure the interface and
+ * start ANNOUNCING. In this state, we cannot defend the
+ * address as that would indicate that it is ready to be used,
+ * and we cannot signal CONFLICT or USED as the caller may
+ * already have started to use the address (and may have
+ * configured the engine to always defend it, which means they
+ * should be able to rely on never losing it after READY).
+ * Simply drop the event, and rely on the anticipated ANNOUNCE
+ * to trigger it again.
+ */
+
+ break;
+
+ case N_ACD_PROBE_STATE_ANNOUNCING: {
+ /*
+ * We were already instructed to announce the address, which
+ * means the address is configured and in use. Hence, the
+ * caller is responsible to serve regular ARP queries. Meaning,
+ * we can ignore any soft conflicts (other peers doing ACD).
+ *
+ * But if we see a hard-conflict, we either defend the address
+ * according to the caller's instructions, or we report the
+ * conflict and bail out.
+ */
+ bool conflict = false, rate_limited = false;
+
+ if (!hard_conflict)
+ break;
+
+ rate_limited = now < probe->last_defend + N_ACD_RFC_DEFEND_INTERVAL_NSEC;
+
+ switch (probe->defend) {
+ case N_ACD_DEFEND_NEVER:
+ conflict = true;
+ break;
+ case N_ACD_DEFEND_ONCE:
+ if (rate_limited) {
+ conflict = true;
+ break;
+ }
+
+ /* fallthrough */
+ case N_ACD_DEFEND_ALWAYS:
+ if (!rate_limited) {
+ r = n_acd_send(probe->acd, &probe->ip, &probe->ip);
+ if (r) {
+ if (r != -N_ACD_E_DROPPED)
+ return r;
+
+ if (probe->defend == N_ACD_DEFEND_ONCE) {
+ conflict = true;
+ break;
+ }
+ }
+
+ if (r != -N_ACD_E_DROPPED)
+ probe->last_defend = now;
+ }
+
+ r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_DEFENDED);
+ if (r)
+ return r;
+
+ node->event.defended.sender = node->sender;
+ node->event.defended.n_sender = ETH_ALEN;
+ memcpy(node->sender, packet->arp_sha, ETH_ALEN);
+
+ break;
+ }
+
+ if (conflict) {
+ r = n_acd_probe_raise(probe, &node, N_ACD_EVENT_CONFLICT);
+ if (r)
+ return r;
+
+ node->event.conflict.sender = node->sender;
+ node->event.conflict.n_sender = ETH_ALEN;
+ memcpy(node->sender, packet->arp_sha, ETH_ALEN);
+
+ n_acd_probe_unschedule(probe);
+ n_acd_probe_unlink(probe);
+ probe->state = N_ACD_PROBE_STATE_FAILED;
+ }
+
+ break;
+ }
+
+ case N_ACD_PROBE_STATE_FAILED:
+ default:
+ /*
+ * We are not listening for packets in these states. If we receive one,
+ * something is fishy.
+ */
+ assert(0);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * n_acd_probe_set_userdata - XXX
+ */
+_public_ void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata) {
+ probe->userdata = userdata;
+}
+
+/**
+ * n_acd_probe_get_userdata - XXX
+ */
+_public_ void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap) {
+ *userdatap = probe->userdata;
+}
+
+/**
+ * n_acd_probe_announce() - announce the configured IP address
+ * @probe: probe object
+ * @defend: defence policy
+ *
+ * Announce the IP address on the local link, and start defending it according
+ * to the given policy, which mut be one of N_ACD_DEFEND_ONCE,
+ * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS.
+ *
+ * This must be called in response to an N_ACD_EVENT_READY event, and only
+ * after the given address has been configured on the given network interface.
+ *
+ * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defence policy
+ * is invalid, negative error code on failure.
+ */
+_public_ int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend) {
+ if (defend >= _N_ACD_DEFEND_N)
+ return N_ACD_E_INVALID_ARGUMENT;
+
+ probe->state = N_ACD_PROBE_STATE_ANNOUNCING;
+ probe->defend = defend;
+ probe->n_iteration = 0;
+
+ /*
+ * We must schedule a fake-timeout, since we are not allowed to
+ * advance the state-machine outside of n_acd_dispatch().
+ */
+ n_acd_probe_schedule(probe, 0, 0);
+
+ return 0;
+}
diff --git a/shared/n-acd/src/n-acd.c b/shared/n-acd/src/n-acd.c
index 9164f9589..def56a215 100644
--- a/shared/n-acd/src/n-acd.c
+++ b/shared/n-acd/src/n-acd.c
@@ -1,188 +1,38 @@
/*
* IPv4 Address Conflict Detection
- *
- * This implements the main n-acd API. It is built around an epoll-fd to
- * encapsulate a timerfd+socket. The n-acd context has quite straightforward
- * lifetime rules. The parameters must be set when the engine is started, and
- * they can only be changed by stopping and restartding the engine. The engine
- * is started on demand and stopped when no longer needed.
- * During the entire lifetime the context can be dispatched. That is, the
- * dispatcher does not have to be aware of the context state. After each call
- * to dispatch(), the caller must pop all pending events until -EAGAIN is
- * returned.
- *
- * If a conflict is detected, the ACD engine reports to the caller and stops
- * the engine. The caller can now modify parameters and restart the engine, if
- * required.
*/
#include
#include
+#include
#include
#include
#include
+#include
#include
-#include
-#include
#include
-#include
#include
#include
-#include
-#include
#include
#include
#include
#include
#include
-#include
#include
#include
#include "n-acd.h"
-
-#define _public_ __attribute__((__visibility__("default")))
-
-/*
- * These parameters and timing intervals specified in RFC-5227. The original
- * values are:
- *
- * PROBE_NUM 3
- * PROBE_WAIT 1s
- * PROBE_MIN 1s
- * PROBE_MAX 3s
- * ANNOUNCE_NUM 3
- * ANNOUNCE_WAIT 2s
- * ANNOUNCE_INTERVAL 2s
- * MAX_CONFLICTS 10
- * RATE_LIMIT_INTERVAL 60s
- * DEFEND_INTERVAL 10s
- *
- * If we assume a best-case and worst-case scenario for non-conflicted runs, we
- * end up with a runtime between 4s and 9s to finish the probe. Then it still
- * takes a fixed 4s to finish the announcements.
- *
- * RFC 5227 section 1.1:
- * [...] (Note that the values listed here are fixed constants; they are
- * not intended to be modifiable by implementers, operators, or end users.
- * These constants are given symbolic names here to facilitate the writing
- * of future standards that may want to reference this document with
- * different values for these named constants; however, at the present time
- * no such future standards exist.) [...]
- *
- * Unfortunately, no-one ever stepped up to write a "future standard" to revise
- * the timings. A 9s timeout for successful link setups is not acceptable today.
- * Hence, we will just go forward and ignore the proposed values. On both
- * wired and wireless local links round-trip latencies of below 3ms are common,
- * while latencies above 10ms are rarely seen. We require the caller to set a
- * timeout multiplier, where 1 corresponds to a total probe time of 0.5 ms and
- * 1.0 ms. On modern networks a multiplier of about 100 should be a reasonable
- * default. To comply with the RFC select a multiplier of 9000.
- */
-#define N_ACD_RFC_PROBE_NUM (3)
-#define N_ACD_RFC_PROBE_WAIT_USEC (UINT64_C(111)) /* 111us */
-#define N_ACD_RFC_PROBE_MIN_USEC (UINT64_C(111)) /* 111us */
-#define N_ACD_RFC_PROBE_MAX_USEC (UINT64_C(333)) /* 333us */
-#define N_ACD_RFC_ANNOUNCE_NUM (3)
-#define N_ACD_RFC_ANNOUNCE_WAIT_USEC (UINT64_C(222)) /* 222us */
-#define N_ACD_RFC_ANNOUNCE_INTERVAL_USEC (UINT64_C(222)) /* 222us */
-#define N_ACD_RFC_MAX_CONFLICTS (10)
-#define N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC (UINT64_C(60000000)) /* 60s */
-#define N_ACD_RFC_DEFEND_INTERVAL_USEC (UINT64_C(10000000)) /* 10s */
-
-/*
- * Fake ENETDOWN error-code. We use this as replacement for known EFOOBAR error
- * codes. It is explicitly chosen to be outside the known error-code range.
- * Whenever we are deep down in a call-stack and notice a ENETDOWN error, we
- * return this instead. It is caught by the top-level dispatcher and then
- * properly handled.
- * This avoids gracefully handling ENETDOWN in call-stacks, but then continuing
- * with some work in the callers without noticing the soft failure.
- */
-#define N_ACD_E_DOWN (INT_MAX)
-
-#define TIME_INFINITY ((uint64_t) -1)
+#include "n-acd-private.h"
enum {
N_ACD_EPOLL_TIMER,
N_ACD_EPOLL_SOCKET,
};
-enum {
- N_ACD_STATE_INIT,
- N_ACD_STATE_PROBING,
- N_ACD_STATE_CONFIGURING,
- N_ACD_STATE_ANNOUNCING,
-};
-
-typedef struct NAcdEventNode {
- NAcdEvent event;
- uint8_t sender[ETH_ALEN];
- CList link;
-} NAcdEventNode;
-
-struct NAcd {
- /* context */
- unsigned int seed;
- int fd_epoll;
- int fd_timer;
-
- /* configuration */
- NAcdConfig config;
- uint8_t mac[ETH_ALEN];
- uint64_t timeout_multiplier;
-
- /* runtime */
- int fd_socket;
- unsigned int state;
- unsigned int n_iteration;
- unsigned int n_conflicts;
- unsigned int defend;
- uint64_t last_defend;
- uint64_t last_conflict;
-
- /* pending events */
- CList events;
- NAcdEventNode *current;
-};
-
-static int n_acd_errno(void) {
- /*
- * Compilers continuously warn about uninitialized variables since they
- * cannot deduce that `return -errno;` will always be negative. This
- * small wrapper makes sure compilers figure that out. Use it as
- * replacement for `errno` read access. Yes, it generates worse code,
- * but only marginally and only affects slow-paths.
- */
- return abs(errno) ? : EIO;
-}
-
-static int n_acd_event_node_new(NAcdEventNode **nodep, unsigned int event) {
- NAcdEventNode *node;
-
- node = calloc(1, sizeof(*node));
- if (!node)
- return -ENOMEM;
-
- node->event.event = event;
- node->link = (CList)C_LIST_INIT(node->link);
-
- *nodep = node;
-
- return 0;
-}
-
-static NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) {
- if (!node)
- return NULL;
-
- c_list_unlink(&node->link);
- free(node);
-
- return NULL;
-}
-
static int n_acd_get_random(unsigned int *random) {
- uint8_t hash_seed[] = { 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a, 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1 };
+ uint8_t hash_seed[] = {
+ 0x3a, 0x0c, 0xa6, 0xdd, 0x44, 0xef, 0x5f, 0x7a,
+ 0x5e, 0xd7, 0x25, 0x37, 0xbf, 0x4e, 0x80, 0xa1,
+ };
CSipHash hash = C_SIPHASH_NULL;
struct timespec ts;
const uint8_t *p;
@@ -203,7 +53,7 @@ static int n_acd_get_random(unsigned int *random) {
if (p)
c_siphash_append(&hash, p, 16);
- r = clock_gettime(CLOCK_BOOTTIME, &ts);
+ r = clock_gettime(CLOCK_MONOTONIC, &ts);
if (r < 0)
return -n_acd_errno();
@@ -214,12 +64,243 @@ static int n_acd_get_random(unsigned int *random) {
return 0;
}
-static void n_acd_reset(NAcd *acd) {
- acd->state = N_ACD_STATE_INIT;
- acd->defend = N_ACD_DEFEND_NEVER;
- acd->n_iteration = 0;
- acd->last_defend = 0;
- timerfd_settime(acd->fd_timer, 0, &(struct itimerspec){}, NULL);
+static int n_acd_socket_new(int *fdp, int fd_bpf_prog, NAcdConfig *config) {
+ const struct sockaddr_ll address = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htobe16(ETH_P_ARP),
+ .sll_ifindex = config->ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+ int r, s = -1;
+
+ s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
+ if (s < 0) {
+ r = -n_acd_errno();
+ goto error;
+ }
+
+ if (fd_bpf_prog >= 0) {
+ r = setsockopt(s, SOL_SOCKET, SO_ATTACH_BPF, &fd_bpf_prog, sizeof(fd_bpf_prog));
+ if (r < 0)
+ return -n_acd_errno();
+ }
+
+ r = bind(s, (struct sockaddr *)&address, sizeof(address));
+ if (r < 0) {
+ r = -n_acd_errno();
+ goto error;
+ }
+
+ *fdp = s;
+ s = -1;
+ return 0;
+
+error:
+ if (s >= 0)
+ close(s);
+ return r;
+}
+
+/**
+ * XXX
+ */
+_public_ int n_acd_config_new(NAcdConfig **configp) {
+ _cleanup_(n_acd_config_freep) NAcdConfig *config = NULL;
+
+ config = malloc(sizeof(*config));
+ if (!config)
+ return -ENOMEM;
+
+ *config = (NAcdConfig)N_ACD_CONFIG_NULL(*config);
+
+ *configp = config;
+ config = NULL;
+ return 0;
+}
+
+/**
+ * XXX
+ */
+_public_ NAcdConfig *n_acd_config_free(NAcdConfig *config) {
+ if (!config)
+ return NULL;
+
+ free(config);
+
+ return NULL;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex) {
+ config->ifindex = ifindex;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport) {
+ config->transport = transport;
+}
+
+/**
+ * XXX
+ */
+_public_ void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac) {
+ config->n_mac = n_mac;
+ memcpy(config->mac, mac, n_mac > ETH_ALEN ? ETH_ALEN : n_mac);
+}
+
+int n_acd_event_node_new(NAcdEventNode **nodep) {
+ NAcdEventNode *node;
+
+ node = malloc(sizeof(*node));
+ if (!node)
+ return -ENOMEM;
+
+ *node = (NAcdEventNode)N_ACD_EVENT_NODE_NULL(*node);
+
+ *nodep = node;
+ return 0;
+}
+
+NAcdEventNode *n_acd_event_node_free(NAcdEventNode *node) {
+ if (!node)
+ return NULL;
+
+ c_list_unlink(&node->probe_link);
+ c_list_unlink(&node->acd_link);
+ free(node);
+
+ return NULL;
+}
+
+int n_acd_ensure_bpf_map_space(NAcd *acd) {
+ NAcdProbe *probe;
+ _cleanup_(n_acd_closep) int fd_map = -1, fd_prog = -1;
+ size_t max_map;
+ int r;
+
+ if (acd->n_bpf_map < acd->max_bpf_map)
+ return 0;
+
+ max_map = 2 * acd->max_bpf_map;
+
+ r = n_acd_bpf_map_create(&fd_map, max_map);
+ if (r)
+ return r;
+
+ c_rbtree_for_each_entry(probe, &acd->ip_tree, ip_node) {
+ r = n_acd_bpf_map_add(fd_map, &probe->ip);
+ if (r)
+ return r;
+ }
+
+ r = n_acd_bpf_compile(&fd_prog, fd_map, (struct ether_addr*) acd->mac);
+ if (r)
+ return r;
+
+ if (fd_prog >= 0) {
+ r = setsockopt(acd->fd_socket, SOL_SOCKET, SO_ATTACH_BPF, &fd_prog, sizeof(fd_prog));
+ if (r)
+ return -n_acd_errno();
+ }
+
+ if (acd->fd_bpf_map >= 0)
+ close(acd->fd_bpf_map);
+ acd->fd_bpf_map = fd_map;
+ fd_map = -1;
+ acd->max_bpf_map = max_map;
+ return 0;
+}
+
+/**
+ * n_acd_new() - create a new ACD context
+ * @acdp: output argument for context
+ * @config: configuration parameters
+ *
+ * Create a new ACD context and return it in @acdp.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+_public_ int n_acd_new(NAcd **acdp, NAcdConfig *config) {
+ _cleanup_(n_acd_unrefp) NAcd *acd = NULL;
+ _cleanup_(n_acd_closep) int fd_bpf_prog = -1;
+ int r;
+
+ if (config->ifindex <= 0 ||
+ config->transport != N_ACD_TRANSPORT_ETHERNET ||
+ config->n_mac != ETH_ALEN ||
+ !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN))
+ return N_ACD_E_INVALID_ARGUMENT;
+
+ acd = malloc(sizeof(*acd));
+ if (!acd)
+ return -ENOMEM;
+
+ *acd = (NAcd)N_ACD_NULL(*acd);
+ acd->ifindex = config->ifindex;
+ memcpy(acd->mac, config->mac, ETH_ALEN);
+
+ r = n_acd_get_random(&acd->seed);
+ if (r)
+ return r;
+
+ acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC);
+ if (acd->fd_epoll < 0)
+ return -n_acd_errno();
+
+ r = timer_init(&acd->timer);
+ if (r < 0)
+ return r;
+
+ acd->max_bpf_map = 8;
+
+ r = n_acd_bpf_map_create(&acd->fd_bpf_map, acd->max_bpf_map);
+ if (r)
+ return r;
+
+ r = n_acd_bpf_compile(&fd_bpf_prog, acd->fd_bpf_map, (struct ether_addr*) acd->mac);
+ if (r)
+ return r;
+
+ r = n_acd_socket_new(&acd->fd_socket, fd_bpf_prog, config);
+ if (r)
+ return r;
+
+ r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->timer.fd,
+ &(struct epoll_event){
+ .events = EPOLLIN,
+ .data.u32 = N_ACD_EPOLL_TIMER,
+ });
+ if (r < 0)
+ return -n_acd_errno();
+
+ r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_socket,
+ &(struct epoll_event){
+ .events = EPOLLIN,
+ .data.u32 = N_ACD_EPOLL_SOCKET,
+ });
+ if (r < 0)
+ return -n_acd_errno();
+
+ *acdp = acd;
+ acd = NULL;
+ return 0;
+}
+
+static void n_acd_free(NAcd *acd) {
+ NAcdEventNode *node, *t_node;
+
+ if (!acd)
+ return;
+
+ c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link)
+ n_acd_event_node_free(node);
+
+ assert(c_rbtree_is_empty(&acd->ip_tree));
if (acd->fd_socket >= 0) {
assert(acd->fd_epoll >= 0);
@@ -227,101 +308,16 @@ static void n_acd_reset(NAcd *acd) {
close(acd->fd_socket);
acd->fd_socket = -1;
}
-}
-/**
- * n_acd_new() - create a new ACD context
- * @acdp: output argument for context
- *
- * Create a new ACD context and return it in @acdp.
- *
- * Return: 0 on success, or a negative error code on failure.
- */
-_public_ int n_acd_new(NAcd **acdp) {
- NAcd *acd;
- int r;
-
- acd = calloc(1, sizeof(*acd));
- if (!acd)
- return -ENOMEM;
-
- acd->fd_epoll = -1;
- acd->fd_timer = -1;
- acd->fd_socket = -1;
- acd->state = N_ACD_STATE_INIT;
- acd->defend = N_ACD_DEFEND_NEVER;
- acd->events = (CList)C_LIST_INIT(acd->events);
- acd->last_conflict = TIME_INFINITY;
-
- r = n_acd_get_random(&acd->seed);
- if (r < 0)
- return r;
-
- acd->fd_epoll = epoll_create1(EPOLL_CLOEXEC);
- if (acd->fd_epoll < 0) {
- r = -n_acd_errno();
- goto error;
+ if (acd->fd_bpf_map >= 0) {
+ close(acd->fd_bpf_map);
+ acd->fd_bpf_map = -1;
}
- acd->fd_timer = timerfd_create(CLOCK_BOOTTIME, TFD_CLOEXEC | TFD_NONBLOCK);
- if (acd->fd_timer < 0 && errno == EINVAL) {
- /*
- * Fall back to CLOCK_MONOTONIC when CLOCK_BOOTTIME is
- * not available (kernel < 3.15).
- */
- acd->fd_timer = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
- }
- if (acd->fd_timer < 0) {
- r = -n_acd_errno();
- goto error;
- }
-
- r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, acd->fd_timer,
- &(struct epoll_event){
- .events = EPOLLIN,
- .data.u32 = N_ACD_EPOLL_TIMER,
- });
- if (r < 0) {
- r = -n_acd_errno();
- goto error;
- }
-
- *acdp = acd;
- return 0;
-
-error:
- n_acd_free(acd);
- return r;
-}
-
-/**
- * n_acd_free() - free an ACD context
- *
- * Frees all resources held by the context. This may be called at any time,
- * but doing so invalidates all data owned by the context.
- *
- * Return: NULL.
- */
-_public_ void n_acd_free(NAcd *acd) {
- NAcdEventNode *node;
-
- if (!acd)
- return;
-
- n_acd_reset(acd);
-
- acd->current = n_acd_event_node_free(acd->current);
-
- while ((node = c_list_first_entry(&acd->events, NAcdEventNode, link)))
- n_acd_event_node_free(node);
-
- assert(acd->fd_socket < 0);
-
- if (acd->fd_timer >= 0) {
+ if (acd->timer.fd >= 0) {
assert(acd->fd_epoll >= 0);
- epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->fd_timer, NULL);
- close(acd->fd_timer);
- acd->fd_timer = -1;
+ epoll_ctl(acd->fd_epoll, EPOLL_CTL_DEL, acd->timer.fd, NULL);
+ timer_deinit(&acd->timer);
}
if (acd->fd_epoll >= 0) {
@@ -332,271 +328,164 @@ _public_ void n_acd_free(NAcd *acd) {
free(acd);
}
+/**
+ * XXX
+ */
+_public_ NAcd *n_acd_ref(NAcd *acd) {
+ if (acd)
+ ++acd->n_refs;
+ return acd;
+}
+
+/**
+ * XXX
+ */
+_public_ NAcd *n_acd_unref(NAcd *acd) {
+ if (acd && !--acd->n_refs)
+ n_acd_free(acd);
+ return NULL;
+}
+
+int n_acd_raise(NAcd *acd, NAcdEventNode **nodep, unsigned int event) {
+ NAcdEventNode *node;
+ int r;
+
+ r = n_acd_event_node_new(&node);
+ if (r)
+ return r;
+
+ node->event.event = event;
+ c_list_link_tail(&acd->event_list, &node->acd_link);
+
+ if (nodep)
+ *nodep = node;
+ return 0;
+}
+
+int n_acd_send(NAcd *acd, const struct in_addr *tpa, const struct in_addr *spa) {
+ struct sockaddr_ll address = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htobe16(ETH_P_ARP),
+ .sll_ifindex = acd->ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+ struct ether_arp arp = {
+ .ea_hdr = {
+ .ar_hrd = htobe16(ARPHRD_ETHER),
+ .ar_pro = htobe16(ETHERTYPE_IP),
+ .ar_hln = sizeof(acd->mac),
+ .ar_pln = sizeof(uint32_t),
+ .ar_op = htobe16(ARPOP_REQUEST),
+ },
+ };
+ ssize_t l;
+ int r;
+
+ memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac));
+ memcpy(arp.arp_tpa, &tpa->s_addr, sizeof(uint32_t));
+
+ if (spa)
+ memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr));
+
+ l = sendto(acd->fd_socket,
+ &arp,
+ sizeof(arp),
+ MSG_NOSIGNAL,
+ (struct sockaddr *)&address,
+ sizeof(address));
+ if (l < 0) {
+ if (errno == EAGAIN || errno == ENOBUFS) {
+ /*
+ * We never maintain outgoing queues. We rely on the
+ * network device to do that for us. In case the queues
+ * are full, or the kernel refuses to queue the packet
+ * for other reasons, we must tell our caller that the
+ * packet was dropped.
+ */
+ return N_ACD_E_DROPPED;
+ } else if (errno == ENETDOWN || errno == ENXIO) {
+ /*
+ * These errors happen if the network device went down
+ * or was actually removed. We always propagate this as
+ * event, so the user can react accordingly (similarly
+ * to the recvmmsg(2) handler). In case the user does
+ * not immediately react, we also tell our caller that
+ * the packet was dropped, so we don't erroneously
+ * treat this as success.
+ */
+
+ r = n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN);
+ if (r)
+ return r;
+
+ return N_ACD_E_DROPPED;
+ }
+
+ /*
+ * Random network error. We treat this as fatal and propagate
+ * the error, so it is noticed and can be investigated.
+ */
+ return -n_acd_errno();
+ } else if (l != (ssize_t)sizeof(arp)) {
+ /*
+ * Ugh, the kernel modified the packet. This is unexpected. We
+ * consider the packet lost.
+ */
+ return N_ACD_E_DROPPED;
+ }
+
+ return 0;
+}
+
/**
* n_acd_get_fd() - get pollable file descriptor
* @acd: ACD context
* @fdp: output argument for file descriptor
*
- * Returns a file descriptor in @fdp. This filedescriptor can be polled by
+ * Returns a file descriptor in @fdp. This file descriptor can be polled by
* the caller to indicate when the ACD context can be dispatched.
*/
_public_ void n_acd_get_fd(NAcd *acd, int *fdp) {
*fdp = acd->fd_epoll;
}
-static int n_acd_push_event(NAcd *acd, unsigned int event, uint16_t *operation, uint8_t (*sender)[6], uint8_t (*target)[4]) {
- NAcdEventNode *node;
- int r;
-
- r = n_acd_event_node_new(&node, event);
- if (r < 0)
- return r;
-
- switch (event) {
- case N_ACD_EVENT_USED:
- node->event.used.operation = be16toh(*operation);
- memcpy(node->sender, sender, sizeof(node->sender));
- node->event.used.sender = node->sender;
- node->event.used.n_sender = sizeof(node->sender);
- memcpy(&node->event.used.target, target, sizeof(node->event.used.target));
- break;
- case N_ACD_EVENT_CONFLICT:
- node->event.conflict.operation = be16toh(*operation);
- memcpy(node->sender, sender, sizeof(node->sender));
- node->event.used.sender = node->sender;
- node->event.used.n_sender = sizeof(node->sender);
- memcpy(&node->event.conflict.target, target, sizeof(node->event.conflict.target));
- break;
- case N_ACD_EVENT_DEFENDED:
- node->event.defended.operation = be16toh(*operation);
- memcpy(node->sender, sender, sizeof(node->sender));
- node->event.used.sender = node->sender;
- node->event.used.n_sender = sizeof(node->sender);
- memcpy(&node->event.defended.target, target, sizeof(node->event.defended.target));
- break;
- case N_ACD_EVENT_READY:
- case N_ACD_EVENT_DOWN:
- break;
- default:
- assert(0);
- }
-
- c_list_link_tail(&acd->events, &node->link);
-
- return 0;
-}
-
-static int n_acd_now(uint64_t *nowp) {
- struct timespec ts;
- int r;
-
- r = clock_gettime(CLOCK_BOOTTIME, &ts);
- if (r < 0)
- return -n_acd_errno();
-
- *nowp = ts.tv_sec * UINT64_C(1000000) + ts.tv_nsec / UINT64_C(1000);
- return 0;
-}
-
-static int n_acd_schedule(NAcd *acd, uint64_t u_timeout, unsigned int u_jitter) {
- uint64_t u_next = u_timeout;
- int r;
-
- /*
- * ACD specifies jitter values to reduce packet storms on the local
- * link. This call accepts the maximum relative jitter value in
- * microseconds as @u_jitter. We then use rand_r(3p) to get a
- * pseudo-random jitter on top of the real timeout given as @u_timeout.
- * Note that rand_r() is fine for this. Before you try to improve the
- * RNG, you better spend some time securing ARP.
- */
- if (u_jitter)
- u_next += rand_r(&acd->seed) % u_jitter;
-
- /*
- * Setting .it_value to 0 in timerfd_settime() disarms the timer. Avoid
- * this and always schedule at least 1us. Otherwise, we'd have to
- * recursively call into the time-out handler, which we really want to
- * avoid. No reason to optimize performance here.
- */
- if (!u_next)
- u_next = 1;
-
- r = timerfd_settime(acd->fd_timer, 0,
- &(struct itimerspec){ .it_value = {
- .tv_sec = u_next / UINT64_C(1000000),
- .tv_nsec = u_next % UINT64_C(1000000) * UINT64_C(1000),
- } }, NULL);
- if (r < 0)
- return -n_acd_errno();
-
- return 0;
-}
-
-static int n_acd_send(NAcd *acd, const struct in_addr *spa) {
- struct sockaddr_ll address = {
- .sll_family = AF_PACKET,
- .sll_protocol = htobe16(ETH_P_ARP),
- .sll_ifindex = acd->config.ifindex,
- .sll_halen = ETH_ALEN,
- .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
- };
- struct ether_arp arp = {
- .ea_hdr.ar_hrd = htobe16(ARPHRD_ETHER),
- .ea_hdr.ar_pro = htobe16(ETHERTYPE_IP),
- .ea_hdr.ar_hln = sizeof(acd->mac),
- .ea_hdr.ar_pln = sizeof(uint32_t),
- .ea_hdr.ar_op = htobe16(ARPOP_REQUEST),
- };
- ssize_t l;
-
- memcpy(arp.arp_sha, acd->mac, sizeof(acd->mac));
- memcpy(arp.arp_tpa, &acd->config.ip.s_addr, sizeof(uint32_t));
-
- if (spa)
- memcpy(arp.arp_spa, &spa->s_addr, sizeof(spa->s_addr));
-
- l = sendto(acd->fd_socket, &arp, sizeof(arp), MSG_NOSIGNAL, (struct sockaddr *)&address, sizeof(address));
- if (l == (ssize_t)sizeof(arp)) {
- /* Packet was properly sent. */
- return 0;
- } else if (l >= 0) {
- /*
- * Ugh. The packet was truncated. This should not happen, but
- * lets just pretend the packet was dropped.
- */
- return 0;
- } else if (errno == EAGAIN || errno == ENOBUFS) {
- /*
- * In case the output buffer is full, the packet is silently
- * dropped. This is just as if the physical layer happened to
- * drop the packet. We are not on a reliable medium, so no
- * reason to pretend we are.
- */
- return 0;
- } else if (errno == ENETDOWN || errno == ENXIO) {
- /*
- * We get ENETDOWN if the network-device goes down or is
- * removed. ENXIO might happen on async send-operations if the
- * network-device was unplugged and thus the kernel is no
- * longer aware of it.
- * In any case, we do not allow proceeding with this socket. We
- * stop the engine and notify the user gracefully.
- */
- return -N_ACD_E_DOWN;
- }
-
- return -n_acd_errno();
-}
-
-static void n_acd_remember_conflict(NAcd *acd, uint64_t now) {
- if (++acd->n_conflicts >= N_ACD_RFC_MAX_CONFLICTS) {
- acd->n_conflicts = N_ACD_RFC_MAX_CONFLICTS;
- acd->last_conflict = now;
- }
-}
-
static int n_acd_handle_timeout(NAcd *acd) {
+ NAcdProbe *probe;
+ uint64_t now;
int r;
- switch (acd->state) {
- case N_ACD_STATE_PROBING:
- /*
- * We are still PROBING. We send 3 probes with a random timeout
- * scheduled between each. If, after a fixed timeout, we did
- * not receive any conflict we consider the probing successful.
- */
- if (acd->n_iteration >= N_ACD_RFC_PROBE_NUM) {
- /*
- * All 3 probes succeeded and we waited enough to
- * consider this address usable by now. Do not announce
- * the address, yet. We must first give the caller a
- * chance to configure the address (so they can answer
- * ARP requests), before announcing it. But our
- * callbacks are not necessarily synchronous (we want
- * to allow IPC there), so just notify the caller and
- * wait for further instructions, thus effectively
- * increasing the probe-wait.
- */
- r = n_acd_push_event(acd, N_ACD_EVENT_READY, NULL, NULL, NULL);
- if (r)
- return r;
+ /*
+ * Read the current time once, and handle all timouts that triggered
+ * before the current time. Rereading the current time in each loop
+ * might risk creating a live-lock, and the fact that we read the
+ * time after reading the timer guarantees that the timeout which
+ * woke us up is hanlded.
+ *
+ * When there are no more timeouts to handle at the given time, we
+ * rearm the timer to potentially wake us up again in the future.
+ */
+ timer_now(&acd->timer, &now);
- acd->state = N_ACD_STATE_CONFIGURING;
- } else {
- /*
- * We have not sent all 3 probes, yet. A timer fired,
- * so we are ready to send the next probe. If this is
- * the third probe, schedule a timer for ANNOUNCE_WAIT
- * to give other peers a chance to answer. If this is
- * not the third probe, wait between PROBE_MIN and
- * PROBE_MAX for the next probe.
- */
+ for (;;) {
+ Timeout *timeout;
- r = n_acd_send(acd, NULL);
- /*
- * During probe we must respect the total timeout and so
- * we ignore errors caused by a down interface.
- */
- if (r < 0 && r != -N_ACD_E_DOWN)
- return r;
-
- if (++acd->n_iteration >= N_ACD_RFC_PROBE_NUM)
- r = n_acd_schedule(acd, acd->timeout_multiplier * N_ACD_RFC_ANNOUNCE_WAIT_USEC, 0);
- else
- r = n_acd_schedule(acd, acd->timeout_multiplier * N_ACD_RFC_PROBE_MIN_USEC,
- acd->timeout_multiplier * (N_ACD_RFC_PROBE_MAX_USEC - N_ACD_RFC_PROBE_MIN_USEC));
- if (r < 0)
- return r;
- }
-
- break;
-
- case N_ACD_STATE_ANNOUNCING:
- /*
- * We are ANNOUNCING, meaning the caller configured the address
- * on the interface and is actively using it. We send 3
- * announcements out, in a short interval, and then just
- * perform passive conflict detection.
- * Note that once all 3 announcements are sent, we no longer
- * schedule a timer, so this part should not trigger, anymore.
- */
-
- r = n_acd_send(acd, &acd->config.ip);
+ r = timer_pop_timeout(&acd->timer, now, &timeout);
if (r < 0) {
- if (r != -N_ACD_E_DOWN)
- return r;
+ return r;
+ } else if (!timeout) {
/*
- * We want to send all the 3 announcements even if the
- * interface goes temporarily down. Therefore, if send()
- * fails, don't increment the iteration and try again.
+ * There are no more timeouts pending before @now. Rearm
+ * the timer to fire again at the next timeout.
*/
- } else
- acd->n_iteration++;
-
- if (acd->n_iteration < N_ACD_RFC_ANNOUNCE_NUM) {
- /*
- * Announcements are always scheduled according to the
- * time-intervals specified in the spec. We always use
- * the RFC5227-mandated multiplier.
- * If you reconsider this, note that timeout_multiplier
- * might be 0 here.
- */
- r = n_acd_schedule(acd, N_ACD_TIMEOUT_RFC5227 * N_ACD_RFC_ANNOUNCE_INTERVAL_USEC, 0);
- if (r < 0)
- return r;
+ timer_rearm(&acd->timer);
+ break;
}
- break;
-
- case N_ACD_STATE_INIT:
- case N_ACD_STATE_CONFIGURING:
- default:
- /*
- * There are no timeouts in these states. If we trigger one,
- * something is fishy. Let the caller deal with this.
- */
- return -EIO;
+ probe = (void *)timeout - offsetof(NAcdProbe, timeout);
+ r = n_acd_probe_handle_timeout(probe);
+ if (r)
+ return r;
}
return 0;
@@ -604,136 +493,94 @@ static int n_acd_handle_timeout(NAcd *acd) {
static int n_acd_handle_packet(NAcd *acd, struct ether_arp *packet) {
bool hard_conflict;
- uint64_t now;
+ NAcdProbe *probe;
+ uint32_t addr;
+ CRBNode *node;
int r;
/*
- * Via BPF we discard any non-conflict packets. There are only 2 types
- * that can pass: A conflict on the Sender Protocol Address, or a
- * conflict on the Target Protocol Address.
+ * We are interested in 2 kinds of ARP messages:
*
- * The former we call a hard-conflict. It implies that the sender uses
- * the address already. We must always catch this and in some way react
- * to it. Any kind, REQUEST or REPLY must be caught (though it is
- * unlikely that we ever catch REPLIES since they tend to be unicasts).
+ * 1) Someone who is *NOT* us sends *ANY* ARP message with our IP
+ * address as sender. This is never good, because it implies an
+ * address conflict.
+ * We call this a hard-conflict.
*
- * However, in case the Target Protocol Address matches, we just know
- * that somebody is looking for the address. Hence, we must also check
- * that the packet is an ARP-Probe (Sender Protocol Address is 0). If
- * it is, it means someone else does ACD on our address. We call this a
- * soft conflict.
+ * 2) Someone who is *NOT* us sends an ARP REQUEST without any sender
+ * IP, but our IP as target. This implies someone else performs an
+ * ARP Probe with our address. This also implies a conflict, but
+ * one that can be resolved by responding to the probe.
+ * We call this a soft-conflict.
+ *
+ * We are never interested in any other ARP message. The kernel already
+ * deals with everything else, hence, we can silently ignore those.
+ *
+ * Now, we simply check whether a sender-address is set. This allows us
+ * to distinguish both cases. We then check further conditions, so we
+ * can bail out early if neither is the case.
+ *
+ * Lastly, we perform a lookup in our probe-set to check whether the
+ * address actually matches, so we can let these probes dispatch the
+ * message. Note that we allow duplicate probes, so we need to dispatch
+ * each matching probe, not just one.
*/
- if (!memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa)) &&
- !memcmp(packet->arp_tpa, &acd->config.ip.s_addr, sizeof(packet->arp_tpa)) &&
- packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) {
- hard_conflict = false;
- } else if (!memcmp(packet->arp_spa, &acd->config.ip.s_addr, sizeof(packet->arp_spa))) {
+
+ if (memcmp(packet->arp_spa, (uint8_t[4]){ }, sizeof(packet->arp_spa))) {
+ memcpy(&addr, packet->arp_spa, sizeof(addr));
hard_conflict = true;
+ } else if (packet->ea_hdr.ar_op == htobe16(ARPOP_REQUEST)) {
+ memcpy(&addr, packet->arp_tpa, sizeof(addr));
+ hard_conflict = false;
} else {
/*
- * Ignore anything that is specific enough to match the BPF
- * filter, but is none of the conflicts described above.
- */
- return 0;
- }
-
- r = n_acd_now(&now);
- if (r < 0)
- return r;
-
- switch (acd->state) {
- case N_ACD_STATE_PROBING:
- /*
- * Regardless whether this is a hard or soft conflict, we must
- * treat this as a probe failure. That is, notify the caller of
- * the conflict and wait for further instructions. We do not
- * react to this, until the caller tells us what to do. But we
- * immediately stop the engine, since there is no point in
- * continuing the probing.
- */
- n_acd_remember_conflict(acd, now);
- n_acd_reset(acd);
- r = n_acd_push_event(acd, N_ACD_EVENT_USED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
-
- break;
-
- case N_ACD_STATE_CONFIGURING:
- /*
- * We are waiting for the caller to configure the interface and
- * start ANNOUNCING. In this state, we cannot defend the address
- * as that would indicate that it is ready to be used, and we
- * cannot signal CONFLICT or USED as the caller may already have
- * started to use the address (and may have configured the engine
- * to always defend it, which means they should be able to rely on
- * never losing it after READY). Simply drop the event, and rely
- * on the anticipated ANNOUNCE to trigger it again.
- */
-
- break;
-
- case N_ACD_STATE_ANNOUNCING:
- /*
- * We were already instructed to announce the address, which
- * means the address is configured and in use. Hence, the
- * caller is responsible to serve regular ARP queries. Meaning,
- * we can ignore any soft conflicts (other peers doing ACD).
- *
- * But if we see a hard-conflict, we either defend the address
- * according to the caller's instructions, or we report the
- * conflict and bail out.
- */
-
- if (!hard_conflict)
- break;
-
- if (acd->defend == N_ACD_DEFEND_NEVER) {
- n_acd_remember_conflict(acd, now);
- n_acd_reset(acd);
- r = n_acd_push_event(acd, N_ACD_EVENT_CONFLICT, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
- } else {
- if (now > acd->last_defend + N_ACD_RFC_DEFEND_INTERVAL_USEC) {
- r = n_acd_send(acd, &acd->config.ip);
- if (r < 0)
- return r;
-
- acd->last_defend = now;
- r = n_acd_push_event(acd, N_ACD_EVENT_DEFENDED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
- } else if (acd->defend == N_ACD_DEFEND_ONCE) {
- n_acd_remember_conflict(acd, now);
- n_acd_reset(acd);
- r = n_acd_push_event(acd, N_ACD_EVENT_CONFLICT, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
- } else {
- r = n_acd_push_event(acd, N_ACD_EVENT_DEFENDED, &packet->ea_hdr.ar_op, &packet->arp_sha, &packet->arp_tpa);
- if (r)
- return r;
- }
- }
-
- break;
-
- case N_ACD_STATE_INIT:
- default:
- /*
- * The socket should not be dispatched in those states, since
- * it is neither allocated nor added to epoll. Fail hard if we
- * trigger this somehow.
+ * The BPF filter will not let through any other packet.
*/
return -EIO;
}
+ /* Find top-most node that matches @addr. */
+ node = acd->ip_tree.root;
+ while (node) {
+ probe = c_rbnode_entry(node, NAcdProbe, ip_node);
+ if (addr < probe->ip.s_addr)
+ node = node->left;
+ else if (addr > probe->ip.s_addr)
+ node = node->right;
+ else
+ break;
+ }
+
+ /*
+ * If the address is unknown, we drop the package. This might happen if
+ * the kernel queued the packet and passed the BPF filter, but we
+ * modified the set before dequeuing the message.
+ */
+ if (!node)
+ return 0;
+
+ /* Forward to left-most child that still matches @addr. */
+ while (node->left && addr == c_rbnode_entry(node->left,
+ NAcdProbe,
+ ip_node)->ip.s_addr)
+ node = node->left;
+
+ /* Iterate all matching entries in-order. */
+ do {
+ probe = c_rbnode_entry(node, NAcdProbe, ip_node);
+
+ r = n_acd_probe_handle_packet(probe, packet, hard_conflict);
+ if (r)
+ return r;
+
+ node = c_rbnode_next(node);
+ } while (node && addr == c_rbnode_entry(node,
+ NAcdProbe,
+ ip_node)->ip.s_addr);
+
return 0;
}
static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) {
- uint64_t v;
int r;
if (event->events & (EPOLLHUP | EPOLLERR)) {
@@ -746,97 +593,113 @@ static int n_acd_dispatch_timer(NAcd *acd, struct epoll_event *event) {
}
if (event->events & EPOLLIN) {
- for (unsigned int i = 0; i < 128; ++i) {
- r = read(acd->fd_timer, &v, sizeof(v));
- if (r == sizeof(v)) {
- /*
- * We successfully read a timer-value. Handle it and
- * return. We do NOT fall-through to EPOLLHUP handling,
- * as we always must drain buffers first.
- */
- return n_acd_handle_timeout(acd);
- } else if (r >= 0) {
- /*
- * Kernel guarantees 8-byte reads; fail hard if it
- * suddenly starts doing weird shit. No clue what to do
- * with those values, anyway.
- */
- return -EIO;
- } else if (errno == EAGAIN) {
- /*
- * No more pending events.
- */
- return 0;
- } else {
- /*
- * Something failed. We use CLOCK_BOOTTIME, so
- * ECANCELED cannot happen. Hence, there is no error
- * that we could gracefully handle. Fail hard and let
- * the caller deal with it.
- */
- return -n_acd_errno();
- }
- }
+ r = timer_read(&acd->timer);
+ if (r <= 0)
+ return r;
- return N_ACD_E_PREEMPTED;
+ assert(r == TIMER_E_TRIGGERED);
+
+ /*
+ * A timer triggered, handle all pending timeouts at a given
+ * point in time. There can only be a finite number of pending
+ * timeouts, any new ones will be in the future, so not handled
+ * now, but guaranteed to wake us up again when they do trigger.
+ */
+ r = n_acd_handle_timeout(acd);
+ if (r)
+ return r;
}
return 0;
}
-static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) {
- struct ether_arp packet;
- ssize_t l;
+static bool n_acd_packet_is_valid(NAcd *acd, void *packet, size_t n_packet) {
+ struct ether_arp *arp;
- for (unsigned int i = 0; i < 128; ++i) {
- /*
- * Regardless whether EPOLLIN is set in @event->events, we always
- * invoke recv(2). This is a safety-net for sockets, which always fetch
- * queued errors on all syscalls. That means, if anything failed on the
- * socket, we will be notified via recv(2). This simplifies the code
- * and avoid magic EPOLLIN/ERR/HUP juggling.
- *
- * Note that we must use recv(2) over read(2), since the latter cannot
- * deal with empty packets properly.
- *
- * We explicitly skip passing MSG_TRUNC here. We *WANT*
- * overlong packets to be retrieved and truncated. Ethernet
- * frames might not have byte-granular lengths. Real hardware
- * does add trailing padding/garbage, so we must discard this
- * here.
- */
- l = recv(acd->fd_socket, &packet, sizeof(packet), 0);
- if (l == (ssize_t)sizeof(packet)) {
+ /*
+ * The eBPF filter will ensure that this function always returns true, however,
+ * this allows the eBPF filter to be an optional optimization which is necessary
+ * on older kernels.
+ *
+ * See comments in n-acd-bpf.c for details.
+ */
+
+ if (n_packet != sizeof(*arp))
+ return false;
+
+ arp = packet;
+
+ if (arp->arp_hrd != htobe16(ARPHRD_ETHER))
+ return false;
+
+ if (arp->arp_pro != htobe16(ETHERTYPE_IP))
+ return false;
+
+ if (arp->arp_hln != sizeof(struct ether_addr))
+ return false;
+
+ if (arp->arp_pln != sizeof(struct in_addr))
+ return false;
+
+ if (!memcmp(arp->arp_sha, acd->mac, sizeof(struct ether_addr)))
+ return false;
+
+ if (memcmp(arp->arp_spa, &((struct in_addr) { INADDR_ANY }), sizeof(struct in_addr))) {
+ if (arp->arp_op != htobe16(ARPOP_REQUEST) && arp->arp_op != htobe16(ARPOP_REPLY))
+ return false;
+ } else if (arp->arp_op != htobe16(ARPOP_REQUEST)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) {
+ const size_t n_batch = 8;
+ struct mmsghdr msgs[n_batch];
+ struct iovec iovecs[n_batch];
+ struct ether_arp data[n_batch];
+ size_t i;
+ int r, n;
+
+ for (i = 0; i < n_batch; ++i) {
+ iovecs[i].iov_base = data + i;
+ iovecs[i].iov_len = sizeof(data[i]);
+ msgs[i].msg_hdr = (struct msghdr){
+ .msg_iov = iovecs + i,
+ .msg_iovlen = 1,
+ };
+ }
+
+ /*
+ * We always directly call into recvmmsg(2), regardless which EPOLL*
+ * event is signalled. On sockets, the recv(2)-family of syscalls does
+ * a suitable job of handling all possible scenarios and telling us
+ * about it. Hence, lets take the easy route and always ask the kernel
+ * about the current state.
+ */
+ n = recvmmsg(acd->fd_socket, msgs, n_batch, 0, NULL);
+ if (n < 0) {
+ if (errno == ENETDOWN) {
/*
- * We read a full ARP packet. We never fall-through to EPOLLHUP
- * handling, as we always must drain buffers first.
+ * We get ENETDOWN if the network-device goes down or
+ * is removed. This error is temporary and only queued
+ * once. Subsequent reads will simply return EAGAIN
+ * until the device is up again and has data queued.
+ * Usually, the caller should tear down all probes when
+ * an interface goes down, but we leave it up to the
+ * caller to decide what to do. We propagate the code
+ * and continue.
*/
- return n_acd_handle_packet(acd, &packet);
- } else if (l >= 0) {
- /*
- * The BPF filter discards short packets, so error out
- * if something slips through for any reason. Don't silently
- * ignore it, since we explicitly want to know if something
- * went fishy.
- */
- return -EIO;
- } else if (errno == ENETDOWN || errno == ENXIO) {
- /*
- * The network device went down or was removed. Ignore
- * such errors and let the pending probe time out.
- * Subsequent reads will simply return EAGAIN until the
- * device is up again and has data queued.
- */
- return 0;
+ return n_acd_raise(acd, NULL, N_ACD_EVENT_DOWN);
} else if (errno == EAGAIN) {
/*
- * We cannot read data from the socket (we got EAGAIN). As a safety net
- * check for EPOLLHUP/ERR. Those cannot be disabled with epoll, so we
- * must make sure to not busy-loop by ignoring them. Note that we know
- * recv(2) on sockets to return an error if either of these epoll-flags
- * is set. Hence, if we did not handle it above, we have no other way
- * but treating those flags as fatal errors and returning them to the
- * caller.
+ * There is no more data queued and we did not get
+ * preempted. Everything is good to go.
+ * As a safety-net against busy-looping, we do check
+ * for HUP/ERR. Neither should be set, since they imply
+ * error-dequeue behavior on all socket calls. Lets
+ * fail hard if we trigger it, so we can investigate.
*/
if (event->events & (EPOLLHUP | EPOLLERR))
return -EIO;
@@ -844,35 +707,63 @@ static int n_acd_dispatch_socket(NAcd *acd, struct epoll_event *event) {
return 0;
} else {
/*
- * Cannot dispatch the packet. This might be due to OOM, HUP,
- * or something else. We cannot handle it gracefully so forward
- * to the caller.
+ * Something went wrong. Propagate the error-code, so
+ * this can be investigated.
*/
return -n_acd_errno();
}
+ } else if (n >= (ssize_t)n_batch) {
+ /*
+ * If all buffers were filled with data, we cannot be sure that
+ * there is nothing left to read. But to avoid starvation, we
+ * cannot loop on this condition. Instead, we mark the context
+ * as preempted so the caller can call us again.
+ * Note that in level-triggered event-loops this condition can
+ * be neglected, but in edge-triggered event-loops it is
+ * crucial to forward this information.
+ *
+ * On the other hand, there are several conditions where the
+ * kernel might return less batches than requested, but was
+ * still preempted. However, all of those cases require the
+ * preemption to have triggered a wakeup *after* we entered
+ * recvmmsg(). Hence, even if we did not recognize the
+ * preemption, an edge must have triggered and as such we will
+ * handle the event on the next turn.
+ */
+ acd->preempted = true;
}
- return N_ACD_E_PREEMPTED;
+ for (i = 0; (ssize_t)i < n; ++i) {
+ if (!n_acd_packet_is_valid(acd, data + i, msgs[i].msg_len))
+ continue;
+ /*
+ * Handle the packet. Bail out if something went wrong. Note
+ * that this must be fatal errors, since we discard all other
+ * packets that follow.
+ */
+ r = n_acd_handle_packet(acd, data + i);
+ if (r)
+ return r;
+ }
+
+ return 0;
}
/**
- * n_acd_dispatch() - dispatch ACD context
- * @acd: ACD context
- *
- * Return: 0 on successful dispatch of all pending events, N_ACD_E_PREEMPT in
- * case there are more still more events to be dispatched, or a
- * negative error code on failure.
+ * XXX
*/
_public_ int n_acd_dispatch(NAcd *acd) {
struct epoll_event events[2];
int n, i, r = 0;
- bool preempted = false;
n = epoll_wait(acd->fd_epoll, events, sizeof(events) / sizeof(*events), 0);
if (n < 0) {
+ /* Linux never returns EINTR if `timeout == 0'. */
return -n_acd_errno();
}
+ acd->preempted = false;
+
for (i = 0; i < n; ++i) {
switch (events[i].data.u32) {
case N_ACD_EPOLL_TIMER:
@@ -882,35 +773,16 @@ _public_ int n_acd_dispatch(NAcd *acd) {
r = n_acd_dispatch_socket(acd, events + i);
break;
default:
+ assert(0);
r = 0;
break;
}
- if (r == N_ACD_E_PREEMPTED)
- preempted = true;
- else if (r != 0)
- break;
- }
-
- if (r == -N_ACD_E_DOWN) {
- /*
- * N_ACD_E_DOWN is synthesized whenever we notice
- * ENETDOWN-related errors on the network interface. This
- * allows bailing out of deep call-paths and then handling the
- * error gracefully here.
- */
- n_acd_reset(acd);
- r = n_acd_push_event(acd, N_ACD_EVENT_DOWN, NULL, NULL, NULL);
if (r)
return r;
-
- return 0;
}
- if (preempted)
- return N_ACD_E_PREEMPTED;
- else
- return r;
+ return acd->preempted ? N_ACD_E_PREEMPTED : 0;
}
/**
@@ -920,343 +792,75 @@ _public_ int n_acd_dispatch(NAcd *acd) {
*
* Returns a pointer to the next pending event. The event is still owend by
* the context, and is only valid until the next call to n_acd_pop_event()
- * or until the context is freed.
+ * or until the owning object is freed (either the ACD context or the indicated
+ * probe object).
+ *
+ * An event either originates on the ACD context, or one of the configured
+ * probes. If the event-type has a 'probe' pointer, it originated on the
+ * indicated probe (which is *never* NULL), otherwise it originated on the
+ * context.
+ *
+ * Users must call this function repeatedly until either an error is returned,
+ * or the event-pointer is NULL. Wakeups on the epoll-fd are only guaranteed
+ * for each batch of events. Hence, it is the callers responsibility to drain
+ * the event-queue somehow after each call to n_acd_dispatch(). Note that
+ * events can only be added by n_acd_dispatch(), hence, you cannot live-lock
+ * when draining the event queue.
*
* The possible events are:
- * * N_ACD_EVENT_READY: The configured IP address was probed successfully
+ * * N_ACD_EVENT_READY: A configured IP address was probed successfully
* and is ready to be used. Once configured on the
* interface, the caller must call n_acd_announce()
* to announce and start defending the address.
- * No further events may be received before
- * n_acd_announce() has been called.
* * N_ACD_EVENT_USED: Someone is already using the IP address being
- * probed. The engine was stopped, and the caller
- * may restart it to try again.
- * * N_ACD_EVENT_DEFENDED: A conflict was detected for the announced IP
+ * probed. The probe is put into stopped state and
+ * should be freed by the caller.
+ * * N_ACD_EVENT_DEFENDED: A conflict was detected for an announced IP
* address, and the engine attempted to defend it.
* This is purely informational, and no action is
* required by the caller.
- * * N_ACD_EVENT_CONFLICT: A conflict was detected for the announced IP
- * address, and the engine was not able to defend
+ * * N_ACD_EVENT_CONFLICT: A conflict was detected for an announced IP
+ * address, and the probe was not able to defend
* it (according to the configured policy). The
- * engine has stoppde, the caller must stop using
- * the address immediately, and may restart the
- * engine to retry.
- * * N_ACD_EVENT_DOWN: A network error was detected. The engine was
- * stopped and it is the responsibility of the
- * caller to restart it once the network may be
- * functional again.
+ * probe halted, the caller must stop using
+ * the address immediately, and should free the probe.
+ * * N_ACD_EVENT_DOWN: The specified network interface was put down. The
+ * user is recommended to free *ALL* probes and
+ * recreate them as soon as the interface is up again.
+ * Note that this event is purely informational. The
+ * probes will continue running, but all packets will
+ * be blackholed, and no network packets are received,
+ * until the network is back up again. Hence, from an
+ * operational perspective, the legitimacy of the ACD
+ * probes is lost and the user better re-probes all
+ * addresses.
*
- * Returns: 0 on success, N_ACD_E_STOPPED if there are no more events and
- * the engine has been stopped, N_ACD_E_DONE if there are no more
- * events, but the engine is still running, or a negative error
- * code on failure.
+ * Returns: 0 on success, negative error code on failure. The popped event is
+ * returned in @eventp. If no event is pending, NULL is placed in
+ * @eventp and 0 is returned. If an error is returned, @eventp is left
+ * untouched.
*/
_public_ int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp) {
- acd->current = n_acd_event_node_free(acd->current);
+ NAcdEventNode *node, *t_node;
- if (c_list_is_empty(&acd->events)) {
- if (acd->state == N_ACD_STATE_INIT)
- return N_ACD_E_STOPPED;
- else
- return N_ACD_E_DONE;
- }
-
- acd->current = c_list_first_entry(&acd->events, NAcdEventNode, link);
- c_list_unlink(&acd->current->link);
-
- if (eventp)
- *eventp = &acd->current->event;
-
- return 0;
-}
-
-static int n_acd_bind_socket(NAcd *acd, int s) {
- /*
- * Due to strict aliasing, we cannot get uint32_t/uint16_t pointers to
- * acd->config.mac, so provide a union accessor.
- */
- const union {
- uint8_t u8[6];
- uint16_t u16[3];
- uint32_t u32[1];
- } mac = {
- .u8 = {
- acd->mac[0],
- acd->mac[1],
- acd->mac[2],
- acd->mac[3],
- acd->mac[4],
- acd->mac[5],
- },
- };
- struct sock_filter filter[] = {
- /*
- * Basic ARP header validation. Make sure the packet-length,
- * wire type, protocol type, and address lengths are correct.
- */
- BPF_STMT(BPF_LD + BPF_W + BPF_LEN, 0), /* A <- packet length */
- BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, sizeof(struct ether_arp), 1, 0), /* #packet >= #arp-packet ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hrd)), /* A <- header */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPHRD_ETHER, 1, 0), /* header == ethernet ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pro)), /* A <- protocol */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 1, 0), /* protocol == IP ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_hln)), /* A <- hardware address length */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct ether_addr), 1, 0), /* length == sizeof(ether_addr)? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_pln)), /* A <- protocol address length */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, sizeof(struct in_addr), 1, 0), /* length == sizeof(in_addr) ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, ea_hdr.ar_op)), /* A <- operation */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REQUEST, 2, 0), /* protocol == request ? */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ARPOP_REPLY, 1, 0), /* protocol == reply ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
-
- /*
- * Sender hardware address must be different from ours. Note
- * that BPF runs in big-endian mode, but assumes immediates are
- * given in native-endian. This might look weird on 6-byte mac
- * addresses, but is needed to revert the BPF magic.
- */
- BPF_STMT(BPF_LD + BPF_IMM, be32toh(mac.u32[0])), /* A <- 4 bytes of client's MAC */
- BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
- BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_sha)), /* A <- 4 bytes of SHA */
- BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 6), /* A == 0 ? */
- BPF_STMT(BPF_LD + BPF_IMM, be16toh(mac.u16[2])), /* A <- remainder of client's MAC */
- BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, offsetof(struct ether_arp, arp_sha) + 4), /* A <- remainder of SHA */
- BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* A xor X */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
-
- /*
- * Sender protocol address or target protocol address must be
- * equal to the one we care about. Again, immediates must be
- * given in native-endian.
- */
- BPF_STMT(BPF_LD + BPF_IMM, be32toh(acd->config.ip.s_addr)), /* A <- clients IP */
- BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
- BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_spa)), /* A <- SPA */
- BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
- BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
- BPF_STMT(BPF_LD + BPF_IMM, be32toh(acd->config.ip.s_addr)), /* A <- clients IP */
- BPF_STMT(BPF_MISC + BPF_TAX, 0), /* X <- A */
- BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct ether_arp, arp_tpa)), /* A <- TPA */
- BPF_STMT(BPF_ALU + BPF_XOR + BPF_X, 0), /* X xor A */
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0, 0, 1), /* A == 0 ? */
- BPF_STMT(BPF_RET + BPF_K, 65535), /* return all */
- BPF_STMT(BPF_RET + BPF_K, 0), /* ignore */
- };
- const struct sock_fprog fprog = {
- .len = sizeof(filter) / sizeof(*filter),
- .filter = filter,
- };
- const struct sockaddr_ll address = {
- .sll_family = AF_PACKET,
- .sll_protocol = htobe16(ETH_P_ARP),
- .sll_ifindex = acd->config.ifindex,
- .sll_halen = ETH_ALEN,
- .sll_addr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
- };
- int r;
-
- /*
- * Install a packet filter that matches on the ARP header and
- * addresses, to reduce the number of wake-ups to a minimum.
- */
- r = setsockopt(s, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog));
- if (r < 0)
- return -n_acd_errno();
-
- /*
- * Bind the packet-socket to ETH_P_ARP and the specified network
- * interface.
- */
- r = bind(s, (struct sockaddr *)&address, sizeof(address));
- if (r < 0)
- return -n_acd_errno();
-
- return 0;
-}
-
-static int n_acd_setup_socket(NAcd *acd) {
- int r, s;
-
- s = socket(PF_PACKET, SOCK_DGRAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
- if (s < 0)
- return -n_acd_errno();
-
- r = n_acd_bind_socket(acd, s);
- if (r < 0)
- goto error;
-
- r = epoll_ctl(acd->fd_epoll, EPOLL_CTL_ADD, s,
- &(struct epoll_event){
- .events = EPOLLIN,
- .data.u32 = N_ACD_EPOLL_SOCKET,
- });
- if (r < 0) {
- r = -n_acd_errno();
- goto error;
- }
-
- acd->fd_socket = s;
- return 0;
-
-error:
- close(s);
- return r;
-}
-
-/**
- * n_acd_start() - start the ACD engine
- * @acd: ACD context
- * @config: description of interface and desired IP address
- *
- * Start probing the given address on the given interface.
- *
- * The engine must not already be running, and there must not be
- * any pending events.
- *
- * Returns: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the configuration
- * was invalid, N_ACD_E_BUSY if the engine is running or there are
- * pending events, or a negative error code on failure.
- */
-_public_ int n_acd_start(NAcd *acd, NAcdConfig *config) {
- uint64_t now, delay;
- int r;
-
- if (config->ifindex <= 0 ||
- config->transport != N_ACD_TRANSPORT_ETHERNET ||
- config->n_mac != ETH_ALEN ||
- !memcmp(config->mac, (uint8_t[ETH_ALEN]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ETH_ALEN) ||
- !config->ip.s_addr)
- return N_ACD_E_INVALID_ARGUMENT;
-
- if (acd->state != N_ACD_STATE_INIT || !c_list_is_empty(&acd->events))
- return N_ACD_E_BUSY;
-
- acd->config = *config;
- memcpy(acd->mac, config->mac, config->n_mac);
- acd->config.mac = acd->mac;
- acd->timeout_multiplier = config->timeout_msec;
-
- r = n_acd_setup_socket(acd);
- if (r < 0)
- goto error;
-
- if (acd->timeout_multiplier) {
- delay = 0;
- acd->n_iteration = 0;
-
- if (acd->last_conflict != TIME_INFINITY) {
- r = n_acd_now(&now);
- if (r < 0)
- goto error;
-
- if (now < acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC)
- delay = acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC - now;
+ c_list_for_each_entry_safe(node, t_node, &acd->event_list, acd_link) {
+ if (node->is_public) {
+ n_acd_event_node_free(node);
+ continue;
}
- r = n_acd_schedule(acd, delay, acd->timeout_multiplier * N_ACD_RFC_PROBE_WAIT_USEC);
- if (r < 0)
- goto error;
- } else {
- /*
- * A zero timeout means we drop the probing alltogether, and behave as if
- * the last probe succeeded immediately.
- */
- acd->n_iteration = N_ACD_RFC_PROBE_NUM;
-
- r = n_acd_schedule(acd, 0, 0);
- if (r < 0)
- goto error;
+ node->is_public = true;
+ *eventp = &node->event;
+ return 0;
}
- acd->state = N_ACD_STATE_PROBING;
- acd->defend = N_ACD_DEFEND_NEVER;
- acd->last_defend = 0;
- return 0;
-
-error:
- n_acd_reset(acd);
- return r;
-}
-
-/**
- * n_acd_stop() - stop the ACD engine
- * @acd: ACD context
- *
- * Stop the engine. No new events may be triggered, but pending events are not
- * flushed. Before calling n_acd_start() again all pending events must be popped.
- *
- * Return: 0 on success, negative error code on failure.
- */
-_public_ int n_acd_stop(NAcd *acd) {
- n_acd_reset(acd);
+ *eventp = NULL;
return 0;
}
/**
- * n_acd_announce() - announce the configured IP address
- * @acd: ACD context
- * @defend: defence policy
- *
- * Announce the IP address on the local link, and start defending it according
- * to the given policy, which mut be one of N_ACD_DEFEND_ONCE,
- * N_ACD_DEFEND_NEVER, or N_ACD_DEFEND_ALWAYS.
- *
- * This must be called after the engine in response to an N_ACD_EVENT_READY
- * event, and only after the given address has been configured on the given
- * interface.
- *
- * Return: 0 on success, N_ACD_E_INVALID_ARGUMENT in case the defence policy
- * is invalid, N_ACD_E_BUSY if this is not in response to a
- * N_ACD_EVENT_READY event, or a negative error code on failure.
+ * XXX
*/
-_public_ int n_acd_announce(NAcd *acd, unsigned int defend) {
- uint64_t now;
- int r;
-
- if (defend >= _N_ACD_DEFEND_N)
- return N_ACD_E_INVALID_ARGUMENT;
- if (acd->state != N_ACD_STATE_CONFIGURING)
- return N_ACD_E_BUSY;
-
- /*
- * Sending announcements means we finished probing and use the address
- * now. We therefore reset the conflict counter in case we adhered to
- * the rate-limit. Since probing is properly delayed, a well-behaving
- * client will always reset the conflict counter here. However, if you
- * force-use an address regardless of conflicts, then this will not
- * trigger and the conflict counter stays untouched.
- */
- if (acd->last_conflict != TIME_INFINITY) {
- r = n_acd_now(&now);
- if (r < 0)
- return r;
-
- if (now >= acd->last_conflict + N_ACD_RFC_RATE_LIMIT_INTERVAL_USEC)
- acd->n_conflicts = 0;
- }
-
- /*
- * Instead of sending the first announcement here, we schedule an idle
- * timer. This avoids possibly recursing into the user callback. We
- * should never trigger callbacks from arbitrary stacks, but always
- * restrict them to the dispatcher.
- */
- r = n_acd_schedule(acd, 0, 0);
- if (r < 0)
- return r;
-
- acd->state = N_ACD_STATE_ANNOUNCING;
- acd->defend = defend;
- acd->n_iteration = 0;
- return 0;
+_public_ int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config) {
+ return n_acd_probe_new(probep, acd, config);
}
diff --git a/shared/n-acd/src/n-acd.h b/shared/n-acd/src/n-acd.h
index 75646243d..74b0aacb5 100644
--- a/shared/n-acd/src/n-acd.h
+++ b/shared/n-acd/src/n-acd.h
@@ -15,44 +15,23 @@ extern "C" {
#include
#include
+typedef struct NAcd NAcd;
+typedef struct NAcdConfig NAcdConfig;
+typedef struct NAcdEvent NAcdEvent;
+typedef struct NAcdProbe NAcdProbe;
+typedef struct NAcdProbeConfig NAcdProbeConfig;
+
#define N_ACD_TIMEOUT_RFC5227 (UINT64_C(9000))
enum {
_N_ACD_E_SUCCESS,
- N_ACD_E_DONE,
- N_ACD_E_STOPPED,
N_ACD_E_PREEMPTED,
-
N_ACD_E_INVALID_ARGUMENT,
- N_ACD_E_BUSY,
+
+ _N_ACD_E_N,
};
-typedef struct NAcd NAcd;
-
-typedef struct NAcdConfig {
- int ifindex;
- unsigned int transport;
- const uint8_t *mac;
- size_t n_mac;
- struct in_addr ip;
- uint64_t timeout_msec;
-} NAcdConfig;
-
-typedef struct NAcdEvent {
- unsigned int event;
- union {
- struct {
- } ready, down;
- struct {
- uint16_t operation;
- uint8_t *sender;
- size_t n_sender;
- struct in_addr target;
- } used, defended, conflict;
- };
-} NAcdEvent;
-
enum {
N_ACD_TRANSPORT_ETHERNET,
_N_ACD_TRANSPORT_N,
@@ -74,21 +53,94 @@ enum {
_N_ACD_DEFEND_N,
};
-int n_acd_new(NAcd **acdp);
-void n_acd_free(NAcd *acd);
+struct NAcdEvent {
+ unsigned int event;
+ union {
+ struct {
+ NAcdProbe *probe;
+ } ready;
+ struct {
+ } down;
+ struct {
+ NAcdProbe *probe;
+ uint8_t *sender;
+ size_t n_sender;
+ } used, defended, conflict;
+ };
+};
+
+/* configs */
+
+int n_acd_config_new(NAcdConfig **configp);
+NAcdConfig *n_acd_config_free(NAcdConfig *config);
+
+void n_acd_config_set_ifindex(NAcdConfig *config, int ifindex);
+void n_acd_config_set_transport(NAcdConfig *config, unsigned int transport);
+void n_acd_config_set_mac(NAcdConfig *config, const uint8_t *mac, size_t n_mac);
+
+int n_acd_probe_config_new(NAcdProbeConfig **configp);
+NAcdProbeConfig *n_acd_probe_config_free(NAcdProbeConfig *config);
+
+void n_acd_probe_config_set_ip(NAcdProbeConfig *config, struct in_addr ip);
+void n_acd_probe_config_set_timeout(NAcdProbeConfig *config, uint64_t msecs);
+
+/* contexts */
+
+int n_acd_new(NAcd **acdp, NAcdConfig *config);
+NAcd *n_acd_ref(NAcd *acd);
+NAcd *n_acd_unref(NAcd *acd);
void n_acd_get_fd(NAcd *acd, int *fdp);
-
int n_acd_dispatch(NAcd *acd);
int n_acd_pop_event(NAcd *acd, NAcdEvent **eventp);
-int n_acd_announce(NAcd *acd, unsigned int defend);
-int n_acd_start(NAcd *acd, NAcdConfig *config);
-int n_acd_stop(NAcd *acd);
+int n_acd_probe(NAcd *acd, NAcdProbe **probep, NAcdProbeConfig *config);
-static inline void n_acd_freep(NAcd **acd) {
+/* probes */
+
+NAcdProbe *n_acd_probe_free(NAcdProbe *probe);
+
+void n_acd_probe_set_userdata(NAcdProbe *probe, void *userdata);
+void n_acd_probe_get_userdata(NAcdProbe *probe, void **userdatap);
+
+int n_acd_probe_announce(NAcdProbe *probe, unsigned int defend);
+
+/* inline helpers */
+
+static inline void n_acd_config_freep(NAcdConfig **config) {
+ if (*config)
+ n_acd_config_free(*config);
+}
+
+static inline void n_acd_config_freev(NAcdConfig *config) {
+ n_acd_config_free(config);
+}
+
+static inline void n_acd_probe_config_freep(NAcdProbeConfig **config) {
+ if (*config)
+ n_acd_probe_config_free(*config);
+}
+
+static inline void n_acd_probe_config_freev(NAcdProbeConfig *config) {
+ n_acd_probe_config_free(config);
+}
+
+static inline void n_acd_unrefp(NAcd **acd) {
if (*acd)
- n_acd_free(*acd);
+ n_acd_unref(*acd);
+}
+
+static inline void n_acd_unrefv(NAcd *acd) {
+ n_acd_unref(acd);
+}
+
+static inline void n_acd_probe_freep(NAcdProbe **probe) {
+ if (*probe)
+ n_acd_probe_free(*probe);
+}
+
+static inline void n_acd_probe_freev(NAcdProbe *probe) {
+ n_acd_probe_free(probe);
}
#ifdef __cplusplus
diff --git a/shared/n-acd/src/test-api.c b/shared/n-acd/src/test-api.c
index 697181aba..e16de48b7 100644
--- a/shared/n-acd/src/test-api.c
+++ b/shared/n-acd/src/test-api.c
@@ -7,67 +7,90 @@
#include
#include "test.h"
-static void test_api_constants(void) {
- assert(N_ACD_DEFEND_NEVER != _N_ACD_DEFEND_N);
- assert(N_ACD_DEFEND_ONCE != _N_ACD_DEFEND_N);
- assert(N_ACD_DEFEND_ALWAYS != _N_ACD_DEFEND_N);
+static void test_api(void) {
+ NAcdConfig *config = NULL;
+ NAcd *acd = NULL;
+ int r;
+
+ assert(N_ACD_E_PREEMPTED);
+ assert(N_ACD_E_INVALID_ARGUMENT);
+
+ assert(N_ACD_TRANSPORT_ETHERNET != _N_ACD_TRANSPORT_N);
assert(N_ACD_EVENT_READY != _N_ACD_EVENT_N);
assert(N_ACD_EVENT_USED != _N_ACD_EVENT_N);
assert(N_ACD_EVENT_DEFENDED != _N_ACD_EVENT_N);
assert(N_ACD_EVENT_CONFLICT != _N_ACD_EVENT_N);
assert(N_ACD_EVENT_DOWN != _N_ACD_EVENT_N);
-}
-static void test_api_management(void) {
- NAcd *acd = NULL;
- int r;
+ assert(N_ACD_DEFEND_NEVER != _N_ACD_DEFEND_N);
+ assert(N_ACD_DEFEND_ONCE != _N_ACD_DEFEND_N);
+ assert(N_ACD_DEFEND_ALWAYS != _N_ACD_DEFEND_N);
- /* new/free/freep */
+ n_acd_config_freep(&config);
- n_acd_freep(&acd);
-
- r = n_acd_new(&acd);
+ r = n_acd_config_new(&config);
assert(!r);
- n_acd_free(acd);
-}
+ n_acd_config_set_ifindex(config, 1);
+ n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
+ n_acd_config_set_mac(config, (uint8_t[6]){ }, 6);
-static void test_api_runtime(void) {
- NAcdConfig config = {
- .ifindex = 1,
- .transport = N_ACD_TRANSPORT_ETHERNET,
- .mac = (uint8_t[]){ 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54 },
- .n_mac = ETH_ALEN,
- .ip = { htobe32((127 << 24) | (1 << 0)) },
- .timeout_msec = 100,
- };
- NAcd *acd;
- int r;
+ {
+ NAcdEvent *event;
+ int fd;
- /* get_fd/dispatch/pop_event/start/stop/announce */
+ n_acd_unrefp(&acd);
+ n_acd_ref(NULL);
- r = n_acd_new(&acd);
- assert(!r);
+ r = n_acd_new(&acd, config);
+ assert(!r);
- n_acd_get_fd(acd, &r);
- assert(r >= 0);
- r = n_acd_dispatch(acd);
- assert(!r);
- r = n_acd_pop_event(acd, NULL);
- assert(r == N_ACD_E_STOPPED);
- r = n_acd_start(acd, &config);
- assert(!r);
- r = n_acd_start(acd, &config);
- assert(r == N_ACD_E_BUSY);
- r = n_acd_pop_event(acd, NULL);
- assert(r == N_ACD_E_DONE);
- r = n_acd_stop(acd);
- assert(!r);
- r = n_acd_announce(acd, N_ACD_DEFEND_NEVER);
- assert(r == N_ACD_E_BUSY);
+ n_acd_get_fd(acd, &fd);
+ n_acd_dispatch(acd);
+ n_acd_pop_event(acd, &event);
- n_acd_free(acd);
+ {
+ NAcdProbeConfig *c = NULL;
+
+ n_acd_probe_config_freep(&c);
+
+ r = n_acd_probe_config_new(&c);
+ assert(!r);
+
+ n_acd_probe_config_set_ip(c, (struct in_addr){ 1 });
+ n_acd_probe_config_set_timeout(c, N_ACD_TIMEOUT_RFC5227);
+
+ {
+ NAcdProbe *probe = NULL;
+ void *userdata;
+
+ r = n_acd_probe(acd, &probe, c);
+ assert(!r);
+
+ n_acd_probe_get_userdata(probe, &userdata);
+ assert(userdata == NULL);
+ n_acd_probe_set_userdata(probe, acd);
+ n_acd_probe_get_userdata(probe, &userdata);
+ assert(userdata == acd);
+
+ r = n_acd_probe_announce(probe, N_ACD_DEFEND_ONCE);
+ assert(!r);
+
+ n_acd_probe_free(probe);
+ n_acd_probe_freev(NULL);
+ }
+
+ n_acd_probe_config_free(c);
+ n_acd_probe_config_freev(NULL);
+ }
+
+ n_acd_unref(acd);
+ n_acd_unrefv(NULL);
+ }
+
+ n_acd_config_free(config);
+ n_acd_config_freev(NULL);
}
int main(int argc, char **argv) {
@@ -77,8 +100,6 @@ int main(int argc, char **argv) {
if (r)
return r;
- test_api_constants();
- test_api_management();
- test_api_runtime();
+ test_api();
return 0;
}
diff --git a/shared/n-acd/src/test-basic.c b/shared/n-acd/src/test-basic.c
deleted file mode 100644
index fa85cb054..000000000
--- a/shared/n-acd/src/test-basic.c
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Basic Tests
- */
-
-#include
-#include
-#include
-#include
-#include "n-acd.h"
-
-int main(int argc, char **argv) {
- return 0;
-}
diff --git a/shared/n-acd/src/test-bpf.c b/shared/n-acd/src/test-bpf.c
new file mode 100644
index 000000000..aa8b20ec3
--- /dev/null
+++ b/shared/n-acd/src/test-bpf.c
@@ -0,0 +1,228 @@
+/*
+ * eBPF socket filter tests
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "n-acd.h"
+#include "n-acd-private.h"
+#include "test.h"
+
+#define ETHER_ARP_PACKET_INIT(_op, _mac, _sip, _tip) { \
+ .ea_hdr = { \
+ .ar_hrd = htobe16(ARPHRD_ETHER), \
+ .ar_pro = htobe16(ETHERTYPE_IP), \
+ .ar_hln = 6, \
+ .ar_pln = 4, \
+ .ar_op = htobe16(_op), \
+ }, \
+ .arp_sha[0] = (_mac)->ether_addr_octet[0], \
+ .arp_sha[1] = (_mac)->ether_addr_octet[1], \
+ .arp_sha[2] = (_mac)->ether_addr_octet[2], \
+ .arp_sha[3] = (_mac)->ether_addr_octet[3], \
+ .arp_sha[4] = (_mac)->ether_addr_octet[4], \
+ .arp_sha[5] = (_mac)->ether_addr_octet[5], \
+ .arp_spa[0] = (be32toh((_sip)->s_addr) >> 24) & 0xff, \
+ .arp_spa[1] = (be32toh((_sip)->s_addr) >> 16) & 0xff, \
+ .arp_spa[2] = (be32toh((_sip)->s_addr) >> 8) & 0xff, \
+ .arp_spa[3] = be32toh((_sip)->s_addr) & 0xff, \
+ .arp_tpa[0] = (be32toh((_tip)->s_addr) >> 24) & 0xff, \
+ .arp_tpa[1] = (be32toh((_tip)->s_addr) >> 16) & 0xff, \
+ .arp_tpa[2] = (be32toh((_tip)->s_addr) >> 8) & 0xff, \
+ .arp_tpa[3] = be32toh((_tip)->s_addr) & 0xff, \
+ }
+
+static void test_map(void) {
+ int r, mapfd = -1;
+ struct in_addr addr = { 1 };
+
+ r = n_acd_bpf_map_create(&mapfd, 8);
+ assert(r >= 0);
+ assert(mapfd >= 0);
+
+ r = n_acd_bpf_map_remove(mapfd, &addr);
+ assert(r == -ENOENT);
+
+ r = n_acd_bpf_map_add(mapfd, &addr);
+ assert(r >= 0);
+
+ r = n_acd_bpf_map_add(mapfd, &addr);
+ assert(r == -EEXIST);
+
+ r = n_acd_bpf_map_remove(mapfd, &addr);
+ assert(r >= 0);
+
+ r = n_acd_bpf_map_remove(mapfd, &addr);
+ assert(r == -ENOENT);
+
+ close(mapfd);
+}
+
+static void verify_success(struct ether_arp *packet, int out_fd, int in_fd) {
+ uint8_t buf[sizeof(struct ether_arp)];
+ int r;
+
+ r = send(out_fd, packet, sizeof(struct ether_arp), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = recv(in_fd, buf, sizeof(buf), 0);
+ assert(r == sizeof(struct ether_arp));
+}
+
+static void verify_failure(struct ether_arp *packet, int out_fd, int in_fd) {
+ uint8_t buf[sizeof(struct ether_arp)];
+ int r;
+
+ r = send(out_fd, packet, sizeof(struct ether_arp), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = recv(in_fd, buf, sizeof(buf), 0);
+ assert(r < 0);
+ assert(errno == EAGAIN);
+}
+
+static void test_filter(void) {
+ uint8_t buf[sizeof(struct ether_arp) + 1];
+ struct ether_addr mac1 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 } };
+ struct ether_addr mac2 = { { 0x01, 0x02, 0x03, 0x04, 0x05, 0x07 } };
+ struct in_addr ip0 = { 0 };
+ struct in_addr ip1 = { 1 };
+ struct in_addr ip2 = { 2 };
+ struct ether_arp *packet = (struct ether_arp *)buf;
+ int r, mapfd = -1, progfd = -1, pair[2];
+
+ r = n_acd_bpf_map_create(&mapfd, 1);
+ assert(r >= 0);
+
+ r = n_acd_bpf_compile(&progfd, mapfd, &mac1);
+ assert(r >= 0);
+ assert(progfd >= 0);
+
+ r = socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, pair);
+ assert(r >= 0);
+
+ r = setsockopt(pair[1], SOL_SOCKET, SO_ATTACH_BPF, &progfd,
+ sizeof(progfd));
+ assert(r >= 0);
+
+ r = n_acd_bpf_map_add(mapfd, &ip1);
+ assert(r >= 0);
+
+ /* valid */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ verify_success(packet, pair[0], pair[1]);
+
+ /* valid: reply instead of request */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip1, &ip2);
+ verify_success(packet, pair[0], pair[1]);
+
+ /* valid: to us instead of from us */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip1);
+ verify_success(packet, pair[0], pair[1]);
+
+ /* invalid header type */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ packet->arp_hrd += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* invalid protocol */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ packet->arp_pro += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* invalid hw addr length */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ packet->arp_hln += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* invalid protocol addr length */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ packet->arp_pln += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* invalid operation */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_NAK, &mac2, &ip1, &ip2);
+ packet->arp_hln += 1;
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* own mac */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac1, &ip1, &ip2);
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* not to, nor from us, with source */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip2, &ip2);
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* not to, nor from us, without source */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip0, &ip2);
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* to us instead of from us, but reply */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REPLY, &mac2, &ip0, &ip1);
+ verify_failure(packet, pair[0], pair[1]);
+
+ /* long */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ r = send(pair[0], buf, sizeof(struct ether_arp) + 1, 0);
+ assert(r == sizeof(struct ether_arp) + 1);
+
+ r = recv(pair[1], buf, sizeof(buf), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ /* short */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ r = send(pair[0], buf, sizeof(struct ether_arp) - 1, 0);
+ assert(r == sizeof(struct ether_arp) - 1);
+
+ r = recv(pair[1], buf, sizeof(buf), 0);
+ assert(r < 0);
+ assert(errno == EAGAIN);
+
+ /*
+ * Send one packet before and one packet after modifying the map,
+ * verify that the modification applies at the time of send(), not recv().
+ */
+ *packet = (struct ether_arp)ETHER_ARP_PACKET_INIT(ARPOP_REQUEST, &mac2, &ip1, &ip2);
+ r = send(pair[0], buf, sizeof(struct ether_arp), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = n_acd_bpf_map_remove(mapfd, &ip1);
+ assert(r >= 0);
+
+ r = send(pair[0], buf, sizeof(struct ether_arp), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = recv(pair[1], buf, sizeof(buf), 0);
+ assert(r == sizeof(struct ether_arp));
+
+ r = recv(pair[1], buf, sizeof(buf), 0);
+ assert(r < 0);
+ assert(errno == EAGAIN);
+
+ close(pair[0]);
+ close(pair[1]);
+ close(progfd);
+ close(mapfd);
+}
+
+int main(int argc, char **argv) {
+ int r;
+
+ r = test_setup();
+ if (r)
+ return r;
+
+ test_map();
+ test_filter();
+
+ return 0;
+}
diff --git a/shared/n-acd/src/test-loopback.c b/shared/n-acd/src/test-loopback.c
index 98195c93a..5c01d65b6 100644
--- a/shared/n-acd/src/test-loopback.c
+++ b/shared/n-acd/src/test-loopback.c
@@ -9,44 +9,62 @@
#include "test.h"
static void test_loopback(int ifindex, uint8_t *mac, size_t n_mac) {
- NAcdConfig config = {
- .ifindex = ifindex,
- .transport = N_ACD_TRANSPORT_ETHERNET,
- .mac = mac,
- .n_mac = n_mac,
- .ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) },
- .timeout_msec = 100,
- };
- struct pollfd pfds;
+ NAcdConfig *config;
NAcd *acd;
+ struct pollfd pfds;
int r, fd;
- r = n_acd_new(&acd);
+ r = n_acd_config_new(&config);
assert(!r);
- n_acd_get_fd(acd, &fd);
- r = n_acd_start(acd, &config);
+ n_acd_config_set_ifindex(config, ifindex);
+ n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
+ n_acd_config_set_mac(config, mac, n_mac);
+
+ r = n_acd_new(&acd, config);
assert(!r);
- for (;;) {
- NAcdEvent *event;
- pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
- r = poll(&pfds, 1, -1);
- assert(r >= 0);
+ n_acd_config_free(config);
- r = n_acd_dispatch(acd);
+ {
+ NAcdProbeConfig *probe_config;
+ NAcdProbe *probe;
+ struct in_addr ip = { htobe32((192 << 24) | (168 << 16) | (1 << 0)) };
+
+ r = n_acd_probe_config_new(&probe_config);
assert(!r);
- r = n_acd_pop_event(acd, &event);
- if (!r) {
- assert(event->event == N_ACD_EVENT_READY);
- break;
- } else {
- assert(r == N_ACD_E_DONE);
+ n_acd_probe_config_set_ip(probe_config, ip);
+ n_acd_probe_config_set_timeout(probe_config, 100);
+
+ r = n_acd_probe(acd, &probe, probe_config);
+ assert(!r);
+
+ n_acd_probe_config_free(probe_config);
+
+ n_acd_get_fd(acd, &fd);
+
+ for (;;) {
+ NAcdEvent *event;
+ pfds = (struct pollfd){ .fd = fd, .events = POLLIN };
+ r = poll(&pfds, 1, -1);
+ assert(r >= 0);
+
+ r = n_acd_dispatch(acd);
+ assert(!r);
+
+ r = n_acd_pop_event(acd, &event);
+ assert(!r);
+ if (event) {
+ assert(event->event == N_ACD_EVENT_READY);
+ break;
+ }
}
+
+ n_acd_probe_free(probe);
}
- n_acd_free(acd);
+ n_acd_unref(acd);
}
int main(int argc, char **argv) {
@@ -57,9 +75,7 @@ int main(int argc, char **argv) {
if (r)
return r;
- r = system("ip link set lo up");
- assert(r == 0);
- test_if_query("lo", &ifindex, &mac);
+ test_loopback_up(&ifindex, &mac);
test_loopback(ifindex, mac.ether_addr_octet, sizeof(mac.ether_addr_octet));
return 0;
diff --git a/shared/n-acd/src/test-veth.c b/shared/n-acd/src/test-veth.c
new file mode 100644
index 000000000..64724f5e9
--- /dev/null
+++ b/shared/n-acd/src/test-veth.c
@@ -0,0 +1,238 @@
+/*
+ * Test on a veth link
+ *
+ * This essentially mimics a real nework with two peers.
+ *
+ * Run one ACD context on each end of the tunnel. On one end probe for N,
+ * addresses on the other end pre-configure N/3 of the same addresses and probe
+ * for another N/3 of the addresses.
+ *
+ * Verify that in the case of simultaneous probes of the same address at most one
+ * succeed, in the case of probing for a configured address it always fails, and
+ * probing for a non-existent address always succeeds.
+ *
+ * Make sure to keep N fairly high as the protocol is probabilistic, and we also
+ * want to verify that resizing the internal maps works correctly.
+ */
+
+#include
+#include "test.h"
+
+#define TEST_ACD_N_PROBES (9)
+
+typedef enum {
+ TEST_ACD_STATE_UNKNOWN,
+ TEST_ACD_STATE_USED,
+ TEST_ACD_STATE_READY,
+} TestAcdState;
+
+static void test_veth(int ifindex1, uint8_t *mac1, size_t n_mac1,
+ int ifindex2, uint8_t *mac2, size_t n_mac2) {
+ NAcdConfig *config;
+ NAcd *acd1, *acd2;
+ NAcdProbe *probes1[TEST_ACD_N_PROBES];
+ NAcdProbe *probes2[TEST_ACD_N_PROBES];
+ unsigned long state1, state2;
+ size_t n_running = 0;
+ int r;
+
+ r = n_acd_config_new(&config);
+ assert(!r);
+
+ n_acd_config_set_transport(config, N_ACD_TRANSPORT_ETHERNET);
+
+ n_acd_config_set_ifindex(config, ifindex1);
+ n_acd_config_set_mac(config, mac1, n_mac1);
+ r = n_acd_new(&acd1, config);
+ assert(!r);
+
+ n_acd_config_set_ifindex(config, ifindex2);
+ n_acd_config_set_mac(config, mac2, n_mac2);
+ r = n_acd_new(&acd2, config);
+ assert(!r);
+
+ n_acd_config_free(config);
+
+ {
+ NAcdProbeConfig *probe_config;
+
+ r = n_acd_probe_config_new(&probe_config);
+ assert(!r);
+ n_acd_probe_config_set_timeout(probe_config, 64);
+
+ assert(TEST_ACD_N_PROBES <= 10 << 24);
+
+ for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
+ struct in_addr ip = { htobe32((10 << 24) | i) };
+
+ n_acd_probe_config_set_ip(probe_config, ip);
+
+ switch (i % 3) {
+ case 0:
+ /*
+ * Probe on one side, and leave the address
+ * unset on the other. The probe must succeed.
+ */
+
+ break;
+ case 1:
+ /*
+ * Preconfigure the address on one side, and
+ * probe on the other. The probe must fail.
+ */
+ test_add_child_ip(&ip);
+
+ break;
+ case 2:
+ /*
+ * Probe both sides for the same address, at
+ * most one may succeed.
+ */
+ r = n_acd_probe(acd2, &probes2[i], probe_config);
+ assert(!r);
+
+ ++n_running;
+
+ break;
+ }
+
+ r = n_acd_probe(acd1, &probes1[i], probe_config);
+ assert(!r);
+
+ ++n_running;
+ }
+
+ n_acd_probe_config_free(probe_config);
+
+ while (n_running > 0) {
+ NAcdEvent *event;
+ struct pollfd pfds[2] = {
+ { .events = POLLIN },
+ { .events = POLLIN },
+ };
+
+ n_acd_get_fd(acd1, &pfds[0].fd);
+ n_acd_get_fd(acd2, &pfds[1].fd);
+
+ r = poll(pfds, 2, -1);
+ assert(r >= 0);
+
+ if (pfds[0].revents & POLLIN) {
+ r = n_acd_dispatch(acd1);
+ assert(!r || r == N_ACD_E_PREEMPTED);
+
+ for (;;) {
+ r = n_acd_pop_event(acd1, &event);
+ assert(!r);
+ if (event) {
+ switch (event->event) {
+ case N_ACD_EVENT_READY:
+ n_acd_probe_get_userdata(event->ready.probe, (void**)&state1);
+ assert(state1 == TEST_ACD_STATE_UNKNOWN);
+ state1 = TEST_ACD_STATE_READY;
+ n_acd_probe_set_userdata(event->ready.probe, (void*)state1);
+
+ break;
+ case N_ACD_EVENT_USED:
+ n_acd_probe_get_userdata(event->used.probe, (void**)&state1);
+ assert(state1 == TEST_ACD_STATE_UNKNOWN);
+ state1 = TEST_ACD_STATE_USED;
+ n_acd_probe_set_userdata(event->used.probe, (void*)state1);
+
+ break;
+ default:
+ assert(0);
+ }
+
+ --n_running;
+ } else {
+ break;
+ }
+ }
+ }
+
+ if (pfds[1].revents & POLLIN) {
+ r = n_acd_dispatch(acd2);
+ assert(!r || r == N_ACD_E_PREEMPTED);
+
+ for (;;) {
+ r = n_acd_pop_event(acd2, &event);
+ assert(!r);
+ if (event) {
+ switch (event->event) {
+ case N_ACD_EVENT_READY:
+ n_acd_probe_get_userdata(event->ready.probe, (void**)&state2);
+ assert(state2 == TEST_ACD_STATE_UNKNOWN);
+ state2 = TEST_ACD_STATE_READY;
+ n_acd_probe_set_userdata(event->ready.probe, (void*)state2);
+
+ break;
+ case N_ACD_EVENT_USED:
+ n_acd_probe_get_userdata(event->used.probe, (void**)&state2);
+ assert(state2 == TEST_ACD_STATE_UNKNOWN);
+ state2 = TEST_ACD_STATE_USED;
+ n_acd_probe_set_userdata(event->used.probe, (void*)state2);
+
+ break;
+ default:
+ assert(0);
+ }
+
+ --n_running;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
+ for (size_t i = 0; i < TEST_ACD_N_PROBES; ++i) {
+ struct in_addr ip = { htobe32((10 << 24) | i) };
+
+ switch (i % 3) {
+ case 0:
+ n_acd_probe_get_userdata(probes1[i], (void **)&state1);
+ assert(state1 == TEST_ACD_STATE_READY);
+
+ break;
+ case 1:
+ test_del_child_ip(&ip);
+
+ n_acd_probe_get_userdata(probes1[i], (void **)&state1);
+ assert(state1 == TEST_ACD_STATE_USED);
+
+ break;
+ case 2:
+ n_acd_probe_get_userdata(probes1[i], (void **)&state1);
+ n_acd_probe_get_userdata(probes2[i], (void **)&state2);
+ assert(state1 != TEST_ACD_STATE_UNKNOWN);
+ assert(state2 != TEST_ACD_STATE_UNKNOWN);
+ assert(state1 == TEST_ACD_STATE_USED || state2 == TEST_ACD_STATE_USED);
+ n_acd_probe_free(probes2[i]);
+
+ break;
+ }
+ n_acd_probe_free(probes1[i]);
+ }
+ }
+
+ n_acd_unref(acd2);
+ n_acd_unref(acd1);
+}
+
+int main(int argc, char **argv) {
+ struct ether_addr mac1, mac2;
+ int r, ifindex1, ifindex2;
+
+ r = test_setup();
+ if (r)
+ return r;
+
+ test_veth_new(&ifindex1, &mac1, &ifindex2, &mac2);
+ for (unsigned int i = 0; i < 8; ++i) {
+ test_veth(ifindex1, mac1.ether_addr_octet, sizeof(mac1.ether_addr_octet),
+ ifindex2, mac2.ether_addr_octet, sizeof(mac2.ether_addr_octet));
+ }
+
+ return 0;
+}
diff --git a/shared/n-acd/src/test.h b/shared/n-acd/src/test.h
index 92315858b..f2cb801aa 100644
--- a/shared/n-acd/src/test.h
+++ b/shared/n-acd/src/test.h
@@ -11,7 +11,9 @@
#include
#include
#include
+#include
#include
+#include
#include
#include
#include
@@ -22,6 +24,32 @@
#include
#include "n-acd.h"
+static inline void test_add_child_ip(const struct in_addr *addr) {
+ char *p;
+ int r;
+
+ r = asprintf(&p, "ip addr add dev veth1 %s/8", inet_ntoa(*addr));
+ assert(r >= 0);
+
+ r = system(p);
+ assert(r >= 0);
+
+ free(p);
+}
+
+static inline void test_del_child_ip(const struct in_addr *addr) {
+ char *p;
+ int r;
+
+ r = asprintf(&p, "ip addr del dev veth1 %s/8", inet_ntoa(*addr));
+ assert(r >= 0);
+
+ r = system(p);
+ assert(r >= 0);
+
+ free(p);
+}
+
static inline void test_if_query(const char *name, int *indexp, struct ether_addr *macp) {
struct ifreq ifr = {};
size_t l;
@@ -39,7 +67,7 @@ static inline void test_if_query(const char *name, int *indexp, struct ether_add
s = socket(AF_INET, SOCK_DGRAM, 0);
assert(s >= 0);
- strncpy(ifr.ifr_name, name, l);
+ strncpy(ifr.ifr_name, name, l + 1);
r = ioctl(s, SIOCGIFHWADDR, &ifr);
assert(r >= 0);
@@ -84,6 +112,15 @@ static inline void test_veth_new(int *parent_indexp,
test_if_query("veth1", child_indexp, child_macp);
}
+static inline void test_loopback_up(int *indexp, struct ether_addr *macp) {
+ int r;
+
+ r = system("ip link set lo up");
+ assert(r == 0);
+
+ test_if_query("lo", indexp, macp);
+}
+
static inline int test_setup(void) {
int r;
diff --git a/shared/n-acd/src/util/test-timer.c b/shared/n-acd/src/util/test-timer.c
new file mode 100644
index 000000000..9cc3109b6
--- /dev/null
+++ b/shared/n-acd/src/util/test-timer.c
@@ -0,0 +1,176 @@
+/*
+ * Tests for timer utility library
+ */
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include "timer.h"
+
+#define N_TIMEOUTS (10000)
+
+static void test_api(void) {
+ Timer timer = TIMER_NULL(timer);
+ Timeout t1 = TIMEOUT_INIT(t1), t2 = TIMEOUT_INIT(t2), *t;
+ int r;
+
+ r = timer_init(&timer);
+ assert(!r);
+
+ timeout_schedule(&t1, &timer, 1);
+ timeout_schedule(&t2, &timer, 2);
+
+ r = timer_pop_timeout(&timer, 10, &t);
+ assert(!r);
+ assert(t == &t1);
+
+ timeout_unschedule(&t2);
+
+ r = timer_pop_timeout(&timer, 10, &t);
+ assert(!r);
+ assert(!t);
+
+ timer_deinit(&timer);
+}
+
+static void test_pop(void) {
+ Timer timer = TIMER_NULL(timer);
+ Timeout timeouts[N_TIMEOUTS] = {};
+ uint64_t times[N_TIMEOUTS] = {};
+ size_t n_timeouts = 0;
+ bool armed;
+ Timeout *t;
+ int r;
+
+ r = timer_init(&timer);
+ assert(!r);
+
+ for(size_t i = 0; i < N_TIMEOUTS; ++i) {
+ timeouts[i] = (Timeout)TIMEOUT_INIT(timeouts[i]);
+ times[i] = rand() % 128 + 1;
+ timeout_schedule(&timeouts[i], &timer, times[i]);
+ }
+
+ armed = true;
+
+ for(size_t i = 0; i <= 128; ++i) {
+ if (armed) {
+ struct pollfd pfd = {
+ .fd = timer.fd,
+ .events = POLLIN,
+ };
+ uint64_t count;
+
+ r = poll(&pfd, 1, -1);
+ assert(r == 1);
+
+ r = read(timer.fd, &count, sizeof(count));
+ assert(r == sizeof(count));
+ assert(count == 1);
+ armed = false;
+ }
+
+ for (;;) {
+ uint64_t current_time;
+
+ r = timer_pop_timeout(&timer, i, &t);
+ assert(!r);
+ if (!t) {
+ timer_rearm(&timer);
+ break;
+ }
+
+ current_time = times[t - timeouts];
+ assert(current_time == i);
+ ++n_timeouts;
+ armed = true;
+ }
+ }
+
+ assert(n_timeouts == N_TIMEOUTS);
+
+ r = timer_pop_timeout(&timer, (uint64_t)-1, &t);
+ assert(!r);
+ assert(!t);
+
+ timer_deinit(&timer);
+}
+
+void test_arm(void) {
+ struct itimerspec spec = {
+ .it_value = {
+ .tv_sec = 1000,
+ },
+ };
+ int fd1, fd2, r;
+
+ fd1 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ assert(fd1 >= 0);
+
+ fd2 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ assert(fd1 >= 0);
+
+ r = timerfd_settime(fd1, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = timerfd_settime(fd2, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = timerfd_gettime(fd1, &spec);
+ assert(r >= 0);
+ assert(spec.it_value.tv_sec);
+
+ r = timerfd_gettime(fd2, &spec);
+ assert(r >= 0);
+ assert(spec.it_value.tv_sec);
+
+ spec = (struct itimerspec){};
+
+ r = timerfd_settime(fd1, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = timerfd_gettime(fd1, &spec);
+ assert(r >= 0);
+ assert(!spec.it_value.tv_sec);
+ assert(!spec.it_value.tv_nsec);
+
+ r = timerfd_gettime(fd2, &spec);
+ assert(r >= 0);
+ assert(spec.it_value.tv_sec);
+
+ spec = (struct itimerspec){ .it_value = { .tv_nsec = 1, }, };
+
+ r = timerfd_settime(fd1, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = poll(&(struct pollfd) { .fd = fd1, .events = POLLIN }, 1, -1);
+ assert(r == 1);
+
+ r = timerfd_settime(fd2, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
+ assert(r == 1);
+
+ spec = (struct itimerspec){};
+
+ r = timerfd_settime(fd1, 0, &spec, NULL);
+ assert(r >= 0);
+
+ r = poll(&(struct pollfd) { .fd = fd2, .events = POLLIN }, 1, -1);
+ assert(r == 1);
+
+ close(fd2);
+ close(fd1);
+}
+
+int main(int argc, char **argv) {
+ test_arm();
+ test_api();
+ test_pop();
+ return 0;
+}
diff --git a/shared/n-acd/src/util/timer.c b/shared/n-acd/src/util/timer.c
new file mode 100644
index 000000000..c995ba400
--- /dev/null
+++ b/shared/n-acd/src/util/timer.c
@@ -0,0 +1,189 @@
+/*
+ * Timer Utility Library
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include "timer.h"
+
+int timer_init(Timer *timer) {
+ clockid_t clock = CLOCK_BOOTTIME;
+ int r;
+
+ r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
+ if (r < 0 && errno == EINVAL) {
+ clock = CLOCK_MONOTONIC;
+ r = timerfd_create(clock, TFD_CLOEXEC | TFD_NONBLOCK);
+ }
+ if (r < 0)
+ return -errno;
+
+ *timer = (Timer)TIMER_NULL(*timer);
+ timer->fd = r;
+ timer->clock = clock;
+
+ return 0;
+}
+
+void timer_deinit(Timer *timer) {
+ assert(c_rbtree_is_empty(&timer->tree));
+
+ if (timer->fd >= 0) {
+ close(timer->fd);
+ timer->fd = -1;
+ }
+}
+
+void timer_now(Timer *timer, uint64_t *nowp) {
+ struct timespec ts;
+ int r;
+
+ r = clock_gettime(timer->clock, &ts);
+ assert(r >= 0);
+
+ *nowp = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec;
+}
+
+void timer_rearm(Timer *timer) {
+ uint64_t time;
+ Timeout *timeout;
+ int r;
+
+ /*
+ * A timeout value of 0 clears the timer, we sholud only set that if
+ * no timout exists in the tree.
+ */
+
+ timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
+ assert(!timeout || timeout->timeout);
+
+ time = timeout ? timeout->timeout : 0;
+
+ if (time != timer->scheduled_timeout) {
+ r = timerfd_settime(timer->fd,
+ TFD_TIMER_ABSTIME,
+ &(struct itimerspec){
+ .it_value = {
+ .tv_sec = time / UINT64_C(1000000000),
+ .tv_nsec = time % UINT64_C(1000000000),
+ },
+ },
+ NULL);
+ assert(r >= 0);
+
+ timer->scheduled_timeout = time;
+ }
+}
+
+int timer_read(Timer *timer) {
+ uint64_t v;
+ int r;
+
+ r = read(timer->fd, &v, sizeof(v));
+ if (r < 0) {
+ if (errno == EAGAIN) {
+ /*
+ * No more pending events.
+ */
+ return 0;
+ } else {
+ /*
+ * Something failed. We use CLOCK_BOOTTIME/MONOTONIC,
+ * so ECANCELED cannot happen. Hence, there is no
+ * error that we could gracefully handle. Fail hard
+ * and let the caller deal with it.
+ */
+ return -errno;
+ }
+ } else if (r != sizeof(v) || v == 0) {
+ /*
+ * Kernel guarantees 8-byte reads, and only to return
+ * data if at least one timer triggered; fail hard if
+ * it suddenly starts doing weird shit.
+ */
+ return -EIO;
+ }
+
+ return TIMER_E_TRIGGERED;
+}
+
+
+int timer_pop_timeout(Timer *timer, uint64_t until, Timeout **timeoutp) {
+ Timeout *timeout;
+
+ /*
+ * If the first timeout is scheduled before @until, then unlink
+ * it and return it. Otherwise, return NULL.
+ */
+ timeout = c_rbnode_entry(c_rbtree_first(&timer->tree), Timeout, node);
+ if (timeout && timeout->timeout <= until) {
+ c_rbnode_unlink(&timeout->node);
+ timeout->timeout = 0;
+ *timeoutp = timeout;
+ } else {
+ *timeoutp = NULL;
+ }
+
+ return 0;
+}
+
+void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time) {
+
+ assert(time);
+
+ /*
+ * In case @timeout was already scheduled, remove it from the
+ * tree. If we are moving it to a new timer, rearm the old one.
+ */
+ if (timeout->timer) {
+ c_rbnode_unlink(&timeout->node);
+ if (timeout->timer != timer)
+ timer_rearm(timeout->timer);
+ }
+ timeout->timer = timer;
+ timeout->timeout = time;
+
+ /*
+ * Now insert it back into the tree in the correct new position.
+ * We allow duplicates in the tree, so this insertion is open-coded.
+ */
+ {
+ Timeout *other;
+ CRBNode **slot, *parent;
+
+ slot = &timer->tree.root;
+ parent = NULL;
+ while (*slot) {
+ other = c_rbnode_entry(*slot, Timeout, node);
+ parent = *slot;
+ if (timeout->timeout < other->timeout)
+ slot = &(*slot)->left;
+ else
+ slot = &(*slot)->right;
+ }
+
+ c_rbtree_add(&timer->tree, parent, slot, &timeout->node);
+ }
+
+ /*
+ * Rearm the timer as we updated the timeout tree.
+ */
+ timer_rearm(timer);
+}
+
+void timeout_unschedule(Timeout *timeout) {
+ Timer *timer = timeout->timer;
+
+ if (!timer)
+ return;
+
+ c_rbnode_unlink(&timeout->node);
+ timeout->timeout = 0;
+ timeout->timer = NULL;
+
+ timer_rearm(timer);
+}
diff --git a/shared/n-acd/src/util/timer.h b/shared/n-acd/src/util/timer.h
new file mode 100644
index 000000000..2acc99e37
--- /dev/null
+++ b/shared/n-acd/src/util/timer.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+typedef struct Timer Timer;
+typedef struct Timeout Timeout;
+
+enum {
+ _TIMER_E_SUCCESS,
+
+ TIMER_E_TRIGGERED,
+
+ _TIMER_E_N,
+};
+
+struct Timer {
+ int fd;
+ clockid_t clock;
+ CRBTree tree;
+ uint64_t scheduled_timeout;
+};
+
+#define TIMER_NULL(_x) { \
+ .fd = -1, \
+ .tree = C_RBTREE_INIT, \
+ }
+
+struct Timeout {
+ Timer *timer;
+ CRBNode node;
+ uint64_t timeout;
+};
+
+#define TIMEOUT_INIT(_x) { \
+ .node = C_RBNODE_INIT((_x).node), \
+ }
+
+int timer_init(Timer *timer);
+void timer_deinit(Timer *timer);
+
+void timer_now(Timer *timer, uint64_t *nowp);
+
+int timer_pop_timeout(Timer *timer, uint64_t now, Timeout **timerp);
+void timer_rearm(Timer *timer);
+int timer_read(Timer *timer);
+
+void timeout_schedule(Timeout *timeout, Timer *timer, uint64_t time);
+void timeout_unschedule(Timeout *timeout);
+
diff --git a/shared/n-acd/subprojects/c-list b/shared/n-acd/subprojects/c-list
index 72c59181d..dda36d30c 160000
--- a/shared/n-acd/subprojects/c-list
+++ b/shared/n-acd/subprojects/c-list
@@ -1 +1 @@
-Subproject commit 72c59181d677a3f50b201d51f190b1bff02d4279
+Subproject commit dda36d30c7d655b4d61358519168fa7ce0e9dae9
diff --git a/shared/n-acd/subprojects/c-rbtree b/shared/n-acd/subprojects/c-rbtree
new file mode 160000
index 000000000..bf627e0c3
--- /dev/null
+++ b/shared/n-acd/subprojects/c-rbtree
@@ -0,0 +1 @@
+Subproject commit bf627e0c32241915108f66ad9738444e4d045b45
diff --git a/shared/n-acd/subprojects/c-siphash b/shared/n-acd/subprojects/c-siphash
index e01ab640d..b24d2e204 160000
--- a/shared/n-acd/subprojects/c-siphash
+++ b/shared/n-acd/subprojects/c-siphash
@@ -1 +1 @@
-Subproject commit e01ab640dcf72dfa6928c94a261bf78cd943d9c3
+Subproject commit b24d2e20489b08bb350d67b82f6fb354d6951a1c
diff --git a/src/devices/nm-acd-manager.c b/src/devices/nm-acd-manager.c
index 035487a33..52e9200a2 100644
--- a/src/devices/nm-acd-manager.c
+++ b/src/devices/nm-acd-manager.c
@@ -39,10 +39,7 @@ typedef enum {
typedef struct {
in_addr_t address;
gboolean duplicate;
- NMAcdManager *manager;
- NAcd *acd;
- GIOChannel *channel;
- guint event_id;
+ NAcdProbe *probe;
} AddressInfo;
enum {
@@ -58,6 +55,9 @@ typedef struct {
State state;
GHashTable *addresses;
guint completed;
+ NAcd *acd;
+ GIOChannel *channel;
+ guint event_id;
} NMAcdManagerPrivate;
struct _NMAcdManager {
@@ -114,29 +114,22 @@ _acd_event_to_string (unsigned int event)
#define acd_event_to_string(event) NM_UTILS_LOOKUP_STR (_acd_event_to_string, event)
static const char *
-_acd_error_to_string (int error)
+acd_error_to_string (int error)
{
if (error < 0)
- return strerror(-error);
+ return g_strerror (-error);
switch (error) {
case _N_ACD_E_SUCCESS:
return "success";
- case N_ACD_E_DONE:
- return "no more events (engine running)";
- case N_ACD_E_STOPPED:
- return "no more events (engine stopped)";
case N_ACD_E_PREEMPTED:
return "preempted";
case N_ACD_E_INVALID_ARGUMENT:
return "invalid argument";
- case N_ACD_E_BUSY:
- return "busy";
}
- return NULL;
-}
-#define acd_error_to_string(error) NM_UTILS_LOOKUP_STR (_acd_error_to_string, error)
+ g_return_val_if_reached (NULL);
+}
/*****************************************************************************/
@@ -164,7 +157,6 @@ nm_acd_manager_add_address (NMAcdManager *self, in_addr_t address)
info = g_slice_new0 (AddressInfo);
info->address = address;
- info->manager = self;
g_hash_table_insert (priv->addresses, GUINT_TO_POINTER (address), info);
@@ -174,115 +166,130 @@ nm_acd_manager_add_address (NMAcdManager *self, in_addr_t address)
static gboolean
acd_event (GIOChannel *source, GIOCondition condition, gpointer data)
{
- AddressInfo *info = data;
- NMAcdManager *self = info->manager;
+ NMAcdManager *self = data;
NMAcdManagerPrivate *priv = NM_ACD_MANAGER_GET_PRIVATE (self);
NAcdEvent *event;
+ AddressInfo *info;
char address_str[INET_ADDRSTRLEN];
gs_free char *hwaddr_str = NULL;
int r;
- if ( n_acd_dispatch (info->acd)
- || n_acd_pop_event (info->acd, &event))
+ if (n_acd_dispatch (priv->acd))
return G_SOURCE_CONTINUE;
- switch (event->event) {
- case N_ACD_EVENT_READY:
- info->duplicate = FALSE;
- if (priv->state == STATE_ANNOUNCING) {
- r = n_acd_announce (info->acd, N_ACD_DEFEND_ONCE);
- if (r) {
- _LOGW ("couldn't announce address %s on interface '%s': %s",
- nm_utils_inet4_ntop (info->address, address_str),
- nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex),
- acd_error_to_string (r));
- } else {
- _LOGD ("announcing address %s",
- nm_utils_inet4_ntop (info->address, address_str));
+ while (!n_acd_pop_event (priv->acd, &event) && event) {
+ switch (event->event) {
+ case N_ACD_EVENT_READY:
+ n_acd_probe_get_userdata (event->ready.probe, (void **) &info);
+ info->duplicate = FALSE;
+ if (priv->state == STATE_ANNOUNCING) {
+ /* fake probe ended, start announcing */
+ r = n_acd_probe_announce (info->probe, N_ACD_DEFEND_ONCE);
+ if (r) {
+ _LOGW ("couldn't announce address %s on interface '%s': %s",
+ nm_utils_inet4_ntop (info->address, address_str),
+ nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex),
+ acd_error_to_string (r));
+ } else {
+ _LOGD ("announcing address %s",
+ nm_utils_inet4_ntop (info->address, address_str));
+ }
}
+ break;
+ case N_ACD_EVENT_USED:
+ n_acd_probe_get_userdata (event->used.probe, (void **) &info);
+ info->duplicate = TRUE;
+ break;
+ case N_ACD_EVENT_DEFENDED:
+ n_acd_probe_get_userdata (event->defended.probe, (void **) &info);
+ _LOGD ("defended address %s from host %s",
+ nm_utils_inet4_ntop (info->address, address_str),
+ (hwaddr_str = nm_utils_hwaddr_ntoa (event->defended.sender,
+ event->defended.n_sender)));
+ return G_SOURCE_CONTINUE;
+ case N_ACD_EVENT_CONFLICT:
+ n_acd_probe_get_userdata (event->conflict.probe, (void **) &info);
+ _LOGW ("conflict for address %s detected with host %s on interface '%s'",
+ nm_utils_inet4_ntop (info->address, address_str),
+ (hwaddr_str = nm_utils_hwaddr_ntoa (event->defended.sender,
+ event->defended.n_sender)),
+ nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex));
+ return G_SOURCE_CONTINUE;
+ default:
+ _LOGD ("unhandled event '%s'", acd_event_to_string (event->event));
+ return G_SOURCE_CONTINUE;
}
- break;
- case N_ACD_EVENT_USED:
- info->duplicate = TRUE;
- break;
- case N_ACD_EVENT_DEFENDED:
- _LOGD ("defended address %s from host %s",
- nm_utils_inet4_ntop (info->address, address_str),
- (hwaddr_str = nm_utils_hwaddr_ntoa (event->defended.sender,
- event->defended.n_sender)));
- break;
- case N_ACD_EVENT_CONFLICT:
- _LOGW ("conflict for address %s detected with host %s on interface '%s'",
- nm_utils_inet4_ntop (info->address, address_str),
- (hwaddr_str = nm_utils_hwaddr_ntoa (event->defended.sender,
- event->defended.n_sender)),
- nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex));
- break;
- default:
- _LOGD ("event '%s' for address %s",
- acd_event_to_string (event->event),
- nm_utils_inet4_ntop (info->address, address_str));
- return G_SOURCE_CONTINUE;
- }
- if ( priv->state == STATE_PROBING
- && ++priv->completed == g_hash_table_size (priv->addresses)) {
- priv->state = STATE_PROBE_DONE;
- g_signal_emit (self, signals[PROBE_TERMINATED], 0);
+ if ( priv->state == STATE_PROBING
+ && ++priv->completed == g_hash_table_size (priv->addresses)) {
+ priv->state = STATE_PROBE_DONE;
+ g_signal_emit (self, signals[PROBE_TERMINATED], 0);
+ }
}
return G_SOURCE_CONTINUE;
}
static gboolean
-acd_probe_start (NMAcdManager *self,
- AddressInfo *info,
- guint64 timeout)
+acd_probe_add (NMAcdManager *self,
+ AddressInfo *info,
+ guint64 timeout)
{
NMAcdManagerPrivate *priv = NM_ACD_MANAGER_GET_PRIVATE (self);
- NAcdConfig *config;
- int r, fd;
+ NAcdProbeConfig *probe_config;
+ int r;
- r = n_acd_new (&info->acd);
+ r = n_acd_probe_config_new (&probe_config);
if (r) {
- _LOGW ("could not create ACD for %s on interface '%s': %s",
+ _LOGW ("could not create probe config for %s on interface '%s': %s",
nm_utils_inet4_ntop (info->address, NULL),
nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex),
acd_error_to_string (r));
return FALSE;
}
- n_acd_get_fd (info->acd, &fd);
- info->channel = g_io_channel_unix_new (fd);
- info->event_id = g_io_add_watch (info->channel, G_IO_IN, acd_event, info);
+ n_acd_probe_config_set_ip (probe_config, (struct in_addr) { info->address });
+ n_acd_probe_config_set_timeout (probe_config, timeout);
- config = &(NAcdConfig) {
- .ifindex = priv->ifindex,
- .mac = priv->hwaddr,
- .n_mac = ETH_ALEN,
- .ip = info->address,
- .timeout_msec = timeout,
- .transport = N_ACD_TRANSPORT_ETHERNET,
- };
-
- r = n_acd_start (info->acd, config);
+ r = n_acd_probe (priv->acd, &info->probe, probe_config);
if (r) {
_LOGW ("could not start probe for %s on interface '%s': %s",
nm_utils_inet4_ntop (info->address, NULL),
nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex),
acd_error_to_string (r));
+ n_acd_probe_config_free (probe_config);
return FALSE;
}
- if (timeout) {
- _LOGD ("started probe for %s with timeout %llu",
- nm_utils_inet4_ntop (info->address, NULL),
- (unsigned long long) timeout);
- }
+ n_acd_probe_set_userdata (info->probe, info);
+ n_acd_probe_config_free (probe_config);
return TRUE;
}
+static int
+acd_init (NMAcdManager *self)
+{
+ NMAcdManagerPrivate *priv = NM_ACD_MANAGER_GET_PRIVATE (self);
+ NAcdConfig *config;
+ int r;
+
+ if (priv->acd)
+ return 0;
+
+ r = n_acd_config_new (&config);
+ if (r)
+ return r;
+
+ n_acd_config_set_ifindex (config, priv->ifindex);
+ n_acd_config_set_transport (config, N_ACD_TRANSPORT_ETHERNET);
+ n_acd_config_set_mac (config, priv->hwaddr, ETH_ALEN);
+
+ r = n_acd_new (&priv->acd, config);
+ n_acd_config_free (config);
+ return r;
+}
+
/**
* nm_acd_manager_start_probe:
* @self: a #NMAcdManager
@@ -301,20 +308,33 @@ nm_acd_manager_start_probe (NMAcdManager *self, guint timeout)
GHashTableIter iter;
AddressInfo *info;
gboolean success = FALSE;
+ int fd, r;
g_return_val_if_fail (NM_IS_ACD_MANAGER (self), FALSE);
priv = NM_ACD_MANAGER_GET_PRIVATE (self);
g_return_val_if_fail (priv->state == STATE_INIT, FALSE);
+ r = acd_init (self);
+ if (r) {
+ _LOGW ("couldn't init ACD for probing on interface '%s': %s",
+ nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex),
+ acd_error_to_string (r));
+ return FALSE;
+ }
+
priv->completed = 0;
g_hash_table_iter_init (&iter, priv->addresses);
while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &info))
- success |= acd_probe_start (self, info, timeout);
+ success |= acd_probe_add (self, info, timeout);
if (success)
priv->state = STATE_PROBING;
+ n_acd_get_fd (priv->acd, &fd);
+ priv->channel = g_io_channel_unix_new (fd);
+ priv->event_id = g_io_add_watch (priv->channel, G_IO_IN, acd_event, self);
+
return success;
}
@@ -393,26 +413,29 @@ nm_acd_manager_announce_addresses (NMAcdManager *self)
AddressInfo *info;
int r;
+ r = acd_init (self);
+ if (r) {
+ _LOGW ("couldn't init ACD for announcing address %s on interface '%s': %s",
+ nm_utils_inet4_ntop (info->address, NULL),
+ nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex),
+ acd_error_to_string (r));
+ return;
+ }
+
if (priv->state == STATE_INIT) {
/* n-acd can't announce without probing, therefore let's
* start a fake probe with zero timeout and then perform
- * the announce. */
- priv->state = STATE_ANNOUNCING;
+ * the announcement. */
g_hash_table_iter_init (&iter, priv->addresses);
- while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &info)) {
- if (!acd_probe_start (self, info, 0)) {
- _LOGW ("couldn't announce address %s on interface '%s'",
- nm_utils_inet4_ntop (info->address, NULL),
- nm_platform_link_get_name (NM_PLATFORM_GET, priv->ifindex));
- }
- }
- } else if (priv->state == STATE_PROBE_DONE) {
+ while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &info))
+ acd_probe_add (self, info, 0);
priv->state = STATE_ANNOUNCING;
+ } else if (priv->state == STATE_ANNOUNCING) {
g_hash_table_iter_init (&iter, priv->addresses);
while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &info)) {
if (info->duplicate)
continue;
- r = n_acd_announce (info->acd, N_ACD_DEFEND_ONCE);
+ r = n_acd_probe_announce (info->probe, N_ACD_DEFEND_ONCE);
if (r) {
_LOGW ("couldn't announce address %s on interface '%s': %s",
nm_utils_inet4_ntop (info->address, NULL),
@@ -421,8 +444,7 @@ nm_acd_manager_announce_addresses (NMAcdManager *self)
} else
_LOGD ("announcing address %s", nm_utils_inet4_ntop (info->address, NULL));
}
- } else
- nm_assert_not_reached ();
+ }
}
static void
@@ -430,9 +452,7 @@ destroy_address_info (gpointer data)
{
AddressInfo *info = (AddressInfo *) data;
- g_clear_pointer (&info->channel, g_io_channel_unref);
- g_clear_pointer (&info->acd, n_acd_free);
- nm_clear_g_source (&info->event_id);
+ n_acd_probe_free (info->probe);
g_slice_free (AddressInfo, info);
}
@@ -450,11 +470,12 @@ nm_acd_manager_init (NMAcdManager *self)
}
NMAcdManager *
-nm_acd_manager_new (int ifindex, const guint8 *hwaddr, size_t hwaddr_len)
+nm_acd_manager_new (int ifindex, const guint8 *hwaddr, guint hwaddr_len)
{
NMAcdManager *self;
NMAcdManagerPrivate *priv;
+ g_return_val_if_fail (ifindex > 0, NULL);
g_return_val_if_fail (hwaddr, NULL);
g_return_val_if_fail (hwaddr_len == ETH_ALEN, NULL);
@@ -473,6 +494,9 @@ dispose (GObject *object)
NMAcdManagerPrivate *priv = NM_ACD_MANAGER_GET_PRIVATE (self);
g_clear_pointer (&priv->addresses, g_hash_table_destroy);
+ g_clear_pointer (&priv->channel, g_io_channel_unref);
+ nm_clear_g_source (&priv->event_id);
+ nm_clear_pointer (&priv->acd, n_acd_unref);
G_OBJECT_CLASS (nm_acd_manager_parent_class)->dispose (object);
}
diff --git a/src/devices/nm-acd-manager.h b/src/devices/nm-acd-manager.h
index eeede5da7..d698c2f4b 100644
--- a/src/devices/nm-acd-manager.h
+++ b/src/devices/nm-acd-manager.h
@@ -32,7 +32,7 @@ typedef struct _NMAcdManagerClass NMAcdManagerClass;
GType nm_acd_manager_get_type (void);
-NMAcdManager *nm_acd_manager_new (int ifindex, const guint8 *hwaddr, size_t hwaddr_len);
+NMAcdManager *nm_acd_manager_new (int ifindex, const guint8 *hwaddr, guint hwaddr_len);
void nm_acd_manager_destroy (NMAcdManager *self);
gboolean nm_acd_manager_add_address (NMAcdManager *self, in_addr_t address);
gboolean nm_acd_manager_start_probe (NMAcdManager *self, guint timeout);