Compare commits
100 Commits
765eb0bf16
...
882599e180
Author | SHA1 | Date | |
---|---|---|---|
![]() |
882599e180 | ||
![]() |
d29fa0856e | ||
![]() |
d89b3aa097 | ||
![]() |
898f797174 | ||
![]() |
b7ad19347f | ||
![]() |
8126f7a660 | ||
![]() |
e0647ad80c | ||
![]() |
a45a7e9798 | ||
![]() |
8abd06e9fa | ||
![]() |
c000f2aba6 | ||
![]() |
060f24e310 | ||
![]() |
4cd753e65c | ||
![]() |
781164e25b | ||
![]() |
2faf6fcd8b | ||
![]() |
2f40a01944 | ||
![]() |
6d76278c21 | ||
![]() |
5cffb1bf64 | ||
![]() |
508adde342 | ||
![]() |
acca4235c4 | ||
![]() |
163a339214 | ||
![]() |
f19a8f71f9 | ||
![]() |
528a6517f8 | ||
![]() |
e2ea10e246 | ||
![]() |
52d45f1737 | ||
![]() |
f9fe212b1f | ||
![]() |
4e2d36e83f | ||
![]() |
8012f5ff55 | ||
![]() |
ba74b1fea1 | ||
![]() |
01e5611ec3 | ||
![]() |
66a02c9f7c | ||
![]() |
5235c47c79 | ||
![]() |
71d7985188 | ||
![]() |
9b125e7776 | ||
![]() |
2fa91ee391 | ||
![]() |
6bd8283bf9 | ||
![]() |
4e1f850f61 | ||
![]() |
272d1d033c | ||
![]() |
f79c42317f | ||
![]() |
a740e16fd1 | ||
![]() |
e63d281871 | ||
![]() |
8bd57bf25b | ||
![]() |
ec2691a12e | ||
![]() |
299c407501 | ||
![]() |
be0214cca6 | ||
![]() |
69e5393c37 | ||
![]() |
c6c61a9e1a | ||
![]() |
55aff45bc1 | ||
![]() |
9f9b15f949 | ||
![]() |
fbd78b6f3e | ||
![]() |
f62c33d85f | ||
![]() |
8f8eb73482 | ||
![]() |
74c1c5efcf | ||
![]() |
b625ed5fee | ||
![]() |
403a7c14a0 | ||
![]() |
21ee1eb2de | ||
![]() |
1ee2ecade3 | ||
![]() |
054697598f | ||
![]() |
c66f0341d9 | ||
![]() |
e7323e515a | ||
![]() |
dba7f0f5ce | ||
![]() |
92a22fef93 | ||
![]() |
c1140df889 | ||
![]() |
afd9cdc9bb | ||
![]() |
8c2f24a560 | ||
![]() |
bca0fefa32 | ||
![]() |
b74801645c | ||
![]() |
65923ba798 | ||
![]() |
62de6140d9 | ||
![]() |
1544a43863 | ||
![]() |
f301bb18b5 | ||
![]() |
450a6131be | ||
![]() |
54a9d3801b | ||
![]() |
020ff7a40e | ||
![]() |
7e87bd98ac | ||
![]() |
ff57f8ddc6 | ||
![]() |
63db7dcdbf | ||
![]() |
523fbc5af7 | ||
![]() |
4070bac7a4 | ||
![]() |
7290335b14 | ||
![]() |
0c335d751a | ||
![]() |
377b666dc9 | ||
![]() |
e7ac995217 | ||
![]() |
9ecf7fedc5 | ||
![]() |
fba2b544b6 | ||
![]() |
ec26fa013a | ||
![]() |
d949667436 | ||
![]() |
8a83b530fe | ||
![]() |
ec416fdcc4 | ||
![]() |
3f63743a65 | ||
![]() |
f9e8ee0777 | ||
![]() |
c919bbbdd3 | ||
![]() |
bda80ef53f | ||
![]() |
0e36fe1a43 | ||
![]() |
7094b91d10 | ||
![]() |
c80fa6a6bb | ||
![]() |
d2afb4b625 | ||
![]() |
b3aeb004ea | ||
![]() |
7cb2088835 | ||
![]() |
e651197b5c | ||
![]() |
e84a01e94c |
9
Makefile
9
Makefile
@@ -47,7 +47,7 @@ FLAGS += -DDUAL_STACK_SOCKETS=$(DUAL_STACK_SOCKETS)
|
||||
PASST_SRCS = arch.c arp.c checksum.c conf.c dhcp.c dhcpv6.c flow.c fwd.c \
|
||||
icmp.c igmp.c inany.c iov.c ip.c isolation.c lineread.c log.c mld.c \
|
||||
ndp.c netlink.c packet.c passt.c pasta.c pcap.c pif.c tap.c tcp.c \
|
||||
tcp_splice.c udp.c util.c
|
||||
tcp_buf.c tcp_splice.c udp.c util.c
|
||||
QRAP_SRCS = qrap.c
|
||||
SRCS = $(PASST_SRCS) $(QRAP_SRCS)
|
||||
|
||||
@@ -56,7 +56,8 @@ MANPAGES = passt.1 pasta.1 qrap.1
|
||||
PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h fwd.h \
|
||||
flow_table.h icmp.h icmp_flow.h inany.h iov.h ip.h isolation.h \
|
||||
lineread.h log.h ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h \
|
||||
siphash.h tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h
|
||||
siphash.h tap.h tcp.h tcp_buf.h tcp_conn.h tcp_internal.h tcp_splice.h \
|
||||
udp.h udp_flow.h util.h
|
||||
HEADERS = $(PASST_HEADERS) seccomp.h
|
||||
|
||||
C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 };
|
||||
@@ -127,7 +128,7 @@ qrap: $(QRAP_SRCS) passt.h
|
||||
$(CC) $(FLAGS) $(CFLAGS) $(CPPFLAGS) $(QRAP_SRCS) -o qrap $(LDFLAGS)
|
||||
|
||||
valgrind: EXTRA_SYSCALLS += rt_sigprocmask rt_sigtimedwait rt_sigaction \
|
||||
getpid gettid kill clock_gettime mmap \
|
||||
rt_sigreturn getpid gettid kill clock_gettime mmap \
|
||||
munmap open unlink gettimeofday futex
|
||||
valgrind: FLAGS += -g -DVALGRIND
|
||||
valgrind: all
|
||||
@@ -192,7 +193,6 @@ docs: README.md
|
||||
# - llvmlibc-restrict-system-libc-headers
|
||||
# TODO: this is Linux-only for the moment, nice to fix eventually
|
||||
#
|
||||
# - bugprone-macro-parentheses
|
||||
# - google-readability-braces-around-statements
|
||||
# - hicpp-braces-around-statements
|
||||
# - readability-braces-around-statements
|
||||
@@ -269,7 +269,6 @@ clang-tidy: $(SRCS) $(HEADERS)
|
||||
-clang-analyzer-valist.Uninitialized,\
|
||||
-cppcoreguidelines-init-variables,\
|
||||
-bugprone-assignment-in-if-condition,\
|
||||
-bugprone-macro-parentheses,\
|
||||
-google-readability-braces-around-statements,\
|
||||
-hicpp-braces-around-statements,\
|
||||
-readability-braces-around-statements,\
|
||||
|
10
arch.c
10
arch.c
@@ -18,6 +18,8 @@
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "log.h"
|
||||
|
||||
/**
|
||||
* arch_avx2_exec() - Switch to AVX2 build if supported
|
||||
* @argv: Arguments from command line
|
||||
@@ -28,10 +30,8 @@ void arch_avx2_exec(char **argv)
|
||||
char exe[PATH_MAX] = { 0 };
|
||||
const char *p;
|
||||
|
||||
if (readlink("/proc/self/exe", exe, PATH_MAX - 1) < 0) {
|
||||
perror("readlink /proc/self/exe");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (readlink("/proc/self/exe", exe, PATH_MAX - 1) < 0)
|
||||
die_perror("Failed to read own /proc/self/exe link");
|
||||
|
||||
p = strstr(exe, ".avx2");
|
||||
if (p && strlen(p) == strlen(".avx2"))
|
||||
@@ -42,7 +42,7 @@ void arch_avx2_exec(char **argv)
|
||||
|
||||
snprintf(new_path, PATH_MAX + sizeof(".avx2"), "%s.avx2", exe);
|
||||
execve(new_path, argv, environ);
|
||||
perror("Can't run AVX2 build, using non-AVX2 version");
|
||||
warn_perror("Can't run AVX2 build, using non-AVX2 version");
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
603
conf.c
603
conf.c
@@ -132,6 +132,11 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
|
||||
return;
|
||||
}
|
||||
|
||||
if ((optname == 't' || optname == 'T') && c->no_tcp)
|
||||
die("TCP port forwarding requested but TCP is disabled");
|
||||
if ((optname == 'u' || optname == 'U') && c->no_udp)
|
||||
die("UDP port forwarding requested but UDP is disabled");
|
||||
|
||||
if (!strcmp(optarg, "auto")) {
|
||||
if (fwd->mode)
|
||||
goto mode_conflict;
|
||||
@@ -147,7 +152,7 @@ static void conf_ports(const struct ctx *c, char optname, const char *optarg,
|
||||
if (fwd->mode)
|
||||
goto mode_conflict;
|
||||
|
||||
if (c->mode != MODE_PASST)
|
||||
if (c->mode == MODE_PASTA)
|
||||
die("'all' port forwarding is only allowed for passt");
|
||||
|
||||
fwd->mode = FWD_ALL;
|
||||
@@ -401,9 +406,9 @@ static void get_dns(struct ctx *c)
|
||||
struct fqdn *s = c->dns_search;
|
||||
struct lineread resolvconf;
|
||||
unsigned int added = 0;
|
||||
ssize_t line_len;
|
||||
char *line, *end;
|
||||
const char *p;
|
||||
int line_len;
|
||||
|
||||
dns4_set = !c->ifi4 || !IN4_IS_ADDR_UNSPECIFIED(dns4);
|
||||
dns6_set = !c->ifi6 || !IN6_IS_ADDR_UNSPECIFIED(dns6);
|
||||
@@ -453,7 +458,7 @@ static void get_dns(struct ctx *c)
|
||||
while (s - c->dns_search < ARRAY_SIZE(c->dns_search) - 1
|
||||
/* cppcheck-suppress strtokCalled */
|
||||
&& (p = strtok(NULL, " \t"))) {
|
||||
strncpy(s->n, p, sizeof(c->dns_search[0]));
|
||||
strncpy(s->n, p, sizeof(c->dns_search[0]) - 1);
|
||||
s++;
|
||||
*s->n = 0;
|
||||
}
|
||||
@@ -461,7 +466,7 @@ static void get_dns(struct ctx *c)
|
||||
}
|
||||
|
||||
if (line_len < 0)
|
||||
warn("Error reading /etc/resolv.conf: %s", strerror(errno));
|
||||
warn_perror("Error reading /etc/resolv.conf");
|
||||
close(fd);
|
||||
|
||||
out:
|
||||
@@ -517,9 +522,6 @@ static void conf_netns_opt(char *netns, const char *arg)
|
||||
static void conf_pasta_ns(int *netns_only, char *userns, char *netns,
|
||||
int optind, int argc, char *argv[])
|
||||
{
|
||||
if (*netns_only && *userns)
|
||||
die("Both --userns and --netns-only given");
|
||||
|
||||
if (*netns && optind != argc)
|
||||
die("Both --netns and PID or command given");
|
||||
|
||||
@@ -626,6 +628,7 @@ static unsigned int conf_ip4(unsigned int ifi,
|
||||
int rc = nl_link_get_mac(nl_sock, ifi, mac);
|
||||
if (rc < 0) {
|
||||
char ifname[IFNAMSIZ];
|
||||
|
||||
err("Couldn't discover MAC address for %s: %s",
|
||||
if_indextoname(ifi, ifname), strerror(-rc));
|
||||
return 0;
|
||||
@@ -704,208 +707,204 @@ static unsigned int conf_ip6(unsigned int ifi,
|
||||
}
|
||||
|
||||
/**
|
||||
* print_usage() - Print usage, exit with given status code
|
||||
* usage() - Print usage, exit with given status code
|
||||
* @name: Executable name
|
||||
* @f: Stream to print usage info to
|
||||
* @status: Status code for exit()
|
||||
*/
|
||||
static void print_usage(const char *name, int status)
|
||||
static void usage(const char *name, FILE *f, int status)
|
||||
{
|
||||
if (strstr(name, "pasta")) {
|
||||
info("Usage: %s [OPTION]... [COMMAND] [ARGS]...", name);
|
||||
info(" %s [OPTION]... PID", name);
|
||||
info(" %s [OPTION]... --netns [PATH|NAME]", name);
|
||||
info("");
|
||||
info("Without PID or --netns, run the given command or a");
|
||||
info("default shell in a new network and user namespace, and");
|
||||
info("connect it via pasta.");
|
||||
fprintf(f, "Usage: %s [OPTION]... [COMMAND] [ARGS]...\n", name);
|
||||
fprintf(f, " %s [OPTION]... PID\n", name);
|
||||
fprintf(f, " %s [OPTION]... --netns [PATH|NAME]\n", name);
|
||||
fprintf(f,
|
||||
"\n"
|
||||
"Without PID or --netns, run the given command or a\n"
|
||||
"default shell in a new network and user namespace, and\n"
|
||||
"connect it via pasta.\n");
|
||||
} else {
|
||||
info("Usage: %s [OPTION]...", name);
|
||||
fprintf(f, "Usage: %s [OPTION]...\n", name);
|
||||
}
|
||||
info("");
|
||||
|
||||
|
||||
info( " -d, --debug Be verbose");
|
||||
info( " --trace Be extra verbose, implies --debug");
|
||||
info( " -q, --quiet Don't print informational messages");
|
||||
info( " -f, --foreground Don't run in background");
|
||||
info( " default: run in background if started from a TTY");
|
||||
info( " -e, --stderr Log to stderr too");
|
||||
info( " default: log to system logger only if started from a TTY");
|
||||
info( " -l, --log-file PATH Log (only) to given file");
|
||||
info( " --log-size BYTES Maximum size of log file");
|
||||
info( " default: 1 MiB");
|
||||
info( " --runas UID|UID:GID Run as given UID, GID, which can be");
|
||||
info( " numeric, or login and group names");
|
||||
info( " default: drop to user \"nobody\"");
|
||||
info( " -h, --help Display this help message and exit");
|
||||
info( " --version Show version and exit");
|
||||
fprintf(f,
|
||||
"\n"
|
||||
" -d, --debug Be verbose\n"
|
||||
" --trace Be extra verbose, implies --debug\n"
|
||||
" -q, --quiet Don't print informational messages\n"
|
||||
" -f, --foreground Don't run in background\n"
|
||||
" default: run in background\n"
|
||||
" -l, --log-file PATH Log (only) to given file\n"
|
||||
" --log-size BYTES Maximum size of log file\n"
|
||||
" default: 1 MiB\n"
|
||||
" --runas UID|UID:GID Run as given UID, GID, which can be\n"
|
||||
" numeric, or login and group names\n"
|
||||
" default: drop to user \"nobody\"\n"
|
||||
" -h, --help Display this help message and exit\n"
|
||||
" --version Show version and exit\n");
|
||||
|
||||
if (strstr(name, "pasta")) {
|
||||
info( " -I, --ns-ifname NAME namespace interface name");
|
||||
info( " default: same interface name as external one");
|
||||
fprintf(f,
|
||||
" -I, --ns-ifname NAME namespace interface name\n"
|
||||
" default: same interface name as external one\n");
|
||||
} else {
|
||||
info( " -s, --socket PATH UNIX domain socket path");
|
||||
info( " default: probe free path starting from "
|
||||
UNIX_SOCK_PATH, 1);
|
||||
fprintf(f,
|
||||
" -s, --socket PATH UNIX domain socket path\n"
|
||||
" default: probe free path starting from "
|
||||
UNIX_SOCK_PATH "\n", 1);
|
||||
}
|
||||
|
||||
info( " -F, --fd FD Use FD as pre-opened connected socket");
|
||||
info( " -p, --pcap FILE Log tap-facing traffic to pcap file");
|
||||
info( " -P, --pid FILE Write own PID to the given file");
|
||||
info( " -m, --mtu MTU Assign MTU via DHCP/NDP");
|
||||
info( " a zero value disables assignment");
|
||||
info( " default: 65520: maximum 802.3 MTU minus 802.3 header");
|
||||
info( " length, rounded to 32 bits (IPv4 words)");
|
||||
info( " -a, --address ADDR Assign IPv4 or IPv6 address ADDR");
|
||||
info( " can be specified zero to two times (for IPv4 and IPv6)");
|
||||
info( " default: use addresses from interface with default route");
|
||||
info( " -n, --netmask MASK Assign IPv4 MASK, dot-decimal or bits");
|
||||
info( " default: netmask from matching address on the host");
|
||||
info( " -M, --mac-addr ADDR Use source MAC address ADDR");
|
||||
info( " default: MAC address from interface with default route");
|
||||
info( " -g, --gateway ADDR Pass IPv4 or IPv6 address as gateway");
|
||||
info( " default: gateway from interface with default route");
|
||||
info( " -i, --interface NAME Interface for addresses and routes");
|
||||
info( " default: from --outbound-if4 and --outbound-if6, if any");
|
||||
info( " otherwise interface with first default route");
|
||||
info( " -o, --outbound ADDR Bind to address as outbound source");
|
||||
info( " can be specified zero to two times (for IPv4 and IPv6)");
|
||||
info( " default: use source address from routing tables");
|
||||
info( " --outbound-if4 NAME Bind to outbound interface for IPv4");
|
||||
info( " default: use interface from default route");
|
||||
info( " --outbound-if6 NAME Bind to outbound interface for IPv6");
|
||||
info( " default: use interface from default route");
|
||||
info( " -D, --dns ADDR Use IPv4 or IPv6 address as DNS");
|
||||
info( " can be specified multiple times");
|
||||
info( " a single, empty option disables DNS information");
|
||||
fprintf(f,
|
||||
" -F, --fd FD Use FD as pre-opened connected socket\n"
|
||||
" -p, --pcap FILE Log tap-facing traffic to pcap file\n"
|
||||
" -P, --pid FILE Write own PID to the given file\n"
|
||||
" -m, --mtu MTU Assign MTU via DHCP/NDP\n"
|
||||
" a zero value disables assignment\n"
|
||||
" default: 65520: maximum 802.3 MTU minus 802.3 header\n"
|
||||
" length, rounded to 32 bits (IPv4 words)\n"
|
||||
" -a, --address ADDR Assign IPv4 or IPv6 address ADDR\n"
|
||||
" can be specified zero to two times (for IPv4 and IPv6)\n"
|
||||
" default: use addresses from interface with default route\n"
|
||||
" -n, --netmask MASK Assign IPv4 MASK, dot-decimal or bits\n"
|
||||
" default: netmask from matching address on the host\n"
|
||||
" -M, --mac-addr ADDR Use source MAC address ADDR\n"
|
||||
" default: MAC address from interface with default route\n"
|
||||
" -g, --gateway ADDR Pass IPv4 or IPv6 address as gateway\n"
|
||||
" default: gateway from interface with default route\n"
|
||||
" -i, --interface NAME Interface for addresses and routes\n"
|
||||
" default: from --outbound-if4 and --outbound-if6, if any\n"
|
||||
" otherwise interface with first default route\n"
|
||||
" -o, --outbound ADDR Bind to address as outbound source\n"
|
||||
" can be specified zero to two times (for IPv4 and IPv6)\n"
|
||||
" default: use source address from routing tables\n"
|
||||
" --outbound-if4 NAME Bind to outbound interface for IPv4\n"
|
||||
" default: use interface from default route\n"
|
||||
" --outbound-if6 NAME Bind to outbound interface for IPv6\n"
|
||||
" default: use interface from default route\n"
|
||||
" -D, --dns ADDR Use IPv4 or IPv6 address as DNS\n"
|
||||
" can be specified multiple times\n"
|
||||
" a single, empty option disables DNS information\n");
|
||||
if (strstr(name, "pasta"))
|
||||
info( " default: don't use any addresses");
|
||||
fprintf(f, " default: don't use any addresses\n");
|
||||
else
|
||||
info( " default: use addresses from /etc/resolv.conf");
|
||||
|
||||
info( " -S, --search LIST Space-separated list, search domains");
|
||||
info( " a single, empty option disables the DNS search list");
|
||||
fprintf(f, " default: use addresses from /etc/resolv.conf\n");
|
||||
fprintf(f,
|
||||
" -S, --search LIST Space-separated list, search domains\n"
|
||||
" a single, empty option disables the DNS search list\n");
|
||||
if (strstr(name, "pasta"))
|
||||
info( " default: don't use any search list");
|
||||
fprintf(f, " default: don't use any search list\n");
|
||||
else
|
||||
info( " default: use search list from /etc/resolv.conf");
|
||||
fprintf(f, " default: use search list from /etc/resolv.conf\n");
|
||||
|
||||
if (strstr(name, "pasta"))
|
||||
info(" --dhcp-dns \tPass DNS list via DHCP/DHCPv6/NDP");
|
||||
fprintf(f, " --dhcp-dns \tPass DNS list via DHCP/DHCPv6/NDP\n");
|
||||
else
|
||||
info(" --no-dhcp-dns No DNS list in DHCP/DHCPv6/NDP");
|
||||
fprintf(f, " --no-dhcp-dns No DNS list in DHCP/DHCPv6/NDP\n");
|
||||
|
||||
if (strstr(name, "pasta"))
|
||||
info(" --dhcp-search Pass list via DHCP/DHCPv6/NDP");
|
||||
fprintf(f, " --dhcp-search Pass list via DHCP/DHCPv6/NDP\n");
|
||||
else
|
||||
info(" --no-dhcp-search No list in DHCP/DHCPv6/NDP");
|
||||
fprintf(f, " --no-dhcp-search No list in DHCP/DHCPv6/NDP\n");
|
||||
|
||||
info( " --dns-forward ADDR Forward DNS queries sent to ADDR");
|
||||
info( " can be specified zero to two times (for IPv4 and IPv6)");
|
||||
info( " default: don't forward DNS queries");
|
||||
|
||||
info( " --no-tcp Disable TCP protocol handler");
|
||||
info( " --no-udp Disable UDP protocol handler");
|
||||
info( " --no-icmp Disable ICMP/ICMPv6 protocol handler");
|
||||
info( " --no-dhcp Disable DHCP server");
|
||||
info( " --no-ndp Disable NDP responses");
|
||||
info( " --no-dhcpv6 Disable DHCPv6 server");
|
||||
info( " --no-ra Disable router advertisements");
|
||||
info( " --no-map-gw Don't map gateway address to host");
|
||||
info( " -4, --ipv4-only Enable IPv4 operation only");
|
||||
info( " -6, --ipv6-only Enable IPv6 operation only");
|
||||
fprintf(f,
|
||||
" --dns-forward ADDR Forward DNS queries sent to ADDR\n"
|
||||
" can be specified zero to two times (for IPv4 and IPv6)\n"
|
||||
" default: don't forward DNS queries\n"
|
||||
" --no-tcp Disable TCP protocol handler\n"
|
||||
" --no-udp Disable UDP protocol handler\n"
|
||||
" --no-icmp Disable ICMP/ICMPv6 protocol handler\n"
|
||||
" --no-dhcp Disable DHCP server\n"
|
||||
" --no-ndp Disable NDP responses\n"
|
||||
" --no-dhcpv6 Disable DHCPv6 server\n"
|
||||
" --no-ra Disable router advertisements\n"
|
||||
" --no-map-gw Don't map gateway address to host\n"
|
||||
" -4, --ipv4-only Enable IPv4 operation only\n"
|
||||
" -6, --ipv6-only Enable IPv6 operation only\n");
|
||||
|
||||
if (strstr(name, "pasta"))
|
||||
goto pasta_opts;
|
||||
|
||||
info( " -1, --one-off Quit after handling one single client");
|
||||
info( " -t, --tcp-ports SPEC TCP port forwarding to guest");
|
||||
info( " can be specified multiple times");
|
||||
info( " SPEC can be:");
|
||||
info( " 'none': don't forward any ports");
|
||||
info( " 'all': forward all unbound, non-ephemeral ports");
|
||||
info( " a comma-separated list, optionally ranged with '-'");
|
||||
info( " and optional target ports after ':', with optional");
|
||||
info( " address specification suffixed by '/' and optional");
|
||||
info( " interface prefixed by '%%'. Ranges can be reduced by");
|
||||
info( " excluding ports or ranges prefixed by '~'");
|
||||
info( " Examples:");
|
||||
info( " -t 22 Forward local port 22 to 22 on guest");
|
||||
info( " -t 22:23 Forward local port 22 to 23 on guest");
|
||||
info( " -t 22,25 Forward ports 22, 25 to ports 22, 25");
|
||||
info( " -t 22-80 Forward ports 22 to 80");
|
||||
info( " -t 22-80:32-90 Forward ports 22 to 80 to");
|
||||
info( " corresponding port numbers plus 10");
|
||||
info( " -t 192.0.2.1/5 Bind port 5 of 192.0.2.1 to guest");
|
||||
info( " -t 5-25,~10-20 Forward ports 5 to 9, and 21 to 25");
|
||||
info( " -t ~25 Forward all ports except for 25");
|
||||
info( " default: none");
|
||||
info( " -u, --udp-ports SPEC UDP port forwarding to guest");
|
||||
info( " SPEC is as described for TCP above");
|
||||
info( " default: none");
|
||||
fprintf(f,
|
||||
" -1, --one-off Quit after handling one single client\n"
|
||||
" -t, --tcp-ports SPEC TCP port forwarding to guest\n"
|
||||
" can be specified multiple times\n"
|
||||
" SPEC can be:\n"
|
||||
" 'none': don't forward any ports\n"
|
||||
" 'all': forward all unbound, non-ephemeral ports\n"
|
||||
" a comma-separated list, optionally ranged with '-'\n"
|
||||
" and optional target ports after ':', with optional\n"
|
||||
" address specification suffixed by '/' and optional\n"
|
||||
" interface prefixed by '%%'. Ranges can be reduced by\n"
|
||||
" excluding ports or ranges prefixed by '~'\n"
|
||||
" Examples:\n"
|
||||
" -t 22 Forward local port 22 to 22 on guest\n"
|
||||
" -t 22:23 Forward local port 22 to 23 on guest\n"
|
||||
" -t 22,25 Forward ports 22, 25 to ports 22, 25\n"
|
||||
" -t 22-80 Forward ports 22 to 80\n"
|
||||
" -t 22-80:32-90 Forward ports 22 to 80 to\n"
|
||||
" corresponding port numbers plus 10\n"
|
||||
" -t 192.0.2.1/5 Bind port 5 of 192.0.2.1 to guest\n"
|
||||
" -t 5-25,~10-20 Forward ports 5 to 9, and 21 to 25\n"
|
||||
" -t ~25 Forward all ports except for 25\n"
|
||||
" default: none\n"
|
||||
" -u, --udp-ports SPEC UDP port forwarding to guest\n"
|
||||
" SPEC is as described for TCP above\n"
|
||||
" default: none\n");
|
||||
|
||||
exit(status);
|
||||
|
||||
pasta_opts:
|
||||
|
||||
info( " -t, --tcp-ports SPEC TCP port forwarding to namespace");
|
||||
info( " can be specified multiple times");
|
||||
info( " SPEC can be:");
|
||||
info( " 'none': don't forward any ports");
|
||||
info( " 'auto': forward all ports currently bound in namespace");
|
||||
info( " a comma-separated list, optionally ranged with '-'");
|
||||
info( " and optional target ports after ':', with optional");
|
||||
info( " address specification suffixed by '/' and optional");
|
||||
info( " interface prefixed by '%%'. Examples:");
|
||||
info( " -t 22 Forward local port 22 to port 22 in netns");
|
||||
info( " -t 22:23 Forward local port 22 to port 23");
|
||||
info( " -t 22,25 Forward ports 22, 25 to ports 22, 25");
|
||||
info( " -t 22-80 Forward ports 22 to 80");
|
||||
info( " -t 22-80:32-90 Forward ports 22 to 80 to");
|
||||
info( " corresponding port numbers plus 10");
|
||||
info( " -t 192.0.2.1/5 Bind port 5 of 192.0.2.1 to namespace");
|
||||
info( " -t 5-25,~10-20 Forward ports 5 to 9, and 21 to 25");
|
||||
info( " -t ~25 Forward all bound ports except for 25");
|
||||
info( " default: auto");
|
||||
info( " IPv6 bound ports are also forwarded for IPv4");
|
||||
info( " -u, --udp-ports SPEC UDP port forwarding to namespace");
|
||||
info( " SPEC is as described for TCP above");
|
||||
info( " default: auto");
|
||||
info( " IPv6 bound ports are also forwarded for IPv4");
|
||||
info( " unless specified, with '-t auto', UDP ports with numbers");
|
||||
info( " corresponding to forwarded TCP port numbers are");
|
||||
info( " forwarded too");
|
||||
info( " -T, --tcp-ns SPEC TCP port forwarding to init namespace");
|
||||
info( " SPEC is as described above");
|
||||
info( " default: auto");
|
||||
info( " -U, --udp-ns SPEC UDP port forwarding to init namespace");
|
||||
info( " SPEC is as described above");
|
||||
info( " default: auto");
|
||||
info( " --userns NSPATH Target user namespace to join");
|
||||
info( " --netns PATH|NAME Target network namespace to join");
|
||||
info( " --netns-only Don't join existing user namespace");
|
||||
info( " implied if PATH or NAME are given without --userns");
|
||||
info( " --no-netns-quit Don't quit if filesystem-bound target");
|
||||
info( " network namespace is deleted");
|
||||
info( " --config-net Configure tap interface in namespace");
|
||||
info( " --no-copy-routes DEPRECATED:");
|
||||
info( " Don't copy all routes to namespace");
|
||||
info( " --no-copy-addrs DEPRECATED:");
|
||||
info( " Don't copy all addresses to namespace");
|
||||
info( " --ns-mac-addr ADDR Set MAC address on tap interface");
|
||||
fprintf(f,
|
||||
" -t, --tcp-ports SPEC TCP port forwarding to namespace\n"
|
||||
" can be specified multiple times\n"
|
||||
" SPEC can be:\n"
|
||||
" 'none': don't forward any ports\n"
|
||||
" 'auto': forward all ports currently bound in namespace\n"
|
||||
" a comma-separated list, optionally ranged with '-'\n"
|
||||
" and optional target ports after ':', with optional\n"
|
||||
" address specification suffixed by '/' and optional\n"
|
||||
" interface prefixed by '%%'. Examples:\n"
|
||||
" -t 22 Forward local port 22 to port 22 in netns\n"
|
||||
" -t 22:23 Forward local port 22 to port 23\n"
|
||||
" -t 22,25 Forward ports 22, 25 to ports 22, 25\n"
|
||||
" -t 22-80 Forward ports 22 to 80\n"
|
||||
" -t 22-80:32-90 Forward ports 22 to 80 to\n"
|
||||
" corresponding port numbers plus 10\n"
|
||||
" -t 192.0.2.1/5 Bind port 5 of 192.0.2.1 to namespace\n"
|
||||
" -t 5-25,~10-20 Forward ports 5 to 9, and 21 to 25\n"
|
||||
" -t ~25 Forward all bound ports except for 25\n"
|
||||
" default: auto\n"
|
||||
" IPv6 bound ports are also forwarded for IPv4\n"
|
||||
" -u, --udp-ports SPEC UDP port forwarding to namespace\n"
|
||||
" SPEC is as described for TCP above\n"
|
||||
" default: auto\n"
|
||||
" IPv6 bound ports are also forwarded for IPv4\n"
|
||||
" unless specified, with '-t auto', UDP ports with numbers\n"
|
||||
" corresponding to forwarded TCP port numbers are\n"
|
||||
" forwarded too\n"
|
||||
" -T, --tcp-ns SPEC TCP port forwarding to init namespace\n"
|
||||
" SPEC is as described above\n"
|
||||
" default: auto\n"
|
||||
" -U, --udp-ns SPEC UDP port forwarding to init namespace\n"
|
||||
" SPEC is as described above\n"
|
||||
" default: auto\n"
|
||||
" --userns NSPATH Target user namespace to join\n"
|
||||
" --netns PATH|NAME Target network namespace to join\n"
|
||||
" --netns-only Don't join existing user namespace\n"
|
||||
" implied if PATH or NAME are given without --userns\n"
|
||||
" --no-netns-quit Don't quit if filesystem-bound target\n"
|
||||
" network namespace is deleted\n"
|
||||
" --config-net Configure tap interface in namespace\n"
|
||||
" --no-copy-routes DEPRECATED:\n"
|
||||
" Don't copy all routes to namespace\n"
|
||||
" --no-copy-addrs DEPRECATED:\n"
|
||||
" Don't copy all addresses to namespace\n"
|
||||
" --ns-mac-addr ADDR Set MAC address on tap interface\n");
|
||||
|
||||
exit(status);
|
||||
}
|
||||
|
||||
/**
|
||||
* usage() - Print usage and exit with failure
|
||||
* @name: Executable name
|
||||
*/
|
||||
static void usage(const char *name)
|
||||
{
|
||||
print_usage(name, EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/**
|
||||
* conf_print() - Print fundamental configuration parameters
|
||||
* @c: Execution context
|
||||
@@ -1100,10 +1099,8 @@ static void conf_ugid(char *runas, uid_t *uid, gid_t *gid)
|
||||
const struct passwd *pw;
|
||||
/* cppcheck-suppress getpwnamCalled */
|
||||
pw = getpwnam("nobody");
|
||||
if (!pw) {
|
||||
perror("getpwnam");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (!pw)
|
||||
die_perror("Can't get password file entry for nobody");
|
||||
|
||||
*uid = pw->pw_uid;
|
||||
*gid = pw->pw_gid;
|
||||
@@ -1120,12 +1117,45 @@ static void conf_ugid(char *runas, uid_t *uid, gid_t *gid)
|
||||
*/
|
||||
static void conf_open_files(struct ctx *c)
|
||||
{
|
||||
if (c->mode == MODE_PASST && c->fd_tap == -1)
|
||||
if (c->mode != MODE_PASTA && c->fd_tap == -1)
|
||||
c->fd_tap_listen = tap_sock_unix_open(c->sock_path);
|
||||
|
||||
c->pidfile_fd = pidfile_open(c->pidfile);
|
||||
}
|
||||
|
||||
/**
|
||||
* parse_mac - Parse a MAC address from a string
|
||||
* @mac: Binary MAC address, initialised on success
|
||||
* @str: String to parse
|
||||
*
|
||||
* Parses @str as an Ethernet MAC address stored in @mac on success. Exits on
|
||||
* failure.
|
||||
*/
|
||||
static void parse_mac(unsigned char mac[ETH_ALEN], const char *str)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (strlen(str) != (ETH_ALEN * 3 - 1))
|
||||
goto fail;
|
||||
|
||||
for (i = 0; i < ETH_ALEN; i++) {
|
||||
const char *octet = str + 3 * i;
|
||||
unsigned long b;
|
||||
char *end;
|
||||
|
||||
errno = 0;
|
||||
b = strtoul(octet, &end, 16);
|
||||
if (b > UCHAR_MAX || errno || end != octet + 2 ||
|
||||
*end != ((i == ETH_ALEN - 1) ? '\0' : ':'))
|
||||
goto fail;
|
||||
mac[i] = b;
|
||||
}
|
||||
return;
|
||||
|
||||
fail:
|
||||
die("Invalid MAC address: %s", str);
|
||||
}
|
||||
|
||||
/**
|
||||
* conf() - Process command-line arguments and set configuration
|
||||
* @c: Execution context
|
||||
@@ -1173,7 +1203,6 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
{"udp-ns", required_argument, NULL, 'U' },
|
||||
{"userns", required_argument, NULL, 2 },
|
||||
{"netns", required_argument, NULL, 3 },
|
||||
{"netns-only", no_argument, &netns_only, 1 },
|
||||
{"ns-mac-addr", required_argument, NULL, 4 },
|
||||
{"dhcp-dns", no_argument, NULL, 5 },
|
||||
{"no-dhcp-dns", no_argument, NULL, 6 },
|
||||
@@ -1190,8 +1219,10 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
{"config-net", no_argument, NULL, 17 },
|
||||
{"no-copy-routes", no_argument, NULL, 18 },
|
||||
{"no-copy-addrs", no_argument, NULL, 19 },
|
||||
{"netns-only", no_argument, NULL, 20 },
|
||||
{ 0 },
|
||||
};
|
||||
const char *logname = (c->mode == MODE_PASTA) ? "pasta" : "passt";
|
||||
char userns[PATH_MAX] = { 0 }, netns[PATH_MAX] = { 0 };
|
||||
bool copy_addrs_opt = false, copy_routes_opt = false;
|
||||
struct in6_addr *dns6 = c->ip6.dns, dns6_tmp;
|
||||
@@ -1202,9 +1233,9 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
unsigned int ifi4 = 0, ifi6 = 0;
|
||||
const char *logfile = NULL;
|
||||
const char *optstring;
|
||||
int name, ret, b, i;
|
||||
size_t logsize = 0;
|
||||
char *runas = NULL;
|
||||
int name, ret;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
|
||||
@@ -1217,7 +1248,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
}
|
||||
|
||||
c->tcp.fwd_in.mode = c->tcp.fwd_out.mode = FWD_UNSET;
|
||||
c->udp.fwd_in.f.mode = c->udp.fwd_out.f.mode = FWD_UNSET;
|
||||
c->udp.fwd_in.mode = c->udp.fwd_out.mode = FWD_UNSET;
|
||||
|
||||
do {
|
||||
name = getopt_long(argc, argv, optstring, options, NULL);
|
||||
@@ -1234,6 +1265,8 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
if (ret <= 0 || ret >= (int)sizeof(userns))
|
||||
die("Invalid userns: %s", optarg);
|
||||
|
||||
netns_only = 0;
|
||||
|
||||
break;
|
||||
case 3:
|
||||
if (c->mode != MODE_PASTA)
|
||||
@@ -1245,14 +1278,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
if (c->mode != MODE_PASTA)
|
||||
die("--ns-mac-addr is for pasta mode only");
|
||||
|
||||
for (i = 0; i < ETH_ALEN; i++) {
|
||||
errno = 0;
|
||||
b = strtol(optarg + (intptr_t)i * 3, NULL, 16);
|
||||
if (b < 0 || b > UCHAR_MAX || errno)
|
||||
die("Invalid MAC address: %s", optarg);
|
||||
|
||||
c->mac_guest[i] = b;
|
||||
}
|
||||
parse_mac(c->mac_guest, optarg);
|
||||
break;
|
||||
case 5:
|
||||
if (c->mode != MODE_PASTA)
|
||||
@@ -1261,7 +1287,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
c->no_dhcp_dns = 0;
|
||||
break;
|
||||
case 6:
|
||||
if (c->mode != MODE_PASST)
|
||||
if (c->mode == MODE_PASTA)
|
||||
die("--no-dhcp-dns is for passt mode only");
|
||||
|
||||
c->no_dhcp_dns = 1;
|
||||
@@ -1273,20 +1299,18 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
c->no_dhcp_dns_search = 0;
|
||||
break;
|
||||
case 8:
|
||||
if (c->mode != MODE_PASST)
|
||||
if (c->mode == MODE_PASTA)
|
||||
die("--no-dhcp-search is for passt mode only");
|
||||
|
||||
c->no_dhcp_dns_search = 1;
|
||||
break;
|
||||
case 9:
|
||||
if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match) &&
|
||||
inet_pton(AF_INET6, optarg, &c->ip6.dns_match) &&
|
||||
if (inet_pton(AF_INET6, optarg, &c->ip6.dns_match) &&
|
||||
!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match) &&
|
||||
!IN6_IS_ADDR_LOOPBACK(&c->ip6.dns_match))
|
||||
break;
|
||||
|
||||
if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) &&
|
||||
inet_pton(AF_INET, optarg, &c->ip4.dns_match) &&
|
||||
if (inet_pton(AF_INET, optarg, &c->ip4.dns_match) &&
|
||||
!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) &&
|
||||
!IN4_IS_ADDR_BROADCAST(&c->ip4.dns_match) &&
|
||||
!IN4_IS_ADDR_LOOPBACK(&c->ip4.dns_match))
|
||||
@@ -1301,24 +1325,13 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
c->no_netns_quit = 1;
|
||||
break;
|
||||
case 11:
|
||||
if (c->trace)
|
||||
die("Multiple --trace options given");
|
||||
|
||||
if (c->quiet)
|
||||
die("Either --trace or --quiet");
|
||||
|
||||
c->trace = c->debug = 1;
|
||||
c->quiet = 0;
|
||||
break;
|
||||
case 12:
|
||||
if (runas)
|
||||
die("Multiple --runas options given");
|
||||
|
||||
runas = optarg;
|
||||
break;
|
||||
case 13:
|
||||
if (logsize)
|
||||
die("Multiple --log-size options given");
|
||||
|
||||
errno = 0;
|
||||
logsize = strtol(optarg, NULL, 0);
|
||||
|
||||
@@ -1328,13 +1341,10 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
break;
|
||||
case 14:
|
||||
fprintf(stdout,
|
||||
c->mode == MODE_PASST ? "passt " : "pasta ");
|
||||
c->mode == MODE_PASTA ? "pasta " : "passt ");
|
||||
fprintf(stdout, VERSION_BLOB);
|
||||
exit(EXIT_SUCCESS);
|
||||
case 15:
|
||||
if (*c->ip4.ifname_out)
|
||||
die("Redundant outbound interface: %s", optarg);
|
||||
|
||||
ret = snprintf(c->ip4.ifname_out,
|
||||
sizeof(c->ip4.ifname_out), "%s", optarg);
|
||||
if (ret <= 0 || ret >= (int)sizeof(c->ip4.ifname_out))
|
||||
@@ -1342,9 +1352,6 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
|
||||
break;
|
||||
case 16:
|
||||
if (*c->ip6.ifname_out)
|
||||
die("Redundant outbound interface: %s", optarg);
|
||||
|
||||
ret = snprintf(c->ip6.ifname_out,
|
||||
sizeof(c->ip6.ifname_out), "%s", optarg);
|
||||
if (ret <= 0 || ret >= (int)sizeof(c->ip6.ifname_out))
|
||||
@@ -1371,62 +1378,39 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
warn("--no-copy-addrs will be dropped soon");
|
||||
c->no_copy_addrs = copy_addrs_opt = true;
|
||||
break;
|
||||
case 20:
|
||||
if (c->mode != MODE_PASTA)
|
||||
die("--netns-only is for pasta mode only");
|
||||
|
||||
netns_only = 1;
|
||||
*userns = 0;
|
||||
break;
|
||||
case 'd':
|
||||
if (c->debug)
|
||||
die("Multiple --debug options given");
|
||||
|
||||
if (c->quiet)
|
||||
die("Either --debug or --quiet");
|
||||
|
||||
c->debug = 1;
|
||||
c->quiet = 0;
|
||||
break;
|
||||
case 'e':
|
||||
if (logfile)
|
||||
die("Can't log to both file and stderr");
|
||||
|
||||
if (c->force_stderr)
|
||||
die("Multiple --stderr options given");
|
||||
|
||||
c->force_stderr = 1;
|
||||
warn("--stderr will be dropped soon");
|
||||
break;
|
||||
case 'l':
|
||||
if (c->force_stderr)
|
||||
die("Can't log to both stderr and file");
|
||||
|
||||
if (logfile)
|
||||
die("Multiple --log-file options given");
|
||||
|
||||
logfile = optarg;
|
||||
break;
|
||||
case 'q':
|
||||
if (c->quiet)
|
||||
die("Multiple --quiet options given");
|
||||
|
||||
if (c->debug)
|
||||
die("Either --debug or --quiet");
|
||||
|
||||
c->quiet = 1;
|
||||
c->debug = c->trace = 0;
|
||||
break;
|
||||
case 'f':
|
||||
if (c->foreground)
|
||||
die("Multiple --foreground options given");
|
||||
|
||||
c->foreground = 1;
|
||||
break;
|
||||
case 's':
|
||||
if (*c->sock_path)
|
||||
die("Multiple --socket options given");
|
||||
|
||||
ret = snprintf(c->sock_path, UNIX_SOCK_MAX - 1, "%s",
|
||||
ret = snprintf(c->sock_path, sizeof(c->sock_path), "%s",
|
||||
optarg);
|
||||
if (ret <= 0 || ret >= (int)sizeof(c->sock_path))
|
||||
die("Invalid socket path: %s", optarg);
|
||||
|
||||
c->fd_tap = -1;
|
||||
break;
|
||||
case 'F':
|
||||
if (c->fd_tap >= 0)
|
||||
die("Multiple --fd options given");
|
||||
|
||||
errno = 0;
|
||||
c->fd_tap = strtol(optarg, NULL, 0);
|
||||
|
||||
@@ -1434,12 +1418,9 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
die("Invalid --fd: %s", optarg);
|
||||
|
||||
c->one_off = true;
|
||||
|
||||
*c->sock_path = 0;
|
||||
break;
|
||||
case 'I':
|
||||
if (*c->pasta_ifn)
|
||||
die("Multiple --ns-ifname options given");
|
||||
|
||||
ret = snprintf(c->pasta_ifn, IFNAMSIZ, "%s",
|
||||
optarg);
|
||||
if (ret <= 0 || ret >= IFNAMSIZ)
|
||||
@@ -1447,18 +1428,12 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
|
||||
break;
|
||||
case 'p':
|
||||
if (*c->pcap)
|
||||
die("Multiple --pcap options given");
|
||||
|
||||
ret = snprintf(c->pcap, sizeof(c->pcap), "%s", optarg);
|
||||
if (ret <= 0 || ret >= (int)sizeof(c->pcap))
|
||||
die("Invalid pcap path: %s", optarg);
|
||||
|
||||
break;
|
||||
case 'P':
|
||||
if (*c->pidfile)
|
||||
die("Multiple --pid options given");
|
||||
|
||||
ret = snprintf(c->pidfile, sizeof(c->pidfile), "%s",
|
||||
optarg);
|
||||
if (ret <= 0 || ret >= (int)sizeof(c->pidfile))
|
||||
@@ -1466,9 +1441,6 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
|
||||
break;
|
||||
case 'm':
|
||||
if (c->mtu)
|
||||
die("Multiple --mtu options given");
|
||||
|
||||
errno = 0;
|
||||
c->mtu = strtol(optarg, NULL, 0);
|
||||
|
||||
@@ -1486,8 +1458,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
if (c->mode == MODE_PASTA)
|
||||
c->no_copy_addrs = 1;
|
||||
|
||||
if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr) &&
|
||||
inet_pton(AF_INET6, optarg, &c->ip6.addr) &&
|
||||
if (inet_pton(AF_INET6, optarg, &c->ip6.addr) &&
|
||||
!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr) &&
|
||||
!IN6_IS_ADDR_LOOPBACK(&c->ip6.addr) &&
|
||||
!IN6_IS_ADDR_V4MAPPED(&c->ip6.addr) &&
|
||||
@@ -1495,8 +1466,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
!IN6_IS_ADDR_MULTICAST(&c->ip6.addr))
|
||||
break;
|
||||
|
||||
if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr) &&
|
||||
inet_pton(AF_INET, optarg, &c->ip4.addr) &&
|
||||
if (inet_pton(AF_INET, optarg, &c->ip4.addr) &&
|
||||
!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr) &&
|
||||
!IN4_IS_ADDR_BROADCAST(&c->ip4.addr) &&
|
||||
!IN4_IS_ADDR_LOOPBACK(&c->ip4.addr) &&
|
||||
@@ -1512,27 +1482,18 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
|
||||
break;
|
||||
case 'M':
|
||||
for (i = 0; i < ETH_ALEN; i++) {
|
||||
errno = 0;
|
||||
b = strtol(optarg + (intptr_t)i * 3, NULL, 16);
|
||||
if (b < 0 || b > UCHAR_MAX || errno)
|
||||
die("Invalid MAC address: %s", optarg);
|
||||
|
||||
c->mac[i] = b;
|
||||
}
|
||||
parse_mac(c->mac, optarg);
|
||||
break;
|
||||
case 'g':
|
||||
if (c->mode == MODE_PASTA)
|
||||
c->no_copy_routes = 1;
|
||||
|
||||
if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.gw) &&
|
||||
inet_pton(AF_INET6, optarg, &c->ip6.gw) &&
|
||||
if (inet_pton(AF_INET6, optarg, &c->ip6.gw) &&
|
||||
!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.gw) &&
|
||||
!IN6_IS_ADDR_LOOPBACK(&c->ip6.gw))
|
||||
break;
|
||||
|
||||
if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.gw) &&
|
||||
inet_pton(AF_INET, optarg, &c->ip4.gw) &&
|
||||
if (inet_pton(AF_INET, optarg, &c->ip4.gw) &&
|
||||
!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.gw) &&
|
||||
!IN4_IS_ADDR_BROADCAST(&c->ip4.gw) &&
|
||||
!IN4_IS_ADDR_LOOPBACK(&c->ip4.gw))
|
||||
@@ -1541,16 +1502,11 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
die("Invalid gateway address: %s", optarg);
|
||||
break;
|
||||
case 'i':
|
||||
if (ifi4 || ifi6)
|
||||
die("Redundant interface: %s", optarg);
|
||||
|
||||
if (!(ifi4 = ifi6 = if_nametoindex(optarg)))
|
||||
die("Invalid interface name %s: %s", optarg,
|
||||
strerror(errno));
|
||||
die_perror("Invalid interface name %s", optarg);
|
||||
break;
|
||||
case 'o':
|
||||
if (IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out) &&
|
||||
inet_pton(AF_INET6, optarg, &c->ip6.addr_out) &&
|
||||
if (inet_pton(AF_INET6, optarg, &c->ip6.addr_out) &&
|
||||
!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out) &&
|
||||
!IN6_IS_ADDR_LOOPBACK(&c->ip6.addr_out) &&
|
||||
!IN6_IS_ADDR_V4MAPPED(&c->ip6.addr_out) &&
|
||||
@@ -1558,8 +1514,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
!IN6_IS_ADDR_MULTICAST(&c->ip6.addr_out))
|
||||
break;
|
||||
|
||||
if (IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out) &&
|
||||
inet_pton(AF_INET, optarg, &c->ip4.addr_out) &&
|
||||
if (inet_pton(AF_INET, optarg, &c->ip4.addr_out) &&
|
||||
!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out) &&
|
||||
!IN4_IS_ADDR_BROADCAST(&c->ip4.addr_out) &&
|
||||
!IN4_IS_ADDR_MULTICAST(&c->ip4.addr_out))
|
||||
@@ -1570,18 +1525,23 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
break;
|
||||
case 'D':
|
||||
if (!strcmp(optarg, "none")) {
|
||||
if (c->no_dns)
|
||||
die("Redundant DNS options");
|
||||
|
||||
if (dns4 - c->ip4.dns || dns6 - c->ip6.dns)
|
||||
die("Conflicting DNS options");
|
||||
|
||||
c->no_dns = 1;
|
||||
|
||||
dns4 = &c->ip4.dns[0];
|
||||
memset(c->ip4.dns, 0, sizeof(c->ip4.dns));
|
||||
c->ip4.dns[0] = (struct in_addr){ 0 };
|
||||
c->ip4.dns_match = (struct in_addr){ 0 };
|
||||
c->ip4.dns_host = (struct in_addr){ 0 };
|
||||
|
||||
dns6 = &c->ip6.dns[0];
|
||||
memset(c->ip6.dns, 0, sizeof(c->ip6.dns));
|
||||
c->ip6.dns_match = (struct in6_addr){ 0 };
|
||||
c->ip6.dns_host = (struct in6_addr){ 0 };
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (c->no_dns)
|
||||
die("Conflicting DNS options");
|
||||
c->no_dns = 0;
|
||||
|
||||
if (dns4 - &c->ip4.dns[0] < ARRAY_SIZE(c->ip4.dns) &&
|
||||
inet_pton(AF_INET, optarg, &dns4_tmp)) {
|
||||
@@ -1599,18 +1559,14 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
break;
|
||||
case 'S':
|
||||
if (!strcmp(optarg, "none")) {
|
||||
if (c->no_dns_search)
|
||||
die("Redundant DNS search options");
|
||||
|
||||
if (dnss != c->dns_search)
|
||||
die("Conflicting DNS search options");
|
||||
|
||||
c->no_dns_search = 1;
|
||||
|
||||
memset(c->dns_search, 0, sizeof(c->dns_search));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (c->no_dns_search)
|
||||
die("Conflicting DNS search options");
|
||||
c->no_dns_search = 0;
|
||||
|
||||
if (dnss - c->dns_search < ARRAY_SIZE(c->dns_search)) {
|
||||
ret = snprintf(dnss->n, sizeof(*c->dns_search),
|
||||
@@ -1626,17 +1582,16 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
break;
|
||||
case '4':
|
||||
v4_only = true;
|
||||
v6_only = false;
|
||||
break;
|
||||
case '6':
|
||||
v6_only = true;
|
||||
v4_only = false;
|
||||
break;
|
||||
case '1':
|
||||
if (c->mode != MODE_PASST)
|
||||
if (c->mode == MODE_PASTA)
|
||||
die("--one-off is for passt mode only");
|
||||
|
||||
if (c->one_off)
|
||||
die("Redundant --one-off option");
|
||||
|
||||
c->one_off = true;
|
||||
break;
|
||||
case 't':
|
||||
@@ -1646,22 +1601,15 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
/* Handle these later, once addresses are configured */
|
||||
break;
|
||||
case 'h':
|
||||
log_to_stdout = 1;
|
||||
print_usage(argv[0], EXIT_SUCCESS);
|
||||
usage(argv[0], stdout, EXIT_SUCCESS);
|
||||
break;
|
||||
case '?':
|
||||
default:
|
||||
usage(argv[0]);
|
||||
usage(argv[0], stderr, EXIT_FAILURE);
|
||||
break;
|
||||
}
|
||||
} while (name != -1);
|
||||
|
||||
if (v4_only && v6_only)
|
||||
die("Options ipv4-only and ipv6-only are mutually exclusive");
|
||||
|
||||
if (*c->sock_path && c->fd_tap >= 0)
|
||||
die("Options --socket and --fd are mutually exclusive");
|
||||
|
||||
if (c->mode == MODE_PASTA && !c->pasta_conf_ns) {
|
||||
if (copy_routes_opt)
|
||||
die("--no-copy-routes needs --config-net");
|
||||
@@ -1677,14 +1625,11 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
|
||||
conf_ugid(runas, &uid, &gid);
|
||||
|
||||
if (logfile) {
|
||||
logfile_init(c->mode == MODE_PASST ? "passt" : "pasta",
|
||||
logfile, logsize);
|
||||
}
|
||||
if (logfile)
|
||||
logfile_init(logname, logfile, logsize);
|
||||
else
|
||||
__openlog(logname, 0, LOG_DAEMON);
|
||||
|
||||
/* Once the log mask is not LOG_EARLY, we will no longer log to stderr
|
||||
* if there was a log file specified.
|
||||
*/
|
||||
if (c->debug)
|
||||
__setlogmask(LOG_UPTO(LOG_DEBUG));
|
||||
else if (c->quiet)
|
||||
@@ -1692,6 +1637,8 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
else
|
||||
__setlogmask(LOG_UPTO(LOG_INFO));
|
||||
|
||||
log_conf_parsed = true; /* Stop printing everything */
|
||||
|
||||
nl_sock_init(c, false);
|
||||
if (!v6_only)
|
||||
c->ifi4 = conf_ip4(ifi4, &c->ip4, c->mac);
|
||||
@@ -1717,7 +1664,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
if (name == 't')
|
||||
conf_ports(c, name, optarg, &c->tcp.fwd_in);
|
||||
else if (name == 'u')
|
||||
conf_ports(c, name, optarg, &c->udp.fwd_in.f);
|
||||
conf_ports(c, name, optarg, &c->udp.fwd_in);
|
||||
} while (name != -1);
|
||||
|
||||
if (c->mode == MODE_PASTA)
|
||||
@@ -1752,7 +1699,7 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
if (name == 'T')
|
||||
conf_ports(c, name, optarg, &c->tcp.fwd_out);
|
||||
else if (name == 'U')
|
||||
conf_ports(c, name, optarg, &c->udp.fwd_out.f);
|
||||
conf_ports(c, name, optarg, &c->udp.fwd_out);
|
||||
} while (name != -1);
|
||||
|
||||
if (!c->ifi4)
|
||||
@@ -1779,10 +1726,10 @@ void conf(struct ctx *c, int argc, char **argv)
|
||||
c->tcp.fwd_in.mode = fwd_default;
|
||||
if (!c->tcp.fwd_out.mode)
|
||||
c->tcp.fwd_out.mode = fwd_default;
|
||||
if (!c->udp.fwd_in.f.mode)
|
||||
c->udp.fwd_in.f.mode = fwd_default;
|
||||
if (!c->udp.fwd_out.f.mode)
|
||||
c->udp.fwd_out.f.mode = fwd_default;
|
||||
if (!c->udp.fwd_in.mode)
|
||||
c->udp.fwd_in.mode = fwd_default;
|
||||
if (!c->udp.fwd_out.mode)
|
||||
c->udp.fwd_out.mode = fwd_default;
|
||||
|
||||
fwd_scan_ports_init(c);
|
||||
|
||||
|
@@ -211,3 +211,4 @@ allow pasta_t ifconfig_t:process { noatsecure rlimitinh siginh };
|
||||
allow pasta_t netutils_t:process { noatsecure rlimitinh siginh };
|
||||
allow pasta_t ping_t:process { noatsecure rlimitinh siginh };
|
||||
allow pasta_t user_tty_device_t:chr_file { append read write };
|
||||
allow pasta_t user_devpts_t:chr_file { append read write };
|
||||
|
3
doc/platform-requirements/.gitignore
vendored
Normal file
3
doc/platform-requirements/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
/reuseaddr-priority
|
||||
/recv-zero
|
||||
/udp-close-dup
|
45
doc/platform-requirements/Makefile
Normal file
45
doc/platform-requirements/Makefile
Normal file
@@ -0,0 +1,45 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#
|
||||
# Copyright Red Hat
|
||||
# Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
|
||||
TARGETS = reuseaddr-priority recv-zero udp-close-dup
|
||||
SRCS = reuseaddr-priority.c recv-zero.c udp-close-dup.c
|
||||
CFLAGS = -Wall
|
||||
|
||||
all: cppcheck clang-tidy $(TARGETS:%=check-%)
|
||||
|
||||
$(TARGETS): %: %.c common.c common.h
|
||||
|
||||
check-%: %
|
||||
./$<
|
||||
|
||||
cppcheck:
|
||||
cppcheck --std=c11 --error-exitcode=1 --enable=all --force \
|
||||
--check-level=exhaustive --inline-suppr \
|
||||
--inconclusive --library=posix --quiet \
|
||||
--suppress=missingIncludeSystem \
|
||||
$(SRCS)
|
||||
|
||||
clang-tidy:
|
||||
clang-tidy --checks=*,\
|
||||
-altera-id-dependent-backward-branch,\
|
||||
-altera-unroll-loops,\
|
||||
-bugprone-easily-swappable-parameters,\
|
||||
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,\
|
||||
-concurrency-mt-unsafe,\
|
||||
-cppcoreguidelines-avoid-non-const-global-variables,\
|
||||
-cppcoreguidelines-init-variables,\
|
||||
-cppcoreguidelines-macro-to-enum,\
|
||||
-google-readability-braces-around-statements,\
|
||||
-hicpp-braces-around-statements,\
|
||||
-llvmlibc-restrict-system-libc-headers,\
|
||||
-misc-include-cleaner,\
|
||||
-modernize-macro-to-enum,\
|
||||
-readability-braces-around-statements,\
|
||||
-readability-identifier-length,\
|
||||
-readability-isolate-declaration \
|
||||
$(SRCS)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGETS) *.o *~
|
18
doc/platform-requirements/README
Normal file
18
doc/platform-requirements/README
Normal file
@@ -0,0 +1,18 @@
|
||||
Platform Requirements
|
||||
=====================
|
||||
|
||||
TODO: document the various Linux specific features we currently require
|
||||
|
||||
|
||||
Test Programs
|
||||
-------------
|
||||
|
||||
In some places we rely on quite specific behaviour of sockets.
|
||||
Although Linux, at least, seems to behave as required, It's not always
|
||||
clear from the available documentation if this is required by POSIX or
|
||||
some other specification.
|
||||
|
||||
To specifically document those expectations this directory has some
|
||||
test programs which explicitly check for the behaviour we need.
|
||||
When/if we attempt a port to a new platform, running these to check
|
||||
behaviour would be a good place to start.
|
66
doc/platform-requirements/common.c
Normal file
66
doc/platform-requirements/common.c
Normal file
@@ -0,0 +1,66 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
/* common.c
|
||||
*
|
||||
* Common helper functions for testing SO_REUSEADDR behaviour
|
||||
*
|
||||
* Copyright Red Hat
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <netinet/in.h>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int sock_reuseaddr(void)
|
||||
{
|
||||
int y = 1;
|
||||
int s;
|
||||
|
||||
|
||||
s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
|
||||
if (s < 0)
|
||||
die("socket(): %s\n", strerror(errno));
|
||||
|
||||
if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) , 0)
|
||||
die("SO_REUSEADDR: %s\n", strerror(errno));
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Send a token via the given connected socket */
|
||||
void send_token(int s, long token)
|
||||
{
|
||||
ssize_t rc;
|
||||
|
||||
rc = send(s, &token, sizeof(token), 0);
|
||||
if (rc < 0)
|
||||
die("send(): %s\n", strerror(errno));
|
||||
if (rc < sizeof(token))
|
||||
die("short send()\n");
|
||||
}
|
||||
|
||||
/* Attempt to receive a token via the given socket.
|
||||
*
|
||||
* Returns true if we received the token, false if we got an EAGAIN, dies in any
|
||||
* other case */
|
||||
bool recv_token(int s, long token)
|
||||
{
|
||||
ssize_t rc;
|
||||
long buf;
|
||||
|
||||
rc = recv(s, &buf, sizeof(buf), MSG_DONTWAIT);
|
||||
if (rc < 0) {
|
||||
if (errno == EWOULDBLOCK)
|
||||
return false;
|
||||
die("recv(): %s\n", strerror(errno));
|
||||
}
|
||||
if (rc < sizeof(buf))
|
||||
die("short recv()\n");
|
||||
if (buf != token)
|
||||
die("data mismatch\n");
|
||||
return true;
|
||||
}
|
47
doc/platform-requirements/common.h
Normal file
47
doc/platform-requirements/common.h
Normal file
@@ -0,0 +1,47 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
/* common.h
|
||||
*
|
||||
* Useful shared functions
|
||||
*
|
||||
* Copyright Red Hat
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*/
|
||||
#ifndef REUSEADDR_COMMON_H
|
||||
#define REUSEADDR_COMMON_H
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static inline void die(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
(void)vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
#define htons_constant(x) (x)
|
||||
#define htonl_constant(x) (x)
|
||||
#else
|
||||
#define htons_constant(x) (__bswap_constant_16(x))
|
||||
#define htonl_constant(x) (__bswap_constant_32(x))
|
||||
#endif
|
||||
|
||||
#define SOCKADDR_INIT(addr, port) \
|
||||
{ \
|
||||
.sin_family = AF_INET, \
|
||||
.sin_addr = { .s_addr = htonl_constant(addr) }, \
|
||||
.sin_port = htons_constant(port), \
|
||||
}
|
||||
|
||||
int sock_reuseaddr(void);
|
||||
void send_token(int s, long token);
|
||||
bool recv_token(int s, long token);
|
||||
|
||||
#endif /* REUSEADDR_COMMON_H */
|
118
doc/platform-requirements/recv-zero.c
Normal file
118
doc/platform-requirements/recv-zero.c
Normal file
@@ -0,0 +1,118 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
/* recv-zero.c
|
||||
*
|
||||
* Verify that we're able to discard datagrams by recv()ing into a zero-length
|
||||
* buffer.
|
||||
*
|
||||
* Copyright Red Hat
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*/
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <errno.h>
|
||||
#include <net/if.h>
|
||||
#include <netinet/in.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define DSTPORT 13257U
|
||||
|
||||
enum discard_method {
|
||||
DISCARD_NULL_BUF,
|
||||
DISCARD_ZERO_IOV,
|
||||
DISCARD_NULL_IOV,
|
||||
NUM_METHODS,
|
||||
};
|
||||
|
||||
/* 127.0.0.1:DSTPORT */
|
||||
static const struct sockaddr_in lo_dst = SOCKADDR_INIT(INADDR_LOOPBACK, DSTPORT);
|
||||
|
||||
static void test_discard(enum discard_method method)
|
||||
{
|
||||
struct iovec zero_iov = { .iov_base = NULL, .iov_len = 0, };
|
||||
struct msghdr mh_zero = {
|
||||
.msg_iov = &zero_iov,
|
||||
.msg_iovlen = 1,
|
||||
};
|
||||
struct msghdr mh_null = {
|
||||
.msg_iov = NULL,
|
||||
.msg_iovlen = 0,
|
||||
};
|
||||
long token1, token2;
|
||||
int recv_s, send_s;
|
||||
ssize_t rc;
|
||||
|
||||
token1 = random();
|
||||
token2 = random();
|
||||
|
||||
recv_s = sock_reuseaddr();
|
||||
if (bind(recv_s, (struct sockaddr *)&lo_dst, sizeof(lo_dst)) < 0)
|
||||
die("bind(): %s\n", strerror(errno));
|
||||
|
||||
send_s = sock_reuseaddr();
|
||||
if (connect(send_s, (struct sockaddr *)&lo_dst, sizeof(lo_dst)) < 0)
|
||||
die("connect(): %s\n", strerror(errno));
|
||||
|
||||
send_token(send_s, token1);
|
||||
send_token(send_s, token2);
|
||||
|
||||
switch (method) {
|
||||
case DISCARD_NULL_BUF:
|
||||
/* cppcheck-suppress nullPointer */
|
||||
rc = recv(recv_s, NULL, 0, MSG_DONTWAIT);
|
||||
if (rc < 0)
|
||||
die("discarding recv(): %s\n", strerror(errno));
|
||||
break;
|
||||
|
||||
case DISCARD_ZERO_IOV:
|
||||
rc = recvmsg(recv_s, &mh_zero, MSG_DONTWAIT);
|
||||
if (rc < 0)
|
||||
die("recvmsg() with zero-length buffer: %s\n",
|
||||
strerror(errno));
|
||||
if (!((unsigned)mh_zero.msg_flags & MSG_TRUNC))
|
||||
die("Missing MSG_TRUNC flag\n");
|
||||
break;
|
||||
|
||||
case DISCARD_NULL_IOV:
|
||||
rc = recvmsg(recv_s, &mh_null, MSG_DONTWAIT);
|
||||
if (rc < 0)
|
||||
die("recvmsg() with zero-length iov: %s\n",
|
||||
strerror(errno));
|
||||
if (!((unsigned)mh_null.msg_flags & MSG_TRUNC))
|
||||
die("Missing MSG_TRUNC flag\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
die("Bad method\n");
|
||||
}
|
||||
|
||||
recv_token(recv_s, token2);
|
||||
|
||||
/* cppcheck-suppress nullPointer */
|
||||
rc = recv(recv_s, NULL, 0, MSG_DONTWAIT);
|
||||
if (rc < 0 && errno != EAGAIN)
|
||||
die("redundant discarding recv(): %s\n", strerror(errno));
|
||||
if (rc >= 0)
|
||||
die("Unexpected receive: rc=%zd\n", rc);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
enum discard_method method;
|
||||
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
for (method = 0; method < NUM_METHODS; method++)
|
||||
test_discard(method);
|
||||
|
||||
printf("Discarding datagrams with 0-length receives seems to work\n");
|
||||
|
||||
exit(0);
|
||||
}
|
240
doc/platform-requirements/reuseaddr-priority.c
Normal file
240
doc/platform-requirements/reuseaddr-priority.c
Normal file
@@ -0,0 +1,240 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
/* reuseaddr-priority.c
|
||||
*
|
||||
* Verify which SO_REUSEADDR UDP sockets get priority to receive
|
||||
* =============================================================
|
||||
*
|
||||
* SO_REUSEADDR allows multiple sockets to bind to overlapping addresses, so
|
||||
* there can be multiple sockets eligible to receive the same packet. The exact
|
||||
* semantics of which socket will receive in this circumstance isn't very well
|
||||
* documented.
|
||||
*
|
||||
* This program verifies that things behave the way we expect. Specifically we
|
||||
* expect:
|
||||
*
|
||||
* - If both a connected and an unconnected socket could receive a datagram, the
|
||||
* connected one will receive it in preference to the unconnected one.
|
||||
*
|
||||
* - If an unconnected socket bound to a specific address and an unconnected
|
||||
* socket bound to the "any" address (0.0.0.0 or ::) could receive a datagram,
|
||||
* then the one with a specific address will receive it in preference to the
|
||||
* other.
|
||||
*
|
||||
* These should be true regardless of the order the sockets are created in, or
|
||||
* the order they're polled in.
|
||||
*
|
||||
* Copyright Red Hat
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*/
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <errno.h>
|
||||
#include <net/if.h>
|
||||
#include <netinet/in.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define SRCPORT 13246U
|
||||
#define DSTPORT 13247U
|
||||
|
||||
/* Different cases for receiving socket configuration */
|
||||
enum sock_type {
|
||||
/* Socket is bound to 0.0.0.0:DSTPORT and not connected */
|
||||
SOCK_BOUND_ANY = 0,
|
||||
|
||||
/* Socket is bound to 127.0.0.1:DSTPORT and not connected */
|
||||
SOCK_BOUND_LO = 1,
|
||||
|
||||
/* Socket is bound to 0.0.0.0:DSTPORT and connected to 127.0.0.1:SRCPORT */
|
||||
SOCK_CONNECTED = 2,
|
||||
|
||||
NUM_SOCK_TYPES,
|
||||
};
|
||||
|
||||
typedef enum sock_type order_t[NUM_SOCK_TYPES];
|
||||
|
||||
static order_t orders[] = {
|
||||
{0, 1, 2}, {0, 2, 1}, {1, 0, 2}, {1, 2, 0}, {2, 0, 1}, {2, 1, 0},
|
||||
};
|
||||
|
||||
/* 127.0.0.2 */
|
||||
#define INADDR_LOOPBACK2 ((in_addr_t)(0x7f000002))
|
||||
|
||||
/* 0.0.0.0:DSTPORT */
|
||||
static const struct sockaddr_in any_dst = SOCKADDR_INIT(INADDR_ANY, DSTPORT);
|
||||
/* 127.0.0.1:DSTPORT */
|
||||
static const struct sockaddr_in lo_dst = SOCKADDR_INIT(INADDR_LOOPBACK, DSTPORT);
|
||||
|
||||
/* 127.0.0.2:DSTPORT */
|
||||
static const struct sockaddr_in lo2_dst = SOCKADDR_INIT(INADDR_LOOPBACK2, DSTPORT);
|
||||
|
||||
/* 127.0.0.1:SRCPORT */
|
||||
static const struct sockaddr_in lo_src = SOCKADDR_INIT(INADDR_LOOPBACK, SRCPORT);
|
||||
|
||||
/* Random token to send in datagram */
|
||||
static long token;
|
||||
|
||||
/* Get a socket of the specified type for receiving */
|
||||
static int sock_recv(enum sock_type type)
|
||||
{
|
||||
const struct sockaddr *connect_sa = NULL;
|
||||
const struct sockaddr *bind_sa = NULL;
|
||||
int s;
|
||||
|
||||
s = sock_reuseaddr();
|
||||
|
||||
switch (type) {
|
||||
case SOCK_CONNECTED:
|
||||
connect_sa = (struct sockaddr *)&lo_src;
|
||||
/* fallthrough */
|
||||
case SOCK_BOUND_ANY:
|
||||
bind_sa = (struct sockaddr *)&any_dst;
|
||||
break;
|
||||
|
||||
case SOCK_BOUND_LO:
|
||||
bind_sa = (struct sockaddr *)&lo_dst;
|
||||
break;
|
||||
|
||||
default:
|
||||
die("bug");
|
||||
}
|
||||
|
||||
if (bind_sa)
|
||||
if (bind(s, bind_sa, sizeof(struct sockaddr_in)) < 0)
|
||||
die("bind(): %s\n", strerror(errno));
|
||||
if (connect_sa)
|
||||
if (connect(s, connect_sa, sizeof(struct sockaddr_in)) < 0)
|
||||
die("connect(): %s\n", strerror(errno));
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Get a socket suitable for sending to the given type of receiving socket */
|
||||
static int sock_send(enum sock_type type)
|
||||
{
|
||||
const struct sockaddr *connect_sa = NULL;
|
||||
const struct sockaddr *bind_sa = NULL;
|
||||
int s;
|
||||
|
||||
s = sock_reuseaddr();
|
||||
|
||||
switch (type) {
|
||||
case SOCK_BOUND_ANY:
|
||||
connect_sa = (struct sockaddr *)&lo2_dst;
|
||||
break;
|
||||
|
||||
case SOCK_CONNECTED:
|
||||
bind_sa = (struct sockaddr *)&lo_src;
|
||||
/* fallthrough */
|
||||
case SOCK_BOUND_LO:
|
||||
connect_sa = (struct sockaddr *)&lo_dst;
|
||||
break;
|
||||
|
||||
default:
|
||||
die("bug");
|
||||
}
|
||||
|
||||
if (bind_sa)
|
||||
if (bind(s, bind_sa, sizeof(struct sockaddr_in)) < 0)
|
||||
die("bind(): %s\n", strerror(errno));
|
||||
if (connect_sa)
|
||||
if (connect(s, connect_sa, sizeof(struct sockaddr_in)) < 0)
|
||||
die("connect(): %s\n", strerror(errno));
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Check for expected behaviour with one specific ordering for various operations:
|
||||
*
|
||||
* @recv_create_order: Order to create receiving sockets in
|
||||
* @send_create_order: Order to create sending sockets in
|
||||
* @test_order: Order to test the behaviour of different types
|
||||
* @recv_order: Order to check the receiving sockets
|
||||
*/
|
||||
static void check_one_order(const order_t recv_create_order,
|
||||
const order_t send_create_order,
|
||||
const order_t test_order,
|
||||
const order_t recv_order)
|
||||
{
|
||||
int rs[NUM_SOCK_TYPES];
|
||||
int ss[NUM_SOCK_TYPES];
|
||||
int nfds = 0;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < NUM_SOCK_TYPES; i++) {
|
||||
enum sock_type t = recv_create_order[i];
|
||||
int s;
|
||||
|
||||
s = sock_recv(t);
|
||||
if (s >= nfds)
|
||||
nfds = s + 1;
|
||||
|
||||
rs[t] = s;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_SOCK_TYPES; i++) {
|
||||
enum sock_type t = send_create_order[i];
|
||||
|
||||
ss[t] = sock_send(t);
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_SOCK_TYPES; i++) {
|
||||
enum sock_type ti = test_order[i];
|
||||
int recv_via = -1;
|
||||
|
||||
send_token(ss[ti], token);
|
||||
|
||||
for (j = 0; j < NUM_SOCK_TYPES; j++) {
|
||||
enum sock_type tj = recv_order[j];
|
||||
|
||||
if (recv_token(rs[tj], token)) {
|
||||
if (recv_via != -1)
|
||||
die("Received token more than once\n");
|
||||
recv_via = tj;
|
||||
}
|
||||
}
|
||||
|
||||
if (recv_via == -1)
|
||||
die("Didn't receive token at all\n");
|
||||
if (recv_via != ti)
|
||||
die("Received token via unexpected socket\n");
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_SOCK_TYPES; i++) {
|
||||
close(rs[i]);
|
||||
close(ss[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_all_orders(void)
|
||||
{
|
||||
int norders = sizeof(orders) / sizeof(orders[0]);
|
||||
int i, j, k, l;
|
||||
|
||||
for (i = 0; i < norders; i++)
|
||||
for (j = 0; j < norders; j++)
|
||||
for (k = 0; k < norders; k++)
|
||||
for (l = 0; l < norders; l++)
|
||||
check_one_order(orders[i], orders[j],
|
||||
orders[k], orders[l]);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
token = random();
|
||||
|
||||
check_all_orders();
|
||||
|
||||
printf("SO_REUSEADDR receive priorities seem to work as expected\n");
|
||||
|
||||
exit(0);
|
||||
}
|
105
doc/platform-requirements/udp-close-dup.c
Normal file
105
doc/platform-requirements/udp-close-dup.c
Normal file
@@ -0,0 +1,105 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
/* udp-close-dup.c
|
||||
*
|
||||
* Verify that closing one dup() of a UDP socket won't stop other dups from
|
||||
* receiving packets.
|
||||
*
|
||||
* Copyright Red Hat
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*/
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <net/if.h>
|
||||
#include <netinet/in.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define DSTPORT 13257U
|
||||
|
||||
/* 127.0.0.1:DSTPORT */
|
||||
static const struct sockaddr_in lo_dst = SOCKADDR_INIT(INADDR_LOOPBACK, DSTPORT);
|
||||
|
||||
enum dup_method {
|
||||
DUP_DUP,
|
||||
DUP_FCNTL,
|
||||
NUM_METHODS,
|
||||
};
|
||||
|
||||
static void test_close_dup(enum dup_method method)
|
||||
{
|
||||
long token;
|
||||
int s1, s2, send_s;
|
||||
ssize_t rc;
|
||||
|
||||
s1 = sock_reuseaddr();
|
||||
if (bind(s1, (struct sockaddr *)&lo_dst, sizeof(lo_dst)) < 0)
|
||||
die("bind(): %s\n", strerror(errno));
|
||||
|
||||
send_s = sock_reuseaddr();
|
||||
if (connect(send_s, (struct sockaddr *)&lo_dst, sizeof(lo_dst)) < 0)
|
||||
die("connect(): %s\n", strerror(errno));
|
||||
|
||||
/* Receive before duplicating */
|
||||
token = random();
|
||||
send_token(send_s, token);
|
||||
recv_token(s1, token);
|
||||
|
||||
switch (method) {
|
||||
case DUP_DUP:
|
||||
/* NOLINTNEXTLINE(android-cloexec-dup) */
|
||||
s2 = dup(s1);
|
||||
if (s2 < 0)
|
||||
die("dup(): %s\n", strerror(errno));
|
||||
break;
|
||||
case DUP_FCNTL:
|
||||
s2 = fcntl(s1, F_DUPFD_CLOEXEC, 0);
|
||||
if (s2 < 0)
|
||||
die("F_DUPFD_CLOEXEC: %s\n", strerror(errno));
|
||||
break;
|
||||
default:
|
||||
die("Bad method\n");
|
||||
}
|
||||
|
||||
/* Receive via original handle */
|
||||
token = random();
|
||||
send_token(send_s, token);
|
||||
recv_token(s1, token);
|
||||
|
||||
/* Receive via duplicated handle */
|
||||
token = random();
|
||||
send_token(send_s, token);
|
||||
recv_token(s2, token);
|
||||
|
||||
/* Close duplicate */
|
||||
rc = close(s2);
|
||||
if (rc < 0)
|
||||
die("close() dup: %s\n", strerror(errno));
|
||||
|
||||
/* Receive after closing duplicate */
|
||||
token = random();
|
||||
send_token(send_s, token);
|
||||
recv_token(s1, token);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
enum dup_method method;
|
||||
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
for (method = 0; method < NUM_METHODS; method++)
|
||||
test_close_dup(method);
|
||||
|
||||
printf("Closing dup()ed UDP sockets seems to work as expected\n");
|
||||
|
||||
exit(0);
|
||||
}
|
43
epoll_type.h
Normal file
43
epoll_type.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Copyright Red Hat
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*/
|
||||
|
||||
#ifndef EPOLL_TYPE_H
|
||||
#define EPOLL_TYPE_H
|
||||
|
||||
/**
|
||||
* enum epoll_type - Different types of fds we poll over
|
||||
*/
|
||||
enum epoll_type {
|
||||
/* Special value to indicate an invalid type */
|
||||
EPOLL_TYPE_NONE = 0,
|
||||
/* Connected TCP sockets */
|
||||
EPOLL_TYPE_TCP,
|
||||
/* Connected TCP sockets (spliced) */
|
||||
EPOLL_TYPE_TCP_SPLICE,
|
||||
/* Listening TCP sockets */
|
||||
EPOLL_TYPE_TCP_LISTEN,
|
||||
/* timerfds used for TCP timers */
|
||||
EPOLL_TYPE_TCP_TIMER,
|
||||
/* UDP "listening" sockets */
|
||||
EPOLL_TYPE_UDP_LISTEN,
|
||||
/* UDP socket for replies on a specific flow */
|
||||
EPOLL_TYPE_UDP_REPLY,
|
||||
/* ICMP/ICMPv6 ping sockets */
|
||||
EPOLL_TYPE_PING,
|
||||
/* inotify fd watching for end of netns (pasta) */
|
||||
EPOLL_TYPE_NSQUIT_INOTIFY,
|
||||
/* timer fd watching for end of netns, fallback for inotify (pasta) */
|
||||
EPOLL_TYPE_NSQUIT_TIMER,
|
||||
/* tuntap character device */
|
||||
EPOLL_TYPE_TAP_PASTA,
|
||||
/* socket connected to qemu */
|
||||
EPOLL_TYPE_TAP_PASST,
|
||||
/* socket listening for qemu socket connections */
|
||||
EPOLL_TYPE_TAP_LISTEN,
|
||||
|
||||
EPOLL_NUM_TYPES,
|
||||
};
|
||||
|
||||
#endif /* EPOLL_TYPE_H */
|
485
flow.c
485
flow.c
@@ -5,9 +5,11 @@
|
||||
* Tracking for logical "flows" of packets.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util.h"
|
||||
@@ -35,6 +37,7 @@ const char *flow_type_str[] = {
|
||||
[FLOW_TCP_SPLICE] = "TCP connection (spliced)",
|
||||
[FLOW_PING4] = "ICMP ping sequence",
|
||||
[FLOW_PING6] = "ICMPv6 ping sequence",
|
||||
[FLOW_UDP] = "UDP flow",
|
||||
};
|
||||
static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES,
|
||||
"flow_type_str[] doesn't match enum flow_type");
|
||||
@@ -44,6 +47,7 @@ const uint8_t flow_proto[] = {
|
||||
[FLOW_TCP_SPLICE] = IPPROTO_TCP,
|
||||
[FLOW_PING4] = IPPROTO_ICMP,
|
||||
[FLOW_PING6] = IPPROTO_ICMPV6,
|
||||
[FLOW_UDP] = IPPROTO_UDP,
|
||||
};
|
||||
static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
|
||||
"flow_proto[] doesn't match enum flow_type");
|
||||
@@ -105,9 +109,154 @@ unsigned flow_first_free;
|
||||
union flow flowtab[FLOW_MAX];
|
||||
static const union flow *flow_new_entry; /* = NULL */
|
||||
|
||||
/* Hash table to index it */
|
||||
#define FLOW_HASH_LOAD 70 /* % */
|
||||
#define FLOW_HASH_SIZE ((2 * FLOW_MAX * 100 / FLOW_HASH_LOAD))
|
||||
|
||||
/* Table for lookup from flowside information */
|
||||
static flow_sidx_t flow_hashtab[FLOW_HASH_SIZE];
|
||||
|
||||
static_assert(ARRAY_SIZE(flow_hashtab) >= 2 * FLOW_MAX,
|
||||
"Safe linear probing requires hash table with more entries than the number of sides in the flow table");
|
||||
|
||||
/* Last time the flow timers ran */
|
||||
static struct timespec flow_timer_run;
|
||||
|
||||
/** flowside_from_af() - Initialise flowside from addresses
|
||||
* @side: flowside to initialise
|
||||
* @af: Address family (AF_INET or AF_INET6)
|
||||
* @eaddr: Endpoint address (pointer to in_addr or in6_addr)
|
||||
* @eport: Endpoint port
|
||||
* @faddr: Forwarding address (pointer to in_addr or in6_addr)
|
||||
* @fport: Forwarding port
|
||||
*/
|
||||
static void flowside_from_af(struct flowside *side, sa_family_t af,
|
||||
const void *eaddr, in_port_t eport,
|
||||
const void *faddr, in_port_t fport)
|
||||
{
|
||||
if (faddr)
|
||||
inany_from_af(&side->faddr, af, faddr);
|
||||
else
|
||||
side->faddr = inany_any6;
|
||||
side->fport = fport;
|
||||
|
||||
if (eaddr)
|
||||
inany_from_af(&side->eaddr, af, eaddr);
|
||||
else
|
||||
side->eaddr = inany_any6;
|
||||
side->eport = eport;
|
||||
}
|
||||
|
||||
/**
|
||||
* struct flowside_sock_args - Parameters for flowside_sock_splice()
|
||||
* @c: Execution context
|
||||
* @fd: Filled in with new socket fd
|
||||
* @err: Filled in with errno if something failed
|
||||
* @type: Socket epoll type
|
||||
* @sa: Socket address
|
||||
* @sl: Length of @sa
|
||||
* @data: epoll reference data
|
||||
*/
|
||||
struct flowside_sock_args {
|
||||
const struct ctx *c;
|
||||
int fd;
|
||||
int err;
|
||||
enum epoll_type type;
|
||||
const struct sockaddr *sa;
|
||||
socklen_t sl;
|
||||
const char *path;
|
||||
uint32_t data;
|
||||
};
|
||||
|
||||
/** flowside_sock_splice() - Create and bind socket for PIF_SPLICE based on flowside
|
||||
* @arg: Argument as a struct flowside_sock_args
|
||||
*
|
||||
* Return: 0
|
||||
*/
|
||||
static int flowside_sock_splice(void *arg)
|
||||
{
|
||||
struct flowside_sock_args *a = arg;
|
||||
|
||||
ns_enter(a->c);
|
||||
|
||||
a->fd = sock_l4_sa(a->c, a->type, a->sa, a->sl, NULL,
|
||||
a->sa->sa_family == AF_INET6, a->data);
|
||||
a->err = errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** flowside_sock_l4() - Create and bind socket based on flowside
|
||||
* @c: Execution context
|
||||
* @type: Socket epoll type
|
||||
* @pif: Interface for this socket
|
||||
* @tgt: Target flowside
|
||||
* @data: epoll reference portion for protocol handlers
|
||||
*
|
||||
* Return: socket fd of protocol @proto bound to the forwarding address and port
|
||||
* from @tgt (if specified).
|
||||
*/
|
||||
int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
|
||||
const struct flowside *tgt, uint32_t data)
|
||||
{
|
||||
const char *ifname = NULL;
|
||||
union sockaddr_inany sa;
|
||||
socklen_t sl;
|
||||
|
||||
ASSERT(pif_is_socket(pif));
|
||||
|
||||
pif_sockaddr(c, &sa, &sl, pif, &tgt->faddr, tgt->fport);
|
||||
|
||||
switch (pif) {
|
||||
case PIF_HOST:
|
||||
if (inany_is_loopback(&tgt->faddr))
|
||||
ifname = NULL;
|
||||
else if (sa.sa_family == AF_INET)
|
||||
ifname = c->ip4.ifname_out;
|
||||
else if (sa.sa_family == AF_INET6)
|
||||
ifname = c->ip6.ifname_out;
|
||||
|
||||
return sock_l4_sa(c, type, &sa, sl, ifname,
|
||||
sa.sa_family == AF_INET6, data);
|
||||
|
||||
case PIF_SPLICE: {
|
||||
struct flowside_sock_args args = {
|
||||
.c = c, .type = type,
|
||||
.sa = &sa.sa, .sl = sl, .data = data,
|
||||
};
|
||||
NS_CALL(flowside_sock_splice, &args);
|
||||
errno = args.err;
|
||||
return args.fd;
|
||||
}
|
||||
|
||||
default:
|
||||
/* If we add new socket pifs, they'll need to be implemented
|
||||
* here
|
||||
*/
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
/** flowside_connect() - Connect a socket based on flowside
|
||||
* @c: Execution context
|
||||
* @s: Socket to connect
|
||||
* @pif: Target pif
|
||||
* @tgt: Target flowside
|
||||
*
|
||||
* Connect @s to the endpoint address and port from @tgt.
|
||||
*
|
||||
* Return: 0 on success, negative on error
|
||||
*/
|
||||
int flowside_connect(const struct ctx *c, int s,
|
||||
uint8_t pif, const struct flowside *tgt)
|
||||
{
|
||||
union sockaddr_inany sa;
|
||||
socklen_t sl;
|
||||
|
||||
pif_sockaddr(c, &sa, &sl, pif, &tgt->eaddr, tgt->eport);
|
||||
return connect(s, &sa.sa, sl);
|
||||
}
|
||||
|
||||
/** flow_log_ - Log flow-related message
|
||||
* @f: flow the message is related to
|
||||
* @pri: Log priority
|
||||
@@ -140,6 +289,10 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
|
||||
*/
|
||||
static void flow_set_state(struct flow_common *f, enum flow_state state)
|
||||
{
|
||||
char estr0[INANY_ADDRSTRLEN], fstr0[INANY_ADDRSTRLEN];
|
||||
char estr1[INANY_ADDRSTRLEN], fstr1[INANY_ADDRSTRLEN];
|
||||
const struct flowside *ini = &f->side[INISIDE];
|
||||
const struct flowside *tgt = &f->side[TGTSIDE];
|
||||
uint8_t oldstate = f->state;
|
||||
|
||||
ASSERT(state < FLOW_NUM_STATES);
|
||||
@@ -150,18 +303,33 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
|
||||
FLOW_STATE(f));
|
||||
|
||||
if (MAX(state, oldstate) >= FLOW_STATE_TGT)
|
||||
flow_log_(f, LOG_DEBUG, "%s => %s", pif_name(f->pif[INISIDE]),
|
||||
pif_name(f->pif[TGTSIDE]));
|
||||
flow_log_(f, LOG_DEBUG,
|
||||
"%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
|
||||
pif_name(f->pif[INISIDE]),
|
||||
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
|
||||
ini->eport,
|
||||
inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)),
|
||||
ini->fport,
|
||||
pif_name(f->pif[TGTSIDE]),
|
||||
inany_ntop(&tgt->faddr, fstr1, sizeof(fstr1)),
|
||||
tgt->fport,
|
||||
inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
|
||||
tgt->eport);
|
||||
else if (MAX(state, oldstate) >= FLOW_STATE_INI)
|
||||
flow_log_(f, LOG_DEBUG, "%s => ?", pif_name(f->pif[INISIDE]));
|
||||
flow_log_(f, LOG_DEBUG, "%s [%s]:%hu -> [%s]:%hu => ?",
|
||||
pif_name(f->pif[INISIDE]),
|
||||
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
|
||||
ini->eport,
|
||||
inany_ntop(&ini->faddr, fstr0, sizeof(fstr0)),
|
||||
ini->fport);
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_initiate() - Move flow to INI, setting INISIDE details
|
||||
* flow_initiate_() - Move flow to INI, setting pif[INISIDE]
|
||||
* @flow: Flow to change state
|
||||
* @pif: pif of the initiating side
|
||||
*/
|
||||
void flow_initiate(union flow *flow, uint8_t pif)
|
||||
static void flow_initiate_(union flow *flow, uint8_t pif)
|
||||
{
|
||||
struct flow_common *f = &flow->f;
|
||||
|
||||
@@ -175,21 +343,104 @@ void flow_initiate(union flow *flow, uint8_t pif)
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_target() - Move flow to TGT, setting TGTSIDE details
|
||||
* flow_initiate_af() - Move flow to INI, setting INISIDE details
|
||||
* @flow: Flow to change state
|
||||
* @pif: pif of the target side
|
||||
* @pif: pif of the initiating side
|
||||
* @af: Address family of @eaddr and @faddr
|
||||
* @saddr: Source address (pointer to in_addr or in6_addr)
|
||||
* @sport: Endpoint port
|
||||
* @daddr: Destination address (pointer to in_addr or in6_addr)
|
||||
* @dport: Destination port
|
||||
*
|
||||
* Return: pointer to the initiating flowside information
|
||||
*/
|
||||
void flow_target(union flow *flow, uint8_t pif)
|
||||
const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
|
||||
sa_family_t af,
|
||||
const void *saddr, in_port_t sport,
|
||||
const void *daddr, in_port_t dport)
|
||||
{
|
||||
struct flow_common *f = &flow->f;
|
||||
struct flowside *ini = &flow->f.side[INISIDE];
|
||||
|
||||
flowside_from_af(ini, af, saddr, sport, daddr, dport);
|
||||
flow_initiate_(flow, pif);
|
||||
return ini;
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_initiate_sa() - Move flow to INI, setting INISIDE details
|
||||
* @flow: Flow to change state
|
||||
* @pif: pif of the initiating side
|
||||
* @ssa: Source socket address
|
||||
* @dport: Destination port
|
||||
*
|
||||
* Return: pointer to the initiating flowside information
|
||||
*/
|
||||
const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
|
||||
const union sockaddr_inany *ssa,
|
||||
in_port_t dport)
|
||||
{
|
||||
struct flowside *ini = &flow->f.side[INISIDE];
|
||||
|
||||
inany_from_sockaddr(&ini->eaddr, &ini->eport, ssa);
|
||||
if (inany_v4(&ini->eaddr))
|
||||
ini->faddr = inany_any4;
|
||||
else
|
||||
ini->faddr = inany_any6;
|
||||
ini->fport = dport;
|
||||
flow_initiate_(flow, pif);
|
||||
return ini;
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_target() - Determine where flow should forward to, and move to TGT
|
||||
* @c: Execution context
|
||||
* @flow: Flow to forward
|
||||
* @proto: Protocol
|
||||
*
|
||||
* Return: pointer to the target flowside information
|
||||
*/
|
||||
const struct flowside *flow_target(const struct ctx *c, union flow *flow,
|
||||
uint8_t proto)
|
||||
{
|
||||
char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
|
||||
struct flow_common *f = &flow->f;
|
||||
const struct flowside *ini = &f->side[INISIDE];
|
||||
struct flowside *tgt = &f->side[TGTSIDE];
|
||||
uint8_t tgtpif = PIF_NONE;
|
||||
|
||||
ASSERT(pif != PIF_NONE);
|
||||
ASSERT(flow_new_entry == flow && f->state == FLOW_STATE_INI);
|
||||
ASSERT(f->type == FLOW_TYPE_NONE);
|
||||
ASSERT(f->pif[INISIDE] != PIF_NONE && f->pif[TGTSIDE] == PIF_NONE);
|
||||
ASSERT(flow->f.state == FLOW_STATE_INI);
|
||||
|
||||
f->pif[TGTSIDE] = pif;
|
||||
switch (f->pif[INISIDE]) {
|
||||
case PIF_TAP:
|
||||
tgtpif = fwd_nat_from_tap(c, proto, ini, tgt);
|
||||
break;
|
||||
|
||||
case PIF_SPLICE:
|
||||
tgtpif = fwd_nat_from_splice(c, proto, ini, tgt);
|
||||
break;
|
||||
|
||||
case PIF_HOST:
|
||||
tgtpif = fwd_nat_from_host(c, proto, ini, tgt);
|
||||
break;
|
||||
|
||||
default:
|
||||
flow_err(flow, "No rules to forward %s [%s]:%hu -> [%s]:%hu",
|
||||
pif_name(f->pif[INISIDE]),
|
||||
inany_ntop(&ini->eaddr, estr, sizeof(estr)),
|
||||
ini->eport,
|
||||
inany_ntop(&ini->faddr, fstr, sizeof(fstr)),
|
||||
ini->fport);
|
||||
}
|
||||
|
||||
if (tgtpif == PIF_NONE)
|
||||
return NULL;
|
||||
|
||||
f->pif[TGTSIDE] = tgtpif;
|
||||
flow_set_state(f, FLOW_STATE_TGT);
|
||||
return tgt;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -295,6 +546,209 @@ void flow_alloc_cancel(union flow *flow)
|
||||
flow_new_entry = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_hash() - Calculate hash value for one side of a flow
|
||||
* @c: Execution context
|
||||
* @proto: Protocol of this flow (IP L4 protocol number)
|
||||
* @pif: pif of the side to hash
|
||||
* @side: Flowside (must not have unspecified parts)
|
||||
*
|
||||
* Return: hash value
|
||||
*/
|
||||
static uint64_t flow_hash(const struct ctx *c, uint8_t proto, uint8_t pif,
|
||||
const struct flowside *side)
|
||||
{
|
||||
struct siphash_state state = SIPHASH_INIT(c->hash_secret);
|
||||
|
||||
/* For the hash table to work, we need complete endpoint information,
|
||||
* and at least a forwarding port.
|
||||
*/
|
||||
ASSERT(pif != PIF_NONE && !inany_is_unspecified(&side->eaddr) &&
|
||||
side->eport != 0 && side->fport != 0);
|
||||
|
||||
inany_siphash_feed(&state, &side->faddr);
|
||||
inany_siphash_feed(&state, &side->eaddr);
|
||||
|
||||
return siphash_final(&state, 38, (uint64_t)proto << 40 |
|
||||
(uint64_t)pif << 32 |
|
||||
(uint64_t)side->fport << 16 |
|
||||
(uint64_t)side->eport);
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_sidx_hash() - Calculate hash value for given side of a given flow
|
||||
* @c: Execution context
|
||||
* @sidx: Flow & side index to get hash for
|
||||
*
|
||||
* Return: hash value, of the flow & side represented by @sidx
|
||||
*/
|
||||
static uint64_t flow_sidx_hash(const struct ctx *c, flow_sidx_t sidx)
|
||||
{
|
||||
const struct flow_common *f = &flow_at_sidx(sidx)->f;
|
||||
return flow_hash(c, FLOW_PROTO(f),
|
||||
f->pif[sidx.sidei], &f->side[sidx.sidei]);
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_hash_probe_() - Find hash bucket for a flow, given hash
|
||||
* @hash: Raw hash value for flow & side
|
||||
* @sidx: Flow and side to find bucket for
|
||||
*
|
||||
* Return: If @sidx is in the hash table, its current bucket, otherwise a
|
||||
* suitable free bucket for it.
|
||||
*/
|
||||
static inline unsigned flow_hash_probe_(uint64_t hash, flow_sidx_t sidx)
|
||||
{
|
||||
unsigned b = hash % FLOW_HASH_SIZE;
|
||||
|
||||
/* Linear probing */
|
||||
while (flow_sidx_valid(flow_hashtab[b]) &&
|
||||
!flow_sidx_eq(flow_hashtab[b], sidx))
|
||||
b = mod_sub(b, 1, FLOW_HASH_SIZE);
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_hash_probe() - Find hash bucket for a flow
|
||||
* @c: Execution context
|
||||
* @sidx: Flow and side to find bucket for
|
||||
*
|
||||
* Return: If @sidx is in the hash table, its current bucket, otherwise a
|
||||
* suitable free bucket for it.
|
||||
*/
|
||||
static inline unsigned flow_hash_probe(const struct ctx *c, flow_sidx_t sidx)
|
||||
{
|
||||
return flow_hash_probe_(flow_sidx_hash(c, sidx), sidx);
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_hash_insert() - Insert side of a flow into into hash table
|
||||
* @c: Execution context
|
||||
* @sidx: Flow & side index
|
||||
*
|
||||
* Return: raw (un-modded) hash value of side of flow
|
||||
*/
|
||||
uint64_t flow_hash_insert(const struct ctx *c, flow_sidx_t sidx)
|
||||
{
|
||||
uint64_t hash = flow_sidx_hash(c, sidx);
|
||||
unsigned b = flow_hash_probe_(hash, sidx);
|
||||
|
||||
flow_hashtab[b] = sidx;
|
||||
flow_dbg(flow_at_sidx(sidx), "Side %u hash table insert: bucket: %u",
|
||||
sidx.sidei, b);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_hash_remove() - Drop side of a flow from the hash table
|
||||
* @c: Execution context
|
||||
* @sidx: Side of flow to remove
|
||||
*/
|
||||
void flow_hash_remove(const struct ctx *c, flow_sidx_t sidx)
|
||||
{
|
||||
unsigned b = flow_hash_probe(c, sidx), s;
|
||||
|
||||
if (!flow_sidx_valid(flow_hashtab[b]))
|
||||
return; /* Redundant remove */
|
||||
|
||||
flow_dbg(flow_at_sidx(sidx), "Side %u hash table remove: bucket: %u",
|
||||
sidx.sidei, b);
|
||||
|
||||
/* Scan the remainder of the cluster */
|
||||
for (s = mod_sub(b, 1, FLOW_HASH_SIZE);
|
||||
flow_sidx_valid(flow_hashtab[s]);
|
||||
s = mod_sub(s, 1, FLOW_HASH_SIZE)) {
|
||||
unsigned h = flow_sidx_hash(c, flow_hashtab[s]) % FLOW_HASH_SIZE;
|
||||
|
||||
if (!mod_between(h, s, b, FLOW_HASH_SIZE)) {
|
||||
/* flow_hashtab[s] can live in flow_hashtab[b]'s slot */
|
||||
debug("hash table remove: shuffle %u -> %u", s, b);
|
||||
flow_hashtab[b] = flow_hashtab[s];
|
||||
b = s;
|
||||
}
|
||||
}
|
||||
|
||||
flow_hashtab[b] = FLOW_SIDX_NONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* flowside_lookup() - Look for a matching flowside in the flow table
|
||||
* @c: Execution context
|
||||
* @proto: Protocol of the flow (IP L4 protocol number)
|
||||
* @pif: pif to look for in the table
|
||||
* @side: Flowside to look for in the table
|
||||
*
|
||||
* Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
|
||||
*/
|
||||
static flow_sidx_t flowside_lookup(const struct ctx *c, uint8_t proto,
|
||||
uint8_t pif, const struct flowside *side)
|
||||
{
|
||||
flow_sidx_t sidx;
|
||||
union flow *flow;
|
||||
unsigned b;
|
||||
|
||||
b = flow_hash(c, proto, pif, side) % FLOW_HASH_SIZE;
|
||||
while ((sidx = flow_hashtab[b], flow = flow_at_sidx(sidx)) &&
|
||||
!(FLOW_PROTO(&flow->f) == proto &&
|
||||
flow->f.pif[sidx.sidei] == pif &&
|
||||
flowside_eq(&flow->f.side[sidx.sidei], side)))
|
||||
b = (b + 1) % FLOW_HASH_SIZE;
|
||||
|
||||
return flow_hashtab[b];
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_lookup_af() - Look up a flow given addressing information
|
||||
* @c: Execution context
|
||||
* @proto: Protocol of the flow (IP L4 protocol number)
|
||||
* @pif: Interface of the flow
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @eaddr: Guest side endpoint address (guest local address)
|
||||
* @faddr: Guest side forwarding address (guest remote address)
|
||||
* @eport: Guest side endpoint port (guest local port)
|
||||
* @fport: Guest side forwarding port (guest remote port)
|
||||
*
|
||||
* Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
|
||||
*/
|
||||
flow_sidx_t flow_lookup_af(const struct ctx *c,
|
||||
uint8_t proto, uint8_t pif, sa_family_t af,
|
||||
const void *eaddr, const void *faddr,
|
||||
in_port_t eport, in_port_t fport)
|
||||
{
|
||||
struct flowside side;
|
||||
|
||||
flowside_from_af(&side, af, eaddr, eport, faddr, fport);
|
||||
return flowside_lookup(c, proto, pif, &side);
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_lookup_sa() - Look up a flow given an endpoint socket address
|
||||
* @c: Execution context
|
||||
* @proto: Protocol of the flow (IP L4 protocol number)
|
||||
* @pif: Interface of the flow
|
||||
* @esa: Socket address of the endpoint
|
||||
* @fport: Forwarding port number
|
||||
*
|
||||
* Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
|
||||
*/
|
||||
flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
|
||||
const void *esa, in_port_t fport)
|
||||
{
|
||||
struct flowside side = {
|
||||
.fport = fport,
|
||||
};
|
||||
|
||||
inany_from_sockaddr(&side.eaddr, &side.eport, esa);
|
||||
if (inany_v4(&side.eaddr))
|
||||
side.faddr = inany_any4;
|
||||
else
|
||||
side.faddr = inany_any6;
|
||||
|
||||
return flowside_lookup(c, proto, pif, &side);
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_defer_handler() - Handler for per-flow deferred and timed tasks
|
||||
* @c: Execution context
|
||||
@@ -374,6 +828,10 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
|
||||
if (timer)
|
||||
closed = icmp_ping_timer(c, &flow->ping, now);
|
||||
break;
|
||||
case FLOW_UDP:
|
||||
if (timer)
|
||||
closed = udp_flow_timer(c, &flow->udp, now);
|
||||
break;
|
||||
default:
|
||||
/* Assume other flow types don't need any handling */
|
||||
;
|
||||
@@ -408,7 +866,12 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now)
|
||||
*/
|
||||
void flow_init(void)
|
||||
{
|
||||
unsigned b;
|
||||
|
||||
/* Initial state is a single free cluster containing the whole table */
|
||||
flowtab[0].free.n = FLOW_MAX;
|
||||
flowtab[0].free.next = FLOW_MAX;
|
||||
|
||||
for (b = 0; b < FLOW_HASH_SIZE; b++)
|
||||
flow_hashtab[b] = FLOW_SIDX_NONE;
|
||||
}
|
||||
|
74
flow.h
74
flow.h
@@ -115,6 +115,8 @@ enum flow_type {
|
||||
FLOW_PING4,
|
||||
/* ICMPv6 echo requests from guest to host and matching replies back */
|
||||
FLOW_PING6,
|
||||
/* UDP pseudo-connection */
|
||||
FLOW_UDP,
|
||||
|
||||
FLOW_NUM_TYPES,
|
||||
};
|
||||
@@ -132,14 +134,49 @@ extern const uint8_t flow_proto[];
|
||||
|
||||
#define SIDES 2
|
||||
|
||||
#define INISIDE 0 /* Initiating side */
|
||||
#define TGTSIDE 1 /* Target side */
|
||||
#define INISIDE 0 /* Initiating side index */
|
||||
#define TGTSIDE 1 /* Target side index */
|
||||
|
||||
/**
|
||||
* struct flowside - Address information for one side of a flow
|
||||
* @eaddr: Endpoint address (remote address from passt's PoV)
|
||||
* @faddr: Forwarding address (local address from passt's PoV)
|
||||
* @eport: Endpoint port
|
||||
* @fport: Forwarding port
|
||||
*/
|
||||
struct flowside {
|
||||
union inany_addr faddr;
|
||||
union inany_addr eaddr;
|
||||
in_port_t fport;
|
||||
in_port_t eport;
|
||||
};
|
||||
|
||||
/**
|
||||
* flowside_eq() - Check if two flowsides are equal
|
||||
* @left, @right: Flowsides to compare
|
||||
*
|
||||
* Return: true if equal, false otherwise
|
||||
*/
|
||||
static inline bool flowside_eq(const struct flowside *left,
|
||||
const struct flowside *right)
|
||||
{
|
||||
return inany_equals(&left->eaddr, &right->eaddr) &&
|
||||
left->eport == right->eport &&
|
||||
inany_equals(&left->faddr, &right->faddr) &&
|
||||
left->fport == right->fport;
|
||||
}
|
||||
|
||||
int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
|
||||
const struct flowside *tgt, uint32_t data);
|
||||
int flowside_connect(const struct ctx *c, int s,
|
||||
uint8_t pif, const struct flowside *tgt);
|
||||
|
||||
/**
|
||||
* struct flow_common - Common fields for packet flows
|
||||
* @state: State of the flow table entry
|
||||
* @type: Type of packet flow
|
||||
* @pif[]: Interface for each side of the flow
|
||||
* @side[]: Information for each side of the flow
|
||||
*/
|
||||
struct flow_common {
|
||||
#ifdef __GNUC__
|
||||
@@ -154,6 +191,7 @@ struct flow_common {
|
||||
"Not enough bits for type field");
|
||||
#endif
|
||||
uint8_t pif[SIDES];
|
||||
struct flowside side[SIDES];
|
||||
};
|
||||
|
||||
#define FLOW_INDEX_BITS 17 /* 128k - 1 */
|
||||
@@ -164,17 +202,28 @@ struct flow_common {
|
||||
|
||||
/**
|
||||
* struct flow_sidx - ID for one side of a specific flow
|
||||
* @side: Side referenced (0 or 1)
|
||||
* @flow: Index of flow referenced
|
||||
* @sidei: Index of side referenced (0 or 1)
|
||||
* @flowi: Index of flow referenced
|
||||
*/
|
||||
typedef struct flow_sidx {
|
||||
unsigned side :1;
|
||||
unsigned flow :FLOW_INDEX_BITS;
|
||||
unsigned sidei :1;
|
||||
unsigned flowi :FLOW_INDEX_BITS;
|
||||
} flow_sidx_t;
|
||||
static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t),
|
||||
"flow_sidx_t must fit within 32 bits");
|
||||
|
||||
#define FLOW_SIDX_NONE ((flow_sidx_t){ .flow = FLOW_MAX })
|
||||
#define FLOW_SIDX_NONE ((flow_sidx_t){ .flowi = FLOW_MAX })
|
||||
|
||||
/**
|
||||
* flow_sidx_valid() - Test if a sidx is valid
|
||||
* @sidx: sidx value
|
||||
*
|
||||
* Return: true if @sidx refers to a valid flow & side
|
||||
*/
|
||||
static inline bool flow_sidx_valid(flow_sidx_t sidx)
|
||||
{
|
||||
return sidx.flowi < FLOW_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
* flow_sidx_eq() - Test if two sidx values are equal
|
||||
@@ -184,9 +233,18 @@ static_assert(sizeof(flow_sidx_t) <= sizeof(uint32_t),
|
||||
*/
|
||||
static inline bool flow_sidx_eq(flow_sidx_t a, flow_sidx_t b)
|
||||
{
|
||||
return (a.flow == b.flow) && (a.side == b.side);
|
||||
return (a.flowi == b.flowi) && (a.sidei == b.sidei);
|
||||
}
|
||||
|
||||
uint64_t flow_hash_insert(const struct ctx *c, flow_sidx_t sidx);
|
||||
void flow_hash_remove(const struct ctx *c, flow_sidx_t sidx);
|
||||
flow_sidx_t flow_lookup_af(const struct ctx *c,
|
||||
uint8_t proto, uint8_t pif, sa_family_t af,
|
||||
const void *eaddr, const void *faddr,
|
||||
in_port_t eport, in_port_t fport);
|
||||
flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
|
||||
const void *esa, in_port_t fport);
|
||||
|
||||
union flow;
|
||||
|
||||
void flow_init(void);
|
||||
|
95
flow_table.h
95
flow_table.h
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "tcp_conn.h"
|
||||
#include "icmp_flow.h"
|
||||
#include "udp_flow.h"
|
||||
|
||||
/**
|
||||
* struct flow_free_cluster - Information about a cluster of free entries
|
||||
@@ -35,14 +36,21 @@ union flow {
|
||||
struct tcp_tap_conn tcp;
|
||||
struct tcp_splice_conn tcp_splice;
|
||||
struct icmp_ping_flow ping;
|
||||
struct udp_flow udp;
|
||||
};
|
||||
|
||||
/* Global Flow Table */
|
||||
extern unsigned flow_first_free;
|
||||
extern union flow flowtab[];
|
||||
|
||||
/**
|
||||
* flow_foreach_sidei() - 'for' type macro to step through each side of flow
|
||||
* @sidei_: Takes value INISIDE, then TGTSIDE
|
||||
*/
|
||||
#define flow_foreach_sidei(sidei_) \
|
||||
for ((sidei_) = INISIDE; (sidei_) < SIDES; (sidei_)++)
|
||||
|
||||
/** flow_idx - Index of flow from common structure
|
||||
/** flow_idx() - Index of flow from common structure
|
||||
* @f: Common flow fields pointer
|
||||
*
|
||||
* Return: index of @f in the flow table
|
||||
@@ -52,63 +60,116 @@ static inline unsigned flow_idx(const struct flow_common *f)
|
||||
return (union flow *)f - flowtab;
|
||||
}
|
||||
|
||||
/** FLOW_IDX - Find the index of a flow
|
||||
/** FLOW_IDX() - Find the index of a flow
|
||||
* @f_: Flow pointer, either union flow * or protocol specific
|
||||
*
|
||||
* Return: index of @f in the flow table
|
||||
*/
|
||||
#define FLOW_IDX(f_) (flow_idx(&(f_)->f))
|
||||
|
||||
/** FLOW - Flow entry at a given index
|
||||
/** FLOW() - Flow entry at a given index
|
||||
* @idx: Flow index
|
||||
*
|
||||
* Return: pointer to entry @idx in the flow table
|
||||
*/
|
||||
#define FLOW(idx) (&flowtab[(idx)])
|
||||
|
||||
/** flow_at_sidx - Flow entry for a given sidx
|
||||
/** flow_at_sidx() - Flow entry for a given sidx
|
||||
* @sidx: Flow & side index
|
||||
*
|
||||
* Return: pointer to the corresponding flow entry, or NULL
|
||||
*/
|
||||
static inline union flow *flow_at_sidx(flow_sidx_t sidx)
|
||||
{
|
||||
if (sidx.flow >= FLOW_MAX)
|
||||
if (!flow_sidx_valid(sidx))
|
||||
return NULL;
|
||||
return FLOW(sidx.flow);
|
||||
return FLOW(sidx.flowi);
|
||||
}
|
||||
|
||||
/** flow_sidx_t - Index of one side of a flow from common structure
|
||||
/** pif_at_sidx() - Interface for a given flow and side
|
||||
* @sidx: Flow & side index
|
||||
*
|
||||
* Return: pif for the flow & side given by @sidx
|
||||
*/
|
||||
static inline uint8_t pif_at_sidx(flow_sidx_t sidx)
|
||||
{
|
||||
const union flow *flow = flow_at_sidx(sidx);
|
||||
|
||||
if (!flow)
|
||||
return PIF_NONE;
|
||||
return flow->f.pif[sidx.sidei];
|
||||
}
|
||||
|
||||
/** flowside_at_sidx() - Retrieve a specific flowside
|
||||
* @sidx: Flow & side index
|
||||
*
|
||||
* Return: Flowside for the flow & side given by @sidx
|
||||
*/
|
||||
static inline const struct flowside *flowside_at_sidx(flow_sidx_t sidx)
|
||||
{
|
||||
const union flow *flow = flow_at_sidx(sidx);
|
||||
|
||||
if (!flow)
|
||||
return PIF_NONE;
|
||||
|
||||
return &flow->f.side[sidx.sidei];
|
||||
}
|
||||
|
||||
/** flow_sidx_opposite() - Get the other side of the same flow
|
||||
* @sidx: Flow & side index
|
||||
*
|
||||
* Return: sidx for the other side of the same flow as @sidx
|
||||
*/
|
||||
static inline flow_sidx_t flow_sidx_opposite(flow_sidx_t sidx)
|
||||
{
|
||||
if (!flow_sidx_valid(sidx))
|
||||
return FLOW_SIDX_NONE;
|
||||
|
||||
return (flow_sidx_t){.flowi = sidx.flowi, .sidei = !sidx.sidei};
|
||||
}
|
||||
|
||||
/** flow_sidx() - Index of one side of a flow from common structure
|
||||
* @f: Common flow fields pointer
|
||||
* @side: Which side to refer to (0 or 1)
|
||||
* @sidei: Which side to refer to (0 or 1)
|
||||
*
|
||||
* Return: index of @f and @side in the flow table
|
||||
*/
|
||||
static inline flow_sidx_t flow_sidx(const struct flow_common *f,
|
||||
int side)
|
||||
unsigned sidei)
|
||||
{
|
||||
/* cppcheck-suppress [knownConditionTrueFalse, unmatchedSuppression] */
|
||||
ASSERT(side == !!side);
|
||||
ASSERT(sidei == !!sidei);
|
||||
|
||||
return (flow_sidx_t){
|
||||
.side = side,
|
||||
.flow = flow_idx(f),
|
||||
.sidei = sidei,
|
||||
.flowi = flow_idx(f),
|
||||
};
|
||||
}
|
||||
|
||||
/** FLOW_SIDX - Find the index of one side of a flow
|
||||
/** FLOW_SIDX() - Find the index of one side of a flow
|
||||
* @f_: Flow pointer, either union flow * or protocol specific
|
||||
* @side: Which side to index (0 or 1)
|
||||
* @sidei: Which side to index (0 or 1)
|
||||
*
|
||||
* Return: index of @f and @side in the flow table
|
||||
*/
|
||||
#define FLOW_SIDX(f_, side) (flow_sidx(&(f_)->f, (side)))
|
||||
#define FLOW_SIDX(f_, sidei) (flow_sidx(&(f_)->f, (sidei)))
|
||||
|
||||
union flow *flow_alloc(void);
|
||||
void flow_alloc_cancel(union flow *flow);
|
||||
|
||||
void flow_initiate(union flow *flow, uint8_t pif);
|
||||
void flow_target(union flow *flow, uint8_t pif);
|
||||
const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
|
||||
sa_family_t af,
|
||||
const void *saddr, in_port_t sport,
|
||||
const void *daddr, in_port_t dport);
|
||||
const struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
|
||||
const union sockaddr_inany *ssa,
|
||||
in_port_t dport);
|
||||
const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
|
||||
sa_family_t af,
|
||||
const void *saddr, in_port_t sport,
|
||||
const void *daddr, in_port_t dport);
|
||||
const struct flowside *flow_target(const struct ctx *c, union flow *flow,
|
||||
uint8_t proto);
|
||||
|
||||
union flow *flow_set_type(union flow *flow, enum flow_type type);
|
||||
#define FLOW_SET_TYPE(flow_, t_, var_) (&flow_set_type((flow_), (t_))->var_)
|
||||
|
189
fwd.c
189
fwd.c
@@ -25,6 +25,7 @@
|
||||
#include "fwd.h"
|
||||
#include "passt.h"
|
||||
#include "lineread.h"
|
||||
#include "flow_table.h"
|
||||
|
||||
/* See enum in kernel's include/net/tcp_states.h */
|
||||
#define UDP_LISTEN 0x07
|
||||
@@ -52,7 +53,7 @@ static void procfs_scan_listen(int fd, unsigned int lstate,
|
||||
return;
|
||||
|
||||
if (lseek(fd, 0, SEEK_SET)) {
|
||||
warn("lseek() failed on /proc/net file: %s", strerror(errno));
|
||||
warn_perror("lseek() failed on /proc/net file");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -128,18 +129,18 @@ void fwd_scan_ports_init(struct ctx *c)
|
||||
|
||||
c->tcp.fwd_in.scan4 = c->tcp.fwd_in.scan6 = -1;
|
||||
c->tcp.fwd_out.scan4 = c->tcp.fwd_out.scan6 = -1;
|
||||
c->udp.fwd_in.f.scan4 = c->udp.fwd_in.f.scan6 = -1;
|
||||
c->udp.fwd_out.f.scan4 = c->udp.fwd_out.f.scan6 = -1;
|
||||
c->udp.fwd_in.scan4 = c->udp.fwd_in.scan6 = -1;
|
||||
c->udp.fwd_out.scan4 = c->udp.fwd_out.scan6 = -1;
|
||||
|
||||
if (c->tcp.fwd_in.mode == FWD_AUTO) {
|
||||
c->tcp.fwd_in.scan4 = open_in_ns(c, "/proc/net/tcp", flags);
|
||||
c->tcp.fwd_in.scan6 = open_in_ns(c, "/proc/net/tcp6", flags);
|
||||
fwd_scan_ports_tcp(&c->tcp.fwd_in, &c->tcp.fwd_out);
|
||||
}
|
||||
if (c->udp.fwd_in.f.mode == FWD_AUTO) {
|
||||
c->udp.fwd_in.f.scan4 = open_in_ns(c, "/proc/net/udp", flags);
|
||||
c->udp.fwd_in.f.scan6 = open_in_ns(c, "/proc/net/udp6", flags);
|
||||
fwd_scan_ports_udp(&c->udp.fwd_in.f, &c->udp.fwd_out.f,
|
||||
if (c->udp.fwd_in.mode == FWD_AUTO) {
|
||||
c->udp.fwd_in.scan4 = open_in_ns(c, "/proc/net/udp", flags);
|
||||
c->udp.fwd_in.scan6 = open_in_ns(c, "/proc/net/udp6", flags);
|
||||
fwd_scan_ports_udp(&c->udp.fwd_in, &c->udp.fwd_out,
|
||||
&c->tcp.fwd_in, &c->tcp.fwd_out);
|
||||
}
|
||||
if (c->tcp.fwd_out.mode == FWD_AUTO) {
|
||||
@@ -147,10 +148,176 @@ void fwd_scan_ports_init(struct ctx *c)
|
||||
c->tcp.fwd_out.scan6 = open("/proc/net/tcp6", flags);
|
||||
fwd_scan_ports_tcp(&c->tcp.fwd_out, &c->tcp.fwd_in);
|
||||
}
|
||||
if (c->udp.fwd_out.f.mode == FWD_AUTO) {
|
||||
c->udp.fwd_out.f.scan4 = open("/proc/net/udp", flags);
|
||||
c->udp.fwd_out.f.scan6 = open("/proc/net/udp6", flags);
|
||||
fwd_scan_ports_udp(&c->udp.fwd_out.f, &c->udp.fwd_in.f,
|
||||
if (c->udp.fwd_out.mode == FWD_AUTO) {
|
||||
c->udp.fwd_out.scan4 = open("/proc/net/udp", flags);
|
||||
c->udp.fwd_out.scan6 = open("/proc/net/udp6", flags);
|
||||
fwd_scan_ports_udp(&c->udp.fwd_out, &c->udp.fwd_in,
|
||||
&c->tcp.fwd_out, &c->tcp.fwd_in);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* fwd_nat_from_tap() - Determine to forward a flow from the tap interface
|
||||
* @c: Execution context
|
||||
* @proto: Protocol (IP L4 protocol number)
|
||||
* @ini: Flow address information of the initiating side
|
||||
* @tgt: Flow address information on the target side (updated)
|
||||
*
|
||||
* Return: pif of the target interface to forward the flow to, PIF_NONE if the
|
||||
* flow cannot or should not be forwarded at all.
|
||||
*/
|
||||
uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
|
||||
const struct flowside *ini, struct flowside *tgt)
|
||||
{
|
||||
tgt->eaddr = ini->faddr;
|
||||
tgt->eport = ini->fport;
|
||||
|
||||
if (proto == IPPROTO_UDP && tgt->eport == 53 &&
|
||||
inany_equals4(&tgt->eaddr, &c->ip4.dns_match)) {
|
||||
tgt->eaddr = inany_from_v4(c->ip4.dns_host);
|
||||
} else if (proto == IPPROTO_UDP && tgt->eport == 53 &&
|
||||
inany_equals6(&tgt->eaddr, &c->ip6.dns_match)) {
|
||||
tgt->eaddr.a6 = c->ip6.dns_host;
|
||||
} else if (!c->no_map_gw) {
|
||||
if (inany_equals4(&tgt->eaddr, &c->ip4.gw))
|
||||
tgt->eaddr = inany_loopback4;
|
||||
else if (inany_equals6(&tgt->eaddr, &c->ip6.gw))
|
||||
tgt->eaddr = inany_loopback6;
|
||||
}
|
||||
|
||||
/* The relevant addr_out controls the host side source address. This
|
||||
* may be unspecified, which allows the kernel to pick an address.
|
||||
*/
|
||||
if (inany_v4(&tgt->eaddr))
|
||||
tgt->faddr = inany_from_v4(c->ip4.addr_out);
|
||||
else
|
||||
tgt->faddr.a6 = c->ip6.addr_out;
|
||||
|
||||
/* Let the kernel pick a host side source port */
|
||||
tgt->fport = 0;
|
||||
if (proto == IPPROTO_UDP) {
|
||||
/* But for UDP we preserve the source port */
|
||||
tgt->fport = ini->eport;
|
||||
}
|
||||
|
||||
return PIF_HOST;
|
||||
}
|
||||
|
||||
/**
|
||||
* fwd_nat_from_splice() - Determine to forward a flow from the splice interface
|
||||
* @c: Execution context
|
||||
* @proto: Protocol (IP L4 protocol number)
|
||||
* @ini: Flow address information of the initiating side
|
||||
* @tgt: Flow address information on the target side (updated)
|
||||
*
|
||||
* Return: pif of the target interface to forward the flow to, PIF_NONE if the
|
||||
* flow cannot or should not be forwarded at all.
|
||||
*/
|
||||
uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
|
||||
const struct flowside *ini, struct flowside *tgt)
|
||||
{
|
||||
if (!inany_is_loopback(&ini->eaddr) ||
|
||||
(!inany_is_loopback(&ini->faddr) && !inany_is_unspecified(&ini->faddr))) {
|
||||
char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
|
||||
|
||||
debug("Non loopback address on %s: [%s]:%hu -> [%s]:%hu",
|
||||
pif_name(PIF_SPLICE),
|
||||
inany_ntop(&ini->eaddr, estr, sizeof(estr)), ini->eport,
|
||||
inany_ntop(&ini->faddr, fstr, sizeof(fstr)), ini->fport);
|
||||
return PIF_NONE;
|
||||
}
|
||||
|
||||
if (inany_v4(&ini->eaddr))
|
||||
tgt->eaddr = inany_loopback4;
|
||||
else
|
||||
tgt->eaddr = inany_loopback6;
|
||||
|
||||
/* Preserve the specific loopback adddress used, but let the kernel pick
|
||||
* a source port on the target side
|
||||
*/
|
||||
tgt->faddr = ini->eaddr;
|
||||
tgt->fport = 0;
|
||||
|
||||
tgt->eport = ini->fport;
|
||||
if (proto == IPPROTO_TCP)
|
||||
tgt->eport += c->tcp.fwd_out.delta[tgt->eport];
|
||||
else if (proto == IPPROTO_UDP)
|
||||
tgt->eport += c->udp.fwd_out.delta[tgt->eport];
|
||||
|
||||
/* Let the kernel pick a host side source port */
|
||||
tgt->fport = 0;
|
||||
if (proto == IPPROTO_UDP)
|
||||
/* But for UDP preserve the source port */
|
||||
tgt->fport = ini->eport;
|
||||
|
||||
return PIF_HOST;
|
||||
}
|
||||
|
||||
/**
|
||||
* fwd_nat_from_host() - Determine to forward a flow from the host interface
|
||||
* @c: Execution context
|
||||
* @proto: Protocol (IP L4 protocol number)
|
||||
* @ini: Flow address information of the initiating side
|
||||
* @tgt: Flow address information on the target side (updated)
|
||||
*
|
||||
* Return: pif of the target interface to forward the flow to, PIF_NONE if the
|
||||
* flow cannot or should not be forwarded at all.
|
||||
*/
|
||||
uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
|
||||
const struct flowside *ini, struct flowside *tgt)
|
||||
{
|
||||
/* Common for spliced and non-spliced cases */
|
||||
tgt->eport = ini->fport;
|
||||
if (proto == IPPROTO_TCP)
|
||||
tgt->eport += c->tcp.fwd_in.delta[tgt->eport];
|
||||
else if (proto == IPPROTO_UDP)
|
||||
tgt->eport += c->udp.fwd_in.delta[tgt->eport];
|
||||
|
||||
if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) &&
|
||||
(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
|
||||
/* spliceable */
|
||||
|
||||
/* Preserve the specific loopback adddress used, but let the
|
||||
* kernel pick a source port on the target side
|
||||
*/
|
||||
tgt->faddr = ini->eaddr;
|
||||
tgt->fport = 0;
|
||||
if (proto == IPPROTO_UDP)
|
||||
/* But for UDP preserve the source port */
|
||||
tgt->fport = ini->eport;
|
||||
|
||||
if (inany_v4(&ini->eaddr))
|
||||
tgt->eaddr = inany_loopback4;
|
||||
else
|
||||
tgt->eaddr = inany_loopback6;
|
||||
|
||||
return PIF_SPLICE;
|
||||
}
|
||||
|
||||
tgt->faddr = ini->eaddr;
|
||||
tgt->fport = ini->eport;
|
||||
|
||||
if (inany_is_loopback4(&tgt->faddr) ||
|
||||
inany_is_unspecified4(&tgt->faddr) ||
|
||||
inany_equals4(&tgt->faddr, &c->ip4.addr_seen)) {
|
||||
tgt->faddr = inany_from_v4(c->ip4.gw);
|
||||
} else if (inany_is_loopback6(&tgt->faddr) ||
|
||||
inany_equals6(&tgt->faddr, &c->ip6.addr_seen) ||
|
||||
inany_equals6(&tgt->faddr, &c->ip6.addr)) {
|
||||
if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
|
||||
tgt->faddr.a6 = c->ip6.gw;
|
||||
else
|
||||
tgt->faddr.a6 = c->ip6.addr_ll;
|
||||
}
|
||||
|
||||
if (inany_v4(&tgt->faddr)) {
|
||||
tgt->eaddr = inany_from_v4(c->ip4.addr_seen);
|
||||
} else {
|
||||
if (inany_is_linklocal6(&tgt->faddr))
|
||||
tgt->eaddr.a6 = c->ip6.addr_ll_seen;
|
||||
else
|
||||
tgt->eaddr.a6 = c->ip6.addr_seen;
|
||||
}
|
||||
|
||||
return PIF_TAP;
|
||||
}
|
||||
|
9
fwd.h
9
fwd.h
@@ -7,6 +7,8 @@
|
||||
#ifndef FWD_H
|
||||
#define FWD_H
|
||||
|
||||
struct flowside;
|
||||
|
||||
/* Number of ports for both TCP and UDP */
|
||||
#define NUM_PORTS (1U << 16)
|
||||
|
||||
@@ -42,4 +44,11 @@ void fwd_scan_ports_udp(struct fwd_ports *fwd, const struct fwd_ports *rev,
|
||||
const struct fwd_ports *tcp_rev);
|
||||
void fwd_scan_ports_init(struct ctx *c);
|
||||
|
||||
uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
|
||||
const struct flowside *ini, struct flowside *tgt);
|
||||
uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
|
||||
const struct flowside *ini, struct flowside *tgt);
|
||||
uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
|
||||
const struct flowside *ini, struct flowside *tgt);
|
||||
|
||||
#endif /* FWD_H */
|
||||
|
121
icmp.c
121
icmp.c
@@ -45,10 +45,23 @@
|
||||
#define ICMP_ECHO_TIMEOUT 60 /* s, timeout for ICMP socket activity */
|
||||
#define ICMP_NUM_IDS (1U << 16)
|
||||
|
||||
#define PINGF(idx) (&(FLOW(idx)->ping))
|
||||
/**
|
||||
* ping_at_sidx() - Get ping specific flow at given sidx
|
||||
* @sidx: Flow and side to retrieve
|
||||
*
|
||||
* Return: ping specific flow at @sidx, or NULL of @sidx is invalid. Asserts if
|
||||
* the flow at @sidx is not FLOW_PING4 or FLOW_PING6
|
||||
*/
|
||||
static struct icmp_ping_flow *ping_at_sidx(flow_sidx_t sidx)
|
||||
{
|
||||
union flow *flow = flow_at_sidx(sidx);
|
||||
|
||||
/* Indexed by ICMP echo identifier */
|
||||
static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
|
||||
if (!flow)
|
||||
return NULL;
|
||||
|
||||
ASSERT(flow->f.type == FLOW_PING4 || flow->f.type == FLOW_PING6);
|
||||
return &flow->ping;
|
||||
}
|
||||
|
||||
/**
|
||||
* icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket
|
||||
@@ -57,7 +70,8 @@ static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS];
|
||||
*/
|
||||
void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
|
||||
{
|
||||
struct icmp_ping_flow *pingf = PINGF(ref.flowside.flow);
|
||||
struct icmp_ping_flow *pingf = ping_at_sidx(ref.flowside);
|
||||
const struct flowside *ini = &pingf->f.side[INISIDE];
|
||||
union sockaddr_inany sr;
|
||||
socklen_t sl = sizeof(sr);
|
||||
char buf[USHRT_MAX];
|
||||
@@ -83,7 +97,7 @@ void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
|
||||
goto unexpected;
|
||||
|
||||
/* Adjust packet back to guest-side ID */
|
||||
ih4->un.echo.id = htons(pingf->id);
|
||||
ih4->un.echo.id = htons(ini->eport);
|
||||
seq = ntohs(ih4->un.echo.sequence);
|
||||
} else if (pingf->f.type == FLOW_PING6) {
|
||||
struct icmp6hdr *ih6 = (struct icmp6hdr *)buf;
|
||||
@@ -93,7 +107,7 @@ void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
|
||||
goto unexpected;
|
||||
|
||||
/* Adjust packet back to guest-side ID */
|
||||
ih6->icmp6_identifier = htons(pingf->id);
|
||||
ih6->icmp6_identifier = htons(ini->eport);
|
||||
seq = ntohs(ih6->icmp6_sequence);
|
||||
} else {
|
||||
ASSERT(0);
|
||||
@@ -108,13 +122,20 @@ void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
|
||||
}
|
||||
|
||||
flow_dbg(pingf, "echo reply to tap, ID: %"PRIu16", seq: %"PRIu16,
|
||||
pingf->id, seq);
|
||||
ini->eport, seq);
|
||||
|
||||
if (pingf->f.type == FLOW_PING4)
|
||||
tap_icmp4_send(c, sr.sa4.sin_addr, tap_ip4_daddr(c), buf, n);
|
||||
else if (pingf->f.type == FLOW_PING6)
|
||||
tap_icmp6_send(c, &sr.sa6.sin6_addr,
|
||||
tap_ip6_daddr(c, &sr.sa6.sin6_addr), buf, n);
|
||||
if (pingf->f.type == FLOW_PING4) {
|
||||
const struct in_addr *saddr = inany_v4(&ini->faddr);
|
||||
const struct in_addr *daddr = inany_v4(&ini->eaddr);
|
||||
|
||||
ASSERT(saddr && daddr); /* Must have IPv4 addresses */
|
||||
tap_icmp4_send(c, *saddr, *daddr, buf, n);
|
||||
} else if (pingf->f.type == FLOW_PING6) {
|
||||
const struct in6_addr *saddr = &ini->faddr.a6;
|
||||
const struct in6_addr *daddr = &ini->eaddr.a6;
|
||||
|
||||
tap_icmp6_send(c, saddr, daddr, buf, n);
|
||||
}
|
||||
return;
|
||||
|
||||
unexpected:
|
||||
@@ -129,58 +150,54 @@ unexpected:
|
||||
static void icmp_ping_close(const struct ctx *c,
|
||||
const struct icmp_ping_flow *pingf)
|
||||
{
|
||||
uint16_t id = pingf->id;
|
||||
|
||||
epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL);
|
||||
close(pingf->sock);
|
||||
|
||||
if (pingf->f.type == FLOW_PING4)
|
||||
icmp_id_map[V4][id] = NULL;
|
||||
else
|
||||
icmp_id_map[V6][id] = NULL;
|
||||
flow_hash_remove(c, FLOW_SIDX(pingf, INISIDE));
|
||||
}
|
||||
|
||||
/**
|
||||
* icmp_ping_new() - Prepare a new ping socket for a new id
|
||||
* @c: Execution context
|
||||
* @id_sock: Pointer to ping flow entry slot in icmp_id_map[] to update
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @id: ICMP id for the new socket
|
||||
* @saddr: Source address
|
||||
* @daddr: Destination address
|
||||
*
|
||||
* Return: Newly opened ping flow, or NULL on failure
|
||||
*/
|
||||
static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
|
||||
struct icmp_ping_flow **id_sock,
|
||||
sa_family_t af, uint16_t id)
|
||||
sa_family_t af, uint16_t id,
|
||||
const void *saddr, const void *daddr)
|
||||
{
|
||||
uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6;
|
||||
uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
|
||||
union epoll_ref ref = { .type = EPOLL_TYPE_PING };
|
||||
union flow *flow = flow_alloc();
|
||||
struct icmp_ping_flow *pingf;
|
||||
const void *bind_addr;
|
||||
const char *bind_if;
|
||||
const struct flowside *tgt;
|
||||
|
||||
if (!flow)
|
||||
return NULL;
|
||||
|
||||
flow_initiate(flow, PIF_TAP);
|
||||
flow_target(flow, PIF_HOST);
|
||||
flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id);
|
||||
if (!(tgt = flow_target(c, flow, proto)))
|
||||
goto cancel;
|
||||
|
||||
if (flow->f.pif[TGTSIDE] != PIF_HOST) {
|
||||
flow_err(flow, "No support for forwarding %s from %s to %s",
|
||||
proto == IPPROTO_ICMP ? "ICMP" : "ICMPv6",
|
||||
pif_name(flow->f.pif[INISIDE]),
|
||||
pif_name(flow->f.pif[TGTSIDE]));
|
||||
goto cancel;
|
||||
}
|
||||
|
||||
pingf = FLOW_SET_TYPE(flow, flowtype, ping);
|
||||
|
||||
pingf->seq = -1;
|
||||
pingf->id = id;
|
||||
|
||||
if (af == AF_INET) {
|
||||
bind_addr = &c->ip4.addr_out;
|
||||
bind_if = c->ip4.ifname_out;
|
||||
} else {
|
||||
bind_addr = &c->ip6.addr_out;
|
||||
bind_if = c->ip6.ifname_out;
|
||||
}
|
||||
|
||||
ref.flowside = FLOW_SIDX(flow, TGTSIDE);
|
||||
pingf->sock = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if,
|
||||
0, ref.data);
|
||||
pingf->sock = flowside_sock_l4(c, EPOLL_TYPE_PING, PIF_HOST,
|
||||
tgt, ref.data);
|
||||
|
||||
if (pingf->sock < 0) {
|
||||
warn("Cannot open \"ping\" socket. You might need to:");
|
||||
@@ -194,7 +211,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
|
||||
|
||||
flow_dbg(pingf, "new socket %i for echo ID %"PRIu16, pingf->sock, id);
|
||||
|
||||
*id_sock = pingf;
|
||||
flow_hash_insert(c, FLOW_SIDX(pingf, INISIDE));
|
||||
|
||||
FLOW_ACTIVATE(pingf);
|
||||
|
||||
@@ -221,11 +238,14 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
const void *saddr, const void *daddr,
|
||||
const struct pool *p, const struct timespec *now)
|
||||
{
|
||||
union sockaddr_inany sa = { .sa_family = af };
|
||||
const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6);
|
||||
struct icmp_ping_flow *pingf, **id_sock;
|
||||
struct icmp_ping_flow *pingf;
|
||||
const struct flowside *tgt;
|
||||
union sockaddr_inany sa;
|
||||
size_t dlen, l4len;
|
||||
uint16_t id, seq;
|
||||
union flow *flow;
|
||||
uint8_t proto;
|
||||
socklen_t sl;
|
||||
void *pkt;
|
||||
|
||||
(void)saddr;
|
||||
@@ -243,10 +263,9 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
if (ih->type != ICMP_ECHO)
|
||||
return 1;
|
||||
|
||||
proto = IPPROTO_ICMP;
|
||||
id = ntohs(ih->un.echo.id);
|
||||
id_sock = &icmp_id_map[V4][id];
|
||||
seq = ntohs(ih->un.echo.sequence);
|
||||
sa.sa4.sin_addr = *(struct in_addr *)daddr;
|
||||
} else if (af == AF_INET6) {
|
||||
const struct icmp6hdr *ih;
|
||||
|
||||
@@ -259,21 +278,27 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
if (ih->icmp6_type != ICMPV6_ECHO_REQUEST)
|
||||
return 1;
|
||||
|
||||
proto = IPPROTO_ICMPV6;
|
||||
id = ntohs(ih->icmp6_identifier);
|
||||
id_sock = &icmp_id_map[V6][id];
|
||||
seq = ntohs(ih->icmp6_sequence);
|
||||
sa.sa6.sin6_addr = *(struct in6_addr *)daddr;
|
||||
sa.sa6.sin6_scope_id = c->ifi6;
|
||||
} else {
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
if (!(pingf = *id_sock))
|
||||
if (!(pingf = icmp_ping_new(c, id_sock, af, id)))
|
||||
flow = flow_at_sidx(flow_lookup_af(c, proto, PIF_TAP,
|
||||
af, saddr, daddr, id, id));
|
||||
|
||||
if (flow)
|
||||
pingf = &flow->ping;
|
||||
else if (!(pingf = icmp_ping_new(c, af, id, saddr, daddr)))
|
||||
return 1;
|
||||
|
||||
tgt = &pingf->f.side[TGTSIDE];
|
||||
|
||||
ASSERT(flow_proto[pingf->f.type] == proto);
|
||||
pingf->ts = now->tv_sec;
|
||||
|
||||
pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, 0);
|
||||
if (sendto(pingf->sock, pkt, l4len, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
|
||||
flow_dbg(pingf, "failed to relay request to socket: %s",
|
||||
strerror(errno));
|
||||
|
@@ -13,7 +13,6 @@
|
||||
* @seq: Last sequence number sent to tap, host order, -1: not sent yet
|
||||
* @sock: "ping" socket
|
||||
* @ts: Last associated activity from tap, seconds
|
||||
* @id: ICMP id for the flow as seen by the guest
|
||||
*/
|
||||
struct icmp_ping_flow {
|
||||
/* Must be first element */
|
||||
@@ -22,7 +21,6 @@ struct icmp_ping_flow {
|
||||
int seq;
|
||||
int sock;
|
||||
time_t ts;
|
||||
uint16_t id;
|
||||
};
|
||||
|
||||
bool icmp_ping_timer(const struct ctx *c, const struct icmp_ping_flow *pingf,
|
||||
|
2
inany.h
2
inany.h
@@ -187,7 +187,6 @@ static inline bool inany_is_unspecified(const union inany_addr *a)
|
||||
*
|
||||
* Return: true if @a is in fe80::/10 (IPv6 link local unicast)
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
static inline bool inany_is_linklocal6(const union inany_addr *a)
|
||||
{
|
||||
return IN6_IS_ADDR_LINKLOCAL(&a->a6);
|
||||
@@ -212,7 +211,6 @@ static inline bool inany_is_multicast(const union inany_addr *a)
|
||||
*
|
||||
* Return: true if @a is specified and a unicast address
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
static inline bool inany_is_unicast(const union inany_addr *a)
|
||||
{
|
||||
return !inany_is_unspecified(a) && !inany_is_multicast(a);
|
||||
|
39
iov.c
39
iov.c
@@ -156,42 +156,3 @@ size_t iov_size(const struct iovec *iov, size_t iov_cnt)
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* iov_copy - Copy data from one scatter/gather I/O vector (struct iovec) to
|
||||
* another.
|
||||
*
|
||||
* @dst_iov: Pointer to the destination array of struct iovec describing
|
||||
* the scatter/gather I/O vector to copy to.
|
||||
* @dst_iov_cnt: Number of elements in the destination iov array.
|
||||
* @iov: Pointer to the source array of struct iovec describing
|
||||
* the scatter/gather I/O vector to copy from.
|
||||
* @iov_cnt: Number of elements in the source iov array.
|
||||
* @offset: Offset within the source iov from where copying should start.
|
||||
* @bytes: Total number of bytes to copy from iov to dst_iov.
|
||||
*
|
||||
* Returns: The number of elements successfully copied to the destination
|
||||
* iov array.
|
||||
*/
|
||||
/* cppcheck-suppress unusedFunction */
|
||||
unsigned iov_copy(struct iovec *dst_iov, size_t dst_iov_cnt,
|
||||
const struct iovec *iov, size_t iov_cnt,
|
||||
size_t offset, size_t bytes)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
i = iov_skip_bytes(iov, iov_cnt, offset, &offset);
|
||||
|
||||
/* copying data */
|
||||
for (j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
|
||||
size_t len = MIN(bytes, iov[i].iov_len - offset);
|
||||
|
||||
dst_iov[j].iov_base = (char *)iov[i].iov_base + offset;
|
||||
dst_iov[j].iov_len = len;
|
||||
j++;
|
||||
bytes -= len;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
return j;
|
||||
}
|
||||
|
3
iov.h
3
iov.h
@@ -28,7 +28,4 @@ size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
|
||||
size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
|
||||
size_t offset, void *buf, size_t bytes);
|
||||
size_t iov_size(const struct iovec *iov, size_t iov_cnt);
|
||||
unsigned iov_copy(struct iovec *dst_iov, size_t dst_iov_cnt,
|
||||
const struct iovec *iov, size_t iov_cnt,
|
||||
size_t offset, size_t bytes);
|
||||
#endif /* IOVEC_H */
|
||||
|
56
isolation.c
56
isolation.c
@@ -105,7 +105,7 @@ static void drop_caps_ep_except(uint64_t keep)
|
||||
int i;
|
||||
|
||||
if (syscall(SYS_capget, &hdr, data))
|
||||
die("Couldn't get current capabilities: %s", strerror(errno));
|
||||
die_perror("Couldn't get current capabilities");
|
||||
|
||||
for (i = 0; i < CAP_WORDS; i++) {
|
||||
uint32_t mask = keep >> (32 * i);
|
||||
@@ -115,7 +115,7 @@ static void drop_caps_ep_except(uint64_t keep)
|
||||
}
|
||||
|
||||
if (syscall(SYS_capset, &hdr, data))
|
||||
die("Couldn't drop capabilities: %s", strerror(errno));
|
||||
die_perror("Couldn't drop capabilities");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -152,19 +152,17 @@ static void clamp_caps(void)
|
||||
*/
|
||||
if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0) &&
|
||||
errno != EINVAL && errno != EPERM)
|
||||
die("Couldn't drop cap %i from bounding set: %s",
|
||||
i, strerror(errno));
|
||||
die_perror("Couldn't drop cap %i from bounding set", i);
|
||||
}
|
||||
|
||||
if (syscall(SYS_capget, &hdr, data))
|
||||
die("Couldn't get current capabilities: %s", strerror(errno));
|
||||
die_perror("Couldn't get current capabilities");
|
||||
|
||||
for (i = 0; i < CAP_WORDS; i++)
|
||||
data[i].inheritable = 0;
|
||||
|
||||
if (syscall(SYS_capset, &hdr, data))
|
||||
die("Couldn't drop inheritable capabilities: %s",
|
||||
strerror(errno));
|
||||
die_perror("Couldn't drop inheritable capabilities");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -234,34 +232,30 @@ void isolate_user(uid_t uid, gid_t gid, bool use_userns, const char *userns,
|
||||
if (setgroups(0, NULL)) {
|
||||
/* If we don't have CAP_SETGID, this will EPERM */
|
||||
if (errno != EPERM)
|
||||
die("Can't drop supplementary groups: %s",
|
||||
strerror(errno));
|
||||
die_perror("Can't drop supplementary groups");
|
||||
}
|
||||
|
||||
if (setgid(gid) != 0)
|
||||
die("Can't set GID to %u: %s", gid, strerror(errno));
|
||||
die_perror("Can't set GID to %u", gid);
|
||||
|
||||
if (setuid(uid) != 0)
|
||||
die("Can't set UID to %u: %s", uid, strerror(errno));
|
||||
die_perror("Can't set UID to %u", uid);
|
||||
|
||||
if (*userns) { /* If given a userns, join it */
|
||||
int ufd;
|
||||
|
||||
ufd = open(userns, O_RDONLY | O_CLOEXEC);
|
||||
if (ufd < 0)
|
||||
die("Couldn't open user namespace %s: %s",
|
||||
userns, strerror(errno));
|
||||
die_perror("Couldn't open user namespace %s", userns);
|
||||
|
||||
if (setns(ufd, CLONE_NEWUSER) != 0)
|
||||
die("Couldn't enter user namespace %s: %s",
|
||||
userns, strerror(errno));
|
||||
die_perror("Couldn't enter user namespace %s", userns);
|
||||
|
||||
close(ufd);
|
||||
|
||||
} else if (use_userns) { /* Create and join a new userns */
|
||||
if (unshare(CLONE_NEWUSER) != 0)
|
||||
die("Couldn't create user namespace: %s",
|
||||
strerror(errno));
|
||||
die_perror("Couldn't create user namespace");
|
||||
}
|
||||
|
||||
/* Joining a new userns gives us full capabilities; drop the
|
||||
@@ -312,38 +306,38 @@ int isolate_prefork(const struct ctx *c)
|
||||
* PID namespace. For passt, use CLONE_NEWPID anyway, in case somebody
|
||||
* ever gets around seccomp profiles -- there's no harm in passing it.
|
||||
*/
|
||||
if (!c->foreground || c->mode == MODE_PASST)
|
||||
if (!c->foreground || c->mode != MODE_PASTA)
|
||||
flags |= CLONE_NEWPID;
|
||||
|
||||
if (unshare(flags)) {
|
||||
perror("unshare");
|
||||
err_perror("Failed to detach isolating namespaces");
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (mount("", "/", "", MS_UNBINDABLE | MS_REC, NULL)) {
|
||||
perror("mount /");
|
||||
err_perror("Failed to remount /");
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (mount("", TMPDIR, "tmpfs",
|
||||
MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY,
|
||||
"nr_inodes=2,nr_blocks=0")) {
|
||||
perror("mount tmpfs");
|
||||
err_perror("Failed to mount empty tmpfs for pivot_root()");
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (chdir(TMPDIR)) {
|
||||
perror("chdir");
|
||||
err_perror("Failed to change directory into empty tmpfs");
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (syscall(SYS_pivot_root, ".", ".")) {
|
||||
perror("pivot_root");
|
||||
err_perror("Failed to pivot_root() into empty tmpfs");
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (umount2(".", MNT_DETACH | UMOUNT_NOFOLLOW)) {
|
||||
perror("umount2");
|
||||
err_perror("Failed to unmount original root filesystem");
|
||||
return -errno;
|
||||
}
|
||||
|
||||
@@ -379,17 +373,15 @@ void isolate_postfork(const struct ctx *c)
|
||||
|
||||
prctl(PR_SET_DUMPABLE, 0);
|
||||
|
||||
if (c->mode == MODE_PASST) {
|
||||
prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
|
||||
prog.filter = filter_passt;
|
||||
} else {
|
||||
if (c->mode == MODE_PASTA) {
|
||||
prog.len = (unsigned short)ARRAY_SIZE(filter_pasta);
|
||||
prog.filter = filter_pasta;
|
||||
} else {
|
||||
prog.len = (unsigned short)ARRAY_SIZE(filter_passt);
|
||||
prog.filter = filter_passt;
|
||||
}
|
||||
|
||||
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
|
||||
prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
|
||||
perror("prctl");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog))
|
||||
die_perror("Failed to apply seccomp filter");
|
||||
}
|
||||
|
10
lineread.c
10
lineread.c
@@ -39,13 +39,11 @@ void lineread_init(struct lineread *lr, int fd)
|
||||
*
|
||||
* Return: length of line in bytes, -1 if no line was found
|
||||
*/
|
||||
static int peek_line(struct lineread *lr, bool eof)
|
||||
static ssize_t peek_line(struct lineread *lr, bool eof)
|
||||
{
|
||||
char *nl;
|
||||
|
||||
/* Sanity checks (which also document invariants) */
|
||||
ASSERT(lr->count >= 0);
|
||||
ASSERT(lr->next_line >= 0);
|
||||
ASSERT(lr->next_line + lr->count >= lr->next_line);
|
||||
ASSERT(lr->next_line + lr->count <= LINEREAD_BUFFER_SIZE);
|
||||
|
||||
@@ -74,13 +72,13 @@ static int peek_line(struct lineread *lr, bool eof)
|
||||
*
|
||||
* Return: Length of line read on success, 0 on EOF, negative on error
|
||||
*/
|
||||
int lineread_get(struct lineread *lr, char **line)
|
||||
ssize_t lineread_get(struct lineread *lr, char **line)
|
||||
{
|
||||
bool eof = false;
|
||||
int line_len;
|
||||
ssize_t line_len;
|
||||
|
||||
while ((line_len = peek_line(lr, eof)) < 0) {
|
||||
int rc;
|
||||
ssize_t rc;
|
||||
|
||||
if ((lr->next_line + lr->count) == LINEREAD_BUFFER_SIZE) {
|
||||
/* No space at end */
|
||||
|
@@ -18,14 +18,15 @@
|
||||
* @buf: Buffer storing data read from file.
|
||||
*/
|
||||
struct lineread {
|
||||
int fd; int next_line;
|
||||
int count;
|
||||
int fd;
|
||||
ssize_t next_line;
|
||||
ssize_t count;
|
||||
|
||||
/* One extra byte for possible trailing \0 */
|
||||
char buf[LINEREAD_BUFFER_SIZE+1];
|
||||
};
|
||||
|
||||
void lineread_init(struct lineread *lr, int fd);
|
||||
int lineread_get(struct lineread *lr, char **line);
|
||||
ssize_t lineread_get(struct lineread *lr, char **line);
|
||||
|
||||
#endif /* _LINEREAD_H */
|
||||
|
73
log.c
73
log.c
@@ -30,13 +30,9 @@
|
||||
#include "util.h"
|
||||
#include "passt.h"
|
||||
|
||||
/* LOG_EARLY means we don't know yet: log everything. LOG_EMERG is unused */
|
||||
#define LOG_EARLY LOG_MASK(LOG_EMERG)
|
||||
|
||||
static int log_sock = -1; /* Optional socket to system logger */
|
||||
static char log_ident[BUFSIZ]; /* Identifier string for openlog() */
|
||||
static int log_mask = LOG_EARLY; /* Current log priority mask */
|
||||
static int log_opt; /* Options for openlog() */
|
||||
static int log_mask; /* Current log priority mask */
|
||||
|
||||
static int log_file = -1; /* Optional log file descriptor */
|
||||
static size_t log_size; /* Maximum log file size in bytes */
|
||||
@@ -45,24 +41,24 @@ static size_t log_cut_size; /* Bytes to cut at start on rotation */
|
||||
static char log_header[BUFSIZ]; /* File header, written back on cuts */
|
||||
|
||||
static time_t log_start; /* Start timestamp */
|
||||
|
||||
int log_trace; /* --trace mode enabled */
|
||||
int log_to_stdout; /* Print to stdout instead of stderr */
|
||||
bool log_conf_parsed; /* Logging options already parsed */
|
||||
bool log_runtime; /* Daemonised, or ready in foreground */
|
||||
|
||||
void vlogmsg(int pri, const char *format, va_list ap)
|
||||
{
|
||||
bool debug_print = (log_mask & LOG_MASK(LOG_DEBUG)) && log_file == -1;
|
||||
bool early_print = LOG_PRI(log_mask) == LOG_EARLY;
|
||||
FILE *out = log_to_stdout ? stdout : stderr;
|
||||
struct timespec tp;
|
||||
|
||||
if (debug_print) {
|
||||
clock_gettime(CLOCK_REALTIME, &tp);
|
||||
fprintf(out, "%lli.%04lli: ",
|
||||
fprintf(stderr, "%lli.%04lli: ",
|
||||
(long long int)tp.tv_sec - log_start,
|
||||
(long long int)tp.tv_nsec / (100L * 1000));
|
||||
}
|
||||
|
||||
if ((log_mask & LOG_MASK(LOG_PRI(pri))) || early_print) {
|
||||
if ((log_mask & LOG_MASK(LOG_PRI(pri))) || !log_conf_parsed) {
|
||||
va_list ap2;
|
||||
|
||||
va_copy(ap2, ap); /* Don't clobber ap, we need it again */
|
||||
@@ -74,13 +70,19 @@ void vlogmsg(int pri, const char *format, va_list ap)
|
||||
va_end(ap2);
|
||||
}
|
||||
|
||||
if (debug_print || (early_print && !(log_opt & LOG_PERROR))) {
|
||||
(void)vfprintf(out, format, ap);
|
||||
if (debug_print || !log_conf_parsed ||
|
||||
(!log_runtime && (log_mask & LOG_MASK(LOG_PRI(pri))))) {
|
||||
(void)vfprintf(stderr, format, ap);
|
||||
if (format[strlen(format)] != '\n')
|
||||
fprintf(out, "\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* logmsg() - vlogmsg() wrapper for variable argument lists
|
||||
* @pri: Facility and level map, same as priority for vsyslog()
|
||||
* @format: Message
|
||||
*/
|
||||
void logmsg(int pri, const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
@@ -90,6 +92,23 @@ void logmsg(int pri, const char *format, ...)
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/**
|
||||
* logmsg_perror() - vlogmsg() wrapper with perror()-like functionality
|
||||
* @pri: Facility and level map, same as priority for vsyslog()
|
||||
* @format: Message
|
||||
*/
|
||||
void logmsg_perror(int pri, const char *format, ...)
|
||||
{
|
||||
int errno_copy = errno;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format);
|
||||
vlogmsg(pri, format, ap);
|
||||
va_end(ap);
|
||||
|
||||
logmsg(pri, ": %s", strerror(errno_copy));
|
||||
}
|
||||
|
||||
/* Prefixes for log file messages, indexed by priority */
|
||||
const char *logfile_prefix[] = {
|
||||
NULL, NULL, NULL, /* Unused: LOG_EMERG, LOG_ALERT, LOG_CRIT */
|
||||
@@ -112,13 +131,15 @@ void trace_init(int enable)
|
||||
/**
|
||||
* __openlog() - Non-optional openlog() implementation, for custom vsyslog()
|
||||
* @ident: openlog() identity (program name)
|
||||
* @option: openlog() options
|
||||
* @option: openlog() options, unused
|
||||
* @facility: openlog() facility (LOG_DAEMON)
|
||||
*/
|
||||
void __openlog(const char *ident, int option, int facility)
|
||||
{
|
||||
struct timespec tp;
|
||||
|
||||
(void)option;
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &tp);
|
||||
log_start = tp.tv_sec;
|
||||
|
||||
@@ -139,7 +160,6 @@ void __openlog(const char *ident, int option, int facility)
|
||||
|
||||
log_mask |= facility;
|
||||
strncpy(log_ident, ident, sizeof(log_ident) - 1);
|
||||
log_opt = option;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -160,21 +180,18 @@ void __setlogmask(int mask)
|
||||
*/
|
||||
void passt_vsyslog(int pri, const char *format, va_list ap)
|
||||
{
|
||||
int prefix_len, n;
|
||||
char buf[BUFSIZ];
|
||||
int n;
|
||||
|
||||
/* Send without timestamp, the system logger should add it */
|
||||
n = prefix_len = snprintf(buf, BUFSIZ, "<%i> %s: ", pri, log_ident);
|
||||
n = snprintf(buf, BUFSIZ, "<%i> %s: ", pri, log_ident);
|
||||
|
||||
n += vsnprintf(buf + n, BUFSIZ - n, format, ap);
|
||||
|
||||
if (format[strlen(format)] != '\n')
|
||||
n += snprintf(buf + n, BUFSIZ - n, "\n");
|
||||
|
||||
if (log_opt & LOG_PERROR)
|
||||
fprintf(stderr, "%s", buf + prefix_len);
|
||||
|
||||
if (log_sock >= 0 && send(log_sock, buf, n, 0) != n)
|
||||
if (log_sock >= 0 && send(log_sock, buf, n, 0) != n && !log_runtime)
|
||||
fprintf(stderr, "Failed to send %i bytes to syslog\n", n);
|
||||
}
|
||||
|
||||
@@ -189,15 +206,13 @@ void logfile_init(const char *name, const char *path, size_t size)
|
||||
char nl = '\n', exe[PATH_MAX] = { 0 };
|
||||
int n;
|
||||
|
||||
if (readlink("/proc/self/exe", exe, PATH_MAX - 1) < 0) {
|
||||
perror("readlink /proc/self/exe");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (readlink("/proc/self/exe", exe, PATH_MAX - 1) < 0)
|
||||
die_perror("Failed to read own /proc/self/exe link");
|
||||
|
||||
log_file = open(path, O_CREAT | O_TRUNC | O_APPEND | O_RDWR | O_CLOEXEC,
|
||||
S_IRUSR | S_IWUSR);
|
||||
if (log_file == -1)
|
||||
die("Couldn't open log file %s: %s", path, strerror(errno));
|
||||
die_perror("Couldn't open log file %s", path);
|
||||
|
||||
log_size = size ? size : LOGFILE_SIZE_DEFAULT;
|
||||
|
||||
@@ -205,10 +220,8 @@ void logfile_init(const char *name, const char *path, size_t size)
|
||||
name, exe, getpid());
|
||||
|
||||
if (write(log_file, log_header, n) <= 0 ||
|
||||
write(log_file, &nl, 1) <= 0) {
|
||||
perror("Couldn't write to log file\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
write(log_file, &nl, 1) <= 0)
|
||||
die_perror("Couldn't write to log file");
|
||||
|
||||
/* For FALLOC_FL_COLLAPSE_RANGE: VFS block size can be up to one page */
|
||||
log_cut_size = ROUND_UP(log_size * LOGFILE_CUT_RATIO / 100, PAGE_SIZE);
|
||||
|
26
log.h
26
log.h
@@ -6,6 +6,7 @@
|
||||
#ifndef LOG_H
|
||||
#define LOG_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <syslog.h>
|
||||
|
||||
#define LOGFILE_SIZE_DEFAULT (1024 * 1024UL)
|
||||
@@ -15,11 +16,18 @@
|
||||
void vlogmsg(int pri, const char *format, va_list ap);
|
||||
void logmsg(int pri, const char *format, ...)
|
||||
__attribute__((format(printf, 2, 3)));
|
||||
void logmsg_perror(int pri, const char *format, ...)
|
||||
__attribute__((format(printf, 2, 3)));
|
||||
|
||||
#define err(...) logmsg(LOG_ERR, __VA_ARGS__)
|
||||
#define warn(...) logmsg(LOG_WARNING, __VA_ARGS__)
|
||||
#define info(...) logmsg(LOG_INFO, __VA_ARGS__)
|
||||
#define debug(...) logmsg(LOG_DEBUG, __VA_ARGS__)
|
||||
#define err(...) logmsg( LOG_ERR, __VA_ARGS__)
|
||||
#define warn(...) logmsg( LOG_WARNING, __VA_ARGS__)
|
||||
#define info(...) logmsg( LOG_INFO, __VA_ARGS__)
|
||||
#define debug(...) logmsg( LOG_DEBUG, __VA_ARGS__)
|
||||
|
||||
#define err_perror(...) logmsg_perror( LOG_ERR, __VA_ARGS__)
|
||||
#define warn_perror(...) logmsg_perror( LOG_WARNING, __VA_ARGS__)
|
||||
#define info_perror(...) logmsg_perror( LOG_INFO, __VA_ARGS__)
|
||||
#define debug_perror(...) logmsg_perror( LOG_DEBUG, __VA_ARGS__)
|
||||
|
||||
#define die(...) \
|
||||
do { \
|
||||
@@ -27,8 +35,16 @@ void logmsg(int pri, const char *format, ...)
|
||||
exit(EXIT_FAILURE); \
|
||||
} while (0)
|
||||
|
||||
#define die_perror(...) \
|
||||
do { \
|
||||
err_perror(__VA_ARGS__); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} while (0)
|
||||
|
||||
extern int log_trace;
|
||||
extern int log_to_stdout;
|
||||
extern bool log_conf_parsed;
|
||||
extern bool log_runtime;
|
||||
|
||||
void trace_init(int enable);
|
||||
#define trace(...) \
|
||||
do { \
|
||||
|
44
netlink.c
44
netlink.c
@@ -133,7 +133,7 @@ static uint32_t nl_send(int s, void *req, uint16_t type,
|
||||
|
||||
n = send(s, req, len, 0);
|
||||
if (n < 0)
|
||||
die("netlink: Failed to send(): %s", strerror(errno));
|
||||
die_perror("netlink: Failed to send()");
|
||||
else if (n < len)
|
||||
die("netlink: Short send (%zd of %zd bytes)", n, len);
|
||||
|
||||
@@ -189,7 +189,7 @@ static struct nlmsghdr *nl_next(int s, char *buf, struct nlmsghdr *nh, ssize_t *
|
||||
|
||||
*n = recv(s, buf, NLBUFSIZ, 0);
|
||||
if (*n < 0)
|
||||
die("netlink: Failed to recv(): %s", strerror(errno));
|
||||
die_perror("netlink: Failed to recv()");
|
||||
|
||||
nh = (struct nlmsghdr *)buf;
|
||||
if (!NLMSG_OK(nh, *n))
|
||||
@@ -269,8 +269,7 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
|
||||
size_t na;
|
||||
|
||||
/* Look for an interface with a default route first, failing that, look
|
||||
* for any interface with a route, and pick it only if it's the only
|
||||
* interface with a route.
|
||||
* for any interface with a route, and pick the first one, if any.
|
||||
*/
|
||||
seq = nl_send(s, &req, RTM_GETROUTE, NLM_F_DUMP, sizeof(req));
|
||||
nl_foreach_oftype(nh, status, s, buf, seq, RTM_NEWROUTE) {
|
||||
@@ -324,19 +323,20 @@ unsigned int nl_get_ext_if(int s, sa_family_t af)
|
||||
warn("netlink: RTM_GETROUTE failed: %s", strerror(-status));
|
||||
|
||||
if (defifi) {
|
||||
if (ndef > 1)
|
||||
if (ndef > 1) {
|
||||
info("Multiple default %s routes, picked first",
|
||||
af_name(af));
|
||||
}
|
||||
return defifi;
|
||||
}
|
||||
|
||||
if (anyifi) {
|
||||
if (nany == 1)
|
||||
return anyifi;
|
||||
|
||||
info("Multiple interfaces with %s routes, use -i to select one",
|
||||
if (nany > 1) {
|
||||
info("Multiple interfaces with %s routes, picked first",
|
||||
af_name(af));
|
||||
}
|
||||
return anyifi;
|
||||
}
|
||||
|
||||
if (!nany)
|
||||
info("No interfaces with usable %s routes", af_name(af));
|
||||
@@ -600,13 +600,22 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
|
||||
|
||||
if (discard)
|
||||
break;
|
||||
} else if (rta->rta_type == RTA_PREFSRC) {
|
||||
/* Host routes might include a preferred source
|
||||
* address, which must be one of the host's
|
||||
* addresses. However, with -a pasta will use a
|
||||
* different namespace address, making such a
|
||||
* route invalid in the namespace. Strip off
|
||||
* RTA_PREFSRC attributes to avoid that. */
|
||||
} else if (rta->rta_type == RTA_PREFSRC ||
|
||||
rta->rta_type == RTA_NH_ID) {
|
||||
/* Strip RTA_PREFSRC attributes: host routes
|
||||
* might include a preferred source address,
|
||||
* which must be one of the host's addresses.
|
||||
* However, with -a, pasta will use a different
|
||||
* namespace address, making such a route
|
||||
* invalid in the namespace.
|
||||
*
|
||||
* Strip RTA_NH_ID attributes: host routes set
|
||||
* up via routing protocols (e.g. OSPF) might
|
||||
* contain a nexthop ID (and not nexthop
|
||||
* objects, which are taken care of in the
|
||||
* RTA_MULTIPATH case above) that's not valid
|
||||
* in the target namespace.
|
||||
*/
|
||||
rta->rta_type = RTA_UNSPEC;
|
||||
}
|
||||
}
|
||||
@@ -655,7 +664,8 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
|
||||
rc = nl_do(s_dst, nh, RTM_NEWROUTE,
|
||||
(flags & ~NLM_F_DUMP_FILTERED) | NLM_F_CREATE,
|
||||
nh->nlmsg_len);
|
||||
if (rc < 0 && rc != -ENETUNREACH && rc != -EEXIST)
|
||||
if (rc < 0 && rc != -EEXIST &&
|
||||
rc != -ENETUNREACH && rc != -EHOSTUNREACH)
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
26
passt.1
26
passt.1
@@ -73,6 +73,9 @@ for performance reasons.
|
||||
|
||||
.SH OPTIONS
|
||||
|
||||
Unless otherwise noted below, \fBif conflicting or multiple options are given,
|
||||
the last one takes effect.\fR
|
||||
|
||||
.TP
|
||||
.BR \-d ", " \-\-debug
|
||||
Be verbose, don't log to the system logger.
|
||||
@@ -93,13 +96,17 @@ Default is to fork into background.
|
||||
|
||||
.TP
|
||||
.BR \-e ", " \-\-stderr
|
||||
Log to standard error too.
|
||||
Default is to log to the system logger only, if started from an interactive
|
||||
terminal, and to both system logger and standard error otherwise.
|
||||
This option has no effect, and is maintained for compatibility purposes only.
|
||||
|
||||
Note that this configuration option is \fBdeprecated\fR and will be removed in a
|
||||
future version.
|
||||
|
||||
.TP
|
||||
.BR \-l ", " \-\-log-file " " \fIPATH\fR
|
||||
Log to file \fIPATH\fR, not to standard error, and not to the system logger.
|
||||
Log to file \fIPATH\fR, and not to the system logger.
|
||||
|
||||
Specifying this option multiple times does \fInot\fR lead to multiple log files:
|
||||
the last given option takes effect.
|
||||
|
||||
.TP
|
||||
.BR \-\-log-size " " \fISIZE\fR
|
||||
@@ -128,6 +135,9 @@ Show version and exit.
|
||||
Capture tap-facing (that is, guest-side or namespace-side) network packets to
|
||||
\fIfile\fR in \fBpcap\fR format.
|
||||
|
||||
Specifying this option multiple times does \fInot\fR lead to multiple capture
|
||||
files: the last given option takes effect.
|
||||
|
||||
.TP
|
||||
.BR \-P ", " \-\-pid " " \fIfile
|
||||
Write own PID to \fIfile\fR once initialisation is done, before forking to
|
||||
@@ -149,8 +159,8 @@ This option can be specified zero (for defaults) to two times (once for IPv4,
|
||||
once for IPv6).
|
||||
By default, assigned IPv4 and IPv6 addresses are taken from the host interfaces
|
||||
with the first default route, if any, for the corresponding IP version. If no
|
||||
default routes are available and there is just one interface with any route,
|
||||
that interface will be chosen instead.
|
||||
default routes are available and there is any interface with any route for a
|
||||
given IP version, the first of these interfaces will be chosen instead.
|
||||
|
||||
.TP
|
||||
.BR \-n ", " \-\-netmask " " \fImask
|
||||
@@ -977,8 +987,8 @@ https://passt.top/passt/lists.
|
||||
Copyright (c) 2020-2022 Red Hat GmbH.
|
||||
|
||||
\fBpasst\fR and \fBpasta\fR are free software: you can redistribute them and/or
|
||||
modify them under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the License, or
|
||||
modify them under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
.SH SEE ALSO
|
||||
|
82
passt.c
82
passt.c
@@ -66,7 +66,8 @@ char *epoll_type_str[] = {
|
||||
[EPOLL_TYPE_TCP_SPLICE] = "connected spliced TCP socket",
|
||||
[EPOLL_TYPE_TCP_LISTEN] = "listening TCP socket",
|
||||
[EPOLL_TYPE_TCP_TIMER] = "TCP timer",
|
||||
[EPOLL_TYPE_UDP] = "UDP socket",
|
||||
[EPOLL_TYPE_UDP_LISTEN] = "listening UDP socket",
|
||||
[EPOLL_TYPE_UDP_REPLY] = "UDP reply socket",
|
||||
[EPOLL_TYPE_PING] = "ICMP/ICMPv6 ping socket",
|
||||
[EPOLL_TYPE_NSQUIT_INOTIFY] = "namespace inotify watch",
|
||||
[EPOLL_TYPE_NSQUIT_TIMER] = "namespace timer watch",
|
||||
@@ -84,7 +85,7 @@ static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
|
||||
*/
|
||||
static void post_handler(struct ctx *c, const struct timespec *now)
|
||||
{
|
||||
#define CALL_PROTO_HANDLER(c, now, lc, uc) \
|
||||
#define CALL_PROTO_HANDLER(lc, uc) \
|
||||
do { \
|
||||
extern void \
|
||||
lc ## _defer_handler (struct ctx *c) \
|
||||
@@ -103,9 +104,9 @@ static void post_handler(struct ctx *c, const struct timespec *now)
|
||||
} while (0)
|
||||
|
||||
/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
|
||||
CALL_PROTO_HANDLER(c, now, tcp, TCP);
|
||||
CALL_PROTO_HANDLER(tcp, TCP);
|
||||
/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
|
||||
CALL_PROTO_HANDLER(c, now, udp, UDP);
|
||||
CALL_PROTO_HANDLER(udp, UDP);
|
||||
|
||||
flow_defer_handler(c, now);
|
||||
#undef CALL_PROTO_HANDLER
|
||||
@@ -136,14 +137,13 @@ static void secret_init(struct ctx *c)
|
||||
}
|
||||
if (dev_random >= 0)
|
||||
close(dev_random);
|
||||
if (random_read < sizeof(c->hash_secret)) {
|
||||
|
||||
if (random_read < sizeof(c->hash_secret))
|
||||
#else
|
||||
if (getrandom(&c->hash_secret, sizeof(c->hash_secret),
|
||||
GRND_RANDOM) < 0) {
|
||||
GRND_RANDOM) < 0)
|
||||
#endif /* !HAS_GETRANDOM */
|
||||
perror("TCP initial sequence getrandom");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
die_perror("Failed to get random bytes for hash table and TCP");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -200,8 +200,8 @@ void exit_handler(int signal)
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct epoll_event events[EPOLL_EVENTS];
|
||||
char *log_name, argv0[PATH_MAX], *name;
|
||||
int nfds, i, devnull_fd = -1;
|
||||
char argv0[PATH_MAX], *name;
|
||||
struct ctx c = { 0 };
|
||||
struct rlimit limit;
|
||||
struct timespec now;
|
||||
@@ -225,23 +225,15 @@ int main(int argc, char **argv)
|
||||
strncpy(argv0, argv[0], PATH_MAX - 1);
|
||||
name = basename(argv0);
|
||||
if (strstr(name, "pasta")) {
|
||||
__openlog(log_name = "pasta", LOG_PERROR, LOG_DAEMON);
|
||||
|
||||
sa.sa_handler = pasta_child_handler;
|
||||
if (sigaction(SIGCHLD, &sa, NULL)) {
|
||||
die("Couldn't install signal handlers: %s",
|
||||
strerror(errno));
|
||||
}
|
||||
if (sigaction(SIGCHLD, &sa, NULL))
|
||||
die_perror("Couldn't install signal handlers");
|
||||
|
||||
if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
|
||||
die("Couldn't set disposition for SIGPIPE: %s",
|
||||
strerror(errno));
|
||||
}
|
||||
if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
|
||||
die_perror("Couldn't set disposition for SIGPIPE");
|
||||
|
||||
c.mode = MODE_PASTA;
|
||||
} else if (strstr(name, "passt")) {
|
||||
__openlog(log_name = "passt", LOG_PERROR, LOG_DAEMON);
|
||||
|
||||
c.mode = MODE_PASST;
|
||||
} else {
|
||||
exit(EXIT_FAILURE);
|
||||
@@ -250,20 +242,16 @@ int main(int argc, char **argv)
|
||||
madvise(pkt_buf, TAP_BUF_BYTES, MADV_HUGEPAGE);
|
||||
|
||||
c.epollfd = epoll_create1(EPOLL_CLOEXEC);
|
||||
if (c.epollfd == -1) {
|
||||
perror("epoll_create1");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (c.epollfd == -1)
|
||||
die_perror("Failed to create epoll file descriptor");
|
||||
|
||||
if (getrlimit(RLIMIT_NOFILE, &limit))
|
||||
die_perror("Failed to get maximum value of open files limit");
|
||||
|
||||
if (getrlimit(RLIMIT_NOFILE, &limit)) {
|
||||
perror("getrlimit");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
c.nofile = limit.rlim_cur = limit.rlim_max;
|
||||
if (setrlimit(RLIMIT_NOFILE, &limit)) {
|
||||
perror("setrlimit");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (setrlimit(RLIMIT_NOFILE, &limit))
|
||||
die_perror("Failed to set current limit for open files");
|
||||
|
||||
sock_probe_mem(&c);
|
||||
|
||||
conf(&c, argc, argv);
|
||||
@@ -293,23 +281,20 @@ int main(int argc, char **argv)
|
||||
pcap_init(&c);
|
||||
|
||||
if (!c.foreground) {
|
||||
if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0) {
|
||||
perror("/dev/null open");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0)
|
||||
die_perror("Failed to open /dev/null");
|
||||
}
|
||||
|
||||
if (isolate_prefork(&c))
|
||||
die("Failed to sandbox process, exiting");
|
||||
|
||||
if (!c.force_stderr && !isatty(fileno(stderr)))
|
||||
__openlog(log_name, 0, LOG_DAEMON);
|
||||
|
||||
if (!c.foreground)
|
||||
__daemon(c.pidfile_fd, devnull_fd);
|
||||
else
|
||||
pidfile_write(c.pidfile_fd, getpid());
|
||||
|
||||
log_runtime = true;
|
||||
|
||||
if (pasta_child_pid)
|
||||
kill(pasta_child_pid, SIGUSR1);
|
||||
|
||||
@@ -321,10 +306,8 @@ loop:
|
||||
/* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */
|
||||
/* cppcheck-suppress [duplicateValueTernary, unmatchedSuppression] */
|
||||
nfds = epoll_wait(c.epollfd, events, EPOLL_EVENTS, TIMER_INTERVAL);
|
||||
if (nfds == -1 && errno != EINTR) {
|
||||
perror("epoll_wait");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (nfds == -1 && errno != EINTR)
|
||||
die_perror("epoll_wait() failed in main loop");
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
|
||||
@@ -333,7 +316,7 @@ loop:
|
||||
uint32_t eventmask = events[i].events;
|
||||
|
||||
trace("%s: epoll event on %s %i (events: 0x%08x)",
|
||||
c.mode == MODE_PASST ? "passt" : "pasta",
|
||||
c.mode == MODE_PASTA ? "pasta" : "passt",
|
||||
EPOLL_TYPE_STR(ref.type), ref.fd, eventmask);
|
||||
|
||||
switch (ref.type) {
|
||||
@@ -364,8 +347,11 @@ loop:
|
||||
case EPOLL_TYPE_TCP_TIMER:
|
||||
tcp_timer_handler(&c, ref);
|
||||
break;
|
||||
case EPOLL_TYPE_UDP:
|
||||
udp_sock_handler(&c, ref, eventmask, &now);
|
||||
case EPOLL_TYPE_UDP_LISTEN:
|
||||
udp_listen_sock_handler(&c, ref, eventmask, &now);
|
||||
break;
|
||||
case EPOLL_TYPE_UDP_REPLY:
|
||||
udp_reply_sock_handler(&c, ref, eventmask, &now);
|
||||
break;
|
||||
case EPOLL_TYPE_PING:
|
||||
icmp_sock_handler(&c, ref);
|
||||
|
39
passt.h
39
passt.h
@@ -17,44 +17,15 @@ union epoll_ref;
|
||||
|
||||
#include "pif.h"
|
||||
#include "packet.h"
|
||||
#include "siphash.h"
|
||||
#include "ip.h"
|
||||
#include "inany.h"
|
||||
#include "flow.h"
|
||||
#include "icmp.h"
|
||||
#include "fwd.h"
|
||||
#include "tcp.h"
|
||||
#include "udp.h"
|
||||
|
||||
/**
|
||||
* enum epoll_type - Different types of fds we poll over
|
||||
*/
|
||||
enum epoll_type {
|
||||
/* Special value to indicate an invalid type */
|
||||
EPOLL_TYPE_NONE = 0,
|
||||
/* Connected TCP sockets */
|
||||
EPOLL_TYPE_TCP,
|
||||
/* Connected TCP sockets (spliced) */
|
||||
EPOLL_TYPE_TCP_SPLICE,
|
||||
/* Listening TCP sockets */
|
||||
EPOLL_TYPE_TCP_LISTEN,
|
||||
/* timerfds used for TCP timers */
|
||||
EPOLL_TYPE_TCP_TIMER,
|
||||
/* UDP sockets */
|
||||
EPOLL_TYPE_UDP,
|
||||
/* ICMP/ICMPv6 ping sockets */
|
||||
EPOLL_TYPE_PING,
|
||||
/* inotify fd watching for end of netns (pasta) */
|
||||
EPOLL_TYPE_NSQUIT_INOTIFY,
|
||||
/* timer fd watching for end of netns, fallback for inotify (pasta) */
|
||||
EPOLL_TYPE_NSQUIT_TIMER,
|
||||
/* tuntap character device */
|
||||
EPOLL_TYPE_TAP_PASTA,
|
||||
/* socket connected to qemu */
|
||||
EPOLL_TYPE_TAP_PASST,
|
||||
/* socket listening for qemu socket connections */
|
||||
EPOLL_TYPE_TAP_LISTEN,
|
||||
|
||||
EPOLL_NUM_TYPES,
|
||||
};
|
||||
|
||||
/**
|
||||
* union epoll_ref - Breakdown of reference for epoll fd bookkeeping
|
||||
* @type: Type of fd (tells us what to do with events)
|
||||
@@ -77,7 +48,7 @@ union epoll_ref {
|
||||
uint32_t flow;
|
||||
flow_sidx_t flowside;
|
||||
union tcp_listen_epoll_ref tcp_listen;
|
||||
union udp_epoll_ref udp;
|
||||
union udp_listen_epoll_ref udp;
|
||||
uint32_t data;
|
||||
int nsdir_fd;
|
||||
};
|
||||
@@ -180,7 +151,6 @@ struct ip6_ctx {
|
||||
* @trace: Enable tracing (extra debug) mode
|
||||
* @quiet: Don't print informational messages
|
||||
* @foreground: Run in foreground, don't log to stderr by default
|
||||
* @force_stderr: Force logging to stderr
|
||||
* @nofile: Maximum number of open files (ulimit -n)
|
||||
* @sock_path: Path for UNIX domain socket
|
||||
* @pcap: Path for packet capture file
|
||||
@@ -231,7 +201,6 @@ struct ctx {
|
||||
int trace;
|
||||
int quiet;
|
||||
int foreground;
|
||||
int force_stderr;
|
||||
int nofile;
|
||||
char sock_path[UNIX_PATH_MAX];
|
||||
char pcap[PATH_MAX];
|
||||
|
28
pasta.c
28
pasta.c
@@ -138,17 +138,15 @@ void pasta_open_ns(struct ctx *c, const char *netns)
|
||||
int nfd = -1;
|
||||
|
||||
nfd = open(netns, O_RDONLY | O_CLOEXEC);
|
||||
if (nfd < 0) {
|
||||
die("Couldn't open network namespace %s: %s",
|
||||
netns, strerror(errno));
|
||||
}
|
||||
if (nfd < 0)
|
||||
die_perror("Couldn't open network namespace %s", netns);
|
||||
|
||||
c->pasta_netns_fd = nfd;
|
||||
|
||||
NS_CALL(ns_check, c);
|
||||
|
||||
if (c->pasta_netns_fd < 0)
|
||||
die("Couldn't switch to pasta namespaces: %s", strerror(errno));
|
||||
die_perror("Couldn't switch to pasta namespaces");
|
||||
|
||||
if (!c->no_netns_quit) {
|
||||
char buf[PATH_MAX] = { 0 };
|
||||
@@ -176,6 +174,7 @@ struct pasta_spawn_cmd_arg {
|
||||
*
|
||||
* Return: this function never returns
|
||||
*/
|
||||
/* cppcheck-suppress [constParameterCallback, unmatchedSuppression] */
|
||||
static int pasta_spawn_cmd(void *arg)
|
||||
{
|
||||
const struct pasta_spawn_cmd_arg *a;
|
||||
@@ -183,7 +182,7 @@ static int pasta_spawn_cmd(void *arg)
|
||||
|
||||
/* We run in a detached PID and mount namespace: mount /proc over */
|
||||
if (mount("", "/proc", "proc", 0, NULL))
|
||||
warn("Couldn't mount /proc: %s", strerror(errno));
|
||||
warn_perror("Couldn't mount /proc");
|
||||
|
||||
if (write_file("/proc/sys/net/ipv4/ping_group_range", "0 0"))
|
||||
warn("Cannot set ping_group_range, ICMP requests might fail");
|
||||
@@ -196,8 +195,7 @@ static int pasta_spawn_cmd(void *arg)
|
||||
a = (const struct pasta_spawn_cmd_arg *)arg;
|
||||
execvp(a->exe, a->argv);
|
||||
|
||||
perror("execvp");
|
||||
exit(EXIT_FAILURE);
|
||||
die_perror("Failed to start command or shell");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -260,14 +258,12 @@ void pasta_start_ns(struct ctx *c, uid_t uid, gid_t gid,
|
||||
CLONE_NEWUTS | CLONE_NEWNS | SIGCHLD,
|
||||
(void *)&arg);
|
||||
|
||||
if (pasta_child_pid == -1) {
|
||||
perror("clone");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (pasta_child_pid == -1)
|
||||
die_perror("Failed to clone process with detached namespaces");
|
||||
|
||||
NS_CALL(pasta_wait_for_ns, c);
|
||||
if (c->pasta_netns_fd < 0)
|
||||
die("Failed to join network namespace: %s", strerror(errno));
|
||||
die_perror("Failed to join network namespace");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -371,12 +367,12 @@ static int pasta_netns_quit_timer(void)
|
||||
struct itimerspec it = { { 1, 0 }, { 1, 0 } }; /* one-second interval */
|
||||
|
||||
if (fd == -1) {
|
||||
err("timerfd_create(): %s", strerror(errno));
|
||||
err_perror("Failed to create timerfd for quit timer");
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (timerfd_settime(fd, 0, &it, NULL) < 0) {
|
||||
err("timerfd_settime(): %s", strerror(errno));
|
||||
err_perror("Failed to set interval for quit timer");
|
||||
close(fd);
|
||||
return -errno;
|
||||
}
|
||||
@@ -469,7 +465,7 @@ void pasta_netns_quit_timer_handler(struct ctx *c, union epoll_ref ref)
|
||||
|
||||
n = read(ref.fd, &expirations, sizeof(expirations));
|
||||
if (n < 0)
|
||||
die("Namespace watch timer read() error: %s", strerror(errno));
|
||||
die_perror("Namespace watch timer read() error");
|
||||
if ((size_t)n < sizeof(expirations))
|
||||
warn("Namespace watch timer: short read(): %zi", n);
|
||||
|
||||
|
8
pcap.c
8
pcap.c
@@ -89,10 +89,8 @@ static void pcap_frame(const struct iovec *iov, size_t iovcnt,
|
||||
struct iovec hiov = { &h, sizeof(h) };
|
||||
|
||||
if (write_remainder(pcap_fd, &hiov, 1, 0) < 0 ||
|
||||
write_remainder(pcap_fd, iov, iovcnt, offset) < 0) {
|
||||
debug("Cannot log packet, length %zu: %s",
|
||||
l2len, strerror(errno));
|
||||
}
|
||||
write_remainder(pcap_fd, iov, iovcnt, offset) < 0)
|
||||
debug_perror("Cannot log packet, length %zu", l2len);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -178,5 +176,5 @@ void pcap_init(struct ctx *c)
|
||||
info("Saving packet capture to %s", c->pcap);
|
||||
|
||||
if (write(pcap_fd, &pcap_hdr, sizeof(pcap_hdr)) < 0)
|
||||
warn("Cannot write PCAP header: %s", strerror(errno));
|
||||
warn_perror("Cannot write PCAP header");
|
||||
}
|
||||
|
40
pif.c
40
pif.c
@@ -7,9 +7,14 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <netinet/in.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "pif.h"
|
||||
#include "siphash.h"
|
||||
#include "ip.h"
|
||||
#include "inany.h"
|
||||
#include "passt.h"
|
||||
|
||||
const char *pif_type_str[] = {
|
||||
[PIF_NONE] = "<none>",
|
||||
@@ -19,3 +24,38 @@ const char *pif_type_str[] = {
|
||||
};
|
||||
static_assert(ARRAY_SIZE(pif_type_str) == PIF_NUM_TYPES,
|
||||
"pif_type_str[] doesn't match enum pif_type");
|
||||
|
||||
|
||||
/** pif_sockaddr() - Construct a socket address suitable for an interface
|
||||
* @c: Execution context
|
||||
* @sa: Pointer to sockaddr to fill in
|
||||
* @sl: Updated to relevant length of initialised @sa
|
||||
* @pif: Interface to create the socket address
|
||||
* @addr: IPv[46] address
|
||||
* @port: Port (host byte order)
|
||||
*/
|
||||
void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
|
||||
uint8_t pif, const union inany_addr *addr, in_port_t port)
|
||||
{
|
||||
const struct in_addr *v4 = inany_v4(addr);
|
||||
|
||||
ASSERT(pif_is_socket(pif));
|
||||
|
||||
if (v4) {
|
||||
sa->sa_family = AF_INET;
|
||||
sa->sa4.sin_addr = *v4;
|
||||
sa->sa4.sin_port = htons(port);
|
||||
memset(&sa->sa4.sin_zero, 0, sizeof(sa->sa4.sin_zero));
|
||||
*sl = sizeof(sa->sa4);
|
||||
} else {
|
||||
sa->sa_family = AF_INET6;
|
||||
sa->sa6.sin6_addr = addr->a6;
|
||||
sa->sa6.sin6_port = htons(port);
|
||||
if (pif == PIF_HOST && IN6_IS_ADDR_LINKLOCAL(&addr->a6))
|
||||
sa->sa6.sin6_scope_id = c->ifi6;
|
||||
else
|
||||
sa->sa6.sin6_scope_id = 0;
|
||||
sa->sa6.sin6_flowinfo = 0;
|
||||
*sl = sizeof(sa->sa6);
|
||||
}
|
||||
}
|
||||
|
17
pif.h
17
pif.h
@@ -7,6 +7,9 @@
|
||||
#ifndef PIF_H
|
||||
#define PIF_H
|
||||
|
||||
union inany_addr;
|
||||
union sockaddr_inany;
|
||||
|
||||
/**
|
||||
* enum pif_type - Type of passt/pasta interface ("pif")
|
||||
*
|
||||
@@ -43,4 +46,18 @@ static inline const char *pif_name(uint8_t pif)
|
||||
return pif_type(pif);
|
||||
}
|
||||
|
||||
/**
|
||||
* pif_is_socket() - Is interface implemented via L4 sockets?
|
||||
* @pif: pif to check
|
||||
*
|
||||
* Return: true of @pif is an L4 socket based interface, otherwise false
|
||||
*/
|
||||
static inline bool pif_is_socket(uint8_t pif)
|
||||
{
|
||||
return pif == PIF_HOST || pif == PIF_SPLICE;
|
||||
}
|
||||
|
||||
void pif_sockaddr(const struct ctx *c, union sockaddr_inany *sa, socklen_t *sl,
|
||||
uint8_t pif, const union inany_addr *addr, in_port_t port);
|
||||
|
||||
#endif /* PIF_H */
|
||||
|
4
qrap.1
4
qrap.1
@@ -66,8 +66,8 @@ issues to Stefano Brivio <sbrivio@redhat.com>.
|
||||
Copyright (c) 2020-2021 Red Hat GmbH.
|
||||
|
||||
\fBqrap\fR is free software: you can redistribute is and/or modify it under the
|
||||
terms of the GNU Affero General Public License as published by the Free Software
|
||||
Foundation, either version 3 of the License, or (at your option) any later
|
||||
terms of the GNU General Public License as published by the Free Software
|
||||
Foundation, either version 2 of the License, or (at your option) any later
|
||||
version.
|
||||
|
||||
.SH SEE ALSO
|
||||
|
@@ -115,10 +115,4 @@ static inline uint64_t siphash_final(struct siphash_state *state,
|
||||
return state->v[0] ^ state->v[1] ^ state->v[2] ^ state->v[3];
|
||||
}
|
||||
|
||||
uint64_t siphash_8b(const uint8_t *in, const uint64_t *k);
|
||||
uint64_t siphash_12b(const uint8_t *in, const uint64_t *k);
|
||||
uint64_t siphash_20b(const uint8_t *in, const uint64_t *k);
|
||||
uint64_t siphash_32b(const uint8_t *in, const uint64_t *k);
|
||||
uint64_t siphash_36b(const uint8_t *in, const uint64_t *k);
|
||||
|
||||
#endif /* SIPHASH_H */
|
||||
|
192
tap.c
192
tap.c
@@ -90,17 +90,6 @@ void tap_send_single(const struct ctx *c, const void *data, size_t l2len)
|
||||
tap_send_frames(c, iov, iovcnt, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* tap_ip4_daddr() - Normal IPv4 destination address for inbound packets
|
||||
* @c: Execution context
|
||||
*
|
||||
* Return: IPv4 address
|
||||
*/
|
||||
struct in_addr tap_ip4_daddr(const struct ctx *c)
|
||||
{
|
||||
return c->ip4.addr_seen;
|
||||
}
|
||||
|
||||
/**
|
||||
* tap_ip6_daddr() - Normal IPv6 destination address for inbound packets
|
||||
* @c: Execution context
|
||||
@@ -325,7 +314,7 @@ static size_t tap_send_frames_pasta(const struct ctx *c,
|
||||
size_t framelen = iov_size(iov + i, bufs_per_frame);
|
||||
|
||||
if (rc < 0) {
|
||||
debug("tap write: %s", strerror(errno));
|
||||
debug_perror("tap write");
|
||||
|
||||
switch (errno) {
|
||||
case EAGAIN:
|
||||
@@ -387,7 +376,7 @@ static size_t tap_send_frames_passt(const struct ctx *c,
|
||||
size_t rembufs = bufs_per_frame - (i % bufs_per_frame);
|
||||
|
||||
if (write_remainder(c->fd_tap, &iov[i], rembufs, buf_offset) < 0) {
|
||||
err("tap: partial frame send: %s", strerror(errno));
|
||||
err_perror("tap: partial frame send");
|
||||
return i;
|
||||
}
|
||||
i += rembufs;
|
||||
@@ -416,10 +405,10 @@ size_t tap_send_frames(const struct ctx *c, const struct iovec *iov,
|
||||
if (!nframes)
|
||||
return 0;
|
||||
|
||||
if (c->mode == MODE_PASST)
|
||||
m = tap_send_frames_passt(c, iov, bufs_per_frame, nframes);
|
||||
else
|
||||
if (c->mode == MODE_PASTA)
|
||||
m = tap_send_frames_pasta(c, iov, bufs_per_frame, nframes);
|
||||
else
|
||||
m = tap_send_frames_passt(c, iov, bufs_per_frame, nframes);
|
||||
|
||||
if (m < nframes)
|
||||
debug("tap: failed to send %zu frames of %zu",
|
||||
@@ -602,7 +591,7 @@ resume:
|
||||
if (!eh)
|
||||
continue;
|
||||
if (ntohs(eh->h_proto) == ETH_P_ARP) {
|
||||
PACKET_POOL_P(pkt, 1, in->buf, sizeof(pkt_buf));
|
||||
PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
|
||||
|
||||
packet_add(pkt, l2len, (char *)eh);
|
||||
arp(c, pkt);
|
||||
@@ -642,7 +631,7 @@ resume:
|
||||
continue;
|
||||
|
||||
if (iph->protocol == IPPROTO_ICMP) {
|
||||
PACKET_POOL_P(pkt, 1, in->buf, sizeof(pkt_buf));
|
||||
PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
|
||||
|
||||
if (c->no_icmp)
|
||||
continue;
|
||||
@@ -661,7 +650,7 @@ resume:
|
||||
continue;
|
||||
|
||||
if (iph->protocol == IPPROTO_UDP) {
|
||||
PACKET_POOL_P(pkt, 1, in->buf, sizeof(pkt_buf));
|
||||
PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
|
||||
|
||||
packet_add(pkt, l2len, (char *)eh);
|
||||
if (dhcp(c, pkt))
|
||||
@@ -675,17 +664,17 @@ resume:
|
||||
}
|
||||
|
||||
#define L4_MATCH(iph, uh, seq) \
|
||||
(seq->protocol == iph->protocol && \
|
||||
seq->source == uh->source && seq->dest == uh->dest && \
|
||||
seq->saddr.s_addr == iph->saddr && seq->daddr.s_addr == iph->daddr)
|
||||
((seq)->protocol == (iph)->protocol && \
|
||||
(seq)->source == (uh)->source && (seq)->dest == (uh)->dest && \
|
||||
(seq)->saddr.s_addr == (iph)->saddr && (seq)->daddr.s_addr == (iph)->daddr)
|
||||
|
||||
#define L4_SET(iph, uh, seq) \
|
||||
do { \
|
||||
seq->protocol = iph->protocol; \
|
||||
seq->source = uh->source; \
|
||||
seq->dest = uh->dest; \
|
||||
seq->saddr.s_addr = iph->saddr; \
|
||||
seq->daddr.s_addr = iph->daddr; \
|
||||
(seq)->protocol = (iph)->protocol; \
|
||||
(seq)->source = (uh)->source; \
|
||||
(seq)->dest = (uh)->dest; \
|
||||
(seq)->saddr.s_addr = (iph)->saddr; \
|
||||
(seq)->daddr.s_addr = (iph)->daddr; \
|
||||
} while (0)
|
||||
|
||||
if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV)
|
||||
@@ -810,7 +799,7 @@ resume:
|
||||
}
|
||||
|
||||
if (proto == IPPROTO_ICMPV6) {
|
||||
PACKET_POOL_P(pkt, 1, in->buf, sizeof(pkt_buf));
|
||||
PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
|
||||
|
||||
if (c->no_icmp)
|
||||
continue;
|
||||
@@ -834,7 +823,7 @@ resume:
|
||||
uh = (struct udphdr *)l4h;
|
||||
|
||||
if (proto == IPPROTO_UDP) {
|
||||
PACKET_POOL_P(pkt, 1, in->buf, sizeof(pkt_buf));
|
||||
PACKET_POOL_P(pkt, 1, in->buf, in->buf_size);
|
||||
|
||||
packet_add(pkt, l4len, l4h);
|
||||
|
||||
@@ -848,18 +837,19 @@ resume:
|
||||
}
|
||||
|
||||
#define L4_MATCH(ip6h, proto, uh, seq) \
|
||||
(seq->protocol == proto && \
|
||||
seq->source == uh->source && seq->dest == uh->dest && \
|
||||
IN6_ARE_ADDR_EQUAL(&seq->saddr, saddr) && \
|
||||
IN6_ARE_ADDR_EQUAL(&seq->daddr, daddr))
|
||||
((seq)->protocol == (proto) && \
|
||||
(seq)->source == (uh)->source && \
|
||||
(seq)->dest == (uh)->dest && \
|
||||
IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr) && \
|
||||
IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr))
|
||||
|
||||
#define L4_SET(ip6h, proto, uh, seq) \
|
||||
do { \
|
||||
seq->protocol = proto; \
|
||||
seq->source = uh->source; \
|
||||
seq->dest = uh->dest; \
|
||||
seq->saddr = *saddr; \
|
||||
seq->daddr = *daddr; \
|
||||
(seq)->protocol = (proto); \
|
||||
(seq)->source = (uh)->source; \
|
||||
(seq)->dest = (uh)->dest; \
|
||||
(seq)->saddr = *saddr; \
|
||||
(seq)->daddr = *daddr; \
|
||||
} while (0)
|
||||
|
||||
if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
|
||||
@@ -920,6 +910,58 @@ append:
|
||||
return in->count;
|
||||
}
|
||||
|
||||
/**
|
||||
* tap_flush_pools() - Flush both IPv4 and IPv6 packet pools
|
||||
*/
|
||||
void tap_flush_pools(void)
|
||||
{
|
||||
pool_flush(pool_tap4);
|
||||
pool_flush(pool_tap6);
|
||||
}
|
||||
|
||||
/**
|
||||
* tap_handler() - IPv4/IPv6 and ARP packet handler for tap file descriptor
|
||||
* @c: Execution context
|
||||
* @now: Current timestamp
|
||||
*/
|
||||
void tap_handler(struct ctx *c, const struct timespec *now)
|
||||
{
|
||||
tap4_handler(c, pool_tap4, now);
|
||||
tap6_handler(c, pool_tap6, now);
|
||||
}
|
||||
|
||||
/**
|
||||
* tap_add_packet() - Queue/capture packet, update notion of guest MAC address
|
||||
* @c: Execution context
|
||||
* @l2len: Total L2 packet length
|
||||
* @p: Packet buffer
|
||||
*/
|
||||
void tap_add_packet(struct ctx *c, ssize_t l2len, char *p)
|
||||
{
|
||||
const struct ethhdr *eh;
|
||||
|
||||
pcap(p, l2len);
|
||||
|
||||
eh = (struct ethhdr *)p;
|
||||
|
||||
if (memcmp(c->mac_guest, eh->h_source, ETH_ALEN)) {
|
||||
memcpy(c->mac_guest, eh->h_source, ETH_ALEN);
|
||||
proto_update_l2_buf(c->mac_guest, NULL);
|
||||
}
|
||||
|
||||
switch (ntohs(eh->h_proto)) {
|
||||
case ETH_P_ARP:
|
||||
case ETH_P_IP:
|
||||
packet_add(pool_tap4, l2len, p);
|
||||
break;
|
||||
case ETH_P_IPV6:
|
||||
packet_add(pool_tap6, l2len, p);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tap_sock_reset() - Handle closing or failure of connect AF_UNIX socket
|
||||
* @c: Execution context
|
||||
@@ -946,7 +988,6 @@ static void tap_sock_reset(struct ctx *c)
|
||||
void tap_handler_passt(struct ctx *c, uint32_t events,
|
||||
const struct timespec *now)
|
||||
{
|
||||
const struct ethhdr *eh;
|
||||
ssize_t n, rem;
|
||||
char *p;
|
||||
|
||||
@@ -959,8 +1000,7 @@ redo:
|
||||
p = pkt_buf;
|
||||
rem = 0;
|
||||
|
||||
pool_flush(pool_tap4);
|
||||
pool_flush(pool_tap6);
|
||||
tap_flush_pools();
|
||||
|
||||
n = recv(c->fd_tap, p, TAP_BUF_FILL, MSG_DONTWAIT);
|
||||
if (n < 0) {
|
||||
@@ -987,38 +1027,18 @@ redo:
|
||||
/* Complete the partial read above before discarding a malformed
|
||||
* frame, otherwise the stream will be inconsistent.
|
||||
*/
|
||||
if (l2len < (ssize_t)sizeof(*eh) ||
|
||||
if (l2len < (ssize_t)sizeof(struct ethhdr) ||
|
||||
l2len > (ssize_t)ETH_MAX_MTU)
|
||||
goto next;
|
||||
|
||||
pcap(p, l2len);
|
||||
|
||||
eh = (struct ethhdr *)p;
|
||||
|
||||
if (memcmp(c->mac_guest, eh->h_source, ETH_ALEN)) {
|
||||
memcpy(c->mac_guest, eh->h_source, ETH_ALEN);
|
||||
proto_update_l2_buf(c->mac_guest, NULL);
|
||||
}
|
||||
|
||||
switch (ntohs(eh->h_proto)) {
|
||||
case ETH_P_ARP:
|
||||
case ETH_P_IP:
|
||||
packet_add(pool_tap4, l2len, p);
|
||||
break;
|
||||
case ETH_P_IPV6:
|
||||
packet_add(pool_tap6, l2len, p);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
tap_add_packet(c, l2len, p);
|
||||
|
||||
next:
|
||||
p += l2len;
|
||||
n -= l2len;
|
||||
}
|
||||
|
||||
tap4_handler(c, pool_tap4, now);
|
||||
tap6_handler(c, pool_tap6, now);
|
||||
tap_handler(c, now);
|
||||
|
||||
/* We can't use EPOLLET otherwise. */
|
||||
if (rem)
|
||||
@@ -1043,35 +1063,18 @@ void tap_handler_pasta(struct ctx *c, uint32_t events,
|
||||
redo:
|
||||
n = 0;
|
||||
|
||||
pool_flush(pool_tap4);
|
||||
pool_flush(pool_tap6);
|
||||
tap_flush_pools();
|
||||
restart:
|
||||
while ((len = read(c->fd_tap, pkt_buf + n, TAP_BUF_BYTES - n)) > 0) {
|
||||
const struct ethhdr *eh = (struct ethhdr *)(pkt_buf + n);
|
||||
|
||||
if (len < (ssize_t)sizeof(*eh) || len > (ssize_t)ETH_MAX_MTU) {
|
||||
if (len < (ssize_t)sizeof(struct ethhdr) ||
|
||||
len > (ssize_t)ETH_MAX_MTU) {
|
||||
n += len;
|
||||
continue;
|
||||
}
|
||||
|
||||
pcap(pkt_buf + n, len);
|
||||
|
||||
if (memcmp(c->mac_guest, eh->h_source, ETH_ALEN)) {
|
||||
memcpy(c->mac_guest, eh->h_source, ETH_ALEN);
|
||||
proto_update_l2_buf(c->mac_guest, NULL);
|
||||
}
|
||||
|
||||
switch (ntohs(eh->h_proto)) {
|
||||
case ETH_P_ARP:
|
||||
case ETH_P_IP:
|
||||
packet_add(pool_tap4, len, pkt_buf + n);
|
||||
break;
|
||||
case ETH_P_IPV6:
|
||||
packet_add(pool_tap6, len, pkt_buf + n);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
tap_add_packet(c, len, pkt_buf + n);
|
||||
|
||||
if ((n += len) == TAP_BUF_BYTES)
|
||||
break;
|
||||
@@ -1082,8 +1085,7 @@ restart:
|
||||
|
||||
ret = errno;
|
||||
|
||||
tap4_handler(c, pool_tap4, now);
|
||||
tap6_handler(c, pool_tap6, now);
|
||||
tap_handler(c, now);
|
||||
|
||||
if (len > 0 || ret == EAGAIN)
|
||||
return;
|
||||
@@ -1109,7 +1111,7 @@ int tap_sock_unix_open(char *sock_path)
|
||||
int i;
|
||||
|
||||
if (fd < 0)
|
||||
die("UNIX socket: %s", strerror(errno));
|
||||
die_perror("Failed to open UNIX domain socket");
|
||||
|
||||
for (i = 1; i < UNIX_SOCK_MAX; i++) {
|
||||
char *path = addr.sun_path;
|
||||
@@ -1122,7 +1124,7 @@ int tap_sock_unix_open(char *sock_path)
|
||||
|
||||
ex = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0);
|
||||
if (ex < 0)
|
||||
die("UNIX domain socket check: %s", strerror(errno));
|
||||
die_perror("Failed to check for UNIX domain conflicts");
|
||||
|
||||
ret = connect(ex, (const struct sockaddr *)&addr, sizeof(addr));
|
||||
if (!ret || (errno != ENOENT && errno != ECONNREFUSED &&
|
||||
@@ -1142,7 +1144,7 @@ int tap_sock_unix_open(char *sock_path)
|
||||
}
|
||||
|
||||
if (i == UNIX_SOCK_MAX)
|
||||
die("UNIX socket bind: %s", strerror(errno));
|
||||
die_perror("Failed to bind UNIX domain socket");
|
||||
|
||||
info("UNIX domain socket bound at %s", addr.sun_path);
|
||||
if (!*sock_path)
|
||||
@@ -1248,11 +1250,11 @@ static int tap_ns_tun(void *arg)
|
||||
|
||||
fd = open("/dev/net/tun", flags);
|
||||
if (fd < 0)
|
||||
die("Failed to open() /dev/net/tun: %s", strerror(errno));
|
||||
die_perror("Failed to open() /dev/net/tun");
|
||||
|
||||
rc = ioctl(fd, TUNSETIFF, &ifr);
|
||||
if (rc < 0)
|
||||
die("TUNSETIFF failed: %s", strerror(errno));
|
||||
die_perror("TUNSETIFF ioctl on /dev/net/tun failed");
|
||||
|
||||
if (!(c->pasta_ifi = if_nametoindex(c->pasta_ifn)))
|
||||
die("Tap device opened but no network interface found");
|
||||
@@ -1317,7 +1319,9 @@ void tap_sock_init(struct ctx *c)
|
||||
return;
|
||||
}
|
||||
|
||||
if (c->mode == MODE_PASST) {
|
||||
if (c->mode == MODE_PASTA) {
|
||||
tap_sock_tun_init(c);
|
||||
} else {
|
||||
tap_sock_unix_init(c);
|
||||
|
||||
/* In passt mode, we don't know the guest's MAC address until it
|
||||
@@ -1325,7 +1329,5 @@ void tap_sock_init(struct ctx *c)
|
||||
* first packets will reach it.
|
||||
*/
|
||||
memset(&c->mac_guest, 0xff, sizeof(c->mac_guest));
|
||||
} else {
|
||||
tap_sock_tun_init(c);
|
||||
}
|
||||
}
|
||||
|
4
tap.h
4
tap.h
@@ -43,7 +43,6 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
|
||||
thdr->vnet_len = htonl(l2len);
|
||||
}
|
||||
|
||||
struct in_addr tap_ip4_daddr(const struct ctx *c);
|
||||
void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
|
||||
struct in_addr dst, in_port_t dport,
|
||||
const void *in, size_t dlen);
|
||||
@@ -70,5 +69,8 @@ void tap_handler_passt(struct ctx *c, uint32_t events,
|
||||
const struct timespec *now);
|
||||
int tap_sock_unix_open(char *sock_path);
|
||||
void tap_sock_init(struct ctx *c);
|
||||
void tap_flush_pools(void);
|
||||
void tap_handler(struct ctx *c, const struct timespec *now);
|
||||
void tap_add_packet(struct ctx *c, ssize_t l2len, char *p);
|
||||
|
||||
#endif /* TAP_H */
|
||||
|
3
tcp.h
3
tcp.h
@@ -24,6 +24,9 @@ void tcp_timer(struct ctx *c, const struct timespec *now);
|
||||
void tcp_defer_handler(struct ctx *c);
|
||||
|
||||
void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s);
|
||||
int tcp_set_peek_offset(int s, int offset);
|
||||
|
||||
extern bool peek_offset_cap;
|
||||
|
||||
/**
|
||||
* union tcp_epoll_ref - epoll reference portion for TCP connections
|
||||
|
532
tcp_buf.c
Normal file
532
tcp_buf.c
Normal file
@@ -0,0 +1,532 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
/* PASST - Plug A Simple Socket Transport
|
||||
* for qemu/UNIX domain socket mode
|
||||
*
|
||||
* PASTA - Pack A Subtle Tap Abstraction
|
||||
* for network namespace/tap device mode
|
||||
*
|
||||
* tcp_buf.c - TCP L2 buffer management functions
|
||||
*
|
||||
* Copyright Red Hat
|
||||
* Author: Stefano Brivio <sbrivio@redhat.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <netinet/ip.h>
|
||||
|
||||
#include <linux/tcp.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "ip.h"
|
||||
#include "iov.h"
|
||||
#include "passt.h"
|
||||
#include "tap.h"
|
||||
#include "siphash.h"
|
||||
#include "inany.h"
|
||||
#include "tcp_conn.h"
|
||||
#include "tcp_internal.h"
|
||||
#include "tcp_buf.h"
|
||||
|
||||
#define TCP_FRAMES_MEM 128
|
||||
#define TCP_FRAMES \
|
||||
(c->mode == MODE_PASTA ? 1 : TCP_FRAMES_MEM)
|
||||
|
||||
/* Static buffers */
|
||||
/**
|
||||
* struct tcp_payload_t - TCP header and data to send segments with payload
|
||||
* @th: TCP header
|
||||
* @data: TCP data
|
||||
*/
|
||||
struct tcp_payload_t {
|
||||
struct tcphdr th;
|
||||
uint8_t data[IP_MAX_MTU - sizeof(struct tcphdr)];
|
||||
#ifdef __AVX2__
|
||||
} __attribute__ ((packed, aligned(32))); /* For AVX2 checksum routines */
|
||||
#else
|
||||
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
|
||||
#endif
|
||||
|
||||
/**
|
||||
* struct tcp_flags_t - TCP header and data to send zero-length
|
||||
* segments (flags)
|
||||
* @th: TCP header
|
||||
* @opts TCP options
|
||||
*/
|
||||
struct tcp_flags_t {
|
||||
struct tcphdr th;
|
||||
char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
|
||||
#ifdef __AVX2__
|
||||
} __attribute__ ((packed, aligned(32)));
|
||||
#else
|
||||
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
|
||||
#endif
|
||||
|
||||
/* Ethernet header for IPv4 frames */
|
||||
static struct ethhdr tcp4_eth_src;
|
||||
|
||||
static struct tap_hdr tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
|
||||
/* IPv4 headers */
|
||||
static struct iphdr tcp4_payload_ip[TCP_FRAMES_MEM];
|
||||
/* TCP segments with payload for IPv4 frames */
|
||||
static struct tcp_payload_t tcp4_payload[TCP_FRAMES_MEM];
|
||||
|
||||
static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516");
|
||||
|
||||
/* References tracking the owner connection of frames in the tap outqueue */
|
||||
static struct tcp_tap_conn *tcp4_frame_conns[TCP_FRAMES_MEM];
|
||||
static unsigned int tcp4_payload_used;
|
||||
|
||||
static struct tap_hdr tcp4_flags_tap_hdr[TCP_FRAMES_MEM];
|
||||
/* IPv4 headers for TCP segment without payload */
|
||||
static struct iphdr tcp4_flags_ip[TCP_FRAMES_MEM];
|
||||
/* TCP segments without payload for IPv4 frames */
|
||||
static struct tcp_flags_t tcp4_flags[TCP_FRAMES_MEM];
|
||||
|
||||
static unsigned int tcp4_flags_used;
|
||||
|
||||
/* Ethernet header for IPv6 frames */
|
||||
static struct ethhdr tcp6_eth_src;
|
||||
|
||||
static struct tap_hdr tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
|
||||
/* IPv6 headers */
|
||||
static struct ipv6hdr tcp6_payload_ip[TCP_FRAMES_MEM];
|
||||
/* TCP headers and data for IPv6 frames */
|
||||
static struct tcp_payload_t tcp6_payload[TCP_FRAMES_MEM];
|
||||
|
||||
static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516");
|
||||
|
||||
/* References tracking the owner connection of frames in the tap outqueue */
|
||||
static struct tcp_tap_conn *tcp6_frame_conns[TCP_FRAMES_MEM];
|
||||
static unsigned int tcp6_payload_used;
|
||||
|
||||
static struct tap_hdr tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
|
||||
/* IPv6 headers for TCP segment without payload */
|
||||
static struct ipv6hdr tcp6_flags_ip[TCP_FRAMES_MEM];
|
||||
/* TCP segment without payload for IPv6 frames */
|
||||
static struct tcp_flags_t tcp6_flags[TCP_FRAMES_MEM];
|
||||
|
||||
static unsigned int tcp6_flags_used;
|
||||
|
||||
/* recvmsg()/sendmsg() data for tap */
|
||||
static struct iovec iov_sock [TCP_FRAMES_MEM + 1];
|
||||
|
||||
static struct iovec tcp4_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS];
|
||||
static struct iovec tcp6_l2_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS];
|
||||
static struct iovec tcp4_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS];
|
||||
static struct iovec tcp6_l2_flags_iov [TCP_FRAMES_MEM][TCP_NUM_IOVS];
|
||||
/**
|
||||
* tcp_update_l2_buf() - Update Ethernet header buffers with addresses
|
||||
* @eth_d: Ethernet destination address, NULL if unchanged
|
||||
* @eth_s: Ethernet source address, NULL if unchanged
|
||||
*/
|
||||
void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
|
||||
{
|
||||
eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
|
||||
eth_update_mac(&tcp6_eth_src, eth_d, eth_s);
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
|
||||
* @c: Execution context
|
||||
*/
|
||||
void tcp_sock4_iov_init(const struct ctx *c)
|
||||
{
|
||||
struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
|
||||
struct iovec *iov;
|
||||
int i;
|
||||
|
||||
tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
|
||||
tcp4_payload_ip[i] = iph;
|
||||
tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
|
||||
tcp4_payload[i].th.ack = 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) {
|
||||
tcp4_flags_ip[i] = iph;
|
||||
tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4;
|
||||
tcp4_flags[i].th.ack = 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TCP_FRAMES_MEM; i++) {
|
||||
iov = tcp4_l2_iov[i];
|
||||
|
||||
iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]);
|
||||
iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
|
||||
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]);
|
||||
iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < TCP_FRAMES_MEM; i++) {
|
||||
iov = tcp4_l2_flags_iov[i];
|
||||
|
||||
iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]);
|
||||
iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
|
||||
iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
|
||||
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]);
|
||||
iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets
|
||||
* @c: Execution context
|
||||
*/
|
||||
void tcp_sock6_iov_init(const struct ctx *c)
|
||||
{
|
||||
struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP);
|
||||
struct iovec *iov;
|
||||
int i;
|
||||
|
||||
tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
|
||||
tcp6_payload_ip[i] = ip6;
|
||||
tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
|
||||
tcp6_payload[i].th.ack = 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
|
||||
tcp6_flags_ip[i] = ip6;
|
||||
tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
|
||||
tcp6_flags[i].th .ack = 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TCP_FRAMES_MEM; i++) {
|
||||
iov = tcp6_l2_iov[i];
|
||||
|
||||
iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
|
||||
iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
|
||||
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]);
|
||||
iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < TCP_FRAMES_MEM; i++) {
|
||||
iov = tcp6_l2_flags_iov[i];
|
||||
|
||||
iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
|
||||
iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
|
||||
iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]);
|
||||
iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_flags_flush() - Send out buffers for segments with no data (flags)
|
||||
* @c: Execution context
|
||||
*/
|
||||
void tcp_flags_flush(const struct ctx *c)
|
||||
{
|
||||
tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS,
|
||||
tcp6_flags_used);
|
||||
tcp6_flags_used = 0;
|
||||
|
||||
tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS,
|
||||
tcp4_flags_used);
|
||||
tcp4_flags_used = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_revert_seq() - Revert affected conn->seq_to_tap after failed transmission
|
||||
* @conns: Array of connection pointers corresponding to queued frames
|
||||
* @frames: Two-dimensional array containing queued frames with sub-iovs
|
||||
* @num_frames: Number of entries in the two arrays to be compared
|
||||
*/
|
||||
static void tcp_revert_seq(struct tcp_tap_conn **conns, struct iovec (*frames)[TCP_NUM_IOVS],
|
||||
int num_frames)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_frames; i++) {
|
||||
const struct tcphdr *th = frames[i][TCP_IOV_PAYLOAD].iov_base;
|
||||
struct tcp_tap_conn *conn = conns[i];
|
||||
uint32_t seq = ntohl(th->seq);
|
||||
|
||||
if (SEQ_LE(conn->seq_to_tap, seq))
|
||||
continue;
|
||||
|
||||
conn->seq_to_tap = seq;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_payload_flush() - Send out buffers for segments with data
|
||||
* @c: Execution context
|
||||
*/
|
||||
void tcp_payload_flush(const struct ctx *c)
|
||||
{
|
||||
size_t m;
|
||||
|
||||
m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
|
||||
tcp6_payload_used);
|
||||
if (m != tcp6_payload_used) {
|
||||
tcp_revert_seq(&tcp6_frame_conns[m], &tcp6_l2_iov[m],
|
||||
tcp6_payload_used - m);
|
||||
}
|
||||
tcp6_payload_used = 0;
|
||||
|
||||
m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
|
||||
tcp4_payload_used);
|
||||
if (m != tcp4_payload_used) {
|
||||
tcp_revert_seq(&tcp4_frame_conns[m], &tcp4_l2_iov[m],
|
||||
tcp4_payload_used - m);
|
||||
}
|
||||
tcp4_payload_used = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_buf_send_flag() - Send segment with flags to tap (no payload)
|
||||
* @c: Execution context
|
||||
* @conn: Connection pointer
|
||||
* @flags: TCP flags: if not set, send segment only if ACK is due
|
||||
*
|
||||
* Return: negative error code on connection reset, 0 otherwise
|
||||
*/
|
||||
int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
|
||||
{
|
||||
struct tcp_flags_t *payload;
|
||||
struct iovec *iov;
|
||||
size_t optlen;
|
||||
size_t l4len;
|
||||
uint32_t seq;
|
||||
int ret;
|
||||
|
||||
if (CONN_V4(conn))
|
||||
iov = tcp4_l2_flags_iov[tcp4_flags_used++];
|
||||
else
|
||||
iov = tcp6_l2_flags_iov[tcp6_flags_used++];
|
||||
|
||||
payload = iov[TCP_IOV_PAYLOAD].iov_base;
|
||||
|
||||
seq = conn->seq_to_tap;
|
||||
ret = tcp_prepare_flags(c, conn, flags, &payload->th,
|
||||
payload->opts, &optlen);
|
||||
if (ret <= 0) {
|
||||
if (CONN_V4(conn))
|
||||
tcp4_flags_used--;
|
||||
else
|
||||
tcp6_flags_used--;
|
||||
return ret;
|
||||
}
|
||||
|
||||
l4len = tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq);
|
||||
iov[TCP_IOV_PAYLOAD].iov_len = l4len;
|
||||
|
||||
if (flags & DUP_ACK) {
|
||||
struct iovec *dup_iov;
|
||||
int i;
|
||||
|
||||
if (CONN_V4(conn))
|
||||
dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
|
||||
else
|
||||
dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++];
|
||||
|
||||
for (i = 0; i < TCP_NUM_IOVS; i++)
|
||||
memcpy(dup_iov[i].iov_base, iov[i].iov_base,
|
||||
iov[i].iov_len);
|
||||
dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len;
|
||||
}
|
||||
|
||||
if (CONN_V4(conn)) {
|
||||
if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
|
||||
tcp_flags_flush(c);
|
||||
} else {
|
||||
if (tcp6_flags_used > TCP_FRAMES_MEM - 2)
|
||||
tcp_flags_flush(c);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_data_to_tap() - Finalise (queue) highest-numbered scatter-gather buffer
|
||||
* @c: Execution context
|
||||
* @conn: Connection pointer
|
||||
* @dlen: TCP payload length
|
||||
* @no_csum: Don't compute IPv4 checksum, use the one from previous buffer
|
||||
* @seq: Sequence number to be sent
|
||||
*/
|
||||
static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
ssize_t dlen, int no_csum, uint32_t seq)
|
||||
{
|
||||
struct iovec *iov;
|
||||
size_t l4len;
|
||||
|
||||
conn->seq_to_tap = seq + dlen;
|
||||
|
||||
if (CONN_V4(conn)) {
|
||||
struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
|
||||
const uint16_t *check = NULL;
|
||||
|
||||
if (no_csum) {
|
||||
struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
|
||||
check = &iph->check;
|
||||
}
|
||||
|
||||
tcp4_frame_conns[tcp4_payload_used] = conn;
|
||||
|
||||
iov = tcp4_l2_iov[tcp4_payload_used++];
|
||||
l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq);
|
||||
iov[TCP_IOV_PAYLOAD].iov_len = l4len;
|
||||
if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
|
||||
tcp_payload_flush(c);
|
||||
} else if (CONN_V6(conn)) {
|
||||
tcp6_frame_conns[tcp6_payload_used] = conn;
|
||||
|
||||
iov = tcp6_l2_iov[tcp6_payload_used++];
|
||||
l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq);
|
||||
iov[TCP_IOV_PAYLOAD].iov_len = l4len;
|
||||
if (tcp6_payload_used > TCP_FRAMES_MEM - 1)
|
||||
tcp_payload_flush(c);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_buf_data_from_sock() - Handle new data from socket, queue to tap, in window
|
||||
* @c: Execution context
|
||||
* @conn: Connection pointer
|
||||
*
|
||||
* Return: negative on connection reset, 0 otherwise
|
||||
*
|
||||
* #syscalls recvmsg
|
||||
*/
|
||||
int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
|
||||
{
|
||||
uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
|
||||
int fill_bufs, send_bufs = 0, last_len, iov_rem = 0;
|
||||
int sendlen, len, dlen, v4 = CONN_V4(conn);
|
||||
int s = conn->sock, i, ret = 0;
|
||||
struct msghdr mh_sock = { 0 };
|
||||
uint16_t mss = MSS_GET(conn);
|
||||
uint32_t already_sent, seq;
|
||||
struct iovec *iov;
|
||||
|
||||
/* How much have we read/sent since last received ack ? */
|
||||
already_sent = conn->seq_to_tap - conn->seq_ack_from_tap;
|
||||
|
||||
if (SEQ_LT(already_sent, 0)) {
|
||||
/* RFC 761, section 2.1. */
|
||||
flow_trace(conn, "ACK sequence gap: ACK for %u, sent: %u",
|
||||
conn->seq_ack_from_tap, conn->seq_to_tap);
|
||||
conn->seq_to_tap = conn->seq_ack_from_tap;
|
||||
already_sent = 0;
|
||||
if (tcp_set_peek_offset(s, 0)) {
|
||||
tcp_rst(c, conn);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!wnd_scaled || already_sent >= wnd_scaled) {
|
||||
conn_flag(c, conn, STALLED);
|
||||
conn_flag(c, conn, ACK_FROM_TAP_DUE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set up buffer descriptors we'll fill completely and partially. */
|
||||
fill_bufs = DIV_ROUND_UP(wnd_scaled - already_sent, mss);
|
||||
if (fill_bufs > TCP_FRAMES) {
|
||||
fill_bufs = TCP_FRAMES;
|
||||
iov_rem = 0;
|
||||
} else {
|
||||
iov_rem = (wnd_scaled - already_sent) % mss;
|
||||
}
|
||||
|
||||
/* Prepare iov according to kernel capability */
|
||||
if (!peek_offset_cap) {
|
||||
mh_sock.msg_iov = iov_sock;
|
||||
iov_sock[0].iov_base = tcp_buf_discard;
|
||||
iov_sock[0].iov_len = already_sent;
|
||||
mh_sock.msg_iovlen = fill_bufs + 1;
|
||||
} else {
|
||||
mh_sock.msg_iov = &iov_sock[1];
|
||||
mh_sock.msg_iovlen = fill_bufs;
|
||||
}
|
||||
|
||||
if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) ||
|
||||
(!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
|
||||
tcp_payload_flush(c);
|
||||
|
||||
/* Silence Coverity CWE-125 false positive */
|
||||
tcp4_payload_used = tcp6_payload_used = 0;
|
||||
}
|
||||
|
||||
for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
|
||||
if (v4)
|
||||
iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
|
||||
else
|
||||
iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data;
|
||||
iov->iov_len = mss;
|
||||
}
|
||||
if (iov_rem)
|
||||
iov_sock[fill_bufs].iov_len = iov_rem;
|
||||
|
||||
/* Receive into buffers, don't dequeue until acknowledged by guest. */
|
||||
do
|
||||
len = recvmsg(s, &mh_sock, MSG_PEEK);
|
||||
while (len < 0 && errno == EINTR);
|
||||
|
||||
if (len < 0)
|
||||
goto err;
|
||||
|
||||
if (!len) {
|
||||
if ((conn->events & (SOCK_FIN_RCVD | TAP_FIN_SENT)) == SOCK_FIN_RCVD) {
|
||||
if ((ret = tcp_buf_send_flag(c, conn, FIN | ACK))) {
|
||||
tcp_rst(c, conn);
|
||||
return ret;
|
||||
}
|
||||
|
||||
conn_event(c, conn, TAP_FIN_SENT);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sendlen = len;
|
||||
if (!peek_offset_cap)
|
||||
sendlen -= already_sent;
|
||||
|
||||
if (sendlen <= 0) {
|
||||
conn_flag(c, conn, STALLED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
conn_flag(c, conn, ~STALLED);
|
||||
|
||||
send_bufs = DIV_ROUND_UP(sendlen, mss);
|
||||
last_len = sendlen - (send_bufs - 1) * mss;
|
||||
|
||||
/* Likely, some new data was acked too. */
|
||||
tcp_update_seqack_wnd(c, conn, 0, NULL);
|
||||
|
||||
/* Finally, queue to tap */
|
||||
dlen = mss;
|
||||
seq = conn->seq_to_tap;
|
||||
for (i = 0; i < send_bufs; i++) {
|
||||
int no_csum = i && i != send_bufs - 1 && tcp4_payload_used;
|
||||
|
||||
if (i == send_bufs - 1)
|
||||
dlen = last_len;
|
||||
|
||||
tcp_data_to_tap(c, conn, dlen, no_csum, seq);
|
||||
seq += dlen;
|
||||
}
|
||||
|
||||
conn_flag(c, conn, ACK_FROM_TAP_DUE);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
if (errno != EAGAIN && errno != EWOULDBLOCK) {
|
||||
ret = -errno;
|
||||
tcp_rst(c, conn);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
16
tcp_buf.h
Normal file
16
tcp_buf.h
Normal file
@@ -0,0 +1,16 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Copyright (c) 2021 Red Hat GmbH
|
||||
* Author: Stefano Brivio <sbrivio@redhat.com>
|
||||
*/
|
||||
|
||||
#ifndef TCP_BUF_H
|
||||
#define TCP_BUF_H
|
||||
|
||||
void tcp_sock4_iov_init(const struct ctx *c);
|
||||
void tcp_sock6_iov_init(const struct ctx *c);
|
||||
void tcp_flags_flush(const struct ctx *c);
|
||||
void tcp_payload_flush(const struct ctx *c);
|
||||
int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
|
||||
int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
|
||||
|
||||
#endif /*TCP_BUF_H */
|
58
tcp_conn.h
58
tcp_conn.h
@@ -13,19 +13,16 @@
|
||||
* struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
|
||||
* @f: Generic flow information
|
||||
* @in_epoll: Is the connection in the epoll set?
|
||||
* @retrans: Number of retransmissions occurred due to ACK_TIMEOUT
|
||||
* @ws_from_tap: Window scaling factor advertised from tap/guest
|
||||
* @ws_to_tap: Window scaling factor advertised to tap/guest
|
||||
* @tap_mss: MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
|
||||
* @sock: Socket descriptor number
|
||||
* @events: Connection events, implying connection states
|
||||
* @timer: timerfd descriptor for timeout events
|
||||
* @flags: Connection flags representing internal attributes
|
||||
* @retrans: Number of retransmissions occurred due to ACK_TIMEOUT
|
||||
* @ws_from_tap: Window scaling factor advertised from tap/guest
|
||||
* @ws_to_tap: Window scaling factor advertised to tap/guest
|
||||
* @sndbuf: Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
|
||||
* @seq_dup_ack_approx: Last duplicate ACK number sent to tap
|
||||
* @faddr: Guest side forwarding address (guest's remote address)
|
||||
* @eport: Guest side endpoint port (guest's local port)
|
||||
* @fport: Guest side forwarding port (guest's remote port)
|
||||
* @wnd_from_tap: Last window size from tap, unscaled (as received)
|
||||
* @wnd_to_tap: Sending window advertised to tap, unscaled (as sent)
|
||||
* @seq_to_tap: Next sequence for packets to tap
|
||||
@@ -49,6 +46,10 @@ struct tcp_tap_conn {
|
||||
unsigned int ws_from_tap :TCP_WS_BITS;
|
||||
unsigned int ws_to_tap :TCP_WS_BITS;
|
||||
|
||||
#define TCP_MSS_BITS 14
|
||||
unsigned int tap_mss :TCP_MSS_BITS;
|
||||
#define MSS_SET(conn, mss) (conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
|
||||
#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS))
|
||||
|
||||
int sock :FD_REF_BITS;
|
||||
|
||||
@@ -77,13 +78,6 @@ struct tcp_tap_conn {
|
||||
#define ACK_TO_TAP_DUE BIT(3)
|
||||
#define ACK_FROM_TAP_DUE BIT(4)
|
||||
|
||||
|
||||
#define TCP_MSS_BITS 14
|
||||
unsigned int tap_mss :TCP_MSS_BITS;
|
||||
#define MSS_SET(conn, mss) (conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
|
||||
#define MSS_GET(conn) (conn->tap_mss << (16 - TCP_MSS_BITS))
|
||||
|
||||
|
||||
#define SNDBUF_BITS 24
|
||||
unsigned int sndbuf :SNDBUF_BITS;
|
||||
#define SNDBUF_SET(conn, bytes) (conn->sndbuf = ((bytes) >> (32 - SNDBUF_BITS)))
|
||||
@@ -91,11 +85,6 @@ struct tcp_tap_conn {
|
||||
|
||||
uint8_t seq_dup_ack_approx;
|
||||
|
||||
|
||||
union inany_addr faddr;
|
||||
in_port_t eport;
|
||||
in_port_t fport;
|
||||
|
||||
uint16_t wnd_from_tap;
|
||||
uint16_t wnd_to_tap;
|
||||
|
||||
@@ -109,43 +98,38 @@ struct tcp_tap_conn {
|
||||
/**
|
||||
* struct tcp_splice_conn - Descriptor for a spliced TCP connection
|
||||
* @f: Generic flow information
|
||||
* @in_epoll: Is the connection in the epoll set?
|
||||
* @s: File descriptor for sockets
|
||||
* @pipe: File descriptors for pipes
|
||||
* @events: Events observed/actions performed on connection
|
||||
* @flags: Connection flags (attributes, not events)
|
||||
* @read: Bytes read (not fully written to other side in one shot)
|
||||
* @written: Bytes written (not fully written from one other side read)
|
||||
*/
|
||||
* @events: Events observed/actions performed on connection
|
||||
* @flags: Connection flags (attributes, not events)
|
||||
* @in_epoll: Is the connection in the epoll set?
|
||||
*/
|
||||
struct tcp_splice_conn {
|
||||
/* Must be first element */
|
||||
struct flow_common f;
|
||||
|
||||
bool in_epoll :1;
|
||||
int s[SIDES];
|
||||
int pipe[SIDES][2];
|
||||
|
||||
uint32_t read[SIDES];
|
||||
uint32_t written[SIDES];
|
||||
|
||||
uint8_t events;
|
||||
#define SPLICE_CLOSED 0
|
||||
#define SPLICE_CONNECT BIT(0)
|
||||
#define SPLICE_ESTABLISHED BIT(1)
|
||||
#define OUT_WAIT_0 BIT(2)
|
||||
#define OUT_WAIT_1 BIT(3)
|
||||
#define FIN_RCVD_0 BIT(4)
|
||||
#define FIN_RCVD_1 BIT(5)
|
||||
#define FIN_SENT_0 BIT(6)
|
||||
#define FIN_SENT_1 BIT(7)
|
||||
#define OUT_WAIT(sidei_) ((sidei_) ? BIT(3) : BIT(2))
|
||||
#define FIN_RCVD(sidei_) ((sidei_) ? BIT(5) : BIT(4))
|
||||
#define FIN_SENT(sidei_) ((sidei_) ? BIT(7) : BIT(6))
|
||||
|
||||
uint8_t flags;
|
||||
#define SPLICE_V6 BIT(0)
|
||||
#define RCVLOWAT_SET_0 BIT(1)
|
||||
#define RCVLOWAT_SET_1 BIT(2)
|
||||
#define RCVLOWAT_ACT_0 BIT(3)
|
||||
#define RCVLOWAT_ACT_1 BIT(4)
|
||||
#define CLOSING BIT(5)
|
||||
#define RCVLOWAT_SET(sidei_) ((sidei_) ? BIT(1) : BIT(0))
|
||||
#define RCVLOWAT_ACT(sidei_) ((sidei_) ? BIT(3) : BIT(2))
|
||||
#define CLOSING BIT(4)
|
||||
|
||||
uint32_t read[SIDES];
|
||||
uint32_t written[SIDES];
|
||||
bool in_epoll :1;
|
||||
};
|
||||
|
||||
/* Socket pools */
|
||||
|
100
tcp_internal.h
Normal file
100
tcp_internal.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Copyright (c) 2021 Red Hat GmbH
|
||||
* Author: Stefano Brivio <sbrivio@redhat.com>
|
||||
*/
|
||||
|
||||
#ifndef TCP_INTERNAL_H
|
||||
#define TCP_INTERNAL_H
|
||||
|
||||
#define MAX_WS 8
|
||||
#define MAX_WINDOW (1 << (16 + (MAX_WS)))
|
||||
|
||||
#define MSS4 ROUND_DOWN(IP_MAX_MTU - \
|
||||
sizeof(struct tcphdr) - \
|
||||
sizeof(struct iphdr), \
|
||||
sizeof(uint32_t))
|
||||
#define MSS6 ROUND_DOWN(IP_MAX_MTU - \
|
||||
sizeof(struct tcphdr) - \
|
||||
sizeof(struct ipv6hdr), \
|
||||
sizeof(uint32_t))
|
||||
|
||||
#define SEQ_LE(a, b) ((b) - (a) < MAX_WINDOW)
|
||||
#define SEQ_LT(a, b) ((b) - (a) - 1 < MAX_WINDOW)
|
||||
#define SEQ_GE(a, b) ((a) - (b) < MAX_WINDOW)
|
||||
#define SEQ_GT(a, b) ((a) - (b) - 1 < MAX_WINDOW)
|
||||
|
||||
#define FIN (1 << 0)
|
||||
#define SYN (1 << 1)
|
||||
#define RST (1 << 2)
|
||||
#define ACK (1 << 4)
|
||||
|
||||
/* Flags for internal usage */
|
||||
#define DUP_ACK (1 << 5)
|
||||
#define OPT_EOL 0
|
||||
#define OPT_NOP 1
|
||||
#define OPT_MSS 2
|
||||
#define OPT_MSS_LEN 4
|
||||
#define OPT_WS 3
|
||||
#define OPT_WS_LEN 3
|
||||
#define OPT_SACKP 4
|
||||
#define OPT_SACK 5
|
||||
#define OPT_TS 8
|
||||
|
||||
#define TAPSIDE(conn_) ((conn_)->f.pif[1] == PIF_TAP)
|
||||
#define TAPFLOW(conn_) (&((conn_)->f.side[TAPSIDE(conn_)]))
|
||||
#define TAP_SIDX(conn_) (FLOW_SIDX((conn_), TAPSIDE(conn_)))
|
||||
|
||||
#define CONN_V4(conn) (!!inany_v4(&TAPFLOW(conn)->faddr))
|
||||
#define CONN_V6(conn) (!CONN_V4(conn))
|
||||
|
||||
/*
|
||||
* enum tcp_iov_parts - I/O vector parts for one TCP frame
|
||||
* @TCP_IOV_TAP tap backend specific header
|
||||
* @TCP_IOV_ETH Ethernet header
|
||||
* @TCP_IOV_IP IP (v4/v6) header
|
||||
* @TCP_IOV_PAYLOAD IP payload (TCP header + data)
|
||||
* @TCP_NUM_IOVS the number of entries in the iovec array
|
||||
*/
|
||||
enum tcp_iov_parts {
|
||||
TCP_IOV_TAP = 0,
|
||||
TCP_IOV_ETH = 1,
|
||||
TCP_IOV_IP = 2,
|
||||
TCP_IOV_PAYLOAD = 3,
|
||||
TCP_NUM_IOVS
|
||||
};
|
||||
|
||||
extern char tcp_buf_discard [MAX_WINDOW];
|
||||
|
||||
void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
unsigned long flag);
|
||||
#define conn_flag(c, conn, flag) \
|
||||
do { \
|
||||
flow_trace(conn, "flag at %s:%i", __func__, __LINE__); \
|
||||
conn_flag_do(c, conn, flag); \
|
||||
} while (0)
|
||||
|
||||
|
||||
void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
unsigned long event);
|
||||
#define conn_event(c, conn, event) \
|
||||
do { \
|
||||
flow_trace(conn, "event at %s:%i", __func__, __LINE__); \
|
||||
conn_event_do(c, conn, event); \
|
||||
} while (0)
|
||||
|
||||
void tcp_rst_do(struct ctx *c, struct tcp_tap_conn *conn);
|
||||
#define tcp_rst(c, conn) \
|
||||
do { \
|
||||
flow_dbg((conn), "TCP reset at %s:%i", __func__, __LINE__); \
|
||||
tcp_rst_do(c, conn); \
|
||||
} while (0)
|
||||
|
||||
size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
|
||||
struct iovec *iov, size_t dlen,
|
||||
const uint16_t *check, uint32_t seq);
|
||||
int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
|
||||
int force_seq, struct tcp_info *tinfo);
|
||||
int tcp_prepare_flags(struct ctx *c, struct tcp_tap_conn *conn, int flags,
|
||||
struct tcphdr *th, char *data, size_t *optlen);
|
||||
|
||||
#endif /* TCP_INTERNAL_H */
|
264
tcp_splice.c
264
tcp_splice.c
@@ -73,10 +73,7 @@ static int ns_sock_pool6 [TCP_SOCK_POOL_SIZE];
|
||||
/* Pool of pre-opened pipes */
|
||||
static int splice_pipe_pool [TCP_SPLICE_PIPE_POOL_SIZE][2];
|
||||
|
||||
#define CONN_V6(x) (x->flags & SPLICE_V6)
|
||||
#define CONN_V4(x) (!CONN_V6(x))
|
||||
#define CONN_HAS(conn, set) ((conn->events & (set)) == (set))
|
||||
#define CONN(idx) (&FLOW(idx)->tcp_splice)
|
||||
#define CONN_HAS(conn, set) (((conn)->events & (set)) == (set))
|
||||
|
||||
/* Display strings for connection events */
|
||||
static const char *tcp_splice_event_str[] __attribute((__unused__)) = {
|
||||
@@ -94,6 +91,24 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
|
||||
static int tcp_sock_refill_ns(void *arg);
|
||||
static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af);
|
||||
|
||||
/**
|
||||
* conn_at_sidx() - Get spliced TCP connection specific flow at given sidx
|
||||
* @sidx: Flow and side to retrieve
|
||||
*
|
||||
* Return: Spliced TCP connection at @sidx, or NULL of @sidx is invalid.
|
||||
* Asserts if the flow at @sidx is not FLOW_TCP_SPLICE.
|
||||
*/
|
||||
static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx)
|
||||
{
|
||||
union flow *flow = flow_at_sidx(sidx);
|
||||
|
||||
if (!flow)
|
||||
return NULL;
|
||||
|
||||
ASSERT(flow->f.type == FLOW_TCP_SPLICE);
|
||||
return &flow->tcp_splice;
|
||||
}
|
||||
|
||||
/**
|
||||
* tcp_splice_conn_epoll_events() - epoll events masks for given state
|
||||
* @events: Connection event flags
|
||||
@@ -102,19 +117,22 @@ static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af);
|
||||
static void tcp_splice_conn_epoll_events(uint16_t events,
|
||||
struct epoll_event ev[])
|
||||
{
|
||||
ev[0].events = ev[1].events = 0;
|
||||
unsigned sidei;
|
||||
|
||||
flow_foreach_sidei(sidei)
|
||||
ev[sidei].events = 0;
|
||||
|
||||
if (events & SPLICE_ESTABLISHED) {
|
||||
if (!(events & FIN_SENT_1))
|
||||
ev[0].events = EPOLLIN | EPOLLRDHUP;
|
||||
if (!(events & FIN_SENT_0))
|
||||
ev[1].events = EPOLLIN | EPOLLRDHUP;
|
||||
flow_foreach_sidei(sidei) {
|
||||
if (!(events & FIN_SENT(!sidei)))
|
||||
ev[sidei].events = EPOLLIN | EPOLLRDHUP;
|
||||
}
|
||||
} else if (events & SPLICE_CONNECT) {
|
||||
ev[1].events = EPOLLOUT;
|
||||
}
|
||||
|
||||
ev[0].events |= (events & OUT_WAIT_0) ? EPOLLOUT : 0;
|
||||
ev[1].events |= (events & OUT_WAIT_1) ? EPOLLOUT : 0;
|
||||
flow_foreach_sidei(sidei)
|
||||
ev[sidei].events |= (events & OUT_WAIT(sidei)) ? EPOLLOUT : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -241,25 +259,25 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
|
||||
*/
|
||||
bool tcp_splice_flow_defer(struct tcp_splice_conn *conn)
|
||||
{
|
||||
unsigned side;
|
||||
unsigned sidei;
|
||||
|
||||
if (!(conn->flags & CLOSING))
|
||||
return false;
|
||||
|
||||
for (side = 0; side < SIDES; side++) {
|
||||
flow_foreach_sidei(sidei) {
|
||||
/* Flushing might need to block: don't recycle them. */
|
||||
if (conn->pipe[side][0] >= 0) {
|
||||
close(conn->pipe[side][0]);
|
||||
close(conn->pipe[side][1]);
|
||||
conn->pipe[side][0] = conn->pipe[side][1] = -1;
|
||||
if (conn->pipe[sidei][0] >= 0) {
|
||||
close(conn->pipe[sidei][0]);
|
||||
close(conn->pipe[sidei][1]);
|
||||
conn->pipe[sidei][0] = conn->pipe[sidei][1] = -1;
|
||||
}
|
||||
|
||||
if (conn->s[side] >= 0) {
|
||||
close(conn->s[side]);
|
||||
conn->s[side] = -1;
|
||||
if (conn->s[sidei] >= 0) {
|
||||
close(conn->s[sidei]);
|
||||
conn->s[sidei] = -1;
|
||||
}
|
||||
|
||||
conn->read[side] = conn->written[side] = 0;
|
||||
conn->read[sidei] = conn->written[sidei] = 0;
|
||||
}
|
||||
|
||||
conn->events = SPLICE_CLOSED;
|
||||
@@ -279,33 +297,33 @@ bool tcp_splice_flow_defer(struct tcp_splice_conn *conn)
|
||||
static int tcp_splice_connect_finish(const struct ctx *c,
|
||||
struct tcp_splice_conn *conn)
|
||||
{
|
||||
unsigned side;
|
||||
unsigned sidei;
|
||||
int i = 0;
|
||||
|
||||
for (side = 0; side < SIDES; side++) {
|
||||
flow_foreach_sidei(sidei) {
|
||||
for (; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
|
||||
if (splice_pipe_pool[i][0] >= 0) {
|
||||
SWAP(conn->pipe[side][0],
|
||||
SWAP(conn->pipe[sidei][0],
|
||||
splice_pipe_pool[i][0]);
|
||||
SWAP(conn->pipe[side][1],
|
||||
SWAP(conn->pipe[sidei][1],
|
||||
splice_pipe_pool[i][1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (conn->pipe[side][0] < 0) {
|
||||
if (pipe2(conn->pipe[side], O_NONBLOCK | O_CLOEXEC)) {
|
||||
if (conn->pipe[sidei][0] < 0) {
|
||||
if (pipe2(conn->pipe[sidei], O_NONBLOCK | O_CLOEXEC)) {
|
||||
flow_err(conn, "cannot create %d->%d pipe: %s",
|
||||
side, !side, strerror(errno));
|
||||
sidei, !sidei, strerror(errno));
|
||||
conn_flag(c, conn, CLOSING);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (fcntl(conn->pipe[side][0], F_SETPIPE_SZ,
|
||||
if (fcntl(conn->pipe[sidei][0], F_SETPIPE_SZ,
|
||||
c->tcp.pipe_size)) {
|
||||
flow_trace(conn,
|
||||
"cannot set %d->%d pipe size to %zu",
|
||||
side, !side, c->tcp.pipe_size);
|
||||
sidei, !sidei, c->tcp.pipe_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -320,31 +338,20 @@ static int tcp_splice_connect_finish(const struct ctx *c,
|
||||
* tcp_splice_connect() - Create and connect socket for new spliced connection
|
||||
* @c: Execution context
|
||||
* @conn: Connection pointer
|
||||
* @af: Address family
|
||||
* @pif: pif on which to create socket
|
||||
* @port: Destination port, host order
|
||||
*
|
||||
* Return: 0 for connect() succeeded or in progress, negative value on error
|
||||
*/
|
||||
static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
|
||||
sa_family_t af, uint8_t pif, in_port_t port)
|
||||
static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
|
||||
{
|
||||
struct sockaddr_in6 addr6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_port = htons(port),
|
||||
.sin6_addr = IN6ADDR_LOOPBACK_INIT,
|
||||
};
|
||||
struct sockaddr_in addr4 = {
|
||||
.sin_family = AF_INET,
|
||||
.sin_port = htons(port),
|
||||
.sin_addr = IN4ADDR_LOOPBACK_INIT,
|
||||
};
|
||||
const struct sockaddr *sa;
|
||||
const struct flowside *tgt = &conn->f.side[TGTSIDE];
|
||||
sa_family_t af = inany_v4(&tgt->eaddr) ? AF_INET : AF_INET6;
|
||||
uint8_t tgtpif = conn->f.pif[TGTSIDE];
|
||||
union sockaddr_inany sa;
|
||||
socklen_t sl;
|
||||
|
||||
if (pif == PIF_HOST)
|
||||
if (tgtpif == PIF_HOST)
|
||||
conn->s[1] = tcp_conn_sock(c, af);
|
||||
else if (pif == PIF_SPLICE)
|
||||
else if (tgtpif == PIF_SPLICE)
|
||||
conn->s[1] = tcp_conn_sock_ns(c, af);
|
||||
else
|
||||
ASSERT(0);
|
||||
@@ -358,15 +365,9 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
|
||||
conn->s[1]);
|
||||
}
|
||||
|
||||
if (CONN_V6(conn)) {
|
||||
sa = (struct sockaddr *)&addr6;
|
||||
sl = sizeof(addr6);
|
||||
} else {
|
||||
sa = (struct sockaddr *)&addr4;
|
||||
sl = sizeof(addr4);
|
||||
}
|
||||
pif_sockaddr(c, &sa, &sl, tgtpif, &tgt->eaddr, tgt->eport);
|
||||
|
||||
if (connect(conn->s[1], sa, sl)) {
|
||||
if (connect(conn->s[1], &sa.sa, sl)) {
|
||||
if (errno != EINPROGRESS) {
|
||||
flow_trace(conn, "Couldn't connect socket for splice: %s",
|
||||
strerror(errno));
|
||||
@@ -413,68 +414,19 @@ static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af)
|
||||
/**
|
||||
* tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection
|
||||
* @c: Execution context
|
||||
* @pif0: pif id of side 0
|
||||
* @dstport: Side 0 destination port of connection
|
||||
* @flow: flow to initialise
|
||||
* @s0: Accepted (side 0) socket
|
||||
* @sa: Peer address of connection
|
||||
*
|
||||
* Return: true if able to create a spliced connection, false otherwise
|
||||
* #syscalls:pasta setsockopt
|
||||
*/
|
||||
bool tcp_splice_conn_from_sock(const struct ctx *c,
|
||||
uint8_t pif0, in_port_t dstport,
|
||||
union flow *flow, int s0,
|
||||
const union sockaddr_inany *sa)
|
||||
void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0)
|
||||
{
|
||||
struct tcp_splice_conn *conn;
|
||||
union inany_addr src;
|
||||
in_port_t srcport;
|
||||
sa_family_t af;
|
||||
uint8_t tgtpif;
|
||||
struct tcp_splice_conn *conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE,
|
||||
tcp_splice);
|
||||
|
||||
if (c->mode != MODE_PASTA)
|
||||
return false;
|
||||
ASSERT(c->mode == MODE_PASTA);
|
||||
|
||||
inany_from_sockaddr(&src, &srcport, sa);
|
||||
af = inany_v4(&src) ? AF_INET : AF_INET6;
|
||||
|
||||
switch (pif0) {
|
||||
case PIF_SPLICE:
|
||||
if (!inany_is_loopback(&src)) {
|
||||
char str[INANY_ADDRSTRLEN];
|
||||
|
||||
/* We can't use flow_err() etc. because we haven't set
|
||||
* the flow type yet
|
||||
*/
|
||||
warn("Bad source address %s for splice, closing",
|
||||
inany_ntop(&src, str, sizeof(str)));
|
||||
|
||||
/* We *don't* want to fall back to tap */
|
||||
flow_alloc_cancel(flow);
|
||||
return true;
|
||||
}
|
||||
|
||||
tgtpif = PIF_HOST;
|
||||
dstport += c->tcp.fwd_out.delta[dstport];
|
||||
break;
|
||||
|
||||
case PIF_HOST:
|
||||
if (!inany_is_loopback(&src))
|
||||
return false;
|
||||
|
||||
tgtpif = PIF_SPLICE;
|
||||
dstport += c->tcp.fwd_in.delta[dstport];
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
flow_target(flow, tgtpif);
|
||||
conn = FLOW_SET_TYPE(flow, FLOW_TCP_SPLICE, tcp_splice);
|
||||
|
||||
conn->flags = af == AF_INET ? 0 : SPLICE_V6;
|
||||
conn->s[0] = s0;
|
||||
conn->s[1] = -1;
|
||||
conn->pipe[0][0] = conn->pipe[0][1] = -1;
|
||||
@@ -483,12 +435,10 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
|
||||
if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
|
||||
flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0);
|
||||
|
||||
if (tcp_splice_connect(c, conn, af, tgtpif, dstport))
|
||||
if (tcp_splice_connect(c, conn))
|
||||
conn_flag(c, conn, CLOSING);
|
||||
|
||||
FLOW_ACTIVATE(conn);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -502,8 +452,8 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
|
||||
void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events)
|
||||
{
|
||||
struct tcp_splice_conn *conn = CONN(ref.flowside.flow);
|
||||
unsigned side = ref.flowside.side, fromside;
|
||||
struct tcp_splice_conn *conn = conn_at_sidx(ref.flowside);
|
||||
unsigned evsidei = ref.flowside.sidei, fromsidei;
|
||||
uint8_t lowat_set_flag, lowat_act_flag;
|
||||
int eof, never_read;
|
||||
|
||||
@@ -535,30 +485,31 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
|
||||
}
|
||||
|
||||
if (events & EPOLLOUT) {
|
||||
fromside = !side;
|
||||
conn_event(c, conn, side == 0 ? ~OUT_WAIT_0 : ~OUT_WAIT_1);
|
||||
fromsidei = !evsidei;
|
||||
conn_event(c, conn, ~OUT_WAIT(evsidei));
|
||||
} else {
|
||||
fromside = side;
|
||||
fromsidei = evsidei;
|
||||
}
|
||||
|
||||
if (events & EPOLLRDHUP)
|
||||
/* For side 0 this is fake, but implied */
|
||||
conn_event(c, conn, side == 0 ? FIN_RCVD_0 : FIN_RCVD_1);
|
||||
conn_event(c, conn, FIN_RCVD(evsidei));
|
||||
|
||||
swap:
|
||||
eof = 0;
|
||||
never_read = 1;
|
||||
|
||||
lowat_set_flag = fromside == 0 ? RCVLOWAT_SET_0 : RCVLOWAT_SET_1;
|
||||
lowat_act_flag = fromside == 0 ? RCVLOWAT_ACT_0 : RCVLOWAT_ACT_1;
|
||||
lowat_set_flag = RCVLOWAT_SET(fromsidei);
|
||||
lowat_act_flag = RCVLOWAT_ACT(fromsidei);
|
||||
|
||||
while (1) {
|
||||
ssize_t readlen, to_write = 0, written;
|
||||
int more = 0;
|
||||
|
||||
retry:
|
||||
readlen = splice(conn->s[fromside], NULL,
|
||||
conn->pipe[fromside][1], NULL, c->tcp.pipe_size,
|
||||
readlen = splice(conn->s[fromsidei], NULL,
|
||||
conn->pipe[fromsidei][1], NULL,
|
||||
c->tcp.pipe_size,
|
||||
SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
|
||||
flow_trace(conn, "%zi from read-side call", readlen);
|
||||
if (readlen < 0) {
|
||||
@@ -583,8 +534,8 @@ retry:
|
||||
}
|
||||
|
||||
eintr:
|
||||
written = splice(conn->pipe[fromside][0], NULL,
|
||||
conn->s[!fromside], NULL, to_write,
|
||||
written = splice(conn->pipe[fromsidei][0], NULL,
|
||||
conn->s[!fromsidei], NULL, to_write,
|
||||
SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
|
||||
flow_trace(conn, "%zi from write-side call (passed %zi)",
|
||||
written, to_write);
|
||||
@@ -598,18 +549,23 @@ eintr:
|
||||
readlen > (long)c->tcp.pipe_size / 10) {
|
||||
int lowat = c->tcp.pipe_size / 4;
|
||||
|
||||
setsockopt(conn->s[fromside], SOL_SOCKET,
|
||||
SO_RCVLOWAT, &lowat, sizeof(lowat));
|
||||
|
||||
if (setsockopt(conn->s[fromsidei], SOL_SOCKET,
|
||||
SO_RCVLOWAT,
|
||||
&lowat, sizeof(lowat))) {
|
||||
flow_trace(conn,
|
||||
"Setting SO_RCVLOWAT %i: %s",
|
||||
lowat, strerror(errno));
|
||||
} else {
|
||||
conn_flag(c, conn, lowat_set_flag);
|
||||
conn_flag(c, conn, lowat_act_flag);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
conn->read[fromside] += readlen > 0 ? readlen : 0;
|
||||
conn->written[fromside] += written > 0 ? written : 0;
|
||||
conn->read[fromsidei] += readlen > 0 ? readlen : 0;
|
||||
conn->written[fromsidei] += written > 0 ? written : 0;
|
||||
|
||||
if (written < 0) {
|
||||
if (errno == EINTR)
|
||||
@@ -618,11 +574,10 @@ eintr:
|
||||
if (errno != EAGAIN)
|
||||
goto close;
|
||||
|
||||
if (conn->read[fromside] == conn->written[fromside])
|
||||
if (conn->read[fromsidei] == conn->written[fromsidei])
|
||||
break;
|
||||
|
||||
conn_event(c, conn,
|
||||
fromside == 0 ? OUT_WAIT_1 : OUT_WAIT_0);
|
||||
conn_event(c, conn, OUT_WAIT(fromsidei));
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -638,27 +593,25 @@ eintr:
|
||||
break;
|
||||
}
|
||||
|
||||
if ((conn->events & FIN_RCVD_0) && !(conn->events & FIN_SENT_1)) {
|
||||
if (conn->read[fromside] == conn->written[fromside] && eof) {
|
||||
shutdown(conn->s[1], SHUT_WR);
|
||||
conn_event(c, conn, FIN_SENT_1);
|
||||
if (conn->read[fromsidei] == conn->written[fromsidei] && eof) {
|
||||
unsigned sidei;
|
||||
|
||||
flow_foreach_sidei(sidei) {
|
||||
if ((conn->events & FIN_RCVD(sidei)) &&
|
||||
!(conn->events & FIN_SENT(!sidei))) {
|
||||
shutdown(conn->s[!sidei], SHUT_WR);
|
||||
conn_event(c, conn, FIN_SENT(!sidei));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((conn->events & FIN_RCVD_1) && !(conn->events & FIN_SENT_0)) {
|
||||
if (conn->read[fromside] == conn->written[fromside] && eof) {
|
||||
shutdown(conn->s[0], SHUT_WR);
|
||||
conn_event(c, conn, FIN_SENT_0);
|
||||
}
|
||||
}
|
||||
|
||||
if (CONN_HAS(conn, FIN_SENT_0 | FIN_SENT_1))
|
||||
if (CONN_HAS(conn, FIN_SENT(0) | FIN_SENT(1)))
|
||||
goto close;
|
||||
|
||||
if ((events & (EPOLLIN | EPOLLOUT)) == (EPOLLIN | EPOLLOUT)) {
|
||||
events = EPOLLIN;
|
||||
|
||||
fromside = !fromside;
|
||||
fromsidei = !fromsidei;
|
||||
goto swap;
|
||||
}
|
||||
|
||||
@@ -736,6 +689,7 @@ static void tcp_splice_pipe_refill(const struct ctx *c)
|
||||
*
|
||||
* Return: 0
|
||||
*/
|
||||
/* cppcheck-suppress [constParameterCallback, unmatchedSuppression] */
|
||||
static int tcp_sock_refill_ns(void *arg)
|
||||
{
|
||||
const struct ctx *c = (const struct ctx *)arg;
|
||||
@@ -792,24 +746,22 @@ void tcp_splice_init(struct ctx *c)
|
||||
*/
|
||||
void tcp_splice_timer(const struct ctx *c, struct tcp_splice_conn *conn)
|
||||
{
|
||||
int side;
|
||||
unsigned sidei;
|
||||
|
||||
ASSERT(!(conn->flags & CLOSING));
|
||||
|
||||
for (side = 0; side < SIDES; side++) {
|
||||
uint8_t set = side == 0 ? RCVLOWAT_SET_0 : RCVLOWAT_SET_1;
|
||||
uint8_t act = side == 0 ? RCVLOWAT_ACT_0 : RCVLOWAT_ACT_1;
|
||||
|
||||
if ((conn->flags & set) && !(conn->flags & act)) {
|
||||
if (setsockopt(conn->s[side], SOL_SOCKET, SO_RCVLOWAT,
|
||||
flow_foreach_sidei(sidei) {
|
||||
if ((conn->flags & RCVLOWAT_SET(sidei)) &&
|
||||
!(conn->flags & RCVLOWAT_ACT(sidei))) {
|
||||
if (setsockopt(conn->s[sidei], SOL_SOCKET, SO_RCVLOWAT,
|
||||
&((int){ 1 }), sizeof(int))) {
|
||||
flow_trace(conn, "can't set SO_RCVLOWAT on %d",
|
||||
conn->s[side]);
|
||||
conn->s[sidei]);
|
||||
}
|
||||
conn_flag(c, conn, ~set);
|
||||
conn_flag(c, conn, ~RCVLOWAT_SET(sidei));
|
||||
}
|
||||
}
|
||||
|
||||
conn_flag(c, conn, ~RCVLOWAT_ACT_0);
|
||||
conn_flag(c, conn, ~RCVLOWAT_ACT_1);
|
||||
flow_foreach_sidei(sidei)
|
||||
conn_flag(c, conn, ~RCVLOWAT_ACT(sidei));
|
||||
}
|
||||
|
@@ -11,10 +11,7 @@ union sockaddr_inany;
|
||||
|
||||
void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events);
|
||||
bool tcp_splice_conn_from_sock(const struct ctx *c,
|
||||
uint8_t pif0, in_port_t dstport,
|
||||
union flow *flow, int s0,
|
||||
const union sockaddr_inany *sa);
|
||||
void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0);
|
||||
void tcp_splice_init(struct ctx *c);
|
||||
|
||||
#endif /* TCP_SPLICE_H */
|
||||
|
35
udp.h
35
udp.h
@@ -9,10 +9,12 @@
|
||||
#define UDP_TIMER_INTERVAL 1000 /* ms */
|
||||
|
||||
void udp_portmap_clear(void);
|
||||
void udp_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
|
||||
const struct timespec *now);
|
||||
int udp_tap_handler(struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
const void *saddr, const void *daddr,
|
||||
void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events, const struct timespec *now);
|
||||
void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events, const struct timespec *now);
|
||||
int udp_tap_handler(const struct ctx *c, uint8_t pif,
|
||||
sa_family_t af, const void *saddr, const void *daddr,
|
||||
const struct pool *p, int idx, const struct timespec *now);
|
||||
int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
|
||||
const void *addr, const char *ifname, in_port_t port);
|
||||
@@ -21,37 +23,22 @@ void udp_timer(struct ctx *c, const struct timespec *now);
|
||||
void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s);
|
||||
|
||||
/**
|
||||
* union udp_epoll_ref - epoll reference portion for TCP connections
|
||||
* union udp_listen_epoll_ref - epoll reference for "listening" UDP sockets
|
||||
* @port: Source port for connected sockets, bound port otherwise
|
||||
* @pif: pif for this socket
|
||||
* @bound: Set if this file descriptor is a bound socket
|
||||
* @splice: Set if descriptor packets to be "spliced"
|
||||
* @orig: Set if a spliced socket which can originate "connections"
|
||||
* @v6: Set for IPv6 sockets or connections
|
||||
* @u32: Opaque u32 value of reference
|
||||
*/
|
||||
union udp_epoll_ref {
|
||||
union udp_listen_epoll_ref {
|
||||
struct {
|
||||
in_port_t port;
|
||||
uint8_t pif;
|
||||
bool splice:1,
|
||||
orig:1,
|
||||
v6:1;
|
||||
bool v6:1;
|
||||
};
|
||||
uint32_t u32;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* udp_fwd_ports - UDP specific port forwarding configuration
|
||||
* @f: Generic forwarding configuration
|
||||
* @rdelta: Reversed delta map to translate source ports on return packets
|
||||
*/
|
||||
struct udp_fwd_ports {
|
||||
struct fwd_ports f;
|
||||
in_port_t rdelta[NUM_PORTS];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct udp_ctx - Execution context for UDP
|
||||
* @fwd_in: Port forwarding configuration for inbound packets
|
||||
@@ -59,8 +46,8 @@ struct udp_fwd_ports {
|
||||
* @timer_run: Timestamp of most recent timer run
|
||||
*/
|
||||
struct udp_ctx {
|
||||
struct udp_fwd_ports fwd_in;
|
||||
struct udp_fwd_ports fwd_out;
|
||||
struct fwd_ports fwd_in;
|
||||
struct fwd_ports fwd_out;
|
||||
struct timespec timer_run;
|
||||
};
|
||||
|
||||
|
27
udp_flow.h
Normal file
27
udp_flow.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
* Copyright Red Hat
|
||||
* Author: David Gibson <david@gibson.dropbear.id.au>
|
||||
*
|
||||
* UDP flow tracking data structures
|
||||
*/
|
||||
#ifndef UDP_FLOW_H
|
||||
#define UDP_FLOW_H
|
||||
|
||||
/**
|
||||
* struct udp - Descriptor for a flow of UDP packets
|
||||
* @f: Generic flow information
|
||||
* @ts: Activity timestamp
|
||||
* @s: Socket fd (or -1) for each side of the flow
|
||||
*/
|
||||
struct udp_flow {
|
||||
/* Must be first element */
|
||||
struct flow_common f;
|
||||
|
||||
time_t ts;
|
||||
int s[SIDES];
|
||||
};
|
||||
|
||||
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
|
||||
const struct timespec *now);
|
||||
|
||||
#endif /* UDP_FLOW_H */
|
217
util.c
217
util.c
@@ -25,6 +25,7 @@
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <linux/errqueue.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "iov.h"
|
||||
@@ -33,63 +34,50 @@
|
||||
#include "log.h"
|
||||
|
||||
/**
|
||||
* sock_l4() - Create and bind socket for given L4, add to epoll list
|
||||
* sock_l4_sa() - Create and bind socket to socket address, add to epoll list
|
||||
* @c: Execution context
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @proto: Protocol number
|
||||
* @bind_addr: Address for binding, NULL for any
|
||||
* @type: epoll type
|
||||
* @sa: Socket address to bind to
|
||||
* @sl: Length of @sa
|
||||
* @ifname: Interface for binding, NULL for any
|
||||
* @port: Port, host order
|
||||
* @v6only: Set IPV6_V6ONLY socket option
|
||||
* @data: epoll reference portion for protocol handlers
|
||||
*
|
||||
* Return: newly created socket, negative error code on failure
|
||||
*/
|
||||
int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
|
||||
const void *bind_addr, const char *ifname, uint16_t port,
|
||||
uint32_t data)
|
||||
int sock_l4_sa(const struct ctx *c, enum epoll_type type,
|
||||
const void *sa, socklen_t sl,
|
||||
const char *ifname, bool v6only, uint32_t data)
|
||||
{
|
||||
union epoll_ref ref = { .data = data };
|
||||
struct sockaddr_in addr4 = {
|
||||
.sin_family = AF_INET,
|
||||
.sin_port = htons(port),
|
||||
{ 0 }, { 0 },
|
||||
};
|
||||
struct sockaddr_in6 addr6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_port = htons(port),
|
||||
0, IN6ADDR_ANY_INIT, 0,
|
||||
};
|
||||
const struct sockaddr *sa;
|
||||
bool dual_stack = false;
|
||||
int fd, sl, y = 1, ret;
|
||||
sa_family_t af = ((const struct sockaddr *)sa)->sa_family;
|
||||
union epoll_ref ref = { .type = type, .data = data };
|
||||
struct epoll_event ev;
|
||||
int fd, y = 1, ret;
|
||||
uint8_t proto;
|
||||
int socktype;
|
||||
|
||||
switch (proto) {
|
||||
case IPPROTO_TCP:
|
||||
ref.type = EPOLL_TYPE_TCP_LISTEN;
|
||||
switch (type) {
|
||||
case EPOLL_TYPE_TCP_LISTEN:
|
||||
proto = IPPROTO_TCP;
|
||||
socktype = SOCK_STREAM | SOCK_NONBLOCK;
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
ref.type = EPOLL_TYPE_UDP;
|
||||
case EPOLL_TYPE_UDP_LISTEN:
|
||||
case EPOLL_TYPE_UDP_REPLY:
|
||||
proto = IPPROTO_UDP;
|
||||
socktype = SOCK_DGRAM | SOCK_NONBLOCK;
|
||||
break;
|
||||
case IPPROTO_ICMP:
|
||||
case IPPROTO_ICMPV6:
|
||||
ref.type = EPOLL_TYPE_PING;
|
||||
case EPOLL_TYPE_PING:
|
||||
if (af == AF_INET)
|
||||
proto = IPPROTO_ICMP;
|
||||
else
|
||||
proto = IPPROTO_ICMPV6;
|
||||
socktype = SOCK_DGRAM | SOCK_NONBLOCK;
|
||||
break;
|
||||
default:
|
||||
return -EPFNOSUPPORT; /* Not implemented. */
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
if (af == AF_UNSPEC) {
|
||||
if (!DUAL_STACK_SOCKETS || bind_addr)
|
||||
return -EINVAL;
|
||||
dual_stack = true;
|
||||
af = AF_INET6;
|
||||
}
|
||||
|
||||
if (proto == IPPROTO_TCP)
|
||||
fd = socket(af, SOCK_STREAM | SOCK_NONBLOCK, proto);
|
||||
else
|
||||
fd = socket(af, SOCK_DGRAM | SOCK_NONBLOCK, proto);
|
||||
fd = socket(af, socktype, proto);
|
||||
|
||||
ret = -errno;
|
||||
if (fd < 0) {
|
||||
@@ -104,34 +92,21 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
|
||||
|
||||
ref.fd = fd;
|
||||
|
||||
if (af == AF_INET) {
|
||||
if (bind_addr)
|
||||
addr4.sin_addr = *(struct in_addr *)bind_addr;
|
||||
|
||||
sa = (const struct sockaddr *)&addr4;
|
||||
sl = sizeof(addr4);
|
||||
} else {
|
||||
if (bind_addr) {
|
||||
addr6.sin6_addr = *(struct in6_addr *)bind_addr;
|
||||
|
||||
if (!memcmp(bind_addr, &c->ip6.addr_ll,
|
||||
sizeof(c->ip6.addr_ll)))
|
||||
addr6.sin6_scope_id = c->ifi6;
|
||||
}
|
||||
|
||||
sa = (const struct sockaddr *)&addr6;
|
||||
sl = sizeof(addr6);
|
||||
|
||||
if (!dual_stack)
|
||||
if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
|
||||
&y, sizeof(y)))
|
||||
debug("Failed to set IPV6_V6ONLY on socket %i",
|
||||
fd);
|
||||
}
|
||||
if (v6only)
|
||||
if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &y, sizeof(y)))
|
||||
debug("Failed to set IPV6_V6ONLY on socket %i", fd);
|
||||
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)))
|
||||
debug("Failed to set SO_REUSEADDR on socket %i", fd);
|
||||
|
||||
if (proto == IPPROTO_UDP) {
|
||||
int level = af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
|
||||
int opt = af == AF_INET ? IP_RECVERR : IPV6_RECVERR;
|
||||
|
||||
if (setsockopt(fd, level, opt, &y, sizeof(y)))
|
||||
die_perror("Failed to set RECVERR on socket %i", fd);
|
||||
}
|
||||
|
||||
if (ifname && *ifname) {
|
||||
/* Supported since kernel version 5.7, commit c427bfec18f2
|
||||
* ("net: core: enable SO_BINDTODEVICE for non-root users"). If
|
||||
@@ -140,9 +115,12 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
|
||||
*/
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
|
||||
ifname, strlen(ifname))) {
|
||||
char str[SOCKADDR_STRLEN];
|
||||
|
||||
ret = -errno;
|
||||
warn("Can't bind %s socket for port %u to %s, closing",
|
||||
EPOLL_TYPE_STR(proto), port, ifname);
|
||||
warn("Can't bind %s socket for %s to %s, closing",
|
||||
EPOLL_TYPE_STR(proto),
|
||||
sockaddr_ntop(sa, str, sizeof(str)), ifname);
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
@@ -154,14 +132,14 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
|
||||
* this is fine. This might also fail for ICMP because of a
|
||||
* broken SELinux policy, see icmp_tap_handler().
|
||||
*/
|
||||
if (proto != IPPROTO_ICMP && proto != IPPROTO_ICMPV6) {
|
||||
if (type != EPOLL_TYPE_PING) {
|
||||
ret = -errno;
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (proto == IPPROTO_TCP && listen(fd, 128) < 0) {
|
||||
if (type == EPOLL_TYPE_TCP_LISTEN && listen(fd, 128) < 0) {
|
||||
ret = -errno;
|
||||
warn("TCP socket listen: %s", strerror(-ret));
|
||||
close(fd);
|
||||
@@ -178,6 +156,59 @@ int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
|
||||
|
||||
return fd;
|
||||
}
|
||||
/**
|
||||
* sock_l4() - Create and bind socket for given L4, add to epoll list
|
||||
* @c: Execution context
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @type: epoll type
|
||||
* @bind_addr: Address for binding, NULL for any
|
||||
* @ifname: Interface for binding, NULL for any
|
||||
* @port: Port, host order
|
||||
* @data: epoll reference portion for protocol handlers
|
||||
*
|
||||
* Return: newly created socket, negative error code on failure
|
||||
*/
|
||||
int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
|
||||
const void *bind_addr, const char *ifname, uint16_t port,
|
||||
uint32_t data)
|
||||
{
|
||||
switch (af) {
|
||||
case AF_INET: {
|
||||
struct sockaddr_in addr4 = {
|
||||
.sin_family = AF_INET,
|
||||
.sin_port = htons(port),
|
||||
{ 0 }, { 0 },
|
||||
};
|
||||
if (bind_addr)
|
||||
addr4.sin_addr = *(struct in_addr *)bind_addr;
|
||||
return sock_l4_sa(c, type, &addr4, sizeof(addr4), ifname,
|
||||
false, data);
|
||||
}
|
||||
|
||||
case AF_UNSPEC:
|
||||
if (!DUAL_STACK_SOCKETS || bind_addr)
|
||||
return -EINVAL;
|
||||
/* fallthrough */
|
||||
case AF_INET6: {
|
||||
struct sockaddr_in6 addr6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_port = htons(port),
|
||||
0, IN6ADDR_ANY_INIT, 0,
|
||||
};
|
||||
if (bind_addr) {
|
||||
addr6.sin6_addr = *(struct in6_addr *)bind_addr;
|
||||
|
||||
if (!memcmp(bind_addr, &c->ip6.addr_ll,
|
||||
sizeof(c->ip6.addr_ll)))
|
||||
addr6.sin6_scope_id = c->ifi6;
|
||||
}
|
||||
return sock_l4_sa(c, type, &addr6, sizeof(addr6), ifname,
|
||||
af == AF_INET6, data);
|
||||
}
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* sock_probe_mem() - Check if setting high SO_SNDBUF and SO_RCVBUF is allowed
|
||||
@@ -216,7 +247,7 @@ void sock_probe_mem(struct ctx *c)
|
||||
*
|
||||
* Return: difference in milliseconds
|
||||
*/
|
||||
int timespec_diff_ms(const struct timespec *a, const struct timespec *b)
|
||||
long timespec_diff_ms(const struct timespec *a, const struct timespec *b)
|
||||
{
|
||||
if (a->tv_nsec < b->tv_nsec) {
|
||||
return (b->tv_nsec - a->tv_nsec) / 1000000 +
|
||||
@@ -232,7 +263,7 @@ int timespec_diff_ms(const struct timespec *a, const struct timespec *b)
|
||||
* @map: Pointer to bitmap
|
||||
* @bit: Bit number to set
|
||||
*/
|
||||
void bitmap_set(uint8_t *map, int bit)
|
||||
void bitmap_set(uint8_t *map, unsigned bit)
|
||||
{
|
||||
unsigned long *word = (unsigned long *)map + BITMAP_WORD(bit);
|
||||
|
||||
@@ -244,7 +275,7 @@ void bitmap_set(uint8_t *map, int bit)
|
||||
* @map: Pointer to bitmap
|
||||
* @bit: Bit number to clear
|
||||
*/
|
||||
void bitmap_clear(uint8_t *map, int bit)
|
||||
void bitmap_clear(uint8_t *map, unsigned bit)
|
||||
{
|
||||
unsigned long *word = (unsigned long *)map + BITMAP_WORD(bit);
|
||||
|
||||
@@ -256,9 +287,9 @@ void bitmap_clear(uint8_t *map, int bit)
|
||||
* @map: Pointer to bitmap
|
||||
* @bit: Bit number to check
|
||||
*
|
||||
* Return: one if given bit is set, zero if it's not
|
||||
* Return: true if given bit is set, false if it's not
|
||||
*/
|
||||
int bitmap_isset(const uint8_t *map, int bit)
|
||||
bool bitmap_isset(const uint8_t *map, unsigned bit)
|
||||
{
|
||||
const unsigned long *word
|
||||
= (const unsigned long *)map + BITMAP_WORD(bit);
|
||||
@@ -298,7 +329,7 @@ void bitmap_or(uint8_t *dst, size_t size, const uint8_t *a, const uint8_t *b)
|
||||
void ns_enter(const struct ctx *c)
|
||||
{
|
||||
if (setns(c->pasta_netns_fd, CLONE_NEWNET))
|
||||
die("setns() failed entering netns: %s", strerror(errno));
|
||||
die_perror("setns() failed entering netns");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -313,10 +344,8 @@ bool ns_is_init(void)
|
||||
bool ret = true;
|
||||
int fd;
|
||||
|
||||
if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0) {
|
||||
die("Can't determine if we're in init namespace: %s",
|
||||
strerror(errno));
|
||||
}
|
||||
if ((fd = open("/proc/self/uid_map", O_RDONLY | O_CLOEXEC)) < 0)
|
||||
die_perror("Can't determine if we're in init namespace");
|
||||
|
||||
if (read(fd, buf, sizeof(root_uid_map)) != sizeof(root_uid_map) - 1 ||
|
||||
strncmp(buf, root_uid_map, sizeof(root_uid_map)))
|
||||
@@ -492,7 +521,7 @@ int write_file(const char *path, const char *buf)
|
||||
size_t len = strlen(buf);
|
||||
|
||||
if (fd < 0) {
|
||||
warn("Could not open %s: %s", path, strerror(errno));
|
||||
warn_perror("Could not open %s", path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -500,7 +529,7 @@ int write_file(const char *path, const char *buf)
|
||||
ssize_t rc = write(fd, buf, len);
|
||||
|
||||
if (rc <= 0) {
|
||||
warn("Couldn't write to %s: %s", path, strerror(errno));
|
||||
warn_perror("Couldn't write to %s", path);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -603,6 +632,10 @@ const char *sockaddr_ntop(const void *sa, char *dst, socklen_t size)
|
||||
} while (0)
|
||||
|
||||
switch (family) {
|
||||
case AF_UNSPEC:
|
||||
IPRINTF("<unspecified>");
|
||||
break;
|
||||
|
||||
case AF_INET: {
|
||||
const struct sockaddr_in *sa4 = sa;
|
||||
|
||||
@@ -631,3 +664,23 @@ const char *sockaddr_ntop(const void *sa, char *dst, socklen_t size)
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
/** str_ee_origin() - Convert socket extended error origin to a string
|
||||
* @ee: Socket extended error structure
|
||||
*
|
||||
* Return: Static string describing error origin
|
||||
*/
|
||||
const char *str_ee_origin(const struct sock_extended_err *ee)
|
||||
{
|
||||
const char *const desc[] = {
|
||||
[SO_EE_ORIGIN_NONE] = "<no origin>",
|
||||
[SO_EE_ORIGIN_LOCAL] = "Local",
|
||||
[SO_EE_ORIGIN_ICMP] = "ICMP",
|
||||
[SO_EE_ORIGIN_ICMP6] = "ICMPv6",
|
||||
};
|
||||
|
||||
if (ee->ee_origin < ARRAY_SIZE(desc))
|
||||
return desc[ee->ee_origin];
|
||||
|
||||
return "<invalid>";
|
||||
}
|
||||
|
17
util.h
17
util.h
@@ -137,20 +137,24 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "epoll_type.h"
|
||||
#include "packet.h"
|
||||
|
||||
struct ctx;
|
||||
|
||||
/* cppcheck-suppress funcArgNamesDifferent */
|
||||
__attribute__ ((weak)) int ffsl(long int i) { return __builtin_ffsl(i); }
|
||||
int sock_l4(const struct ctx *c, sa_family_t af, uint8_t proto,
|
||||
int sock_l4_sa(const struct ctx *c, enum epoll_type type,
|
||||
const void *sa, socklen_t sl,
|
||||
const char *ifname, bool v6only, uint32_t data);
|
||||
int sock_l4(const struct ctx *c, sa_family_t af, enum epoll_type type,
|
||||
const void *bind_addr, const char *ifname, uint16_t port,
|
||||
uint32_t data);
|
||||
void sock_probe_mem(struct ctx *c);
|
||||
int timespec_diff_ms(const struct timespec *a, const struct timespec *b);
|
||||
void bitmap_set(uint8_t *map, int bit);
|
||||
void bitmap_clear(uint8_t *map, int bit);
|
||||
int bitmap_isset(const uint8_t *map, int bit);
|
||||
long timespec_diff_ms(const struct timespec *a, const struct timespec *b);
|
||||
void bitmap_set(uint8_t *map, unsigned bit);
|
||||
void bitmap_clear(uint8_t *map, unsigned bit);
|
||||
bool bitmap_isset(const uint8_t *map, unsigned bit);
|
||||
void bitmap_or(uint8_t *dst, size_t size, const uint8_t *a, const uint8_t *b);
|
||||
char *line_read(char *buf, size_t len, int fd);
|
||||
void ns_enter(const struct ctx *c);
|
||||
@@ -193,7 +197,10 @@ static inline const char *af_name(sa_family_t af)
|
||||
|
||||
#define SOCKADDR_STRLEN MAX(SOCKADDR_INET_STRLEN, SOCKADDR_INET6_STRLEN)
|
||||
|
||||
struct sock_extended_err;
|
||||
|
||||
const char *sockaddr_ntop(const void *sa, char *dst, socklen_t size);
|
||||
const char *str_ee_origin(const struct sock_extended_err *ee);
|
||||
|
||||
/**
|
||||
* mod_sub() - Modular arithmetic subtraction
|
||||
|
Reference in New Issue
Block a user