switch 4.14.32 kernel for x86/x64

This commit is contained in:
coolsnowwolf 2018-04-08 16:54:10 +08:00
parent 1fe7a68b0f
commit 0c39849215
138 changed files with 17160 additions and 552 deletions

View File

@ -5,12 +5,12 @@ LINUX_RELEASE?=1
LINUX_VERSION-3.18 = .71
LINUX_VERSION-4.4 = .112
LINUX_VERSION-4.9 = .77
LINUX_VERSION-4.14 = .14
LINUX_VERSION-4.14 = .32
LINUX_KERNEL_HASH-3.18.71 = 5abc9778ad44ce02ed6c8ab52ece8a21c6d20d21f6ed8a19287b4a38a50c1240
LINUX_KERNEL_HASH-4.4.112 = 544b42cbeed022896115c76a18fc97b4507d5b41d7ac0ce1dce9afd6ffd11ecd
LINUX_KERNEL_HASH-4.9.77 = 7c29bc3fb96f1e23d98f664e786dddd53a1599f56431b9b7fdfba402a4b3705c
LINUX_KERNEL_HASH-4.14.14 = 8b96362eb55ae152555980e7193fe2585b487176fb936cc69b8947d7dd32044a
LINUX_KERNEL_HASH-4.14.32 = cb0979bec663089a43b10cfbeae0cf9673544b0ff5968c33ede614ec0f43b680
ifdef KERNEL_PATCHVER
LINUX_VERSION:=$(KERNEL_PATCHVER)$(strip $(LINUX_VERSION-$(KERNEL_PATCHVER)))

View File

@ -10,10 +10,10 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/Makefile
+++ b/Makefile
@@ -410,8 +410,8 @@ KERNELRELEASE = $(shell cat include/conf
KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION)
@@ -433,8 +433,8 @@ KBUILD_CFLAGS_MODULE := -DMODULE
KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
GCC_PLUGINS_CFLAGS :=
export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
-export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP HOSTLDFLAGS HOST_LOADLIBES
+export ARCH SRCARCH SUBARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD

View File

@ -44,9 +44,9 @@ Cc: Kir Kolyshkin <kir@openvz.org>
* @sk_lingertime: %SO_LINGER l_linger setting
* @sk_backlog: always used with the per-socket spinlock held
* @sk_callback_lock: used with the callbacks in the end of this struct
@@ -448,6 +449,8 @@ struct sock {
kmemcheck_bitfield_end(flags);
@@ -445,6 +446,8 @@ struct sock {
sk_type : 16;
#define SK_PROTOCOL_MAX U8_MAX
u16 sk_gso_max_segs;
+#define sk_pacing_shift sk_pacing_shift /* for backport checks */
+ u8 sk_pacing_shift;
@ -55,7 +55,7 @@ Cc: Kir Kolyshkin <kir@openvz.org>
rwlock_t sk_callback_lock;
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2744,6 +2744,7 @@ void sock_init_data(struct socket *sock,
@@ -2739,6 +2739,7 @@ void sock_init_data(struct socket *sock,
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
@ -74,7 +74,7 @@ Cc: Kir Kolyshkin <kir@openvz.org>
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
/* Goal is to send at least one packet per ms,
@@ -2145,7 +2145,7 @@ static bool tcp_small_queue_check(struct
@@ -2172,7 +2172,7 @@ static bool tcp_small_queue_check(struct
{
unsigned int limit;

View File

@ -1,25 +0,0 @@
From 4b05f09db650d215632da97f2c25ceba8235102a Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sun, 26 Nov 2017 00:09:45 +0100
Subject: [PATCH] crypto: fix typo in KPP dependency of CRYPTO_ECDH
This fixes a typo in the CRYPTO_KPP dependency of CRYPTO_ECDH.
Fixes: 3c4b23901a0c ("crypto: ecdh - Add ECDH software support")
Cc: <stable@vger.kernel.org> # v4.8+
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
---
crypto/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -130,7 +130,7 @@ config CRYPTO_DH
config CRYPTO_ECDH
tristate "ECDH algorithm"
- select CRYTPO_KPP
+ select CRYPTO_KPP
select CRYPTO_RNG_DEFAULT
help
Generic implementation of the ECDH algorithm

View File

@ -0,0 +1,74 @@
From 2c77c57d22adb05b21cdb333a0c42bdfa0e19835 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Tue, 16 Jan 2018 16:45:41 +0100
Subject: [PATCH] mtd: move code adding master MTD out of
mtd_add_device_partitions()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This change is a small cleanup of mtd_device_parse_register(). When
using MTD_PARTITIONED_MASTER it makes sure a master MTD is registered
before dealing with partitions. The advantage of this is not mixing
code handling master MTD with code handling partitions.
This commit doesn't change any behavior except from a slightly different
failure code path. The new code may need to call del_mtd_device when
something goes wrong.
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
drivers/mtd/mtdcore.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -641,20 +641,12 @@ static int mtd_add_device_partitions(str
{
const struct mtd_partition *real_parts = parts->parts;
int nbparts = parts->nr_parts;
- int ret;
- if (nbparts == 0 || IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
- ret = add_mtd_device(mtd);
- if (ret)
- return ret;
- }
+ if (!nbparts && !device_is_registered(&mtd->dev))
+ return add_mtd_device(mtd);
- if (nbparts > 0) {
- ret = add_mtd_partitions(mtd, real_parts, nbparts);
- if (ret && IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
- del_mtd_device(mtd);
- return ret;
- }
+ if (nbparts > 0)
+ return add_mtd_partitions(mtd, real_parts, nbparts);
return 0;
}
@@ -714,6 +706,12 @@ int mtd_device_parse_register(struct mtd
mtd_set_dev_defaults(mtd);
+ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
+ ret = add_mtd_device(mtd);
+ if (ret)
+ return ret;
+ }
+
memset(&parsed, 0, sizeof(parsed));
ret = parse_mtd_partitions(mtd, types, &parsed, parser_data);
@@ -753,6 +751,9 @@ int mtd_device_parse_register(struct mtd
out:
/* Cleanup any parsed partitions */
mtd_part_parser_cleanup(&parsed);
+ if (ret && device_is_registered(&mtd->dev))
+ del_mtd_device(mtd);
+
return ret;
}
EXPORT_SYMBOL_GPL(mtd_device_parse_register);

View File

@ -0,0 +1,93 @@
From 0dbe4ea78d69756efeb0bba0764f6bd4a9ee9567 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Tue, 16 Jan 2018 16:45:42 +0100
Subject: [PATCH] mtd: get rid of the mtd_add_device_partitions()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This simplifies code a bit by:
1) Avoiding an extra (tiny) function
2) Checking for amount of parsed (found) partitions just once
3) Avoiding clearing/filling struct mtd_partitions manually
With this commit proper functions are called directly from the
mtd_device_parse_register(). It doesn't need to use minor tricks like
memsetting struct to 0 to trigger an expected
mtd_add_device_partitions() behavior.
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
drivers/mtd/mtdcore.c | 43 ++++++++++++-------------------------------
1 file changed, 12 insertions(+), 31 deletions(-)
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -636,21 +636,6 @@ out_error:
return ret;
}
-static int mtd_add_device_partitions(struct mtd_info *mtd,
- struct mtd_partitions *parts)
-{
- const struct mtd_partition *real_parts = parts->parts;
- int nbparts = parts->nr_parts;
-
- if (!nbparts && !device_is_registered(&mtd->dev))
- return add_mtd_device(mtd);
-
- if (nbparts > 0)
- return add_mtd_partitions(mtd, real_parts, nbparts);
-
- return 0;
-}
-
/*
* Set a few defaults based on the parent devices, if not provided by the
* driver
@@ -701,7 +686,7 @@ int mtd_device_parse_register(struct mtd
const struct mtd_partition *parts,
int nr_parts)
{
- struct mtd_partitions parsed;
+ struct mtd_partitions parsed = { };
int ret;
mtd_set_dev_defaults(mtd);
@@ -712,24 +697,20 @@ int mtd_device_parse_register(struct mtd
return ret;
}
- memset(&parsed, 0, sizeof(parsed));
-
+ /* Prefer parsed partitions over driver-provided fallback */
ret = parse_mtd_partitions(mtd, types, &parsed, parser_data);
- if ((ret < 0 || parsed.nr_parts == 0) && parts && nr_parts) {
- /* Fall back to driver-provided partitions */
- parsed = (struct mtd_partitions){
- .parts = parts,
- .nr_parts = nr_parts,
- };
- } else if (ret < 0) {
- /* Didn't come up with parsed OR fallback partitions */
- pr_info("mtd: failed to find partitions; one or more parsers reports errors (%d)\n",
- ret);
- /* Don't abort on errors; we can still use unpartitioned MTD */
- memset(&parsed, 0, sizeof(parsed));
+ if (!ret && parsed.nr_parts) {
+ parts = parsed.parts;
+ nr_parts = parsed.nr_parts;
}
- ret = mtd_add_device_partitions(mtd, &parsed);
+ if (nr_parts)
+ ret = add_mtd_partitions(mtd, parts, nr_parts);
+ else if (!device_is_registered(&mtd->dev))
+ ret = add_mtd_device(mtd);
+ else
+ ret = 0;
+
if (ret)
goto out;

View File

@ -1,121 +0,0 @@
From bb2192123ec70470d6ea33f138846b175403a968 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Thu, 4 Jan 2018 08:05:33 +0100
Subject: [PATCH] mtd: partitions: add of_match_table parser matching
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Partition parsers can now provide an of_match_table to enable
flash<-->parser matching via device tree as documented in the
mtd/partition.txt.
It works by looking for a matching parser for every string in the
"compatibility" property (starting with the most specific one).
This support is currently limited to built-in parsers as it uses
request_module() and friends. This should be sufficient for most cases
though as compiling parsers as modules isn't a common choice.
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
drivers/mtd/mtdpart.c | 59 ++++++++++++++++++++++++++++++++++++++++++
include/linux/mtd/partitions.h | 1 +
2 files changed, 60 insertions(+)
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -30,6 +30,7 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
#include <linux/err.h>
+#include <linux/of.h>
#include "mtdcore.h"
@@ -894,6 +895,45 @@ static int mtd_part_do_parse(struct mtd_
}
/**
+ * mtd_part_get_compatible_parser - find MTD parser by a compatible string
+ *
+ * @compat: compatible string describing partitions in a device tree
+ *
+ * MTD parsers can specify supported partitions by providing a table of
+ * compatibility strings. This function finds a parser that advertises support
+ * for a passed value of "compatible".
+ */
+static struct mtd_part_parser *mtd_part_get_compatible_parser(const char *compat)
+{
+ struct mtd_part_parser *p, *ret = NULL;
+
+ spin_lock(&part_parser_lock);
+
+ list_for_each_entry(p, &part_parsers, list) {
+ const struct of_device_id *matches;
+
+ matches = p->of_match_table;
+ if (!matches)
+ continue;
+
+ for (; matches->compatible[0]; matches++) {
+ if (!strcmp(matches->compatible, compat) &&
+ try_module_get(p->owner)) {
+ ret = p;
+ break;
+ }
+ }
+
+ if (ret)
+ break;
+ }
+
+ spin_unlock(&part_parser_lock);
+
+ return ret;
+}
+
+/**
* parse_mtd_partitions - parse MTD partitions
* @master: the master partition (describes whole MTD device)
* @types: names of partition parsers to try or %NULL
@@ -919,8 +959,27 @@ int parse_mtd_partitions(struct mtd_info
struct mtd_part_parser_data *data)
{
struct mtd_part_parser *parser;
+ struct device_node *np;
+ struct property *prop;
+ const char *compat;
int ret, err = 0;
+ np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
+ of_property_for_each_string(np, "compatible", prop, compat) {
+ parser = mtd_part_get_compatible_parser(compat);
+ if (!parser)
+ continue;
+ ret = mtd_part_do_parse(parser, master, pparts, data);
+ if (ret > 0) {
+ of_node_put(np);
+ return 0;
+ }
+ mtd_part_parser_put(parser);
+ if (ret < 0 && !err)
+ err = ret;
+ }
+ of_node_put(np);
+
if (!types)
types = default_mtd_part_types;
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -77,6 +77,7 @@ struct mtd_part_parser {
struct list_head list;
struct module *owner;
const char *name;
+ const struct of_device_id *of_match_table;
int (*parse_fn)(struct mtd_info *, const struct mtd_partition **,
struct mtd_part_parser_data *);
void (*cleanup)(const struct mtd_partition *pparts, int nr_parts);

View File

@ -1,16 +1,20 @@
From: David Heidelberger <david.heidelberger@ixit.cz>
From 649affd04813c43e0a72886517fcfccd63230981 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 29 Jun 2015 16:53:03 +0200
Subject: uapi/if_ether.h: prevent redefinition of struct ethhdr
Musl provides its own ethhdr struct definition. Add a guard to prevent
its definition of the appropriate musl header has already been included.
Signed-off-by: John Spencer <maillist-linux@barfooze.de>
Tested-by: David Heidelberger <david.heidelberger@ixit.cz>
Signed-off-by: Jonas Gorski <jogo@openwrt.org>
glibc does not implement this header, but when glibc will implement this
they can just define __UAPI_DEF_ETHHDR 0 to make it work with the
kernel.
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
---
include/uapi/linux/if_ether.h | 3 +++
include/uapi/linux/libc-compat.h | 11 +++++++++++
2 files changed, 14 insertions(+)
include/uapi/linux/if_ether.h | 3 +++
include/uapi/linux/libc-compat.h | 6 ++++++
2 files changed, 9 insertions(+)
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@ -38,28 +42,14 @@ Signed-off-by: Jonas Gorski <jogo@openwrt.org>
#endif /* _UAPI_LINUX_IF_ETHER_H */
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -90,6 +90,14 @@
@@ -264,4 +264,10 @@
#endif /* _NET_IF_H */
#endif /* __GLIBC__ */
+/* musl defines the ethhdr struct itself in its netinet/if_ether.h.
+ * Glibc just includes the kernel header and uses a different guard. */
+#if defined(_NETINET_IF_ETHER_H)
+#define __UAPI_DEF_ETHHDR 0
+#else
+/* Definitions for if_ether.h */
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR 1
+#endif
+
/* Coordinate with libc netinet/in.h header. */
#if defined(_NETINET_IN_H)
@@ -185,6 +193,9 @@
/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+/* Definitions for if_ether.h */
+#define __UAPI_DEF_ETHHDR 1
+
/* Definitions for in.h */
#define __UAPI_DEF_IN_ADDR 1
#define __UAPI_DEF_IN_IPPROTO 1
#endif /* _UAPI_LIBC_COMPAT_H */

View File

@ -0,0 +1,291 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 10 Dec 2017 01:43:14 +0100
Subject: [PATCH] netfilter: nf_tables: explicit nft_set_pktinfo() call from
hook path
Instead of calling this function from the family specific variant, this
reduces the code size in the fast path for the netdev, bridge and inet
families. After this change, we must call nft_set_pktinfo() upfront from
the chain hook indirection.
Before:
text data bss dec hex filename
2145 208 0 2353 931 net/netfilter/nf_tables_netdev.o
After:
text data bss dec hex filename
2125 208 0 2333 91d net/netfilter/nf_tables_netdev.o
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -54,8 +54,8 @@ static inline void nft_set_pktinfo(struc
pkt->xt.state = state;
}
-static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
- struct sk_buff *skb)
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
pkt->tprot_set = false;
pkt->tprot = 0;
@@ -63,14 +63,6 @@ static inline void nft_set_pktinfo_proto
pkt->xt.fragoff = 0;
}
-static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- nft_set_pktinfo(pkt, skb, state);
- nft_set_pktinfo_proto_unspec(pkt, skb);
-}
-
/**
* struct nft_verdict - nf_tables verdict
*
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -5,15 +5,11 @@
#include <net/netfilter/nf_tables.h>
#include <net/ip.h>
-static inline void
-nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
struct iphdr *ip;
- nft_set_pktinfo(pkt, skb, state);
-
ip = ip_hdr(pkt->skb);
pkt->tprot_set = true;
pkt->tprot = ip->protocol;
@@ -21,10 +17,8 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo
pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
}
-static inline int
-__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
struct iphdr *iph, _iph;
u32 len, thoff;
@@ -52,14 +46,11 @@ __nft_set_pktinfo_ipv4_validate(struct n
return 0;
}
-static inline void
-nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
- nft_set_pktinfo(pkt, skb, state);
- if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0)
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0)
+ nft_set_pktinfo_unspec(pkt, skb);
}
extern struct nft_af_info nft_af_ipv4;
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -5,20 +5,16 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/ipv6.h>
-static inline void
-nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
unsigned int flags = IP6_FH_F_AUTH;
int protohdr, thoff = 0;
unsigned short frag_off;
- nft_set_pktinfo(pkt, skb, state);
-
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
if (protohdr < 0) {
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ nft_set_pktinfo_unspec(pkt, skb);
return;
}
@@ -28,10 +24,8 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo
pkt->xt.fragoff = frag_off;
}
-static inline int
-__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
unsigned int flags = IP6_FH_F_AUTH;
@@ -68,14 +62,11 @@ __nft_set_pktinfo_ipv6_validate(struct n
#endif
}
-static inline void
-nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
{
- nft_set_pktinfo(pkt, skb, state);
- if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0)
- nft_set_pktinfo_proto_unspec(pkt, skb);
+ if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0)
+ nft_set_pktinfo_unspec(pkt, skb);
}
extern struct nft_af_info nft_af_ipv6;
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -25,15 +25,17 @@ nft_do_chain_bridge(void *priv,
{
struct nft_pktinfo pkt;
+ nft_set_pktinfo(&pkt, skb, state);
+
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
break;
case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
break;
default:
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
break;
}
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -21,7 +21,8 @@ nft_do_chain_arp(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -24,7 +24,8 @@ static unsigned int nft_do_chain_ipv4(vo
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(voi
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -38,7 +38,8 @@ static unsigned int nf_route_table_hook(
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
mark = skb->mark;
iph = ip_hdr(skb);
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,7 +22,8 @@ static unsigned int nft_do_chain_ipv6(vo
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(voi
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(
u32 mark, flowlabel;
int err;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
/* save source/dest address, mark, hoplimit, flowlabel, priority */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -21,15 +21,17 @@ nft_do_chain_netdev(void *priv, struct s
{
struct nft_pktinfo pkt;
+ nft_set_pktinfo(&pkt, skb, state);
+
switch (skb->protocol) {
case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv4_validate(&pkt, skb);
break;
case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
+ nft_set_pktinfo_ipv6_validate(&pkt, skb);
break;
default:
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
break;
}

View File

@ -0,0 +1,146 @@
From: Florian Westphal <fw@strlen.de>
Date: Fri, 8 Dec 2017 17:01:54 +0100
Subject: [PATCH] netfilter: core: only allow one nat hook per hook point
The netfilter NAT core cannot deal with more than one NAT hook per hook
location (prerouting, input ...), because the NAT hooks install a NAT null
binding in case the iptables nat table (iptable_nat hooks) or the
corresponding nftables chain (nft nat hooks) doesn't specify a nat
transformation.
Null bindings are needed to detect port collsisions between NAT-ed and
non-NAT-ed connections.
This causes nftables NAT rules to not work when iptable_nat module is
loaded, and vice versa because nat binding has already been attached
when the second nat hook is consulted.
The netfilter core is not really the correct location to handle this
(hooks are just hooks, the core has no notion of what kinds of side
effects a hook implements), but its the only place where we can check
for conflicts between both iptables hooks and nftables hooks without
adding dependencies.
So add nat annotation to hook_ops to describe those hooks that will
add NAT bindings and then make core reject if such a hook already exists.
The annotation fills a padding hole, in case further restrictions appar
we might change this to a 'u8 type' instead of bool.
iptables error if nft nat hook active:
iptables -t nat -A POSTROUTING -j MASQUERADE
iptables v1.4.21: can't initialize iptables table `nat': File exists
Perhaps iptables or your kernel needs to be upgraded.
nftables error if iptables nat table present:
nft -f /etc/nftables/ipv4-nat
/usr/etc/nftables/ipv4-nat:3:1-2: Error: Could not process rule: File exists
table nat {
^^
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -67,6 +67,7 @@ struct nf_hook_ops {
struct net_device *dev;
void *priv;
u_int8_t pf;
+ bool nat_hook;
unsigned int hooknum;
/* Hooks are ordered in ascending priority. */
int priority;
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_i
{
.hook = iptable_nat_ipv4_in,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_i
{
.hook = iptable_nat_ipv4_out,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
@@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_i
{
.hook = iptable_nat_ipv4_local_fn,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_i
{
.hook = iptable_nat_ipv4_fn,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
},
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_i
{
.hook = ip6table_nat_in,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
},
@@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_i
{
.hook = ip6table_nat_out,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
},
@@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_i
{
.hook = ip6table_nat_local_fn,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
},
/* After packet filtering, change source */
{
.hook = ip6table_nat_fn,
+ .nat_hook = true,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -135,6 +135,12 @@ nf_hook_entries_grow(const struct nf_hoo
++i;
continue;
}
+
+ if (reg->nat_hook && orig_ops[i]->nat_hook) {
+ kvfree(new);
+ return ERR_PTR(-EEXIST);
+ }
+
if (inserted || reg->priority > orig_ops[i]->priority) {
new_ops[nhooks] = (void *)orig_ops[i];
new->hooks[nhooks] = old->hooks[i];
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1400,6 +1400,8 @@ static int nf_tables_addchain(struct nft
ops->hook = hookfn;
if (afi->hook_ops_init)
afi->hook_ops_init(ops, i);
+ if (basechain->type->type == NFT_CHAIN_T_NAT)
+ ops->nat_hook = true;
}
chain->flags |= NFT_BASE_CHAIN;

View File

@ -0,0 +1,161 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 9 Dec 2017 15:36:24 +0100
Subject: [PATCH] netfilter: nf_tables_inet: don't use multihook infrastructure
anymore
Use new native NFPROTO_INET support in netfilter core, this gets rid of
ad-hoc code in the nf_tables API codebase.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -53,6 +53,4 @@ static inline void nft_set_pktinfo_ipv4_
nft_set_pktinfo_unspec(pkt, skb);
}
-extern struct nft_af_info nft_af_ipv4;
-
#endif
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -69,6 +69,4 @@ static inline void nft_set_pktinfo_ipv6_
nft_set_pktinfo_unspec(pkt, skb);
}
-extern struct nft_af_info nft_af_ipv6;
-
#endif
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -45,7 +45,7 @@ static unsigned int nft_ipv4_output(void
return nft_do_chain_ipv4(priv, skb, state);
}
-struct nft_af_info nft_af_ipv4 __read_mostly = {
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
.family = NFPROTO_IPV4,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
@@ -58,7 +58,6 @@ struct nft_af_info nft_af_ipv4 __read_mo
[NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
},
};
-EXPORT_SYMBOL_GPL(nft_af_ipv4);
static int nf_tables_ipv4_init_net(struct net *net)
{
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -42,7 +42,7 @@ static unsigned int nft_ipv6_output(void
return nft_do_chain_ipv6(priv, skb, state);
}
-struct nft_af_info nft_af_ipv6 __read_mostly = {
+static struct nft_af_info nft_af_ipv6 __read_mostly = {
.family = NFPROTO_IPV6,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
@@ -55,7 +55,6 @@ struct nft_af_info nft_af_ipv6 __read_mo
[NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
},
};
-EXPORT_SYMBOL_GPL(nft_af_ipv6);
static int nf_tables_ipv6_init_net(struct net *net)
{
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_tables.h>
@@ -16,26 +17,71 @@
#include <net/netfilter/nf_tables_ipv6.h>
#include <net/ip.h>
-static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n)
+static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
- struct nft_af_info *afi;
+ struct nft_pktinfo pkt;
- if (n == 1)
- afi = &nft_af_ipv4;
- else
- afi = &nft_af_ipv6;
-
- ops->pf = afi->family;
- if (afi->hooks[ops->hooknum])
- ops->hook = afi->hooks[ops->hooknum];
+ nft_set_pktinfo(&pkt, skb, state);
+
+ switch (state->pf) {
+ case NFPROTO_IPV4:
+ nft_set_pktinfo_ipv4(&pkt, skb);
+ break;
+ case NFPROTO_IPV6:
+ nft_set_pktinfo_ipv6(&pkt, skb);
+ break;
+ default:
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
+}
+
+static unsigned int nft_inet_output(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+
+ switch (state->pf) {
+ case NFPROTO_IPV4:
+ if (unlikely(skb->len < sizeof(struct iphdr) ||
+ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+ if (net_ratelimit())
+ pr_info("ignoring short SOCK_RAW packet\n");
+ return NF_ACCEPT;
+ }
+ nft_set_pktinfo_ipv4(&pkt, skb);
+ break;
+ case NFPROTO_IPV6:
+ if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+ if (net_ratelimit())
+ pr_info("ignoring short SOCK_RAW packet\n");
+ return NF_ACCEPT;
+ }
+ nft_set_pktinfo_ipv6(&pkt, skb);
+ break;
+ default:
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
}
static struct nft_af_info nft_af_inet __read_mostly = {
.family = NFPROTO_INET,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 2,
- .hook_ops_init = nft_inet_hook_ops_init,
+ .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
+ [NF_INET_LOCAL_OUT] = nft_inet_output,
+ [NF_INET_FORWARD] = nft_do_chain_inet,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
+ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
+ },
};
static int __net_init nf_tables_inet_init_net(struct net *net)

View File

@ -0,0 +1,391 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 9 Dec 2017 15:40:25 +0100
Subject: [PATCH] netfilter: nf_tables: remove multihook chains and families
Since NFPROTO_INET is handled from the core, we don't need to maintain
extra infrastructure in nf_tables to handle the double hook
registration, one for IPv4 and another for IPv6.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -892,8 +892,6 @@ struct nft_stats {
struct u64_stats_sync syncp;
};
-#define NFT_HOOK_OPS_MAX 2
-
/**
* struct nft_base_chain - nf_tables base chain
*
@@ -905,7 +903,7 @@ struct nft_stats {
* @dev_name: device name that this base chain is attached to (if any)
*/
struct nft_base_chain {
- struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
+ struct nf_hook_ops ops;
const struct nf_chain_type *type;
u8 policy;
u8 flags;
@@ -966,8 +964,6 @@ enum nft_af_flags {
* @owner: module owner
* @tables: used internally
* @flags: family flags
- * @nops: number of hook ops in this family
- * @hook_ops_init: initialization function for chain hook ops
* @hooks: hookfn overrides for packet validation
*/
struct nft_af_info {
@@ -977,9 +973,6 @@ struct nft_af_info {
struct module *owner;
struct list_head tables;
u32 flags;
- unsigned int nops;
- void (*hook_ops_init)(struct nf_hook_ops *,
- unsigned int);
nf_hookfn *hooks[NF_MAX_HOOKS];
};
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -46,7 +46,6 @@ static struct nft_af_info nft_af_bridge
.family = NFPROTO_BRIDGE,
.nhooks = NF_BR_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
.hooks = {
[NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
[NF_BR_LOCAL_IN] = nft_do_chain_bridge,
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -31,7 +31,6 @@ static struct nft_af_info nft_af_arp __r
.family = NFPROTO_ARP,
.nhooks = NF_ARP_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
.hooks = {
[NF_ARP_IN] = nft_do_chain_arp,
[NF_ARP_OUT] = nft_do_chain_arp,
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -49,7 +49,6 @@ static struct nft_af_info nft_af_ipv4 __
.family = NFPROTO_IPV4,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
.hooks = {
[NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
[NF_INET_LOCAL_OUT] = nft_ipv4_output,
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -46,7 +46,6 @@ static struct nft_af_info nft_af_ipv6 __
.family = NFPROTO_IPV6,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
.hooks = {
[NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
[NF_INET_LOCAL_OUT] = nft_ipv6_output,
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -139,29 +139,26 @@ static void nft_trans_destroy(struct nft
kfree(trans);
}
-static int nf_tables_register_hooks(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain,
- unsigned int hook_nops)
+static int nf_tables_register_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
{
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return 0;
- return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
- hook_nops);
+ return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
}
-static void nf_tables_unregister_hooks(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain,
- unsigned int hook_nops)
+static void nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
{
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return;
- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
+ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -595,8 +592,7 @@ static void _nf_tables_table_disable(str
if (cnt && i++ == cnt)
break;
- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
- afi->nops);
+ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
}
}
@@ -613,8 +609,7 @@ static int nf_tables_table_enable(struct
if (!nft_is_base_chain(chain))
continue;
- err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
- afi->nops);
+ err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
if (err < 0)
goto err;
@@ -1026,7 +1021,7 @@ static int nf_tables_fill_chain_info(str
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
struct nlattr *nest;
nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
@@ -1252,8 +1247,8 @@ static void nf_tables_chain_destroy(stru
free_percpu(basechain->stats);
if (basechain->stats)
static_branch_dec(&nft_counters_enabled);
- if (basechain->ops[0].dev != NULL)
- dev_put(basechain->ops[0].dev);
+ if (basechain->ops.dev != NULL)
+ dev_put(basechain->ops.dev);
kfree(chain->name);
kfree(basechain);
} else {
@@ -1349,7 +1344,6 @@ static int nf_tables_addchain(struct nft
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
struct nft_chain *chain;
- unsigned int i;
int err;
if (table->use == UINT_MAX)
@@ -1388,21 +1382,18 @@ static int nf_tables_addchain(struct nft
basechain->type = hook.type;
chain = &basechain->chain;
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- ops->pf = family;
- ops->hooknum = hook.num;
- ops->priority = hook.priority;
- ops->priv = chain;
- ops->hook = afi->hooks[ops->hooknum];
- ops->dev = hook.dev;
- if (hookfn)
- ops->hook = hookfn;
- if (afi->hook_ops_init)
- afi->hook_ops_init(ops, i);
- if (basechain->type->type == NFT_CHAIN_T_NAT)
- ops->nat_hook = true;
- }
+ ops = &basechain->ops;
+ ops->pf = family;
+ ops->hooknum = hook.num;
+ ops->priority = hook.priority;
+ ops->priv = chain;
+ ops->hook = afi->hooks[ops->hooknum];
+ ops->dev = hook.dev;
+ if (hookfn)
+ ops->hook = hookfn;
+
+ if (basechain->type->type == NFT_CHAIN_T_NAT)
+ ops->nat_hook = true;
chain->flags |= NFT_BASE_CHAIN;
basechain->policy = policy;
@@ -1420,7 +1411,7 @@ static int nf_tables_addchain(struct nft
goto err1;
}
- err = nf_tables_register_hooks(net, table, chain, afi->nops);
+ err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err1;
@@ -1434,7 +1425,7 @@ static int nf_tables_addchain(struct nft
return 0;
err2:
- nf_tables_unregister_hooks(net, table, chain, afi->nops);
+ nf_tables_unregister_hook(net, table, chain);
err1:
nf_tables_chain_destroy(chain);
@@ -1447,14 +1438,13 @@ static int nf_tables_updchain(struct nft
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
struct nft_chain *chain = ctx->chain;
- struct nft_af_info *afi = ctx->afi;
struct nft_base_chain *basechain;
struct nft_stats *stats = NULL;
struct nft_chain_hook hook;
const struct nlattr *name;
struct nf_hook_ops *ops;
struct nft_trans *trans;
- int err, i;
+ int err;
if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain))
@@ -1471,14 +1461,12 @@ static int nf_tables_updchain(struct nft
return -EBUSY;
}
- for (i = 0; i < afi->nops; i++) {
- ops = &basechain->ops[i];
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
- }
+ ops = &basechain->ops;
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority ||
+ ops->dev != hook.dev) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
}
nft_chain_release_hook(&hook);
}
@@ -5062,10 +5050,9 @@ static int nf_tables_commit(struct net *
case NFT_MSG_DELCHAIN:
list_del_rcu(&trans->ctx.chain->list);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
- nf_tables_unregister_hooks(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain,
- trans->ctx.afi->nops);
+ nf_tables_unregister_hook(trans->ctx.net,
+ trans->ctx.table,
+ trans->ctx.chain);
break;
case NFT_MSG_NEWRULE:
nft_clear(trans->ctx.net, nft_trans_rule(trans));
@@ -5202,10 +5189,9 @@ static int nf_tables_abort(struct net *n
} else {
trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list);
- nf_tables_unregister_hooks(trans->ctx.net,
- trans->ctx.table,
- trans->ctx.chain,
- trans->ctx.afi->nops);
+ nf_tables_unregister_hook(trans->ctx.net,
+ trans->ctx.table,
+ trans->ctx.chain);
}
break;
case NFT_MSG_DELCHAIN:
@@ -5306,7 +5292,7 @@ int nft_chain_validate_hooks(const struc
if (nft_is_base_chain(chain)) {
basechain = nft_base_chain(chain);
- if ((1 << basechain->ops[0].hooknum) & hook_flags)
+ if ((1 << basechain->ops.hooknum) & hook_flags)
return 0;
return -EOPNOTSUPP;
@@ -5788,8 +5774,7 @@ int __nft_release_basechain(struct nft_c
BUG_ON(!nft_is_base_chain(ctx->chain));
- nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
- ctx->afi->nops);
+ nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
ctx->chain->use--;
@@ -5818,8 +5803,7 @@ static void __nft_release_afinfo(struct
list_for_each_entry_safe(table, nt, &afi->tables, list) {
list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hooks(net, table, chain,
- afi->nops);
+ nf_tables_unregister_hook(net, table, chain);
/* No packets are walking on these chains anymore. */
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -74,7 +74,6 @@ static struct nft_af_info nft_af_inet __
.family = NFPROTO_INET,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
.hooks = {
[NF_INET_LOCAL_IN] = nft_do_chain_inet,
[NF_INET_LOCAL_OUT] = nft_inet_output,
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -43,7 +43,6 @@ static struct nft_af_info nft_af_netdev
.nhooks = NF_NETDEV_NUMHOOKS,
.owner = THIS_MODULE,
.flags = NFT_AF_NEEDS_DEV,
- .nops = 1,
.hooks = {
[NF_NETDEV_INGRESS] = nft_do_chain_netdev,
},
@@ -98,7 +97,7 @@ static void nft_netdev_event(unsigned lo
__nft_release_basechain(ctx);
break;
case NETDEV_CHANGENAME:
- if (dev->ifindex != basechain->ops[0].dev->ifindex)
+ if (dev->ifindex != basechain->ops.dev->ifindex)
return;
strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -169,7 +169,7 @@ nft_target_set_tgchk_param(struct xt_tgc
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
par->hook_mask = 1 << ops->hooknum;
} else {
@@ -302,7 +302,7 @@ static int nft_target_validate(const str
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
hook_mask = 1 << ops->hooknum;
if (target->hooks && !(hook_mask & target->hooks))
@@ -383,7 +383,7 @@ nft_match_set_mtchk_param(struct xt_mtch
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
par->hook_mask = 1 << ops->hooknum;
} else {
@@ -481,7 +481,7 @@ static int nft_match_validate(const stru
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
- const struct nf_hook_ops *ops = &basechain->ops[0];
+ const struct nf_hook_ops *ops = &basechain->ops;
hook_mask = 1 << ops->hooknum;
if (match->hooks && !(hook_mask & match->hooks))

View File

@ -0,0 +1,171 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 27 Nov 2017 21:55:14 +0100
Subject: [PATCH] netfilter: move checksum indirection to struct nf_ipv6_ops
We cannot make a direct call to nf_ip6_checksum() because that would
result in autoloading the 'ipv6' module because of symbol dependencies.
Therefore, define checksum indirection in nf_ipv6_ops where this really
belongs to.
For IPv4, we can indeed make a direct function call, which is faster,
given IPv4 is built-in in the networking code by default. Still,
CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
stub for IPv4 in such case.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
create mode 100644 net/netfilter/utils.c
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -274,8 +274,6 @@ struct nf_queue_entry;
struct nf_afinfo {
unsigned short family;
- __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, u_int8_t protocol);
__sum16 (*checksum_partial)(struct sk_buff *skb,
unsigned int hook,
unsigned int dataoff,
@@ -296,20 +294,9 @@ static inline const struct nf_afinfo *nf
return rcu_dereference(nf_afinfo[family]);
}
-static inline __sum16
-nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff,
- u_int8_t protocol, unsigned short family)
-{
- const struct nf_afinfo *afinfo;
- __sum16 csum = 0;
-
- rcu_read_lock();
- afinfo = nf_get_afinfo(family);
- if (afinfo)
- csum = afinfo->checksum(skb, hook, dataoff, protocol);
- rcu_read_unlock();
- return csum;
-}
+__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol,
+ unsigned short family);
static inline __sum16
nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -7,6 +7,16 @@
#include <uapi/linux/netfilter_ipv4.h>
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
+
+#ifdef CONFIG_INET
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
+#else
+static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol)
+{
+ return 0;
+}
+#endif /* CONFIG_INET */
+
#endif /*__LINUX_IP_NETFILTER_H*/
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -19,6 +19,8 @@ struct nf_ipv6_ops {
void (*route_input)(struct sk_buff *skb);
int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
+ __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol);
};
#ifdef CONFIG_NETFILTER
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -106,12 +106,6 @@ static int nf_br_reroute(struct net *net
return 0;
}
-static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, u_int8_t protocol)
-{
- return 0;
-}
-
static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol)
@@ -127,7 +121,6 @@ static int nf_br_route(struct net *net,
static const struct nf_afinfo nf_br_afinfo = {
.family = AF_BRIDGE,
- .checksum = nf_br_checksum,
.checksum_partial = nf_br_checksum_partial,
.route = nf_br_route,
.saveroute = nf_br_saveroute,
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -188,7 +188,6 @@ static int nf_ip_route(struct net *net,
static const struct nf_afinfo nf_ip_afinfo = {
.family = AF_INET,
- .checksum = nf_ip_checksum,
.checksum_partial = nf_ip_checksum_partial,
.route = nf_ip_route,
.saveroute = nf_ip_saveroute,
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -193,12 +193,12 @@ static __sum16 nf_ip6_checksum_partial(s
static const struct nf_ipv6_ops ipv6ops = {
.chk_addr = ipv6_chk_addr,
.route_input = ip6_route_input,
- .fragment = ip6_fragment
+ .fragment = ip6_fragment,
+ .checksum = nf_ip6_checksum,
};
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
- .checksum = nf_ip6_checksum,
.checksum_partial = nf_ip6_checksum_partial,
.route = nf_ip6_route,
.saveroute = nf_ip6_saveroute,
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
+netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
--- /dev/null
+++ b/net/netfilter/utils.c
@@ -0,0 +1,26 @@
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+
+__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol,
+ unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ __sum16 csum = 0;
+
+ switch (family) {
+ case AF_INET:
+ csum = nf_ip_checksum(skb, hook, dataoff, protocol);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ csum = v6ops->checksum(skb, hook, dataoff, protocol);
+ break;
+ }
+
+ return csum;
+}
+EXPORT_SYMBOL_GPL(nf_checksum);

View File

@ -0,0 +1,204 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 Dec 2017 16:04:18 +0100
Subject: [PATCH] netfilter: move checksum_partial indirection to struct
nf_ipv6_ops
We cannot make a direct call to nf_ip6_checksum_partial() because that
would result in autoloading the 'ipv6' module because of symbol
dependencies. Therefore, define checksum_partial indirection in
nf_ipv6_ops where this really belongs to.
For IPv4, we can indeed make a direct function call, which is faster,
given IPv4 is built-in in the networking code by default. Still,
CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
stub for IPv4 in such case.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -274,11 +274,6 @@ struct nf_queue_entry;
struct nf_afinfo {
unsigned short family;
- __sum16 (*checksum_partial)(struct sk_buff *skb,
- unsigned int hook,
- unsigned int dataoff,
- unsigned int len,
- u_int8_t protocol);
int (*route)(struct net *net, struct dst_entry **dst,
struct flowi *fl, bool strict);
void (*saveroute)(const struct sk_buff *skb,
@@ -298,22 +293,9 @@ __sum16 nf_checksum(struct sk_buff *skb,
unsigned int dataoff, u_int8_t protocol,
unsigned short family);
-static inline __sum16
-nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, unsigned int len,
- u_int8_t protocol, unsigned short family)
-{
- const struct nf_afinfo *afinfo;
- __sum16 csum = 0;
-
- rcu_read_lock();
- afinfo = nf_get_afinfo(family);
- if (afinfo)
- csum = afinfo->checksum_partial(skb, hook, dataoff, len,
- protocol);
- rcu_read_unlock();
- return csum;
-}
+__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol, unsigned short family);
int nf_register_afinfo(const struct nf_afinfo *afinfo);
void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -11,12 +11,23 @@ int ip_route_me_harder(struct net *net,
#ifdef CONFIG_INET
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
+__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol);
#else
static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
{
return 0;
}
+static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb,
+ unsigned int hook,
+ unsigned int dataoff,
+ unsigned int len,
+ u_int8_t protocol)
+{
+ return 0;
+}
#endif /* CONFIG_INET */
#endif /*__LINUX_IP_NETFILTER_H*/
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -21,6 +21,9 @@ struct nf_ipv6_ops {
int (*output)(struct net *, struct sock *, struct sk_buff *));
__sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
+ __sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol);
};
#ifdef CONFIG_NETFILTER
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -106,13 +106,6 @@ static int nf_br_reroute(struct net *net
return 0;
}
-static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, unsigned int len,
- u_int8_t protocol)
-{
- return 0;
-}
-
static int nf_br_route(struct net *net, struct dst_entry **dst,
struct flowi *fl, bool strict __always_unused)
{
@@ -121,7 +114,6 @@ static int nf_br_route(struct net *net,
static const struct nf_afinfo nf_br_afinfo = {
.family = AF_BRIDGE,
- .checksum_partial = nf_br_checksum_partial,
.route = nf_br_route,
.saveroute = nf_br_saveroute,
.reroute = nf_br_reroute,
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -155,9 +155,9 @@ __sum16 nf_ip_checksum(struct sk_buff *s
}
EXPORT_SYMBOL(nf_ip_checksum);
-static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, unsigned int len,
- u_int8_t protocol)
+__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
@@ -175,6 +175,7 @@ static __sum16 nf_ip_checksum_partial(st
}
return csum;
}
+EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
static int nf_ip_route(struct net *net, struct dst_entry **dst,
struct flowi *fl, bool strict __always_unused)
@@ -188,7 +189,6 @@ static int nf_ip_route(struct net *net,
static const struct nf_afinfo nf_ip_afinfo = {
.family = AF_INET,
- .checksum_partial = nf_ip_checksum_partial,
.route = nf_ip_route,
.saveroute = nf_ip_saveroute,
.reroute = nf_ip_reroute,
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -191,15 +191,15 @@ static __sum16 nf_ip6_checksum_partial(s
};
static const struct nf_ipv6_ops ipv6ops = {
- .chk_addr = ipv6_chk_addr,
- .route_input = ip6_route_input,
- .fragment = ip6_fragment,
- .checksum = nf_ip6_checksum,
+ .chk_addr = ipv6_chk_addr,
+ .route_input = ip6_route_input,
+ .fragment = ip6_fragment,
+ .checksum = nf_ip6_checksum,
+ .checksum_partial = nf_ip6_checksum_partial,
};
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
- .checksum_partial = nf_ip6_checksum_partial,
.route = nf_ip6_route,
.saveroute = nf_ip6_saveroute,
.reroute = nf_ip6_reroute,
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -24,3 +24,27 @@ __sum16 nf_checksum(struct sk_buff *skb,
return csum;
}
EXPORT_SYMBOL_GPL(nf_checksum);
+
+__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol, unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ __sum16 csum = 0;
+
+ switch (family) {
+ case AF_INET:
+ csum = nf_ip_checksum_partial(skb, hook, dataoff, len,
+ protocol);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ csum = v6ops->checksum_partial(skb, hook, dataoff, len,
+ protocol);
+ break;
+ }
+
+ return csum;
+}
+EXPORT_SYMBOL_GPL(nf_checksum_partial);

View File

@ -0,0 +1,232 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 Dec 2017 16:12:55 +0100
Subject: [PATCH] netfilter: remove saveroute indirection in struct nf_afinfo
This is only used by nf_queue.c and this function comes with no symbol
dependencies with IPv6, it just refers to structure layouts. Therefore,
we can replace it by a direct function call from where it belongs.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -276,8 +276,6 @@ struct nf_afinfo {
unsigned short family;
int (*route)(struct net *net, struct dst_entry **dst,
struct flowi *fl, bool strict);
- void (*saveroute)(const struct sk_buff *skb,
- struct nf_queue_entry *entry);
int (*reroute)(struct net *net, struct sk_buff *skb,
const struct nf_queue_entry *entry);
int route_key_size;
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -6,6 +6,16 @@
#include <uapi/linux/netfilter_ipv4.h>
+/* Extra routing may needed on local out, as the QUEUE target never returns
+ * control to the table.
+ */
+struct ip_rt_info {
+ __be32 daddr;
+ __be32 saddr;
+ u_int8_t tos;
+ u_int32_t mark;
+};
+
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
#ifdef CONFIG_INET
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -9,6 +9,15 @@
#include <uapi/linux/netfilter_ipv6.h>
+/* Extra routing may needed on local out, as the QUEUE target never returns
+ * control to the table.
+ */
+struct ip6_rt_info {
+ struct in6_addr daddr;
+ struct in6_addr saddr;
+ u_int32_t mark;
+};
+
/*
* Hook functions for ipv6 to allow xt_* modules to be built-in even
* if IPv6 is a module.
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -95,11 +95,6 @@ static const struct nf_chain_type filter
(1 << NF_BR_POST_ROUTING),
};
-static void nf_br_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
-}
-
static int nf_br_reroute(struct net *net, struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
@@ -115,7 +110,6 @@ static int nf_br_route(struct net *net,
static const struct nf_afinfo nf_br_afinfo = {
.family = AF_BRIDGE,
.route = nf_br_route,
- .saveroute = nf_br_saveroute,
.reroute = nf_br_reroute,
.route_key_size = 0,
};
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -80,33 +80,6 @@ int ip_route_me_harder(struct net *net,
}
EXPORT_SYMBOL(ip_route_me_harder);
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip_rt_info {
- __be32 daddr;
- __be32 saddr;
- u_int8_t tos;
- u_int32_t mark;
-};
-
-static void nf_ip_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
- struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct iphdr *iph = ip_hdr(skb);
-
- rt_info->tos = iph->tos;
- rt_info->daddr = iph->daddr;
- rt_info->saddr = iph->saddr;
- rt_info->mark = skb->mark;
- }
-}
-
static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
@@ -190,7 +163,6 @@ static int nf_ip_route(struct net *net,
static const struct nf_afinfo nf_ip_afinfo = {
.family = AF_INET,
.route = nf_ip_route,
- .saveroute = nf_ip_saveroute,
.reroute = nf_ip_reroute,
.route_key_size = sizeof(struct ip_rt_info),
};
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -69,31 +69,6 @@ int ip6_route_me_harder(struct net *net,
}
EXPORT_SYMBOL(ip6_route_me_harder);
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip6_rt_info {
- struct in6_addr daddr;
- struct in6_addr saddr;
- u_int32_t mark;
-};
-
-static void nf_ip6_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
- struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct ipv6hdr *iph = ipv6_hdr(skb);
-
- rt_info->daddr = iph->daddr;
- rt_info->saddr = iph->saddr;
- rt_info->mark = skb->mark;
- }
-}
-
static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
@@ -201,7 +176,6 @@ static const struct nf_ipv6_ops ipv6ops
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
.route = nf_ip6_route,
- .saveroute = nf_ip6_saveroute,
.reroute = nf_ip6_reroute,
.route_key_size = sizeof(struct ip6_rt_info),
};
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,6 +10,8 @@
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
@@ -111,6 +113,35 @@ unsigned int nf_queue_nf_hook_drop(struc
}
EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
+static void nf_ip_saveroute(const struct sk_buff *skb,
+ struct nf_queue_entry *entry)
+{
+ struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ rt_info->tos = iph->tos;
+ rt_info->daddr = iph->daddr;
+ rt_info->saddr = iph->saddr;
+ rt_info->mark = skb->mark;
+ }
+}
+
+static void nf_ip6_saveroute(const struct sk_buff *skb,
+ struct nf_queue_entry *entry)
+{
+ struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ rt_info->daddr = iph->daddr;
+ rt_info->saddr = iph->saddr;
+ rt_info->mark = skb->mark;
+ }
+}
+
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
const struct nf_hook_entries *entries,
unsigned int index, unsigned int queuenum)
@@ -147,7 +178,16 @@ static int __nf_queue(struct sk_buff *sk
nf_queue_entry_get_refs(entry);
skb_dst_force(skb);
- afinfo->saveroute(skb, entry);
+
+ switch (entry->state.pf) {
+ case AF_INET:
+ nf_ip_saveroute(skb, entry);
+ break;
+ case AF_INET6:
+ nf_ip6_saveroute(skb, entry);
+ break;
+ }
+
status = qh->outfn(entry, queuenum);
if (status < 0) {

View File

@ -0,0 +1,349 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 27 Nov 2017 22:29:52 +0100
Subject: [PATCH] netfilter: move route indirection to struct nf_ipv6_ops
We cannot make a direct call to nf_ip6_route() because that would result
in autoloading the 'ipv6' module because of symbol dependencies.
Therefore, define route indirection in nf_ipv6_ops where this really
belongs to.
For IPv4, we can indeed make a direct function call, which is faster,
given IPv4 is built-in in the networking code by default. Still,
CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
stub for IPv4 in such case.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -274,8 +274,6 @@ struct nf_queue_entry;
struct nf_afinfo {
unsigned short family;
- int (*route)(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict);
int (*reroute)(struct net *net, struct sk_buff *skb,
const struct nf_queue_entry *entry);
int route_key_size;
@@ -294,6 +292,8 @@ __sum16 nf_checksum(struct sk_buff *skb,
__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol, unsigned short family);
+int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict, unsigned short family);
int nf_register_afinfo(const struct nf_afinfo *afinfo);
void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -24,6 +24,8 @@ __sum16 nf_ip_checksum(struct sk_buff *s
__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol);
+int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict);
#else
static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
@@ -38,6 +40,11 @@ static inline __sum16 nf_ip_checksum_par
{
return 0;
}
+static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INET */
#endif /*__LINUX_IP_NETFILTER_H*/
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -33,6 +33,8 @@ struct nf_ipv6_ops {
__sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, unsigned int len,
u_int8_t protocol);
+ int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict);
};
#ifdef CONFIG_NETFILTER
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -101,15 +101,8 @@ static int nf_br_reroute(struct net *net
return 0;
}
-static int nf_br_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict __always_unused)
-{
- return 0;
-}
-
static const struct nf_afinfo nf_br_afinfo = {
.family = AF_BRIDGE,
- .route = nf_br_route,
.reroute = nf_br_reroute,
.route_key_size = 0,
};
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -150,8 +150,8 @@ __sum16 nf_ip_checksum_partial(struct sk
}
EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
-static int nf_ip_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict __always_unused)
+int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict __always_unused)
{
struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
if (IS_ERR(rt))
@@ -159,10 +159,10 @@ static int nf_ip_route(struct net *net,
*dst = &rt->dst;
return 0;
}
+EXPORT_SYMBOL_GPL(nf_ip_route);
static const struct nf_afinfo nf_ip_afinfo = {
.family = AF_INET,
- .route = nf_ip_route,
.reroute = nf_ip_reroute,
.route_key_size = sizeof(struct ip_rt_info),
};
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -171,11 +171,11 @@ static const struct nf_ipv6_ops ipv6ops
.fragment = ip6_fragment,
.checksum = nf_ip6_checksum,
.checksum_partial = nf_ip6_checksum_partial,
+ .route = nf_ip6_route,
};
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
- .route = nf_ip6_route,
.reroute = nf_ip6_reroute,
.route_key_size = sizeof(struct ip6_rt_info),
};
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const st
{
const struct net_device *dev = NULL;
const struct nf_ipv6_ops *v6ops;
- const struct nf_afinfo *afinfo;
int route_err, addrtype;
struct rt6_info *rt;
struct flowi6 fl6 = {
@@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const st
};
u32 ret = 0;
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (!afinfo)
+ v6ops = nf_get_ipv6_ops();
+ if (!v6ops)
return RTN_UNREACHABLE;
if (priv->flags & NFTA_FIB_F_IIF)
@@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const st
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
- v6ops = nf_get_ipv6_ops();
- if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+ if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
ret = RTN_LOCAL;
- route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
- flowi6_to_flowi(&fl6), false);
+ route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+ flowi6_to_flowi(&fl6), false);
if (route_err)
goto err;
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -24,6 +24,7 @@
#include <linux/skbuff.h>
#include <net/route.h>
#include <net/ip6_route.h>
+#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -732,14 +733,8 @@ static int callforward_do_filter(struct
const union nf_inet_addr *dst,
u_int8_t family)
{
- const struct nf_afinfo *afinfo;
int ret = 0;
- /* rcu_read_lock()ed by nf_hook_thresh */
- afinfo = nf_get_afinfo(family);
- if (!afinfo)
- return 0;
-
switch (family) {
case AF_INET: {
struct flowi4 fl1, fl2;
@@ -750,10 +745,10 @@ static int callforward_do_filter(struct
memset(&fl2, 0, sizeof(fl2));
fl2.daddr = dst->ip;
- if (!afinfo->route(net, (struct dst_entry **)&rt1,
- flowi4_to_flowi(&fl1), false)) {
- if (!afinfo->route(net, (struct dst_entry **)&rt2,
- flowi4_to_flowi(&fl2), false)) {
+ if (!nf_ip_route(net, (struct dst_entry **)&rt1,
+ flowi4_to_flowi(&fl1), false)) {
+ if (!nf_ip_route(net, (struct dst_entry **)&rt2,
+ flowi4_to_flowi(&fl2), false)) {
if (rt_nexthop(rt1, fl1.daddr) ==
rt_nexthop(rt2, fl2.daddr) &&
rt1->dst.dev == rt2->dst.dev)
@@ -766,18 +761,23 @@ static int callforward_do_filter(struct
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
case AF_INET6: {
- struct flowi6 fl1, fl2;
+ const struct nf_ipv6_ops *v6ops;
struct rt6_info *rt1, *rt2;
+ struct flowi6 fl1, fl2;
+
+ v6ops = nf_get_ipv6_ops();
+ if (!v6ops)
+ return 0;
memset(&fl1, 0, sizeof(fl1));
fl1.daddr = src->in6;
memset(&fl2, 0, sizeof(fl2));
fl2.daddr = dst->in6;
- if (!afinfo->route(net, (struct dst_entry **)&rt1,
- flowi6_to_flowi(&fl1), false)) {
- if (!afinfo->route(net, (struct dst_entry **)&rt2,
- flowi6_to_flowi(&fl2), false)) {
+ if (!v6ops->route(net, (struct dst_entry **)&rt1,
+ flowi6_to_flowi(&fl1), false)) {
+ if (!v6ops->route(net, (struct dst_entry **)&rt2,
+ flowi6_to_flowi(&fl2), false)) {
if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
rt6_nexthop(rt2, &fl2.daddr)) &&
rt1->dst.dev == rt2->dst.dev)
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -27,7 +27,7 @@ static u16 get_tcpmss(const struct nft_p
{
u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
const struct sk_buff *skb = pkt->skb;
- const struct nf_afinfo *ai;
+ struct dst_entry *dst = NULL;
struct flowi fl;
memset(&fl, 0, sizeof(fl));
@@ -43,15 +43,10 @@ static u16 get_tcpmss(const struct nft_p
break;
}
- ai = nf_get_afinfo(nft_pf(pkt));
- if (ai) {
- struct dst_entry *dst = NULL;
-
- ai->route(nft_net(pkt), &dst, &fl, false);
- if (dst) {
- mtu = min(mtu, dst_mtu(dst));
- dst_release(dst);
- }
+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
+ if (dst) {
+ mtu = min(mtu, dst_mtu(dst));
+ dst_release(dst);
}
if (mtu <= minlen || mtu > 0xffff)
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -48,3 +48,24 @@ __sum16 nf_checksum_partial(struct sk_bu
return csum;
}
EXPORT_SYMBOL_GPL(nf_checksum_partial);
+
+int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict, unsigned short family)
+{
+ const struct nf_ipv6_ops *v6ops;
+ int ret = 0;
+
+ switch (family) {
+ case AF_INET:
+ ret = nf_ip_route(net, dst, fl, strict);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ ret = v6ops->route(net, dst, fl, strict);
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_route);
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -48,7 +48,6 @@ static u_int32_t tcpmss_reverse_mtu(stru
unsigned int family)
{
struct flowi fl;
- const struct nf_afinfo *ai;
struct rtable *rt = NULL;
u_int32_t mtu = ~0U;
@@ -62,10 +61,8 @@ static u_int32_t tcpmss_reverse_mtu(stru
memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
- ai = nf_get_afinfo(family);
- if (ai != NULL)
- ai->route(net, (struct dst_entry **)&rt, &fl, false);
+ nf_route(net, (struct dst_entry **)&rt, &fl, false, family);
if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
dst_release(&rt->dst);
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -36,7 +36,7 @@ MODULE_ALIAS("ip6t_addrtype");
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
const struct in6_addr *addr, u16 mask)
{
- const struct nf_afinfo *afinfo;
+ const struct nf_ipv6_ops *v6ops;
struct flowi6 flow;
struct rt6_info *rt;
u32 ret = 0;
@@ -47,17 +47,14 @@ static u32 match_lookup_rt6(struct net *
if (dev)
flow.flowi6_oif = dev->ifindex;
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (afinfo != NULL) {
- const struct nf_ipv6_ops *v6ops;
-
+ v6ops = nf_get_ipv6_ops();
+ if (v6ops) {
if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
- v6ops = nf_get_ipv6_ops();
- if (v6ops && v6ops->chk_addr(net, addr, dev, true))
+ if (v6ops->chk_addr(net, addr, dev, true))
ret = XT_ADDRTYPE_LOCAL;
}
- route_err = afinfo->route(net, (struct dst_entry **)&rt,
- flowi6_to_flowi(&flow), false);
+ route_err = v6ops->route(net, (struct dst_entry **)&rt,
+ flowi6_to_flowi(&flow), false);
} else {
route_err = 1;
}

View File

@ -0,0 +1,223 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 27 Nov 2017 22:50:26 +0100
Subject: [PATCH] netfilter: move reroute indirection to struct nf_ipv6_ops
We cannot make a direct call to nf_ip6_reroute() because that would result
in autoloading the 'ipv6' module because of symbol dependencies.
Therefore, define reroute indirection in nf_ipv6_ops where this really
belongs to.
For IPv4, we can indeed make a direct function call, which is faster,
given IPv4 is built-in in the networking code by default. Still,
CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
stub for IPv4 in such case.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -274,8 +274,6 @@ struct nf_queue_entry;
struct nf_afinfo {
unsigned short family;
- int (*reroute)(struct net *net, struct sk_buff *skb,
- const struct nf_queue_entry *entry);
int route_key_size;
};
@@ -294,6 +292,7 @@ __sum16 nf_checksum_partial(struct sk_bu
u_int8_t protocol, unsigned short family);
int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict, unsigned short family);
+int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
int nf_register_afinfo(const struct nf_afinfo *afinfo);
void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -18,6 +18,8 @@ struct ip_rt_info {
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
+struct nf_queue_entry;
+
#ifdef CONFIG_INET
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
@@ -26,6 +28,7 @@ __sum16 nf_ip_checksum_partial(struct sk
u_int8_t protocol);
int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict);
+int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
#else
static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
@@ -45,6 +48,11 @@ static inline int nf_ip_route(struct net
{
return -EOPNOTSUPP;
}
+static inline int nf_ip_reroute(struct sk_buff *skb,
+ const struct nf_queue_entry *entry)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INET */
#endif /*__LINUX_IP_NETFILTER_H*/
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -18,6 +18,8 @@ struct ip6_rt_info {
u_int32_t mark;
};
+struct nf_queue_entry;
+
/*
* Hook functions for ipv6 to allow xt_* modules to be built-in even
* if IPv6 is a module.
@@ -35,6 +37,7 @@ struct nf_ipv6_ops {
u_int8_t protocol);
int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict);
+ int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
};
#ifdef CONFIG_NETFILTER
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -95,15 +95,8 @@ static const struct nf_chain_type filter
(1 << NF_BR_POST_ROUTING),
};
-static int nf_br_reroute(struct net *net, struct sk_buff *skb,
- const struct nf_queue_entry *entry)
-{
- return 0;
-}
-
static const struct nf_afinfo nf_br_afinfo = {
.family = AF_BRIDGE,
- .reroute = nf_br_reroute,
.route_key_size = 0,
};
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -80,8 +80,7 @@ int ip_route_me_harder(struct net *net,
}
EXPORT_SYMBOL(ip_route_me_harder);
-static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
- const struct nf_queue_entry *entry)
+int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
{
const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -92,10 +91,12 @@ static int nf_ip_reroute(struct net *net
skb->mark == rt_info->mark &&
iph->daddr == rt_info->daddr &&
iph->saddr == rt_info->saddr))
- return ip_route_me_harder(net, skb, RTN_UNSPEC);
+ return ip_route_me_harder(entry->state.net, skb,
+ RTN_UNSPEC);
}
return 0;
}
+EXPORT_SYMBOL_GPL(nf_ip_reroute);
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
@@ -163,7 +164,6 @@ EXPORT_SYMBOL_GPL(nf_ip_route);
static const struct nf_afinfo nf_ip_afinfo = {
.family = AF_INET,
- .reroute = nf_ip_reroute,
.route_key_size = sizeof(struct ip_rt_info),
};
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -69,7 +69,7 @@ int ip6_route_me_harder(struct net *net,
}
EXPORT_SYMBOL(ip6_route_me_harder);
-static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
+static int nf_ip6_reroute(struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -79,7 +79,7 @@ static int nf_ip6_reroute(struct net *ne
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
- return ip6_route_me_harder(net, skb);
+ return ip6_route_me_harder(entry->state.net, skb);
}
return 0;
}
@@ -172,11 +172,11 @@ static const struct nf_ipv6_ops ipv6ops
.checksum = nf_ip6_checksum,
.checksum_partial = nf_ip6_checksum_partial,
.route = nf_ip6_route,
+ .reroute = nf_ip6_reroute,
};
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
- .reroute = nf_ip6_reroute,
.route_key_size = sizeof(struct ip6_rt_info),
};
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -250,7 +250,6 @@ void nf_reinject(struct nf_queue_entry *
const struct nf_hook_entry *hook_entry;
const struct nf_hook_entries *hooks;
struct sk_buff *skb = entry->skb;
- const struct nf_afinfo *afinfo;
const struct net *net;
unsigned int i;
int err;
@@ -277,8 +276,7 @@ void nf_reinject(struct nf_queue_entry *
verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
if (verdict == NF_ACCEPT) {
- afinfo = nf_get_afinfo(entry->state.pf);
- if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
+ if (nf_reroute(skb, entry) < 0)
verdict = NF_DROP;
}
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -2,6 +2,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_queue.h>
__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol,
@@ -69,3 +70,21 @@ int nf_route(struct net *net, struct dst
return ret;
}
EXPORT_SYMBOL_GPL(nf_route);
+
+int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
+{
+ const struct nf_ipv6_ops *v6ops;
+ int ret = 0;
+
+ switch (entry->state.pf) {
+ case AF_INET:
+ ret = nf_ip_reroute(skb, entry);
+ break;
+ case AF_INET6:
+ v6ops = rcu_dereference(nf_ipv6_ops);
+ if (v6ops)
+ ret = v6ops->reroute(skb, entry);
+ break;
+ }
+ return ret;
+}

View File

@ -0,0 +1,94 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 27 Nov 2017 22:58:37 +0100
Subject: [PATCH] netfilter: remove route_key_size field in struct nf_afinfo
This is only needed by nf_queue, place this code where it belongs.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -274,7 +274,6 @@ struct nf_queue_entry;
struct nf_afinfo {
unsigned short family;
- int route_key_size;
};
extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -164,7 +164,6 @@ EXPORT_SYMBOL_GPL(nf_ip_route);
static const struct nf_afinfo nf_ip_afinfo = {
.family = AF_INET,
- .route_key_size = sizeof(struct ip_rt_info),
};
static int __init ipv4_netfilter_init(void)
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -177,7 +177,6 @@ static const struct nf_ipv6_ops ipv6ops
static const struct nf_afinfo nf_ip6_afinfo = {
.family = AF_INET6,
- .route_key_size = sizeof(struct ip6_rt_info),
};
int __init ipv6_netfilter_init(void)
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -15,6 +15,8 @@
#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
#include <net/protocol.h>
#include <net/netfilter/nf_queue.h>
#include <net/dst.h>
@@ -148,9 +150,9 @@ static int __nf_queue(struct sk_buff *sk
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
- const struct nf_afinfo *afinfo;
const struct nf_queue_handler *qh;
struct net *net = state->net;
+ unsigned int route_key_size;
/* QUEUE == DROP if no one is waiting, to be safe. */
qh = rcu_dereference(net->nf.queue_handler);
@@ -159,11 +161,19 @@ static int __nf_queue(struct sk_buff *sk
goto err;
}
- afinfo = nf_get_afinfo(state->pf);
- if (!afinfo)
- goto err;
+ switch (state->pf) {
+ case AF_INET:
+ route_key_size = sizeof(struct ip_rt_info);
+ break;
+ case AF_INET6:
+ route_key_size = sizeof(struct ip6_rt_info);
+ break;
+ default:
+ route_key_size = 0;
+ break;
+ }
- entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
if (!entry) {
status = -ENOMEM;
goto err;
@@ -173,7 +183,7 @@ static int __nf_queue(struct sk_buff *sk
.skb = skb,
.state = *state,
.hook_index = index,
- .size = sizeof(*entry) + afinfo->route_key_size,
+ .size = sizeof(*entry) + route_key_size,
};
nf_queue_entry_get_refs(entry);

View File

@ -0,0 +1,173 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 9 Dec 2017 17:05:53 +0100
Subject: [PATCH] netfilter: remove struct nf_afinfo and its helper functions
This abstraction has no clients anymore, remove it.
This is what remains from previous authors, so correct copyright
statement after recent modifications and code removal.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -272,16 +272,6 @@ int skb_make_writable(struct sk_buff *sk
struct flowi;
struct nf_queue_entry;
-struct nf_afinfo {
- unsigned short family;
-};
-
-extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
-static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
-{
- return rcu_dereference(nf_afinfo[family]);
-}
-
__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol,
unsigned short family);
@@ -293,9 +283,6 @@ int nf_route(struct net *net, struct dst
bool strict, unsigned short family);
int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
-int nf_register_afinfo(const struct nf_afinfo *afinfo);
-void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
-
#include <net/flow.h>
extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -95,30 +95,23 @@ static const struct nf_chain_type filter
(1 << NF_BR_POST_ROUTING),
};
-static const struct nf_afinfo nf_br_afinfo = {
- .family = AF_BRIDGE,
- .route_key_size = 0,
-};
-
static int __init nf_tables_bridge_init(void)
{
int ret;
- nf_register_afinfo(&nf_br_afinfo);
ret = nft_register_chain_type(&filter_bridge);
if (ret < 0)
- goto err1;
+ return ret;
ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
if (ret < 0)
- goto err2;
+ goto err_register_subsys;
return ret;
-err2:
+err_register_subsys:
nft_unregister_chain_type(&filter_bridge);
-err1:
- nf_unregister_afinfo(&nf_br_afinfo);
+
return ret;
}
@@ -126,7 +119,6 @@ static void __exit nf_tables_bridge_exit
{
unregister_pernet_subsys(&nf_tables_bridge_net_ops);
nft_unregister_chain_type(&filter_bridge);
- nf_unregister_afinfo(&nf_br_afinfo);
}
module_init(nf_tables_bridge_init);
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -161,13 +161,3 @@ int nf_ip_route(struct net *net, struct
return 0;
}
EXPORT_SYMBOL_GPL(nf_ip_route);
-
-static const struct nf_afinfo nf_ip_afinfo = {
- .family = AF_INET,
-};
-
-static int __init ipv4_netfilter_init(void)
-{
- return nf_register_afinfo(&nf_ip_afinfo);
-}
-subsys_initcall(ipv4_netfilter_init);
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -175,14 +175,10 @@ static const struct nf_ipv6_ops ipv6ops
.reroute = nf_ip6_reroute,
};
-static const struct nf_afinfo nf_ip6_afinfo = {
- .family = AF_INET6,
-};
-
int __init ipv6_netfilter_init(void)
{
RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
- return nf_register_afinfo(&nf_ip6_afinfo);
+ return 0;
}
/* This can be called from inet6_init() on errors, so it cannot
@@ -191,5 +187,4 @@ int __init ipv6_netfilter_init(void)
void ipv6_netfilter_fini(void)
{
RCU_INIT_POINTER(nf_ipv6_ops, NULL);
- nf_unregister_afinfo(&nf_ip6_afinfo);
}
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -4,8 +4,7 @@
* Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
* way.
*
- * Rusty Russell (C)2000 -- This code is GPL.
- * Patrick McHardy (c) 2006-2012
+ * This code is GPL.
*/
#include <linux/kernel.h>
#include <linux/netfilter.h>
@@ -28,34 +27,12 @@
#include "nf_internals.h"
-static DEFINE_MUTEX(afinfo_mutex);
-
-const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
-EXPORT_SYMBOL(nf_afinfo);
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
DEFINE_PER_CPU(bool, nf_skb_duplicated);
EXPORT_SYMBOL_GPL(nf_skb_duplicated);
-int nf_register_afinfo(const struct nf_afinfo *afinfo)
-{
- mutex_lock(&afinfo_mutex);
- RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
- mutex_unlock(&afinfo_mutex);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nf_register_afinfo);
-
-void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
-{
- mutex_lock(&afinfo_mutex);
- RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
- mutex_unlock(&afinfo_mutex);
- synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
-
#ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);

View File

@ -0,0 +1,20 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 10 Dec 2017 01:42:58 +0100
Subject: [PATCH] netfilter: nf_tables_arp: don't set forward chain
46928a0b49f3 ("netfilter: nf_tables: remove multihook chains and
families") already removed this, this is a leftover.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -34,7 +34,6 @@ static struct nft_af_info nft_af_arp __r
.hooks = {
[NF_ARP_IN] = nft_do_chain_arp,
[NF_ARP_OUT] = nft_do_chain_arp,
- [NF_ARP_FORWARD] = nft_do_chain_arp,
},
};

View File

@ -0,0 +1,233 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 9 Dec 2017 15:43:17 +0100
Subject: [PATCH] netfilter: nf_tables: remove hooks from family definition
They don't belong to the family definition, move them to the filter
chain type definition instead.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -870,7 +870,7 @@ enum nft_chain_type {
* @family: address family
* @owner: module owner
* @hook_mask: mask of valid hooks
- * @hooks: hookfn overrides
+ * @hooks: array of hook functions
*/
struct nf_chain_type {
const char *name;
@@ -964,7 +964,6 @@ enum nft_af_flags {
* @owner: module owner
* @tables: used internally
* @flags: family flags
- * @hooks: hookfn overrides for packet validation
*/
struct nft_af_info {
struct list_head list;
@@ -973,7 +972,6 @@ struct nft_af_info {
struct module *owner;
struct list_head tables;
u32 flags;
- nf_hookfn *hooks[NF_MAX_HOOKS];
};
int nft_register_afinfo(struct net *, struct nft_af_info *);
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -46,13 +46,6 @@ static struct nft_af_info nft_af_bridge
.family = NFPROTO_BRIDGE,
.nhooks = NF_BR_NUMHOOKS,
.owner = THIS_MODULE,
- .hooks = {
- [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
- [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
- [NF_BR_FORWARD] = nft_do_chain_bridge,
- [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
- [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
- },
};
static int nf_tables_bridge_init_net(struct net *net)
@@ -93,6 +86,13 @@ static const struct nf_chain_type filter
(1 << NF_BR_FORWARD) |
(1 << NF_BR_LOCAL_OUT) |
(1 << NF_BR_POST_ROUTING),
+ .hooks = {
+ [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
+ [NF_BR_FORWARD] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
+ [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
+ },
};
static int __init nf_tables_bridge_init(void)
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -31,10 +31,6 @@ static struct nft_af_info nft_af_arp __r
.family = NFPROTO_ARP,
.nhooks = NF_ARP_NUMHOOKS,
.owner = THIS_MODULE,
- .hooks = {
- [NF_ARP_IN] = nft_do_chain_arp,
- [NF_ARP_OUT] = nft_do_chain_arp,
- },
};
static int nf_tables_arp_init_net(struct net *net)
@@ -72,6 +68,10 @@ static const struct nf_chain_type filter
.owner = THIS_MODULE,
.hook_mask = (1 << NF_ARP_IN) |
(1 << NF_ARP_OUT),
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ },
};
static int __init nf_tables_arp_init(void)
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -49,13 +49,6 @@ static struct nft_af_info nft_af_ipv4 __
.family = NFPROTO_IPV4,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
- [NF_INET_FORWARD] = nft_do_chain_ipv4,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
- },
};
static int nf_tables_ipv4_init_net(struct net *net)
@@ -96,6 +89,13 @@ static const struct nf_chain_type filter
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
};
static int __init nf_tables_ipv4_init(void)
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -46,13 +46,6 @@ static struct nft_af_info nft_af_ipv6 __
.family = NFPROTO_IPV6,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
- },
};
static int nf_tables_ipv6_init_net(struct net *net)
@@ -93,6 +86,13 @@ static const struct nf_chain_type filter
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
};
static int __init nf_tables_ipv6_init(void)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1352,7 +1352,6 @@ static int nf_tables_addchain(struct nft
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
struct nf_hook_ops *ops;
- nf_hookfn *hookfn;
err = nft_chain_parse_hook(net, nla, afi, &hook, create);
if (err < 0)
@@ -1378,7 +1377,6 @@ static int nf_tables_addchain(struct nft
static_branch_inc(&nft_counters_enabled);
}
- hookfn = hook.type->hooks[hook.num];
basechain->type = hook.type;
chain = &basechain->chain;
@@ -1387,10 +1385,8 @@ static int nf_tables_addchain(struct nft
ops->hooknum = hook.num;
ops->priority = hook.priority;
ops->priv = chain;
- ops->hook = afi->hooks[ops->hooknum];
+ ops->hook = hook.type->hooks[ops->hooknum];
ops->dev = hook.dev;
- if (hookfn)
- ops->hook = hookfn;
if (basechain->type->type == NFT_CHAIN_T_NAT)
ops->nat_hook = true;
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -74,13 +74,6 @@ static struct nft_af_info nft_af_inet __
.family = NFPROTO_INET,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_inet,
- [NF_INET_LOCAL_OUT] = nft_inet_output,
- [NF_INET_FORWARD] = nft_do_chain_inet,
- [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
- [NF_INET_POST_ROUTING] = nft_do_chain_inet,
- },
};
static int __net_init nf_tables_inet_init_net(struct net *net)
@@ -121,6 +114,13 @@ static const struct nf_chain_type filter
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
+ [NF_INET_LOCAL_OUT] = nft_inet_output,
+ [NF_INET_FORWARD] = nft_do_chain_inet,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
+ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
+ },
};
static int __init nf_tables_inet_init(void)
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -43,9 +43,6 @@ static struct nft_af_info nft_af_netdev
.nhooks = NF_NETDEV_NUMHOOKS,
.owner = THIS_MODULE,
.flags = NFT_AF_NEEDS_DEV,
- .hooks = {
- [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
- },
};
static int nf_tables_netdev_init_net(struct net *net)
@@ -82,6 +79,9 @@ static const struct nf_chain_type nft_fi
.family = NFPROTO_NETDEV,
.owner = THIS_MODULE,
.hook_mask = (1 << NF_NETDEV_INGRESS),
+ .hooks = {
+ [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ },
};
static void nft_netdev_event(unsigned long event, struct net_device *dev,

View File

@ -0,0 +1,302 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 30 Dec 2017 22:41:46 +0100
Subject: [PATCH] netfilter: remove defensive check on malformed packets from
raw sockets
Users cannot forge malformed IPv4/IPv6 headers via raw sockets that they
can inject into the stack. Specifically, not for IPv4 since 55888dfb6ba7
("AF_RAW: Augment raw_send_hdrinc to expand skb to fit iphdr->ihl
(v2)"). IPv6 raw sockets also ensure that packets have a well-formed
IPv6 header available in the skbuff.
At quick glance, br_netfilter also validates layer 3 headers and it
drops malformed both IPv4 and IPv6 packets.
Therefore, let's remove this defensive check all over the place.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -38,12 +38,6 @@ static unsigned int
iptable_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* root is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
}
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, cons
u_int32_t mark;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
/* Save things which could affect route */
mark = skb->mark;
iph = ip_hdr(skb);
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -26,12 +26,6 @@ static unsigned int
iptable_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* root is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
}
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -43,12 +43,6 @@ static unsigned int
iptable_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* Somebody is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
}
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
return NF_ACCEPT;
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -355,11 +355,6 @@ nf_nat_ipv4_out(void *priv, struct sk_bu
#endif
unsigned int ret;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -395,11 +390,6 @@ nf_nat_ipv4_local_fn(void *priv, struct
unsigned int ret;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -30,21 +30,6 @@ static unsigned int nft_do_chain_ipv4(vo
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv4_output(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (unlikely(skb->len < sizeof(struct iphdr) ||
- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
- if (net_ratelimit())
- pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
- "packet\n");
- return NF_ACCEPT;
- }
-
- return nft_do_chain_ipv4(priv, skb, state);
-}
-
static struct nft_af_info nft_af_ipv4 __read_mostly = {
.family = NFPROTO_IPV4,
.nhooks = NF_INET_NUMHOOKS,
@@ -91,7 +76,7 @@ static const struct nf_chain_type filter
(1 << NF_INET_POST_ROUTING),
.hooks = {
[NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
[NF_INET_FORWARD] = nft_do_chain_ipv4,
[NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
[NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -33,11 +33,6 @@ static unsigned int nf_route_table_hook(
const struct iphdr *iph;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
nft_set_pktinfo(&pkt, skb, state);
nft_set_pktinfo_ipv4(&pkt, skb);
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, con
u_int8_t hop_limit;
u_int32_t flowlabel, mark;
int err;
-#if 0
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)) {
- net_warn_ratelimited("ip6t_hook: happy cracking\n");
- return NF_ACCEPT;
- }
-#endif
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr)) {
- net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
- return NF_ACCEPT;
- }
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -372,10 +372,6 @@ nf_nat_ipv6_out(void *priv, struct sk_bu
#endif
unsigned int ret;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -411,10 +407,6 @@ nf_nat_ipv6_local_fn(void *priv, struct
unsigned int ret;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -28,20 +28,6 @@ static unsigned int nft_do_chain_ipv6(vo
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv6_output(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
- if (net_ratelimit())
- pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
- "packet\n");
- return NF_ACCEPT;
- }
-
- return nft_do_chain_ipv6(priv, skb, state);
-}
-
static struct nft_af_info nft_af_ipv6 __read_mostly = {
.family = NFPROTO_IPV6,
.nhooks = NF_INET_NUMHOOKS,
@@ -88,7 +74,7 @@ static const struct nf_chain_type filter
(1 << NF_INET_POST_ROUTING),
.hooks = {
[NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
[NF_INET_FORWARD] = nft_do_chain_ipv6,
[NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
[NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -38,38 +38,6 @@ static unsigned int nft_do_chain_inet(vo
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_inet_output(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
-
- switch (state->pf) {
- case NFPROTO_IPV4:
- if (unlikely(skb->len < sizeof(struct iphdr) ||
- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
- if (net_ratelimit())
- pr_info("ignoring short SOCK_RAW packet\n");
- return NF_ACCEPT;
- }
- nft_set_pktinfo_ipv4(&pkt, skb);
- break;
- case NFPROTO_IPV6:
- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
- if (net_ratelimit())
- pr_info("ignoring short SOCK_RAW packet\n");
- return NF_ACCEPT;
- }
- nft_set_pktinfo_ipv6(&pkt, skb);
- break;
- default:
- break;
- }
-
- return nft_do_chain(&pkt, priv);
-}
-
static struct nft_af_info nft_af_inet __read_mostly = {
.family = NFPROTO_INET,
.nhooks = NF_INET_NUMHOOKS,
@@ -116,7 +84,7 @@ static const struct nf_chain_type filter
(1 << NF_INET_POST_ROUTING),
.hooks = {
[NF_INET_LOCAL_IN] = nft_do_chain_inet,
- [NF_INET_LOCAL_OUT] = nft_inet_output,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_inet,
[NF_INET_FORWARD] = nft_do_chain_inet,
[NF_INET_PRE_ROUTING] = nft_do_chain_inet,
[NF_INET_POST_ROUTING] = nft_do_chain_inet,

View File

@ -0,0 +1,101 @@
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Dec 2017 16:18:16 +0100
Subject: [PATCH] netfilter: meta: secpath support
replacement for iptables "-m policy --dir in --policy {ipsec,none}".
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -777,6 +777,7 @@ enum nft_exthdr_attributes {
* @NFT_META_OIFGROUP: packet output interface group
* @NFT_META_CGROUP: socket control group (skb->sk->sk_classid)
* @NFT_META_PRANDOM: a 32bit pseudo-random number
+ * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp)
*/
enum nft_meta_keys {
NFT_META_LEN,
@@ -804,6 +805,7 @@ enum nft_meta_keys {
NFT_META_OIFGROUP,
NFT_META_CGROUP,
NFT_META_PRANDOM,
+ NFT_META_SECPATH,
};
/**
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -210,6 +210,11 @@ void nft_meta_get_eval(const struct nft_
*dest = prandom_u32_state(state);
break;
}
+#ifdef CONFIG_XFRM
+ case NFT_META_SECPATH:
+ nft_reg_store8(dest, !!skb->sp);
+ break;
+#endif
default:
WARN_ON(1);
goto err;
@@ -308,6 +313,11 @@ int nft_meta_get_init(const struct nft_c
prandom_init_once(&nft_prandom_state);
len = sizeof(u32);
break;
+#ifdef CONFIG_XFRM
+ case NFT_META_SECPATH:
+ len = sizeof(u8);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -318,6 +328,38 @@ int nft_meta_get_init(const struct nft_c
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
+static int nft_meta_get_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+#ifdef CONFIG_XFRM
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ if (priv->key != NFT_META_SECPATH)
+ return 0;
+
+ switch (ctx->afi->family) {
+ case NFPROTO_NETDEV:
+ hooks = 1 << NF_NETDEV_INGRESS;
+ break;
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+#else
+ return 0;
+#endif
+}
+
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
@@ -434,6 +476,7 @@ static const struct nft_expr_ops nft_met
.eval = nft_meta_get_eval,
.init = nft_meta_get_init,
.dump = nft_meta_get_dump,
+ .validate = nft_meta_get_validate,
};
static const struct nft_expr_ops nft_meta_set_ops = {

View File

@ -0,0 +1,142 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 3 Nov 2017 16:26:32 +0100
Subject: [PATCH] netfilter: conntrack: move nf_ct_netns_{get,put}() to core
So we can call this from other expression that need conntrack in place
to work.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Florian Westphal <fw@strlen.de>
---
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -125,7 +125,7 @@ void nf_ct_l3proto_module_put(unsigned s
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
int ret;
@@ -150,9 +150,33 @@ int nf_ct_netns_get(struct net *net, u8
return ret;
}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ int err;
+
+ if (nfproto == NFPROTO_INET) {
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+ if (err < 0)
+ goto err1;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+ if (err < 0)
+ goto err2;
+ } else {
+ err = nf_ct_netns_do_get(net, nfproto);
+ if (err < 0)
+ goto err1;
+ }
+ return 0;
+
+err2:
+ nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+ return err;
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
-void nf_ct_netns_put(struct net *net, u8 nfproto)
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
{
const struct nf_conntrack_l3proto *l3proto;
@@ -171,6 +195,15 @@ void nf_ct_netns_put(struct net *net, u8
nf_ct_l3proto_module_put(nfproto);
}
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+ if (nfproto == NFPROTO_INET) {
+ nf_ct_netns_do_put(net, NFPROTO_IPV4);
+ nf_ct_netns_do_put(net, NFPROTO_IPV6);
+ } else
+ nf_ct_netns_do_put(net, nfproto);
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
const struct nf_conntrack_l4proto *
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_po
[NFTA_CT_SREG] = { .type = NLA_U32 },
};
-static int nft_ct_netns_get(struct net *net, uint8_t family)
-{
- int err;
-
- if (family == NFPROTO_INET) {
- err = nf_ct_netns_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_get(net, family);
- if (err < 0)
- goto err1;
- }
- return 0;
-
-err2:
- nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
- return err;
-}
-
-static void nft_ct_netns_put(struct net *net, uint8_t family)
-{
- if (family == NFPROTO_INET) {
- nf_ct_netns_put(net, NFPROTO_IPV4);
- nf_ct_netns_put(net, NFPROTO_IPV6);
- } else
- nf_ct_netns_put(net, family);
-}
-
#ifdef CONFIG_NF_CONNTRACK_ZONES
static void nft_ct_tmpl_put_pcpu(void)
{
@@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct
if (err < 0)
return err;
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
return err;
@@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct
if (err < 0)
goto err1;
- err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->afi->family);
if (err < 0)
goto err1;
@@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const str
struct nft_ct *priv = nft_expr_priv(expr);
__nft_ct_set_destroy(ctx, priv);
- nft_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)

View File

@ -0,0 +1,169 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 7 Jan 2018 01:03:56 +0100
Subject: [PATCH] netfilter: nf_conntrack: add IPS_OFFLOAD status bit
This new bit tells us that the conntrack entry is owned by the flow
table offload infrastructure.
# cat /proc/net/nf_conntrack
ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2
Note the [OFFLOAD] tag in the listing.
The timer of such conntrack entries look like stopped from userspace.
In practise, to make sure the conntrack entry does not go away, the
conntrack timer is periodically set to an arbitrary large value that
gets refreshed on every iteration from the garbage collector, so it
never expires- and they display no internal state in the case of TCP
flows. This allows us to save a bitcheck from the packet path via
nf_ct_is_expired().
Conntrack entries that have been offloaded to the flow table
infrastructure cannot be deleted/flushed via ctnetlink. The flow table
infrastructure is also responsible for releasing this conntrack entry.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -101,12 +101,16 @@ enum ip_conntrack_status {
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),
+ /* Conntrack has been offloaded to flow table. */
+ IPS_OFFLOAD_BIT = 14,
+ IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
+
/* Be careful here, modifying these bits can make things messy,
* so don't let users modify them directly.
*/
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
- IPS_SEQ_ADJUST | IPS_TEMPLATE),
+ IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
__IPS_MAX_BIT = 14,
};
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -901,6 +901,9 @@ static unsigned int early_drop_list(stru
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
+ continue;
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
continue;
@@ -975,6 +978,18 @@ static bool gc_worker_can_early_drop(con
return false;
}
+#define DAY (86400 * HZ)
+
+/* Set an arbitrary timeout large enough not to ever expire, this save
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ * nf_ct_is_expired().
+ */
+static void nf_ct_offload_timeout(struct nf_conn *ct)
+{
+ if (nf_ct_expires(ct) < DAY / 2)
+ ct->timeout = nfct_time_stamp + DAY;
+}
+
static void gc_worker(struct work_struct *work)
{
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
@@ -1011,6 +1026,11 @@ static void gc_worker(struct work_struct
tmp = nf_ct_tuplehash_to_ctrack(h);
scanned++;
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+ nf_ct_offload_timeout(tmp);
+ continue;
+ }
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
expired_count++;
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1105,6 +1105,14 @@ static const struct nla_policy ct_nla_po
.len = NF_CT_LABELS_MAX_SIZE },
};
+static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
+{
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return 0;
+
+ return ctnetlink_filter_match(ct, data);
+}
+
static int ctnetlink_flush_conntrack(struct net *net,
const struct nlattr * const cda[],
u32 portid, int report)
@@ -1117,7 +1125,7 @@ static int ctnetlink_flush_conntrack(str
return PTR_ERR(filter);
}
- nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
+ nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
portid, report);
kfree(filter);
@@ -1163,6 +1171,11 @@ static int ctnetlink_del_conntrack(struc
ct = nf_ct_tuplehash_to_ctrack(h);
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
+ nf_ct_put(ct);
+ return -EBUSY;
+ }
+
if (cda[CTA_ID]) {
u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
if (id != (u32)(unsigned long)ct) {
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_c
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return;
+
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
}
#endif
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *
WARN_ON(!l4proto);
ret = -ENOSPC;
- seq_printf(s, "%-8s %u %-8s %u %ld ",
+ seq_printf(s, "%-8s %u %-8s %u ",
l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
- l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
- nf_ct_expires(ct) / HZ);
+ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
+
+ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ);
if (l4proto->print_conntrack)
l4proto->print_conntrack(s, ct);
@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
- if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ seq_puts(s, "[OFFLOAD] ");
+ else if (test_bit(IPS_ASSURED_BIT, &ct->status))
seq_puts(s, "[ASSURED] ");
if (seq_has_overflowed(s))

View File

@ -0,0 +1,586 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 7 Jan 2018 01:04:11 +0100
Subject: [PATCH] netfilter: add generic flow table infrastructure
This patch defines the API to interact with flow tables, this allows to
add, delete and lookup for entries in the flow table. This also adds the
generic garbage code that removes entries that have expired, ie. no
traffic has been seen for a while.
Users of the flow table infrastructure can delete entries via
flow_offload_dead(), which sets the dying bit, this signals the garbage
collector to release an entry from user context.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
create mode 100644 net/netfilter/nf_flow_table.c
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -1,7 +1,12 @@
#ifndef _NF_FLOW_TABLE_H
#define _NF_FLOW_TABLE_H
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
#include <linux/rhashtable.h>
+#include <linux/rcupdate.h>
+#include <net/dst.h>
struct nf_flowtable;
@@ -20,4 +25,93 @@ struct nf_flowtable {
struct delayed_work gc_work;
};
+enum flow_offload_tuple_dir {
+ FLOW_OFFLOAD_DIR_ORIGINAL,
+ FLOW_OFFLOAD_DIR_REPLY,
+ __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY,
+};
+#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
+
+struct flow_offload_tuple {
+ union {
+ struct in_addr src_v4;
+ struct in6_addr src_v6;
+ };
+ union {
+ struct in_addr dst_v4;
+ struct in6_addr dst_v6;
+ };
+ struct {
+ __be16 src_port;
+ __be16 dst_port;
+ };
+
+ int iifidx;
+
+ u8 l3proto;
+ u8 l4proto;
+ u8 dir;
+
+ int oifidx;
+
+ struct dst_entry *dst_cache;
+};
+
+struct flow_offload_tuple_rhash {
+ struct rhash_head node;
+ struct flow_offload_tuple tuple;
+};
+
+#define FLOW_OFFLOAD_SNAT 0x1
+#define FLOW_OFFLOAD_DNAT 0x2
+#define FLOW_OFFLOAD_DYING 0x4
+
+struct flow_offload {
+ struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
+ u32 flags;
+ union {
+ /* Your private driver data here. */
+ u32 timeout;
+ };
+};
+
+#define NF_FLOW_TIMEOUT (30 * HZ)
+
+struct nf_flow_route {
+ struct {
+ struct dst_entry *dst;
+ int ifindex;
+ } tuple[FLOW_OFFLOAD_DIR_MAX];
+};
+
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
+ struct nf_flow_route *route);
+void flow_offload_free(struct flow_offload *flow);
+
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
+void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
+struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple);
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data);
+void nf_flow_offload_work_gc(struct work_struct *work);
+extern const struct rhashtable_params nf_flow_offload_rhash_params;
+
+void flow_offload_dead(struct flow_offload *flow);
+
+int nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+int nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir);
+
+struct flow_ports {
+ __be16 source, dest;
+};
+
+#define MODULE_ALIAS_NF_FLOWTABLE(family) \
+ MODULE_ALIAS("nf-flowtable-" __stringify(family))
+
#endif /* _FLOW_OFFLOAD_H */
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -661,6 +661,13 @@ endif # NF_TABLES_NETDEV
endif # NF_TABLES
+config NF_FLOW_TABLE
+ tristate "Netfilter flow table module"
+ help
+ This option adds the flow table core infrastructure.
+
+ To compile it as a module, choose M here.
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -110,6 +110,9 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
+# flow table infrastructure
+obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
+
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
--- /dev/null
+++ b/net/netfilter/nf_flow_table.c
@@ -0,0 +1,429 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+struct flow_offload_entry {
+ struct flow_offload flow;
+ struct nf_conn *ct;
+ struct rcu_head rcu_head;
+};
+
+struct flow_offload *
+flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+{
+ struct flow_offload_entry *entry;
+ struct flow_offload *flow;
+
+ if (unlikely(nf_ct_is_dying(ct) ||
+ !atomic_inc_not_zero(&ct->ct_general.use)))
+ return NULL;
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ goto err_ct_refcnt;
+
+ flow = &entry->flow;
+
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+ goto err_dst_cache_original;
+
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+ goto err_dst_cache_reply;
+
+ entry->ct = ct;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
+ case NFPROTO_IPV4:
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
+ break;
+ case NFPROTO_IPV6:
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
+ break;
+ }
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
+ FLOW_OFFLOAD_DIR_ORIGINAL;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
+ FLOW_OFFLOAD_DIR_REPLY;
+
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
+ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
+ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+
+ if (ct->status & IPS_SRC_NAT)
+ flow->flags |= FLOW_OFFLOAD_SNAT;
+ else if (ct->status & IPS_DST_NAT)
+ flow->flags |= FLOW_OFFLOAD_DNAT;
+
+ return flow;
+
+err_dst_cache_reply:
+ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+err_dst_cache_original:
+ kfree(entry);
+err_ct_refcnt:
+ nf_ct_put(ct);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(flow_offload_alloc);
+
+void flow_offload_free(struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+ e = container_of(flow, struct flow_offload_entry, flow);
+ kfree(e);
+}
+EXPORT_SYMBOL_GPL(flow_offload_free);
+
+void flow_offload_dead(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_DYING;
+}
+EXPORT_SYMBOL_GPL(flow_offload_dead);
+
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+{
+ flow->timeout = (u32)jiffies;
+
+ rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ *flow_table->type->params);
+ rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ *flow_table->type->params);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(flow_offload_add);
+
+void flow_offload_del(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ *flow_table->type->params);
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ *flow_table->type->params);
+
+ e = container_of(flow, struct flow_offload_entry, flow);
+ kfree_rcu(e, rcu_head);
+}
+EXPORT_SYMBOL_GPL(flow_offload_del);
+
+struct flow_offload_tuple_rhash *
+flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple)
+{
+ return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+ *flow_table->type->params);
+}
+EXPORT_SYMBOL_GPL(flow_offload_lookup);
+
+static void nf_flow_release_ct(const struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ e = container_of(flow, struct flow_offload_entry, flow);
+ nf_ct_delete(e->ct, 0, 0);
+ nf_ct_put(e->ct);
+}
+
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct rhashtable_iter hti;
+ struct flow_offload *flow;
+ int err;
+
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+ if (err)
+ return err;
+
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+ goto out;
+
+ continue;
+ }
+ if (tuplehash->tuple.dir)
+ continue;
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+ iter(flow, data);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
+
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+{
+ return (__s32)(flow->timeout - (u32)jiffies) <= 0;
+}
+
+static inline bool nf_flow_is_dying(const struct flow_offload *flow)
+{
+ return flow->flags & FLOW_OFFLOAD_DYING;
+}
+
+void nf_flow_offload_work_gc(struct work_struct *work)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table;
+ struct rhashtable_iter hti;
+ struct flow_offload *flow;
+ int err;
+
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+ if (err)
+ goto schedule;
+
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+ goto out;
+
+ continue;
+ }
+ if (tuplehash->tuple.dir)
+ continue;
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+ if (nf_flow_has_expired(flow) ||
+ nf_flow_is_dying(flow)) {
+ flow_offload_del(flow_table, flow);
+ nf_flow_release_ct(flow);
+ }
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
+
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple *tuple = data;
+
+ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple_rhash *tuplehash = data;
+
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct flow_offload_tuple *tuple = arg->key;
+ const struct flow_offload_tuple_rhash *x = ptr;
+
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ return 1;
+
+ return 0;
+}
+
+const struct rhashtable_params nf_flow_offload_rhash_params = {
+ .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
+ .hashfn = flow_offload_hash,
+ .obj_hashfn = flow_offload_hash_obj,
+ .obj_cmpfn = flow_offload_hash_cmp,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
+
+static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace2(&udph->check, skb, port,
+ new_port, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, __be16 port, __be16 new_port)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+int nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
+{
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+ return -1;
+
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = hdr->source;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = hdr->dest;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_snat_port);
+
+int nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
+{
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+ return -1;
+
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = hdr->dest;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = hdr->source;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");

View File

@ -0,0 +1,334 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 7 Jan 2018 01:04:15 +0100
Subject: [PATCH] netfilter: flow table support for IPv4
This patch adds the IPv4 flow table type, that implements the datapath
flow table to forward IPv4 traffic. Rationale is:
1) Look up for the packet in the flow table, from the ingress hook.
2) If there's a hit, decrement ttl and pass it on to the neighbour layer
for transmission.
3) If there's a miss, packet is passed up to the classic forwarding
path.
This patch also supports layer 3 source and destination NAT.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
create mode 100644 net/ipv4/netfilter/nf_flow_table_ipv4.c
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -77,6 +77,14 @@ config NF_TABLES_ARP
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV4
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table IPv4 module"
+ help
+ This option adds the flow table IPv4 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redi
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
+
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
--- /dev/null
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -0,0 +1,283 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace4(&udph->check, skb, addr,
+ new_addr, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, __be32 addr,
+ __be32 new_addr)
+{
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+ csum_replace4(&iph->check, addr, new_addr);
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned int thoff = iph->ihl * 4;
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static bool ip_has_options(unsigned int thoff)
+{
+ return thoff != sizeof(struct iphdr);
+}
+
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ unsigned int thoff;
+ struct iphdr *iph;
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return -1;
+
+ iph = ip_hdr(skb);
+ thoff = iph->ihl * 4;
+
+ if (ip_is_fragment(iph) ||
+ unlikely(ip_has_options(thoff)))
+ return -1;
+
+ if (iph->protocol != IPPROTO_TCP &&
+ iph->protocol != IPPROTO_UDP)
+ return -1;
+
+ thoff = iph->ihl * 4;
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v4.s_addr = iph->saddr;
+ tuple->dst_v4.s_addr = iph->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET;
+ tuple->l4proto = iph->protocol;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
+{
+ u32 mtu;
+
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+static unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ const struct rtable *rt;
+ struct iphdr *iph;
+ __be32 nexthop;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*iph)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ip(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ iph = ip_hdr(skb);
+ ip_decrease_ttl(iph);
+
+ skb->dev = outdev;
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+
+ return NF_STOLEN;
+}
+
+static struct nf_flowtable_type flowtable_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ip_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv4_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv4);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv4_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv4);
+}
+
+module_init(nf_flow_ipv4_module_init);
+module_exit(nf_flow_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET);

View File

@ -0,0 +1,354 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 7 Jan 2018 01:04:19 +0100
Subject: [PATCH] netfilter: flow table support for IPv6
This patch adds the IPv6 flow table type, that implements the datapath
flow table to forward IPv6 traffic.
This patch exports ip6_dst_mtu_forward() that is required to check for
mtu to pass up packets that need PMTUD handling to the classic
forwarding path.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
create mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -913,6 +913,8 @@ static inline struct sk_buff *ip6_finish
&inet6_sk(sk)->cork);
}
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
+
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -370,7 +370,7 @@ static inline int ip6_forward_finish(str
return dst_output(net, sk, skb);
}
-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
unsigned int mtu;
struct inet6_dev *idev;
@@ -390,6 +390,7 @@ static unsigned int ip6_dst_mtu_forward(
return mtu;
}
+EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -71,6 +71,14 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV6
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table IPv6 module"
+ help
+ This option adds the flow table IPv6 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redi
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
--- /dev/null
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -0,0 +1,277 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ unsigned int thoff = sizeof(*ip6h);
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return -1;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return -1;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
+{
+ u32 mtu;
+
+ mtu = ip6_dst_mtu_forward(&rt->dst);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+static unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct in6_addr *nexthop;
+ struct ipv6hdr *ip6h;
+ struct rt6_info *rt;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ ip6h = ipv6_hdr(skb);
+ ip6h->hop_limit--;
+
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+ return NF_STOLEN;
+}
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ipv6_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv6_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv6);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv6_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv6);
+}
+
+module_init(nf_flow_ipv6_module_init);
+module_exit(nf_flow_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);

View File

@ -0,0 +1,141 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 7 Jan 2018 01:04:22 +0100
Subject: [PATCH] netfilter: flow table support for the mixed IPv4/IPv6 family
This patch adds the IPv6 flow table type, that implements the datapath
flow table to forward IPv6 traffic.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
create mode 100644 net/netfilter/nf_flow_table_inet.c
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -111,6 +111,11 @@ struct flow_ports {
__be16 source, dest;
};
+unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
+unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
+
#define MODULE_ALIAS_NF_FLOWTABLE(family) \
MODULE_ALIAS("nf-flowtable-" __stringify(family))
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -202,7 +202,7 @@ static bool nf_flow_exceeds_mtu(struct s
return false;
}
-static unsigned int
+unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -254,6 +254,7 @@ nf_flow_offload_ip_hook(void *priv, stru
return NF_STOLEN;
}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -196,7 +196,7 @@ static bool nf_flow_exceeds_mtu(struct s
return false;
}
-static unsigned int
+unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -248,6 +248,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
return NF_STOLEN;
}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -661,6 +661,14 @@ endif # NF_TABLES_NETDEV
endif # NF_TABLES
+config NF_FLOW_TABLE_INET
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table mixed IPv4/IPv6 module"
+ help
+ This option adds the flow table mixed IPv4/IPv6 support.
+
+ To compile it as a module, choose M here.
+
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
help
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -112,6 +112,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
+obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
--- /dev/null
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -0,0 +1,48 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int
+nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return nf_flow_offload_ip_hook(priv, skb, state);
+ case htons(ETH_P_IPV6):
+ return nf_flow_offload_ipv6_hook(priv, skb, state);
+ }
+
+ return NF_ACCEPT;
+}
+
+static struct nf_flowtable_type flowtable_inet = {
+ .family = NFPROTO_INET,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_inet_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_inet_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_inet);
+
+ return 0;
+}
+
+static void __exit nf_flow_inet_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_inet);
+}
+
+module_init(nf_flow_inet_module_init);
+module_exit(nf_flow_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */

View File

@ -0,0 +1,332 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 7 Jan 2018 01:04:26 +0100
Subject: [PATCH] netfilter: nf_tables: flow offload expression
Add new instruction for the nf_tables VM that allows us to specify what
flows are offloaded into a given flow table via name. This new
instruction creates the flow entry and adds it to the flow table.
Only established flows, ie. we have seen traffic in both directions, are
added to the flow table. You can still decide to offload entries at a
later stage via packet counting or checking the ct status in case you
want to offload assured conntracks.
This new extension depends on the conntrack subsystem.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
create mode 100644 net/netfilter/nft_flow_offload.c
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -957,6 +957,17 @@ enum nft_ct_attributes {
};
#define NFTA_CT_MAX (__NFTA_CT_MAX - 1)
+/**
+ * enum nft_flow_attributes - ct offload expression attributes
+ * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING)
+ */
+enum nft_offload_attributes {
+ NFTA_FLOW_UNSPEC,
+ NFTA_FLOW_TABLE_NAME,
+ __NFTA_FLOW_MAX,
+};
+#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1)
+
enum nft_limit_type {
NFT_LIMIT_PKTS,
NFT_LIMIT_PKT_BYTES
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -509,6 +509,13 @@ config NFT_CT
This option adds the "ct" expression that you can use to match
connection tracking information such as the flow state.
+config NFT_FLOW_OFFLOAD
+ depends on NF_CONNTRACK
+ tristate "Netfilter nf_tables hardware flow offload module"
+ help
+ This option adds the "flow_offload" expression that you can use to
+ choose what flows are placed into the hardware.
+
config NFT_SET_RBTREE
tristate "Netfilter nf_tables rbtree set module"
help
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o
obj-$(CONFIG_NFT_RT) += nft_rt.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
+obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
--- /dev/null
+++ b/net/netfilter/nft_flow_offload.c
@@ -0,0 +1,264 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/ip.h> /* for ipv4 options. */
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct nft_flow_offload {
+ struct nft_flowtable *flowtable;
+};
+
+static int nft_flow_route(const struct nft_pktinfo *pkt,
+ const struct nf_conn *ct,
+ struct nf_flow_route *route,
+ enum ip_conntrack_dir dir)
+{
+ struct dst_entry *this_dst = skb_dst(pkt->skb);
+ struct dst_entry *other_dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ break;
+ }
+
+ nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
+ if (!other_dst)
+ return -ENOENT;
+
+ route->tuple[dir].dst = this_dst;
+ route->tuple[dir].ifindex = nft_in(pkt)->ifindex;
+ route->tuple[!dir].dst = other_dst;
+ route->tuple[!dir].ifindex = nft_out(pkt)->ifindex;
+
+ return 0;
+}
+
+static bool nft_flow_offload_skip(struct sk_buff *skb)
+{
+ struct ip_options *opt = &(IPCB(skb)->opt);
+
+ if (unlikely(opt->optlen))
+ return true;
+ if (skb_sec_path(skb))
+ return true;
+
+ return false;
+}
+
+static void nft_flow_offload_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+ struct nf_flowtable *flowtable = &priv->flowtable->data;
+ enum ip_conntrack_info ctinfo;
+ struct nf_flow_route route;
+ struct flow_offload *flow;
+ enum ip_conntrack_dir dir;
+ struct nf_conn *ct;
+ int ret;
+
+ if (nft_flow_offload_skip(pkt->skb))
+ goto out;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ if (!ct)
+ goto out;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ break;
+ default:
+ goto out;
+ }
+
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
+ goto out;
+
+ if (ctinfo == IP_CT_NEW ||
+ ctinfo == IP_CT_RELATED)
+ goto out;
+
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+ goto out;
+
+ dir = CTINFO2DIR(ctinfo);
+ if (nft_flow_route(pkt, ct, &route, dir) < 0)
+ goto err_flow_route;
+
+ flow = flow_offload_alloc(ct, &route);
+ if (!flow)
+ goto err_flow_alloc;
+
+ ret = flow_offload_add(flowtable, flow);
+ if (ret < 0)
+ goto err_flow_add;
+
+ return;
+
+err_flow_add:
+ flow_offload_free(flow);
+err_flow_alloc:
+ dst_release(route.tuple[!dir].dst);
+err_flow_route:
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+out:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_flow_offload_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ unsigned int hook_mask = (1 << NF_INET_FORWARD);
+
+ return nft_chain_validate_hooks(ctx->chain, hook_mask);
+}
+
+static int nft_flow_offload_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_flowtable *flowtable;
+
+ if (!tb[NFTA_FLOW_TABLE_NAME])
+ return -EINVAL;
+
+ flowtable = nf_tables_flowtable_lookup(ctx->table,
+ tb[NFTA_FLOW_TABLE_NAME],
+ genmask);
+ if (IS_ERR(flowtable))
+ return PTR_ERR(flowtable);
+
+ priv->flowtable = flowtable;
+ flowtable->use++;
+
+ return nf_ct_netns_get(ctx->net, ctx->afi->family);
+}
+
+static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ priv->flowtable->use--;
+ nf_ct_netns_put(ctx->net, ctx->afi->family);
+}
+
+static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_flow_offload_type;
+static const struct nft_expr_ops nft_flow_offload_ops = {
+ .type = &nft_flow_offload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
+ .eval = nft_flow_offload_eval,
+ .init = nft_flow_offload_init,
+ .destroy = nft_flow_offload_destroy,
+ .validate = nft_flow_offload_validate,
+ .dump = nft_flow_offload_dump,
+};
+
+static struct nft_expr_type nft_flow_offload_type __read_mostly = {
+ .name = "flow_offload",
+ .ops = &nft_flow_offload_ops,
+ .maxattr = NFTA_FLOW_MAX,
+ .owner = THIS_MODULE,
+};
+
+static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
+{
+ struct net_device *dev = data;
+
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+ return;
+
+ flow_offload_dead(flow);
+}
+
+static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
+ void *data)
+{
+ nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
+}
+
+static int flow_offload_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
+
+ nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block flow_offload_netdev_notifier = {
+ .notifier_call = flow_offload_netdev_event,
+};
+
+static int __init nft_flow_offload_module_init(void)
+{
+ int err;
+
+ register_netdevice_notifier(&flow_offload_netdev_notifier);
+
+ err = nft_register_expr(&nft_flow_offload_type);
+ if (err < 0)
+ goto register_expr;
+
+ return 0;
+
+register_expr:
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ return err;
+}
+
+static void __exit nft_flow_offload_module_exit(void)
+{
+ struct net *net;
+
+ nft_unregister_expr(&nft_flow_offload_type);
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+ rtnl_lock();
+ for_each_net(net)
+ nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
+ rtnl_unlock();
+}
+
+module_init(nft_flow_offload_module_init);
+module_exit(nft_flow_offload_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("flow_offload");

View File

@ -0,0 +1,113 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 19 Dec 2017 13:53:45 +0100
Subject: [PATCH] netfilter: nf_tables: remove nhooks field from struct
nft_af_info
We already validate the hook through bitmask, so this check is
superfluous. When removing this, this patch is also fixing a bug in the
new flowtable codebase, since ctx->afi points to the table family
instead of the netdev family which is where the flowtable is really
hooked in.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -963,7 +963,6 @@ enum nft_af_flags {
*
* @list: used internally
* @family: address family
- * @nhooks: number of hooks in this family
* @owner: module owner
* @tables: used internally
* @flags: family flags
@@ -971,7 +970,6 @@ enum nft_af_flags {
struct nft_af_info {
struct list_head list;
int family;
- unsigned int nhooks;
struct module *owner;
struct list_head tables;
u32 flags;
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -44,7 +44,6 @@ nft_do_chain_bridge(void *priv,
static struct nft_af_info nft_af_bridge __read_mostly = {
.family = NFPROTO_BRIDGE,
- .nhooks = NF_BR_NUMHOOKS,
.owner = THIS_MODULE,
};
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -29,7 +29,6 @@ nft_do_chain_arp(void *priv,
static struct nft_af_info nft_af_arp __read_mostly = {
.family = NFPROTO_ARP,
- .nhooks = NF_ARP_NUMHOOKS,
.owner = THIS_MODULE,
};
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -32,7 +32,6 @@ static unsigned int nft_do_chain_ipv4(vo
static struct nft_af_info nft_af_ipv4 __read_mostly = {
.family = NFPROTO_IPV4,
- .nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
};
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -30,7 +30,6 @@ static unsigned int nft_do_chain_ipv6(vo
static struct nft_af_info nft_af_ipv6 __read_mostly = {
.family = NFPROTO_IPV6,
- .nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
};
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1328,9 +1328,6 @@ static int nft_chain_parse_hook(struct n
return -EINVAL;
hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
- if (hook->num >= afi->nhooks)
- return -EINVAL;
-
hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
@@ -4919,7 +4916,7 @@ static int nf_tables_flowtable_parse_hoo
return -EINVAL;
hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
- if (hooknum >= ctx->afi->nhooks)
+ if (hooknum != NF_NETDEV_INGRESS)
return -EINVAL;
priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -40,7 +40,6 @@ static unsigned int nft_do_chain_inet(vo
static struct nft_af_info nft_af_inet __read_mostly = {
.family = NFPROTO_INET,
- .nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
};
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -40,7 +40,6 @@ nft_do_chain_netdev(void *priv, struct s
static struct nft_af_info nft_af_netdev __read_mostly = {
.family = NFPROTO_NETDEV,
- .nhooks = NF_NETDEV_NUMHOOKS,
.owner = THIS_MODULE,
.flags = NFT_AF_NEEDS_DEV,
};

View File

@ -0,0 +1,22 @@
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 10 Jan 2018 07:04:54 +0000
Subject: [PATCH] netfilter: nf_tables: fix a typo in nf_tables_getflowtable()
Fix a typo, we should check 'flowtable' instead of 'table'.
Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5343,7 +5343,7 @@ static int nf_tables_getflowtable(struct
flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
genmask);
- if (IS_ERR(table))
+ if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);

View File

@ -0,0 +1,106 @@
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 10 Jan 2018 18:10:59 +0100
Subject: [PATCH] netfilter: improve flow table Kconfig dependencies
The newly added NF_FLOW_TABLE options cause some build failures in
randconfig kernels:
- when CONFIG_NF_CONNTRACK is disabled, or is a loadable module but
NF_FLOW_TABLE is built-in:
In file included from net/netfilter/nf_flow_table.c:8:0:
include/net/netfilter/nf_conntrack.h:59:22: error: field 'ct_general' has incomplete type
struct nf_conntrack ct_general;
include/net/netfilter/nf_conntrack.h: In function 'nf_ct_get':
include/net/netfilter/nf_conntrack.h:148:15: error: 'const struct sk_buff' has no member named '_nfct'
include/net/netfilter/nf_conntrack.h: In function 'nf_ct_put':
include/net/netfilter/nf_conntrack.h:157:2: error: implicit declaration of function 'nf_conntrack_put'; did you mean 'nf_ct_put'? [-Werror=implicit-function-declaration]
net/netfilter/nf_flow_table.o: In function `nf_flow_offload_work_gc':
(.text+0x1540): undefined reference to `nf_ct_delete'
- when CONFIG_NF_TABLES is disabled:
In file included from net/ipv6/netfilter/nf_flow_table_ipv6.c:13:0:
include/net/netfilter/nf_tables.h: In function 'nft_gencursor_next':
include/net/netfilter/nf_tables.h:1189:14: error: 'const struct net' has no member named 'nft'; did you mean 'nf'?
- when CONFIG_NF_FLOW_TABLE_INET is enabled, but NF_FLOW_TABLE_IPV4
or NF_FLOW_TABLE_IPV6 are not, or are loadable modules
net/netfilter/nf_flow_table_inet.o: In function `nf_flow_offload_inet_hook':
nf_flow_table_inet.c:(.text+0x94): undefined reference to `nf_flow_offload_ipv6_hook'
nf_flow_table_inet.c:(.text+0x40): undefined reference to `nf_flow_offload_ip_hook'
- when CONFIG_NF_FLOW_TABLES is disabled, but the other options are
enabled:
net/netfilter/nf_flow_table_inet.o: In function `nf_flow_offload_inet_hook':
nf_flow_table_inet.c:(.text+0x6c): undefined reference to `nf_flow_offload_ipv6_hook'
net/netfilter/nf_flow_table_inet.o: In function `nf_flow_inet_module_exit':
nf_flow_table_inet.c:(.exit.text+0x8): undefined reference to `nft_unregister_flowtable_type'
net/netfilter/nf_flow_table_inet.o: In function `nf_flow_inet_module_init':
nf_flow_table_inet.c:(.init.text+0x8): undefined reference to `nft_register_flowtable_type'
net/ipv4/netfilter/nf_flow_table_ipv4.o: In function `nf_flow_ipv4_module_exit':
nf_flow_table_ipv4.c:(.exit.text+0x8): undefined reference to `nft_unregister_flowtable_type'
net/ipv4/netfilter/nf_flow_table_ipv4.o: In function `nf_flow_ipv4_module_init':
nf_flow_table_ipv4.c:(.init.text+0x8): undefined reference to `nft_register_flowtable_type'
This adds additional Kconfig dependencies to ensure that NF_CONNTRACK and NF_TABLES
are always visible from NF_FLOW_TABLE, and that the internal dependencies between
the four new modules are met.
Fixes: 7c23b629a808 ("netfilter: flow table support for the mixed IPv4/IPv6 family")
Fixes: 0995210753a2 ("netfilter: flow table support for IPv6")
Fixes: 97add9f0d66d ("netfilter: flow table support for IPv4")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -78,8 +78,9 @@ config NF_TABLES_ARP
endif # NF_TABLES
config NF_FLOW_TABLE_IPV4
- select NF_FLOW_TABLE
tristate "Netfilter flow table IPv4 module"
+ depends on NF_CONNTRACK && NF_TABLES
+ select NF_FLOW_TABLE
help
This option adds the flow table IPv4 support.
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -72,8 +72,9 @@ endif # NF_TABLES_IPV6
endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
- select NF_FLOW_TABLE
tristate "Netfilter flow table IPv6 module"
+ depends on NF_CONNTRACK && NF_TABLES
+ select NF_FLOW_TABLE
help
This option adds the flow table IPv6 support.
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -669,8 +669,9 @@ endif # NF_TABLES_NETDEV
endif # NF_TABLES
config NF_FLOW_TABLE_INET
- select NF_FLOW_TABLE
tristate "Netfilter flow table mixed IPv4/IPv6 module"
+ depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
+ select NF_FLOW_TABLE
help
This option adds the flow table mixed IPv4/IPv6 support.
@@ -678,6 +679,7 @@ config NF_FLOW_TABLE_INET
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
+ depends on NF_CONNTRACK && NF_TABLES
help
This option adds the flow table core infrastructure.

View File

@ -0,0 +1,59 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 19 Dec 2017 14:07:52 +0100
Subject: [PATCH] netfilter: nf_tables: remove flag field from struct
nft_af_info
Replace it by a direct check for the netdev protocol family.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -954,10 +954,6 @@ struct nft_table {
char *name;
};
-enum nft_af_flags {
- NFT_AF_NEEDS_DEV = (1 << 0),
-};
-
/**
* struct nft_af_info - nf_tables address family info
*
@@ -965,14 +961,12 @@ enum nft_af_flags {
* @family: address family
* @owner: module owner
* @tables: used internally
- * @flags: family flags
*/
struct nft_af_info {
struct list_head list;
int family;
struct module *owner;
struct list_head tables;
- u32 flags;
};
int nft_register_afinfo(struct net *, struct nft_af_info *);
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1345,7 +1345,7 @@ static int nft_chain_parse_hook(struct n
hook->type = type;
hook->dev = NULL;
- if (afi->flags & NFT_AF_NEEDS_DEV) {
+ if (afi->family == NFPROTO_NETDEV) {
char ifname[IFNAMSIZ];
if (!ha[NFTA_HOOK_DEV]) {
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -41,7 +41,6 @@ nft_do_chain_netdev(void *priv, struct s
static struct nft_af_info nft_af_netdev __read_mostly = {
.family = NFPROTO_NETDEV,
.owner = THIS_MODULE,
- .flags = NFT_AF_NEEDS_DEV,
};
static int nf_tables_netdev_init_net(struct net *net)

View File

@ -0,0 +1,80 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 19 Dec 2017 12:17:52 +0100
Subject: [PATCH] netfilter: nf_tables: no need for struct nft_af_info to
enable/disable table
nf_tables_table_enable() and nf_tables_table_disable() take a pointer to
struct nft_af_info that is never used, remove it.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -611,10 +611,7 @@ err:
return err;
}
-static void _nf_tables_table_disable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table,
- u32 cnt)
+static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
{
struct nft_chain *chain;
u32 i = 0;
@@ -632,9 +629,7 @@ static void _nf_tables_table_disable(str
}
}
-static int nf_tables_table_enable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table)
+static int nf_tables_table_enable(struct net *net, struct nft_table *table)
{
struct nft_chain *chain;
int err, i = 0;
@@ -654,15 +649,13 @@ static int nf_tables_table_enable(struct
return 0;
err:
if (i)
- _nf_tables_table_disable(net, afi, table, i);
+ nft_table_disable(net, table, i);
return err;
}
-static void nf_tables_table_disable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table)
+static void nf_tables_table_disable(struct net *net, struct nft_table *table)
{
- _nf_tables_table_disable(net, afi, table, 0);
+ nft_table_disable(net, table, 0);
}
static int nf_tables_updtable(struct nft_ctx *ctx)
@@ -691,7 +684,7 @@ static int nf_tables_updtable(struct nft
nft_trans_table_enable(trans) = false;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
+ ret = nf_tables_table_enable(ctx->net, ctx->table);
if (ret >= 0) {
ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
nft_trans_table_enable(trans) = true;
@@ -5721,7 +5714,6 @@ static int nf_tables_commit(struct net *
if (nft_trans_table_update(trans)) {
if (!nft_trans_table_enable(trans)) {
nf_tables_table_disable(net,
- trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
@@ -5883,7 +5875,6 @@ static int nf_tables_abort(struct net *n
if (nft_trans_table_update(trans)) {
if (nft_trans_table_enable(trans)) {
nf_tables_table_disable(net,
- trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}

View File

@ -0,0 +1,60 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 19 Dec 2017 13:40:22 +0100
Subject: [PATCH] netfilter: nf_tables: remove struct nft_af_info parameter in
nf_tables_chain_type_lookup()
Pass family number instead, this comes in preparation for the removal of
struct nft_af_info.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -423,7 +423,7 @@ static inline u64 nf_tables_alloc_handle
static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
static const struct nf_chain_type *
-__nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
+__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
{
int i;
@@ -436,22 +436,20 @@ __nf_tables_chain_type_lookup(int family
}
static const struct nf_chain_type *
-nf_tables_chain_type_lookup(const struct nft_af_info *afi,
- const struct nlattr *nla,
- bool autoload)
+nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
{
const struct nf_chain_type *type;
- type = __nf_tables_chain_type_lookup(afi->family, nla);
+ type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return type;
#ifdef CONFIG_MODULES
if (autoload) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-chain-%u-%.*s", afi->family,
+ request_module("nft-chain-%u-%.*s", family,
nla_len(nla), (const char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- type = __nf_tables_chain_type_lookup(afi->family, nla);
+ type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return ERR_PTR(-EAGAIN);
}
@@ -1325,8 +1323,8 @@ static int nft_chain_parse_hook(struct n
type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
if (nla[NFTA_CHAIN_TYPE]) {
- type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
- create);
+ type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+ afi->family, create);
if (IS_ERR(type))
return PTR_ERR(type);
}

View File

@ -0,0 +1,30 @@
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 25 Dec 2017 11:34:54 +0800
Subject: [PATCH] netfilter: nf_tables: fix potential NULL-ptr deref in
nf_tables_dump_obj_done()
If there is no NFTA_OBJ_TABLE and NFTA_OBJ_TYPE, the c.data will be NULL in
nf_tables_getobj(). So before free filter->table in nf_tables_dump_obj_done(),
we need to check if filter is NULL first.
Fixes: e46abbcc05aa ("netfilter: nf_tables: Allow table names of up to 255 chars")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5262,8 +5262,10 @@ static int nf_tables_dump_flowtable_done
if (!filter)
return 0;
- kfree(filter->table);
- kfree(filter);
+ if (filter) {
+ kfree(filter->table);
+ kfree(filter);
+ }
return 0;
}

View File

@ -0,0 +1,100 @@
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sun, 12 Nov 2017 14:32:37 +0300
Subject: [PATCH] netfilter: exit_net cleanup check added
Be sure that lists initialized in net_init hook was return to initial
state.
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -829,6 +829,7 @@ static void clusterip_net_exit(struct ne
cn->procdir = NULL;
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
+ WARN_ON_ONCE(!list_empty(&cn->configs));
}
static struct pernet_operations clusterip_net_ops = {
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6470,6 +6470,12 @@ static int __net_init nf_tables_init_net
return 0;
}
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+ WARN_ON_ONCE(!list_empty(&net->nft.af_info));
+ WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+}
+
int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
@@ -6547,6 +6553,7 @@ static void __nft_release_afinfo(struct
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
+ .exit = nf_tables_exit_net,
};
static int __init nf_tables_module_init(void)
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(
static void __net_exit nfnl_log_net_exit(struct net *net)
{
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
+ unsigned int i;
+
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
#endif
nf_log_unset(net, &nfulnl_logger);
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
}
static struct pernet_operations nfnl_log_net_ops = {
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_ini
static void __net_exit nfnl_queue_net_exit(struct net *net)
{
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ unsigned int i;
+
nf_unregister_queue_handler(net);
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
#endif
+ for (i = 0; i < INSTANCE_BUCKETS; i++)
+ WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
}
static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1719,8 +1719,17 @@ static int __net_init xt_net_init(struct
return 0;
}
+static void __net_exit xt_net_exit(struct net *net)
+{
+ int i;
+
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
+}
+
static struct pernet_operations xt_net_ops = {
.init = xt_net_init,
+ .exit = xt_net_exit,
};
static int __init xt_init(void)

View File

@ -0,0 +1,598 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 9 Jan 2018 02:42:11 +0100
Subject: [PATCH] netfilter: nf_tables: get rid of pernet families
Now that we have a single table list for each netns, we can get rid of
one pointer per family and the global afinfo list, thus, shrinking
struct netns for nftables that now becomes 64 bytes smaller.
And call __nft_release_afinfo() from __net_exit path accordingly to
release netnamespace objects on removal.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -969,8 +969,8 @@ struct nft_af_info {
struct module *owner;
};
-int nft_register_afinfo(struct net *, struct nft_af_info *);
-void nft_unregister_afinfo(struct net *, struct nft_af_info *);
+int nft_register_afinfo(struct nft_af_info *);
+void nft_unregister_afinfo(struct nft_af_info *);
int nft_register_chain_type(const struct nf_chain_type *);
void nft_unregister_chain_type(const struct nf_chain_type *);
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,15 +7,8 @@
struct nft_af_info;
struct netns_nftables {
- struct list_head af_info;
struct list_head tables;
struct list_head commit_list;
- struct nft_af_info *ipv4;
- struct nft_af_info *ipv6;
- struct nft_af_info *inet;
- struct nft_af_info *arp;
- struct nft_af_info *bridge;
- struct nft_af_info *netdev;
unsigned int base_seq;
u8 gencursor;
};
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -47,34 +47,6 @@ static struct nft_af_info nft_af_bridge
.owner = THIS_MODULE,
};
-static int nf_tables_bridge_init_net(struct net *net)
-{
- net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.bridge == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
-
- if (nft_register_afinfo(net, net->nft.bridge) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.bridge);
- return -ENOMEM;
-}
-
-static void nf_tables_bridge_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.bridge);
- kfree(net->nft.bridge);
-}
-
-static struct pernet_operations nf_tables_bridge_net_ops = {
- .init = nf_tables_bridge_init_net,
- .exit = nf_tables_bridge_exit_net,
-};
-
static const struct nf_chain_type filter_bridge = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -98,17 +70,17 @@ static int __init nf_tables_bridge_init(
{
int ret;
- ret = nft_register_chain_type(&filter_bridge);
+ ret = nft_register_afinfo(&nft_af_bridge);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
+ ret = nft_register_chain_type(&filter_bridge);
if (ret < 0)
- goto err_register_subsys;
+ goto err_register_chain;
return ret;
-err_register_subsys:
+err_register_chain:
nft_unregister_chain_type(&filter_bridge);
return ret;
@@ -116,8 +88,8 @@ err_register_subsys:
static void __exit nf_tables_bridge_exit(void)
{
- unregister_pernet_subsys(&nf_tables_bridge_net_ops);
nft_unregister_chain_type(&filter_bridge);
+ nft_unregister_afinfo(&nft_af_bridge);
}
module_init(nf_tables_bridge_init);
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -32,34 +32,6 @@ static struct nft_af_info nft_af_arp __r
.owner = THIS_MODULE,
};
-static int nf_tables_arp_init_net(struct net *net)
-{
- net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.arp== NULL)
- return -ENOMEM;
-
- memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
-
- if (nft_register_afinfo(net, net->nft.arp) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.arp);
- return -ENOMEM;
-}
-
-static void nf_tables_arp_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.arp);
- kfree(net->nft.arp);
-}
-
-static struct pernet_operations nf_tables_arp_net_ops = {
- .init = nf_tables_arp_init_net,
- .exit = nf_tables_arp_exit_net,
-};
-
static const struct nf_chain_type filter_arp = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -77,21 +49,26 @@ static int __init nf_tables_arp_init(voi
{
int ret;
- ret = nft_register_chain_type(&filter_arp);
+ ret = nft_register_afinfo(&nft_af_arp);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&nf_tables_arp_net_ops);
+ ret = nft_register_chain_type(&filter_arp);
if (ret < 0)
- nft_unregister_chain_type(&filter_arp);
+ goto err_register_chain;
+
+ return 0;
+
+err_register_chain:
+ nft_unregister_chain_type(&filter_arp);
return ret;
}
static void __exit nf_tables_arp_exit(void)
{
- unregister_pernet_subsys(&nf_tables_arp_net_ops);
nft_unregister_chain_type(&filter_arp);
+ nft_unregister_afinfo(&nft_af_arp);
}
module_init(nf_tables_arp_init);
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -35,34 +35,6 @@ static struct nft_af_info nft_af_ipv4 __
.owner = THIS_MODULE,
};
-static int nf_tables_ipv4_init_net(struct net *net)
-{
- net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.ipv4 == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
-
- if (nft_register_afinfo(net, net->nft.ipv4) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.ipv4);
- return -ENOMEM;
-}
-
-static void nf_tables_ipv4_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.ipv4);
- kfree(net->nft.ipv4);
-}
-
-static struct pernet_operations nf_tables_ipv4_net_ops = {
- .init = nf_tables_ipv4_init_net,
- .exit = nf_tables_ipv4_exit_net,
-};
-
static const struct nf_chain_type filter_ipv4 = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -86,21 +58,25 @@ static int __init nf_tables_ipv4_init(vo
{
int ret;
- ret = nft_register_chain_type(&filter_ipv4);
+ ret = nft_register_afinfo(&nft_af_ipv4);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
+ ret = nft_register_chain_type(&filter_ipv4);
if (ret < 0)
- nft_unregister_chain_type(&filter_ipv4);
+ goto err_register_chain;
+
+ return 0;
+err_register_chain:
+ nft_unregister_afinfo(&nft_af_ipv4);
return ret;
}
static void __exit nf_tables_ipv4_exit(void)
{
- unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
nft_unregister_chain_type(&filter_ipv4);
+ nft_unregister_afinfo(&nft_af_ipv4);
}
module_init(nf_tables_ipv4_init);
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -33,34 +33,6 @@ static struct nft_af_info nft_af_ipv6 __
.owner = THIS_MODULE,
};
-static int nf_tables_ipv6_init_net(struct net *net)
-{
- net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.ipv6 == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
-
- if (nft_register_afinfo(net, net->nft.ipv6) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.ipv6);
- return -ENOMEM;
-}
-
-static void nf_tables_ipv6_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.ipv6);
- kfree(net->nft.ipv6);
-}
-
-static struct pernet_operations nf_tables_ipv6_net_ops = {
- .init = nf_tables_ipv6_init_net,
- .exit = nf_tables_ipv6_exit_net,
-};
-
static const struct nf_chain_type filter_ipv6 = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -84,20 +56,24 @@ static int __init nf_tables_ipv6_init(vo
{
int ret;
- ret = nft_register_chain_type(&filter_ipv6);
+ ret = nft_register_afinfo(&nft_af_ipv6);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
+ ret = nft_register_chain_type(&filter_ipv6);
if (ret < 0)
- nft_unregister_chain_type(&filter_ipv6);
+ goto err_register_chain;
+
+ return 0;
+err_register_chain:
+ nft_unregister_afinfo(&nft_af_ipv6);
return ret;
}
static void __exit nf_tables_ipv6_exit(void)
{
- unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
+ nft_unregister_afinfo(&nft_af_ipv6);
nft_unregister_chain_type(&filter_ipv6);
}
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -26,6 +26,7 @@
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
+static LIST_HEAD(nf_tables_af_info);
/**
* nft_register_afinfo - register nf_tables address family info
@@ -35,17 +36,15 @@ static LIST_HEAD(nf_tables_flowtables);
* Register the address family for use with nf_tables. Returns zero on
* success or a negative errno code otherwise.
*/
-int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
+int nft_register_afinfo(struct nft_af_info *afi)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail_rcu(&afi->list, &net->nft.af_info);
+ list_add_tail_rcu(&afi->list, &nf_tables_af_info);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
EXPORT_SYMBOL_GPL(nft_register_afinfo);
-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi);
-
/**
* nft_unregister_afinfo - unregister nf_tables address family info
*
@@ -53,10 +52,9 @@ static void __nft_release_afinfo(struct
*
* Unregister the address family for use with nf_tables.
*/
-void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi)
+void nft_unregister_afinfo(struct nft_af_info *afi)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- __nft_release_afinfo(net, afi);
list_del_rcu(&afi->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
@@ -66,7 +64,7 @@ static struct nft_af_info *nft_afinfo_lo
{
struct nft_af_info *afi;
- list_for_each_entry(afi, &net->nft.af_info, list) {
+ list_for_each_entry(afi, &nf_tables_af_info, list) {
if (afi->family == family)
return afi;
}
@@ -4968,15 +4966,12 @@ void nft_flow_table_iterate(struct net *
void *data)
{
struct nft_flowtable *flowtable;
- const struct nft_af_info *afi;
const struct nft_table *table;
rcu_read_lock();
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
- iter(&flowtable->data, data);
- }
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ iter(&flowtable->data, data);
}
}
rcu_read_unlock();
@@ -6461,21 +6456,6 @@ int nft_data_dump(struct sk_buff *skb, i
}
EXPORT_SYMBOL_GPL(nft_data_dump);
-static int __net_init nf_tables_init_net(struct net *net)
-{
- INIT_LIST_HEAD(&net->nft.af_info);
- INIT_LIST_HEAD(&net->nft.tables);
- INIT_LIST_HEAD(&net->nft.commit_list);
- net->nft.base_seq = 1;
- return 0;
-}
-
-static void __net_exit nf_tables_exit_net(struct net *net)
-{
- WARN_ON_ONCE(!list_empty(&net->nft.af_info));
- WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
-}
-
int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
@@ -6496,8 +6476,7 @@ int __nft_release_basechain(struct nft_c
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);
-/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
+static void __nft_release_afinfo(struct net *net)
{
struct nft_flowtable *flowtable, *nf;
struct nft_table *table, *nt;
@@ -6507,10 +6486,11 @@ static void __nft_release_afinfo(struct
struct nft_set *set, *ns;
struct nft_ctx ctx = {
.net = net,
- .family = afi->family,
};
list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
+ ctx.family = table->afi->family;
+
list_for_each_entry(chain, &table->chains, list)
nf_tables_unregister_hook(net, table, chain);
list_for_each_entry(flowtable, &table->flowtables, list)
@@ -6551,6 +6531,21 @@ static void __nft_release_afinfo(struct
}
}
+static int __net_init nf_tables_init_net(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nft.tables);
+ INIT_LIST_HEAD(&net->nft.commit_list);
+ net->nft.base_seq = 1;
+ return 0;
+}
+
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+ __nft_release_afinfo(net);
+ WARN_ON_ONCE(!list_empty(&net->nft.tables));
+ WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.exit = nf_tables_exit_net,
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -43,34 +43,6 @@ static struct nft_af_info nft_af_inet __
.owner = THIS_MODULE,
};
-static int __net_init nf_tables_inet_init_net(struct net *net)
-{
- net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.inet == NULL)
- return -ENOMEM;
- memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet));
-
- if (nft_register_afinfo(net, net->nft.inet) < 0)
- goto err;
-
- return 0;
-
-err:
- kfree(net->nft.inet);
- return -ENOMEM;
-}
-
-static void __net_exit nf_tables_inet_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.inet);
- kfree(net->nft.inet);
-}
-
-static struct pernet_operations nf_tables_inet_net_ops = {
- .init = nf_tables_inet_init_net,
- .exit = nf_tables_inet_exit_net,
-};
-
static const struct nf_chain_type filter_inet = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -94,21 +66,24 @@ static int __init nf_tables_inet_init(vo
{
int ret;
- ret = nft_register_chain_type(&filter_inet);
- if (ret < 0)
+ if (nft_register_afinfo(&nft_af_inet) < 0)
return ret;
- ret = register_pernet_subsys(&nf_tables_inet_net_ops);
+ ret = nft_register_chain_type(&filter_inet);
if (ret < 0)
- nft_unregister_chain_type(&filter_inet);
+ goto err_register_chain;
+
+ return ret;
+err_register_chain:
+ nft_unregister_afinfo(&nft_af_inet);
return ret;
}
static void __exit nf_tables_inet_exit(void)
{
- unregister_pernet_subsys(&nf_tables_inet_net_ops);
nft_unregister_chain_type(&filter_inet);
+ nft_unregister_afinfo(&nft_af_inet);
}
module_init(nf_tables_inet_init);
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -43,34 +43,6 @@ static struct nft_af_info nft_af_netdev
.owner = THIS_MODULE,
};
-static int nf_tables_netdev_init_net(struct net *net)
-{
- net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.netdev == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev));
-
- if (nft_register_afinfo(net, net->nft.netdev) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.netdev);
- return -ENOMEM;
-}
-
-static void nf_tables_netdev_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.netdev);
- kfree(net->nft.netdev);
-}
-
-static struct pernet_operations nf_tables_netdev_net_ops = {
- .init = nf_tables_netdev_init_net,
- .exit = nf_tables_netdev_exit_net,
-};
-
static const struct nf_chain_type nft_filter_chain_netdev = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -145,32 +117,32 @@ static int __init nf_tables_netdev_init(
{
int ret;
- ret = nft_register_chain_type(&nft_filter_chain_netdev);
- if (ret)
+ if (nft_register_afinfo(&nft_af_netdev) < 0)
return ret;
- ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
+ ret = nft_register_chain_type(&nft_filter_chain_netdev);
if (ret)
- goto err1;
+ goto err_register_chain_type;
ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
if (ret)
- goto err2;
+ goto err_register_netdevice_notifier;
return 0;
-err2:
- unregister_pernet_subsys(&nf_tables_netdev_net_ops);
-err1:
+err_register_netdevice_notifier:
nft_unregister_chain_type(&nft_filter_chain_netdev);
+err_register_chain_type:
+ nft_unregister_afinfo(&nft_af_netdev);
+
return ret;
}
static void __exit nf_tables_netdev_exit(void)
{
unregister_netdevice_notifier(&nf_tables_netdev_notifier);
- unregister_pernet_subsys(&nf_tables_netdev_net_ops);
nft_unregister_chain_type(&nft_filter_chain_netdev);
+ nft_unregister_afinfo(&nft_af_netdev);
}
module_init(nf_tables_netdev_init);

View File

@ -0,0 +1,47 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 1 Feb 2018 18:49:00 +0100
Subject: [PATCH] netfilter: nft_flow_offload: wait for garbage collector
to run after cleanup
If netdevice goes down, then flowtable entries are scheduled to be
removed. Wait for garbage collector to have a chance to run so it can
delete them from the hashtable.
The flush call might sleep, so hold the nfnl mutex from
nft_flow_table_iterate() instead of rcu read side lock. The use of the
nfnl mutex is also implicitly fixing races between updates via nfnetlink
and netdevice event.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4818,13 +4818,13 @@ void nft_flow_table_iterate(struct net *
struct nft_flowtable *flowtable;
const struct nft_table *table;
- rcu_read_lock();
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_for_each_entry(table, &net->nft.tables, list) {
+ list_for_each_entry(flowtable, &table->flowtables, list) {
iter(&flowtable->data, data);
}
}
- rcu_read_unlock();
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -208,6 +208,7 @@ static void nft_flow_offload_iterate_cle
void *data)
{
nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
+ flush_delayed_work(&flowtable->gc_work);
}
static int flow_offload_netdev_event(struct notifier_block *this,

View File

@ -0,0 +1,29 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 1 Feb 2018 18:49:01 +0100
Subject: [PATCH] netfilter: nft_flow_offload: no need to flush entries on
module removal
nft_flow_offload module removal does not require to flush existing
flowtables, it is valid to remove this module while keeping flowtables
around.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -247,14 +247,8 @@ register_expr:
static void __exit nft_flow_offload_module_exit(void)
{
- struct net *net;
-
nft_unregister_expr(&nft_flow_offload_type);
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
- rtnl_lock();
- for_each_net(net)
- nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
- rtnl_unlock();
}
module_init(nft_flow_offload_module_init);

View File

@ -0,0 +1,97 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 23 Jan 2018 17:46:09 +0100
Subject: [PATCH] netfilter: nft_flow_offload: move flowtable cleanup
routines to nf_flow_table
Move the flowtable cleanup routines to nf_flow_table and expose the
nf_flow_table_cleanup() helper function.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -95,6 +95,9 @@ struct flow_offload_tuple_rhash *flow_of
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data);
+
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+
void nf_flow_offload_work_gc(struct work_struct *work);
extern const struct rhashtable_params nf_flow_offload_rhash_params;
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -4,6 +4,7 @@
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
+#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -425,5 +426,28 @@ int nf_flow_dnat_port(const struct flow_
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+{
+ struct net_device *dev = data;
+
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+ return;
+
+ flow_offload_dead(flow);
+}
+
+static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
+ void *data)
+{
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
+ flush_delayed_work(&flowtable->gc_work);
+}
+
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+{
+ nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -194,23 +194,6 @@ static struct nft_expr_type nft_flow_off
.owner = THIS_MODULE,
};
-static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
-{
- struct net_device *dev = data;
-
- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
- return;
-
- flow_offload_dead(flow);
-}
-
-static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
- void *data)
-{
- nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
- flush_delayed_work(&flowtable->gc_work);
-}
-
static int flow_offload_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -219,7 +202,7 @@ static int flow_offload_netdev_event(str
if (event != NETDEV_DOWN)
return NOTIFY_DONE;
- nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
+ nf_flow_table_cleanup(dev_net(dev), dev);
return NOTIFY_DONE;
}

View File

@ -0,0 +1,140 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 5 Feb 2018 21:44:50 +0100
Subject: [PATCH] netfilter: nf_tables: fix flowtable free
Every flow_offload entry is added into the table twice. Because of this,
rhashtable_free_and_destroy can't be used, since it would call kfree for
each flow_offload object twice.
This patch adds a call to nf_flow_table_iterate_cleanup() to schedule
removal of entries, then there is an explicitly invocation of the
garbage collector to clean up resources.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -14,6 +14,7 @@ struct nf_flowtable_type {
struct list_head list;
int family;
void (*gc)(struct work_struct *work);
+ void (*free)(struct nf_flowtable *ft);
const struct rhashtable_params *params;
nf_hookfn *hook;
struct module *owner;
@@ -98,6 +99,7 @@ int nf_flow_table_iterate(struct nf_flow
void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+void nf_flow_table_free(struct nf_flowtable *flow_table);
void nf_flow_offload_work_gc(struct work_struct *work);
extern const struct rhashtable_params nf_flow_offload_rhash_params;
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -260,6 +260,7 @@ static struct nf_flowtable_type flowtabl
.family = NFPROTO_IPV4,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
+ .free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,
};
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -254,6 +254,7 @@ static struct nf_flowtable_type flowtabl
.family = NFPROTO_IPV6,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
+ .free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
};
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -232,19 +232,16 @@ static inline bool nf_flow_is_dying(cons
return flow->flags & FLOW_OFFLOAD_DYING;
}
-void nf_flow_offload_work_gc(struct work_struct *work)
+static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
{
struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table;
struct rhashtable_iter hti;
struct flow_offload *flow;
int err;
- flow_table = container_of(work, struct nf_flowtable, gc_work.work);
-
err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
if (err)
- goto schedule;
+ return 0;
rhashtable_walk_start(&hti);
@@ -270,7 +267,16 @@ void nf_flow_offload_work_gc(struct work
out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
-schedule:
+
+ return 1;
+}
+
+void nf_flow_offload_work_gc(struct work_struct *work)
+{
+ struct nf_flowtable *flow_table;
+
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+ nf_flow_offload_gc_step(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
@@ -449,5 +455,12 @@ void nf_flow_table_cleanup(struct net *n
}
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+void nf_flow_table_free(struct nf_flowtable *flow_table)
+{
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ WARN_ON(!nf_flow_offload_gc_step(flow_table));
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_free);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -24,6 +24,7 @@ static struct nf_flowtable_type flowtabl
.family = NFPROTO_INET,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
+ .free = nf_flow_table_free,
.hook = nf_flow_offload_inet_hook,
.owner = THIS_MODULE,
};
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5203,17 +5203,12 @@ err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
-static void nft_flowtable_destroy(void *ptr, void *arg)
-{
- kfree(ptr);
-}
-
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
cancel_delayed_work_sync(&flowtable->data.gc_work);
kfree(flowtable->name);
- rhashtable_free_and_destroy(&flowtable->data.rhashtable,
- nft_flowtable_destroy, NULL);
+ flowtable->data.type->free(&flowtable->data);
+ rhashtable_destroy(&flowtable->data.rhashtable);
module_put(flowtable->data.type->owner);
}

View File

@ -0,0 +1,96 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 25 Jan 2018 12:58:55 +0100
Subject: [PATCH] netfilter: nft_flow_offload: handle netdevice events from
nf_flow_table
Move the code that deals with device events to the core.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -462,5 +462,35 @@ void nf_flow_table_free(struct nf_flowta
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
+static int nf_flow_table_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
+
+ nf_flow_table_cleanup(dev_net(dev), dev);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block flow_offload_netdev_notifier = {
+ .notifier_call = nf_flow_table_netdev_event,
+};
+
+static int __init nf_flow_table_module_init(void)
+{
+ return register_netdevice_notifier(&flow_offload_netdev_notifier);
+}
+
+static void __exit nf_flow_table_module_exit(void)
+{
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+}
+
+module_init(nf_flow_table_module_init);
+module_exit(nf_flow_table_module_exit);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -194,44 +194,14 @@ static struct nft_expr_type nft_flow_off
.owner = THIS_MODULE,
};
-static int flow_offload_netdev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (event != NETDEV_DOWN)
- return NOTIFY_DONE;
-
- nf_flow_table_cleanup(dev_net(dev), dev);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block flow_offload_netdev_notifier = {
- .notifier_call = flow_offload_netdev_event,
-};
-
static int __init nft_flow_offload_module_init(void)
{
- int err;
-
- register_netdevice_notifier(&flow_offload_netdev_notifier);
-
- err = nft_register_expr(&nft_flow_offload_type);
- if (err < 0)
- goto register_expr;
-
- return 0;
-
-register_expr:
- unregister_netdevice_notifier(&flow_offload_netdev_notifier);
- return err;
+ return nft_register_expr(&nft_flow_offload_type);
}
static void __exit nft_flow_offload_module_exit(void)
{
nft_unregister_expr(&nft_flow_offload_type);
- unregister_netdevice_notifier(&flow_offload_netdev_notifier);
}
module_init(nft_flow_offload_module_init);

View File

@ -0,0 +1,468 @@
From: Harsha Sharma <harshasharmaiitr@gmail.com>
Date: Wed, 27 Dec 2017 00:59:00 +0530
Subject: [PATCH] netfilter: nf_tables: allocate handle and delete objects via
handle
This patch allows deletion of objects via unique handle which can be
listed via '-a' option.
Signed-off-by: Harsha Sharma <harshasharmaiitr@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -369,6 +369,7 @@ void nft_unregister_set(struct nft_set_t
* @list: table set list node
* @bindings: list of set bindings
* @name: name of the set
+ * @handle: unique handle of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
* @dtype: data type (verdict or numeric type defined by userspace)
* @objtype: object type (see NFT_OBJECT_* definitions)
@@ -391,6 +392,7 @@ struct nft_set {
struct list_head list;
struct list_head bindings;
char *name;
+ u64 handle;
u32 ktype;
u32 dtype;
u32 objtype;
@@ -936,6 +938,7 @@ unsigned int nft_do_chain(struct nft_pkt
* @objects: stateful objects in the table
* @flowtables: flow tables in the table
* @hgenerator: handle generator state
+ * @handle: table handle
* @use: number of chain references to this table
* @flags: table flag (see enum nft_table_flags)
* @genmask: generation mask
@@ -949,6 +952,7 @@ struct nft_table {
struct list_head objects;
struct list_head flowtables;
u64 hgenerator;
+ u64 handle;
u32 use;
u16 family:6,
flags:8,
@@ -973,9 +977,9 @@ int nft_verdict_dump(struct sk_buff *skb
* @name: name of this stateful object
* @genmask: generation mask
* @use: number of references to this stateful object
- * @data: object data, layout depends on type
+ * @handle: unique object handle
* @ops: object operations
- * @data: pointer to object data
+ * @data: object data, layout depends on type
*/
struct nft_object {
struct list_head list;
@@ -983,6 +987,7 @@ struct nft_object {
struct nft_table *table;
u32 genmask:2,
use:30;
+ u64 handle;
/* runtime data below here */
const struct nft_object_ops *ops ____cacheline_aligned;
unsigned char data[]
@@ -1064,6 +1069,7 @@ void nft_unregister_obj(struct nft_objec
* @ops_len: number of hooks in array
* @genmask: generation mask
* @use: number of references to this flow table
+ * @handle: unique object handle
* @data: rhashtable and garbage collector
* @ops: array of hooks
*/
@@ -1076,6 +1082,7 @@ struct nft_flowtable {
int ops_len;
u32 genmask:2,
use:30;
+ u64 handle;
/* runtime data below here */
struct nf_hook_ops *ops ____cacheline_aligned;
struct nf_flowtable data;
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -174,6 +174,8 @@ enum nft_table_attributes {
NFTA_TABLE_NAME,
NFTA_TABLE_FLAGS,
NFTA_TABLE_USE,
+ NFTA_TABLE_HANDLE,
+ NFTA_TABLE_PAD,
__NFTA_TABLE_MAX
};
#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
@@ -317,6 +319,7 @@ enum nft_set_desc_attributes {
* @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
* @NFTA_SET_USERDATA: user data (NLA_BINARY)
* @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
+ * @NFTA_SET_HANDLE: set handle (NLA_U64)
*/
enum nft_set_attributes {
NFTA_SET_UNSPEC,
@@ -335,6 +338,7 @@ enum nft_set_attributes {
NFTA_SET_USERDATA,
NFTA_SET_PAD,
NFTA_SET_OBJ_TYPE,
+ NFTA_SET_HANDLE,
__NFTA_SET_MAX
};
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
@@ -1314,6 +1318,7 @@ enum nft_ct_helper_attributes {
* @NFTA_OBJ_TYPE: stateful object type (NLA_U32)
* @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
* @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
+ * @NFTA_OBJ_HANDLE: object handle (NLA_U64)
*/
enum nft_object_attributes {
NFTA_OBJ_UNSPEC,
@@ -1322,6 +1327,8 @@ enum nft_object_attributes {
NFTA_OBJ_TYPE,
NFTA_OBJ_DATA,
NFTA_OBJ_USE,
+ NFTA_OBJ_HANDLE,
+ NFTA_OBJ_PAD,
__NFTA_OBJ_MAX
};
#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1)
@@ -1333,6 +1340,7 @@ enum nft_object_attributes {
* @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING)
* @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
* @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
+ * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
*/
enum nft_flowtable_attributes {
NFTA_FLOWTABLE_UNSPEC,
@@ -1340,6 +1348,8 @@ enum nft_flowtable_attributes {
NFTA_FLOWTABLE_NAME,
NFTA_FLOWTABLE_HOOK,
NFTA_FLOWTABLE_USE,
+ NFTA_FLOWTABLE_HANDLE,
+ NFTA_FLOWTABLE_PAD,
__NFTA_FLOWTABLE_MAX
};
#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -26,6 +26,7 @@
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
+static u64 table_handle;
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
@@ -332,6 +333,20 @@ static struct nft_table *nft_table_looku
return NULL;
}
+static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
+{
+ struct nft_table *table;
+
+ list_for_each_entry(table, &net->nft.tables, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
+ nft_active_genmask(table, genmask))
+ return table;
+ }
+ return NULL;
+}
+
static struct nft_table *nf_tables_table_lookup(const struct net *net,
const struct nlattr *nla,
u8 family, u8 genmask)
@@ -348,6 +363,22 @@ static struct nft_table *nf_tables_table
return ERR_PTR(-ENOENT);
}
+static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
+{
+ struct nft_table *table;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ table = nft_table_lookup_byhandle(net, nla, genmask);
+ if (table != NULL)
+ return table;
+
+ return ERR_PTR(-ENOENT);
+}
+
static inline u64 nf_tables_alloc_handle(struct nft_table *table)
{
return ++table->hgenerator;
@@ -394,6 +425,7 @@ static const struct nla_policy nft_table
[NFTA_TABLE_NAME] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
+ [NFTA_TABLE_HANDLE] = { .type = NLA_U64 },
};
static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -415,7 +447,9 @@ static int nf_tables_fill_table_info(str
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
- nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
+ nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
+ nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
+ NFTA_TABLE_PAD))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -674,6 +708,7 @@ static int nf_tables_newtable(struct net
INIT_LIST_HEAD(&table->flowtables);
table->family = family;
table->flags = flags;
+ table->handle = ++table_handle;
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
@@ -791,11 +826,18 @@ static int nf_tables_deltable(struct net
struct nft_ctx ctx;
nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
- if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
+ if (family == AF_UNSPEC ||
+ (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
return nft_flush(&ctx, family);
- table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
- genmask);
+ if (nla[NFTA_TABLE_HANDLE])
+ table = nf_tables_table_lookup_byhandle(net,
+ nla[NFTA_TABLE_HANDLE],
+ genmask);
+ else
+ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME],
+ family, genmask);
+
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1534,6 +1576,7 @@ static int nf_tables_delchain(struct net
struct nft_rule *rule;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+ u64 handle;
u32 use;
int err;
@@ -1542,7 +1585,12 @@ static int nf_tables_delchain(struct net
if (IS_ERR(table))
return PTR_ERR(table);
- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+ if (nla[NFTA_CHAIN_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+ } else {
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+ }
if (IS_ERR(chain))
return PTR_ERR(chain);
@@ -2503,6 +2551,7 @@ static const struct nla_policy nft_set_p
[NFTA_SET_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
+ [NFTA_SET_HANDLE] = { .type = NLA_U64 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2546,6 +2595,22 @@ static struct nft_set *nf_tables_set_loo
return ERR_PTR(-ENOENT);
}
+static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
+{
+ struct nft_set *set;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ list_for_each_entry(set, &table->sets, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
+ nft_active_genmask(set, genmask))
+ return set;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
const struct nlattr *nla,
u8 genmask)
@@ -2661,6 +2726,9 @@ static int nf_tables_fill_set(struct sk_
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_NAME, set->name))
goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle),
+ NFTA_SET_PAD))
+ goto nla_put_failure;
if (set->flags != 0)
if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
goto nla_put_failure;
@@ -3069,6 +3137,7 @@ static int nf_tables_newset(struct net *
set->udata = udata;
set->timeout = timeout;
set->gc_int = gc_int;
+ set->handle = nf_tables_alloc_handle(table);
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -3126,7 +3195,10 @@ static int nf_tables_delset(struct net *
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+ if (nla[NFTA_SET_HANDLE])
+ set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask);
+ else
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
@@ -4182,6 +4254,21 @@ struct nft_object *nf_tables_obj_lookup(
}
EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
+struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla,
+ u32 objtype, u8 genmask)
+{
+ struct nft_object *obj;
+
+ list_for_each_entry(obj, &table->objects, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == obj->handle &&
+ objtype == obj->ops->type->type &&
+ nft_active_genmask(obj, genmask))
+ return obj;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
[NFTA_OBJ_TABLE] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
@@ -4189,6 +4276,7 @@ static const struct nla_policy nft_obj_p
.len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
+ [NFTA_OBJ_HANDLE] = { .type = NLA_U64},
};
static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
@@ -4336,6 +4424,8 @@ static int nf_tables_newobj(struct net *
goto err1;
}
obj->table = table;
+ obj->handle = nf_tables_alloc_handle(table);
+
obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
if (!obj->name) {
err = -ENOMEM;
@@ -4382,7 +4472,9 @@ static int nf_tables_fill_obj_info(struc
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
- nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
+ nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
+ nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
+ NFTA_OBJ_PAD))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -4580,7 +4672,7 @@ static int nf_tables_delobj(struct net *
u32 objtype;
if (!nla[NFTA_OBJ_TYPE] ||
- !nla[NFTA_OBJ_NAME])
+ (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
return -EINVAL;
table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
@@ -4589,7 +4681,12 @@ static int nf_tables_delobj(struct net *
return PTR_ERR(table);
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+ if (nla[NFTA_OBJ_HANDLE])
+ obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
+ objtype, genmask);
+ else
+ obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME],
+ objtype, genmask);
if (IS_ERR(obj))
return PTR_ERR(obj);
if (obj->use > 0)
@@ -4661,6 +4758,7 @@ static const struct nla_policy nft_flowt
[NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING,
.len = NFT_NAME_MAXLEN - 1 },
[NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
+ [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
};
struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
@@ -4678,6 +4776,20 @@ struct nft_flowtable *nf_tables_flowtabl
}
EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
+struct nft_flowtable *
+nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
+{
+ struct nft_flowtable *flowtable;
+
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle &&
+ nft_active_genmask(flowtable, genmask))
+ return flowtable;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
#define NFT_FLOWTABLE_DEVICE_MAX 8
static int nf_tables_parse_devices(const struct nft_ctx *ctx,
@@ -4886,6 +4998,8 @@ static int nf_tables_newflowtable(struct
return -ENOMEM;
flowtable->table = table;
+ flowtable->handle = nf_tables_alloc_handle(table);
+
flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
if (!flowtable->name) {
err = -ENOMEM;
@@ -4960,8 +5074,14 @@ static int nf_tables_delflowtable(struct
if (IS_ERR(table))
return PTR_ERR(table);
- flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
- genmask);
+ if (nla[NFTA_FLOWTABLE_HANDLE])
+ flowtable = nf_tables_flowtable_lookup_byhandle(table,
+ nla[NFTA_FLOWTABLE_HANDLE],
+ genmask);
+ else
+ flowtable = nf_tables_flowtable_lookup(table,
+ nla[NFTA_FLOWTABLE_NAME],
+ genmask);
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
if (flowtable->use > 0)
@@ -4994,7 +5114,9 @@ static int nf_tables_fill_flowtable_info
if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
- nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)))
+ nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
+ nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
+ NFTA_FLOWTABLE_PAD))
goto nla_put_failure;
nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);

View File

@ -0,0 +1,95 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 7 Feb 2018 09:23:25 +0100
Subject: [PATCH] netfilter: nf_flow_offload: fix use-after-free and a resource
leak
flow_offload_del frees the flow, so all associated resource must be
freed before.
Since the ct entry in struct flow_offload_entry was allocated by
flow_offload_alloc, it should be freed by flow_offload_free to take care
of the error handling path when flow_offload_add fails.
While at it, make flow_offload_del static, since it should never be
called directly, only from the gc step
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -90,7 +90,6 @@ struct flow_offload *flow_offload_alloc(
void flow_offload_free(struct flow_offload *flow);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
-void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -125,7 +125,9 @@ void flow_offload_free(struct flow_offlo
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
e = container_of(flow, struct flow_offload_entry, flow);
- kfree(e);
+ nf_ct_delete(e->ct, 0, 0);
+ nf_ct_put(e->ct);
+ kfree_rcu(e, rcu_head);
}
EXPORT_SYMBOL_GPL(flow_offload_free);
@@ -149,11 +151,9 @@ int flow_offload_add(struct nf_flowtable
}
EXPORT_SYMBOL_GPL(flow_offload_add);
-void flow_offload_del(struct nf_flowtable *flow_table,
- struct flow_offload *flow)
+static void flow_offload_del(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
*flow_table->type->params);
@@ -161,10 +161,8 @@ void flow_offload_del(struct nf_flowtabl
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
*flow_table->type->params);
- e = container_of(flow, struct flow_offload_entry, flow);
- kfree_rcu(e, rcu_head);
+ flow_offload_free(flow);
}
-EXPORT_SYMBOL_GPL(flow_offload_del);
struct flow_offload_tuple_rhash *
flow_offload_lookup(struct nf_flowtable *flow_table,
@@ -175,15 +173,6 @@ flow_offload_lookup(struct nf_flowtable
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
-static void nf_flow_release_ct(const struct flow_offload *flow)
-{
- struct flow_offload_entry *e;
-
- e = container_of(flow, struct flow_offload_entry, flow);
- nf_ct_delete(e->ct, 0, 0);
- nf_ct_put(e->ct);
-}
-
int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data)
@@ -259,10 +248,8 @@ static int nf_flow_offload_gc_step(struc
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
if (nf_flow_has_expired(flow) ||
- nf_flow_is_dying(flow)) {
+ nf_flow_is_dying(flow))
flow_offload_del(flow_table, flow);
- nf_flow_release_ct(flow);
- }
}
out:
rhashtable_walk_stop(&hti);

View File

@ -0,0 +1,73 @@
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 31 Jan 2018 18:13:39 +0100
Subject: [PATCH] netfilter: flowtable infrastructure depends on
NETFILTER_INGRESS
config NF_FLOW_TABLE depends on NETFILTER_INGRESS. If users forget to
enable this toggle, flowtable registration fails with EOPNOTSUPP.
Moreover, turn 'select NF_FLOW_TABLE' in every flowtable family flavour
into dependency instead, otherwise this new dependency on
NETFILTER_INGRESS causes a warning. This also allows us to remove the
explicit dependency between family flowtables <-> NF_TABLES and
NF_CONNTRACK, given they depend on the NF_FLOW_TABLE core that already
expresses the general dependencies for this new infrastructure.
Moreover, NF_FLOW_TABLE_INET depends on NF_FLOW_TABLE_IPV4 and
NF_FLOWTABLE_IPV6, which already depends on NF_FLOW_TABLE. So we can get
rid of direct dependency with NF_FLOW_TABLE.
In general, let's avoid 'select', it just makes things more complicated.
Reported-by: John Crispin <john@phrozen.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -79,8 +79,7 @@ endif # NF_TABLES
config NF_FLOW_TABLE_IPV4
tristate "Netfilter flow table IPv4 module"
- depends on NF_CONNTRACK && NF_TABLES
- select NF_FLOW_TABLE
+ depends on NF_FLOW_TABLE
help
This option adds the flow table IPv4 support.
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -73,8 +73,7 @@ endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
tristate "Netfilter flow table IPv6 module"
- depends on NF_CONNTRACK && NF_TABLES
- select NF_FLOW_TABLE
+ depends on NF_FLOW_TABLE
help
This option adds the flow table IPv6 support.
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -670,8 +670,8 @@ endif # NF_TABLES
config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
- depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
- select NF_FLOW_TABLE
+ depends on NF_FLOW_TABLE_IPV4
+ depends on NF_FLOW_TABLE_IPV6
help
This option adds the flow table mixed IPv4/IPv6 support.
@@ -679,7 +679,9 @@ config NF_FLOW_TABLE_INET
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
- depends on NF_CONNTRACK && NF_TABLES
+ depends on NETFILTER_INGRESS
+ depends on NF_CONNTRACK
+ depends on NF_TABLES
help
This option adds the flow table core infrastructure.

View File

@ -0,0 +1,29 @@
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 10 Jan 2018 13:06:46 +0000
Subject: [PATCH] netfilter: remove duplicated include
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -5,7 +5,6 @@
#include <linux/rhashtable.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
-#include <linux/ipv6.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/neighbour.h>
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -15,8 +15,6 @@
#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv6.h>
#include <net/protocol.h>
#include <net/netfilter/nf_queue.h>
#include <net/dst.h>

View File

@ -0,0 +1,35 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 16 Feb 2018 09:41:18 +0100
Subject: [PATCH] netfilter: nf_flow_table: use IP_CT_DIR_* values for
FLOW_OFFLOAD_DIR_*
Simplifies further code cleanups
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -6,6 +6,7 @@
#include <linux/netdevice.h>
#include <linux/rhashtable.h>
#include <linux/rcupdate.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/dst.h>
struct nf_flowtable;
@@ -27,11 +28,10 @@ struct nf_flowtable {
};
enum flow_offload_tuple_dir {
- FLOW_OFFLOAD_DIR_ORIGINAL,
- FLOW_OFFLOAD_DIR_REPLY,
- __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY,
+ FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
+ FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
+ FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
};
-#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
struct flow_offload_tuple {
union {

View File

@ -0,0 +1,118 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 16 Feb 2018 09:42:32 +0100
Subject: [PATCH] netfilter: nf_flow_table: clean up flow_offload_alloc
Reduce code duplication and make it much easier to read
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -16,6 +16,38 @@ struct flow_offload_entry {
struct rcu_head rcu_head;
};
+static void
+flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+ struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
+ struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+
+ ft->dir = dir;
+
+ switch (ctt->src.l3num) {
+ case NFPROTO_IPV4:
+ ft->src_v4 = ctt->src.u3.in;
+ ft->dst_v4 = ctt->dst.u3.in;
+ break;
+ case NFPROTO_IPV6:
+ ft->src_v6 = ctt->src.u3.in6;
+ ft->dst_v6 = ctt->dst.u3.in6;
+ break;
+ }
+
+ ft->l3proto = ctt->src.l3num;
+ ft->l4proto = ctt->dst.protonum;
+ ft->src_port = ctt->src.u.tcp.port;
+ ft->dst_port = ctt->dst.u.tcp.port;
+
+ ft->iifidx = route->tuple[dir].ifindex;
+ ft->oifidx = route->tuple[!dir].ifindex;
+
+ ft->dst_cache = route->tuple[dir].dst;
+}
+
struct flow_offload *
flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
{
@@ -40,65 +72,8 @@ flow_offload_alloc(struct nf_conn *ct, s
entry->ct = ct;
- switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
- case NFPROTO_IPV4:
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
- break;
- case NFPROTO_IPV6:
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
- break;
- }
-
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
-
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
- route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
-
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
- FLOW_OFFLOAD_DIR_ORIGINAL;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
- FLOW_OFFLOAD_DIR_REPLY;
-
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
- route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
- route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
+ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
if (ct->status & IPS_SRC_NAT)
flow->flags |= FLOW_OFFLOAD_SNAT;

View File

@ -0,0 +1,80 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 16 Feb 2018 10:54:24 +0100
Subject: [PATCH] ipv6: make ip6_dst_mtu_forward inline
Removes a direct dependency on ipv6.ko
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -252,4 +252,26 @@ static inline bool rt6_duplicate_nexthop
ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
!lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
}
+
+static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+{
+ unsigned int mtu;
+ struct inet6_dev *idev;
+
+ if (dst_metric_locked(dst, RTAX_MTU)) {
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ return mtu;
+ }
+
+ mtu = IPV6_MIN_MTU;
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ if (idev)
+ mtu = idev->cnf.mtu6;
+ rcu_read_unlock();
+
+ return mtu;
+}
+
#endif
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -913,8 +913,6 @@ static inline struct sk_buff *ip6_finish
&inet6_sk(sk)->cork);
}
-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
-
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -370,28 +370,6 @@ static inline int ip6_forward_finish(str
return dst_output(net, sk, skb);
}
-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
-{
- unsigned int mtu;
- struct inet6_dev *idev;
-
- if (dst_metric_locked(dst, RTAX_MTU)) {
- mtu = dst_metric_raw(dst, RTAX_MTU);
- if (mtu)
- return mtu;
- }
-
- mtu = IPV6_MIN_MTU;
- rcu_read_lock();
- idev = __in6_dev_get(dst->dev);
- if (idev)
- mtu = idev->cnf.mtu6;
- rcu_read_unlock();
-
- return mtu;
-}
-EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
-
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)

View File

@ -0,0 +1,145 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 16 Feb 2018 10:57:23 +0100
Subject: [PATCH] netfilter: nf_flow_table: cache mtu in struct
flow_offload_tuple
Reduces the number of cache lines touched in the offload forwarding path
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -55,6 +55,8 @@ struct flow_offload_tuple {
int oifidx;
+ u16 mtu;
+
struct dst_entry *dst_cache;
};
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -177,7 +177,7 @@ static int nf_flow_tuple_ip(struct sk_bu
}
/* Based on ip_exceeds_mtu(). */
-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
@@ -191,17 +191,6 @@ static bool __nf_flow_exceeds_mtu(const
return true;
}
-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
-{
- u32 mtu;
-
- mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
- if (__nf_flow_exceeds_mtu(skb, mtu))
- return true;
-
- return false;
-}
-
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -232,9 +221,9 @@ nf_flow_offload_ip_hook(void *priv, stru
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-
rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
- if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*iph)))
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -173,7 +173,7 @@ static int nf_flow_tuple_ipv6(struct sk_
}
/* Based on ip_exceeds_mtu(). */
-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
@@ -184,17 +184,6 @@ static bool __nf_flow_exceeds_mtu(const
return true;
}
-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
-{
- u32 mtu;
-
- mtu = ip6_dst_mtu_forward(&rt->dst);
- if (__nf_flow_exceeds_mtu(skb, mtu))
- return true;
-
- return false;
-}
-
unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -225,9 +214,9 @@ nf_flow_offload_ipv6_hook(void *priv, st
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-
rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
- if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*ip6h)))
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -4,6 +4,8 @@
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/ip6_route.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
@@ -23,6 +25,7 @@ flow_offload_fill_dir(struct flow_offloa
{
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+ struct dst_entry *dst = route->tuple[dir].dst;
ft->dir = dir;
@@ -30,10 +33,12 @@ flow_offload_fill_dir(struct flow_offloa
case NFPROTO_IPV4:
ft->src_v4 = ctt->src.u3.in;
ft->dst_v4 = ctt->dst.u3.in;
+ ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
break;
case NFPROTO_IPV6:
ft->src_v6 = ctt->src.u3.in6;
ft->dst_v6 = ctt->dst.u3.in6;
+ ft->mtu = ip6_dst_mtu_forward(dst);
break;
}
@@ -44,8 +49,7 @@ flow_offload_fill_dir(struct flow_offloa
ft->iifidx = route->tuple[dir].ifindex;
ft->oifidx = route->tuple[!dir].ifindex;
-
- ft->dst_cache = route->tuple[dir].dst;
+ ft->dst_cache = dst;
}
struct flow_offload *

View File

@ -0,0 +1,952 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 16 Feb 2018 11:08:47 +0100
Subject: [PATCH] netfilter: nf_flow_table: rename nf_flow_table.c to
nf_flow_table_core.c
Preparation for adding more code to the same module
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
rename net/netfilter/{nf_flow_table.c => nf_flow_table_core.c} (100%)
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -113,6 +113,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
+nf_flow_table-objs := nf_flow_table_core.o
+
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
# generic X tables
--- a/net/netfilter/nf_flow_table.c
+++ /dev/null
@@ -1,462 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/rhashtable.h>
-#include <linux/netdevice.h>
-#include <net/ip.h>
-#include <net/ip6_route.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_flow_table.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_tuple.h>
-
-struct flow_offload_entry {
- struct flow_offload flow;
- struct nf_conn *ct;
- struct rcu_head rcu_head;
-};
-
-static void
-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
- struct nf_flow_route *route,
- enum flow_offload_tuple_dir dir)
-{
- struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
- struct dst_entry *dst = route->tuple[dir].dst;
-
- ft->dir = dir;
-
- switch (ctt->src.l3num) {
- case NFPROTO_IPV4:
- ft->src_v4 = ctt->src.u3.in;
- ft->dst_v4 = ctt->dst.u3.in;
- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
- break;
- case NFPROTO_IPV6:
- ft->src_v6 = ctt->src.u3.in6;
- ft->dst_v6 = ctt->dst.u3.in6;
- ft->mtu = ip6_dst_mtu_forward(dst);
- break;
- }
-
- ft->l3proto = ctt->src.l3num;
- ft->l4proto = ctt->dst.protonum;
- ft->src_port = ctt->src.u.tcp.port;
- ft->dst_port = ctt->dst.u.tcp.port;
-
- ft->iifidx = route->tuple[dir].ifindex;
- ft->oifidx = route->tuple[!dir].ifindex;
- ft->dst_cache = dst;
-}
-
-struct flow_offload *
-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
-{
- struct flow_offload_entry *entry;
- struct flow_offload *flow;
-
- if (unlikely(nf_ct_is_dying(ct) ||
- !atomic_inc_not_zero(&ct->ct_general.use)))
- return NULL;
-
- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry)
- goto err_ct_refcnt;
-
- flow = &entry->flow;
-
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
- goto err_dst_cache_original;
-
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
- goto err_dst_cache_reply;
-
- entry->ct = ct;
-
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
-
- if (ct->status & IPS_SRC_NAT)
- flow->flags |= FLOW_OFFLOAD_SNAT;
- else if (ct->status & IPS_DST_NAT)
- flow->flags |= FLOW_OFFLOAD_DNAT;
-
- return flow;
-
-err_dst_cache_reply:
- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
-err_dst_cache_original:
- kfree(entry);
-err_ct_refcnt:
- nf_ct_put(ct);
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(flow_offload_alloc);
-
-void flow_offload_free(struct flow_offload *flow)
-{
- struct flow_offload_entry *e;
-
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
- e = container_of(flow, struct flow_offload_entry, flow);
- nf_ct_delete(e->ct, 0, 0);
- nf_ct_put(e->ct);
- kfree_rcu(e, rcu_head);
-}
-EXPORT_SYMBOL_GPL(flow_offload_free);
-
-void flow_offload_dead(struct flow_offload *flow)
-{
- flow->flags |= FLOW_OFFLOAD_DYING;
-}
-EXPORT_SYMBOL_GPL(flow_offload_dead);
-
-int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
-{
- flow->timeout = (u32)jiffies;
-
- rhashtable_insert_fast(&flow_table->rhashtable,
- &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
- *flow_table->type->params);
- rhashtable_insert_fast(&flow_table->rhashtable,
- &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
- *flow_table->type->params);
- return 0;
-}
-EXPORT_SYMBOL_GPL(flow_offload_add);
-
-static void flow_offload_del(struct nf_flowtable *flow_table,
- struct flow_offload *flow)
-{
- rhashtable_remove_fast(&flow_table->rhashtable,
- &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
- *flow_table->type->params);
- rhashtable_remove_fast(&flow_table->rhashtable,
- &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
- *flow_table->type->params);
-
- flow_offload_free(flow);
-}
-
-struct flow_offload_tuple_rhash *
-flow_offload_lookup(struct nf_flowtable *flow_table,
- struct flow_offload_tuple *tuple)
-{
- return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
- *flow_table->type->params);
-}
-EXPORT_SYMBOL_GPL(flow_offload_lookup);
-
-int nf_flow_table_iterate(struct nf_flowtable *flow_table,
- void (*iter)(struct flow_offload *flow, void *data),
- void *data)
-{
- struct flow_offload_tuple_rhash *tuplehash;
- struct rhashtable_iter hti;
- struct flow_offload *flow;
- int err;
-
- err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
- if (err)
- return err;
-
- rhashtable_walk_start(&hti);
-
- while ((tuplehash = rhashtable_walk_next(&hti))) {
- if (IS_ERR(tuplehash)) {
- err = PTR_ERR(tuplehash);
- if (err != -EAGAIN)
- goto out;
-
- continue;
- }
- if (tuplehash->tuple.dir)
- continue;
-
- flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
-
- iter(flow, data);
- }
-out:
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
-
-static inline bool nf_flow_has_expired(const struct flow_offload *flow)
-{
- return (__s32)(flow->timeout - (u32)jiffies) <= 0;
-}
-
-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
-{
- return flow->flags & FLOW_OFFLOAD_DYING;
-}
-
-static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
-{
- struct flow_offload_tuple_rhash *tuplehash;
- struct rhashtable_iter hti;
- struct flow_offload *flow;
- int err;
-
- err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
- if (err)
- return 0;
-
- rhashtable_walk_start(&hti);
-
- while ((tuplehash = rhashtable_walk_next(&hti))) {
- if (IS_ERR(tuplehash)) {
- err = PTR_ERR(tuplehash);
- if (err != -EAGAIN)
- goto out;
-
- continue;
- }
- if (tuplehash->tuple.dir)
- continue;
-
- flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
-
- if (nf_flow_has_expired(flow) ||
- nf_flow_is_dying(flow))
- flow_offload_del(flow_table, flow);
- }
-out:
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
-
- return 1;
-}
-
-void nf_flow_offload_work_gc(struct work_struct *work)
-{
- struct nf_flowtable *flow_table;
-
- flow_table = container_of(work, struct nf_flowtable, gc_work.work);
- nf_flow_offload_gc_step(flow_table);
- queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
-}
-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
-
-static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
-{
- const struct flow_offload_tuple *tuple = data;
-
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
-}
-
-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
-{
- const struct flow_offload_tuple_rhash *tuplehash = data;
-
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
-}
-
-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
- const void *ptr)
-{
- const struct flow_offload_tuple *tuple = arg->key;
- const struct flow_offload_tuple_rhash *x = ptr;
-
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
- return 1;
-
- return 0;
-}
-
-const struct rhashtable_params nf_flow_offload_rhash_params = {
- .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
- .hashfn = flow_offload_hash,
- .obj_hashfn = flow_offload_hash_obj,
- .obj_cmpfn = flow_offload_hash_cmp,
- .automatic_shrinking = true,
-};
-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
-
-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
- __be16 port, __be16 new_port)
-{
- struct tcphdr *tcph;
-
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
- tcph = (void *)(skb_network_header(skb) + thoff);
- inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
-
- return 0;
-}
-
-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
- __be16 port, __be16 new_port)
-{
- struct udphdr *udph;
-
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
- udph = (void *)(skb_network_header(skb) + thoff);
- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
- inet_proto_csum_replace2(&udph->check, skb, port,
- new_port, true);
- if (!udph->check)
- udph->check = CSUM_MANGLED_0;
- }
-
- return 0;
-}
-
-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
- u8 protocol, __be16 port, __be16 new_port)
-{
- switch (protocol) {
- case IPPROTO_TCP:
- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
- return NF_DROP;
- break;
- case IPPROTO_UDP:
- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
- return NF_DROP;
- break;
- }
-
- return 0;
-}
-
-int nf_flow_snat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir)
-{
- struct flow_ports *hdr;
- __be16 port, new_port;
-
- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
- return -1;
-
- hdr = (void *)(skb_network_header(skb) + thoff);
-
- switch (dir) {
- case FLOW_OFFLOAD_DIR_ORIGINAL:
- port = hdr->source;
- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
- hdr->source = new_port;
- break;
- case FLOW_OFFLOAD_DIR_REPLY:
- port = hdr->dest;
- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
- hdr->dest = new_port;
- break;
- default:
- return -1;
- }
-
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
-}
-EXPORT_SYMBOL_GPL(nf_flow_snat_port);
-
-int nf_flow_dnat_port(const struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff,
- u8 protocol, enum flow_offload_tuple_dir dir)
-{
- struct flow_ports *hdr;
- __be16 port, new_port;
-
- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
- return -1;
-
- hdr = (void *)(skb_network_header(skb) + thoff);
-
- switch (dir) {
- case FLOW_OFFLOAD_DIR_ORIGINAL:
- port = hdr->dest;
- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
- hdr->dest = new_port;
- break;
- case FLOW_OFFLOAD_DIR_REPLY:
- port = hdr->source;
- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
- hdr->source = new_port;
- break;
- default:
- return -1;
- }
-
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
-}
-EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
-
-static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
-{
- struct net_device *dev = data;
-
- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
- return;
-
- flow_offload_dead(flow);
-}
-
-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
- void *data)
-{
- nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
- flush_delayed_work(&flowtable->gc_work);
-}
-
-void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
-{
- nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
-}
-EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
-
-void nf_flow_table_free(struct nf_flowtable *flow_table)
-{
- nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
- WARN_ON(!nf_flow_offload_gc_step(flow_table));
-}
-EXPORT_SYMBOL_GPL(nf_flow_table_free);
-
-static int nf_flow_table_netdev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (event != NETDEV_DOWN)
- return NOTIFY_DONE;
-
- nf_flow_table_cleanup(dev_net(dev), dev);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block flow_offload_netdev_notifier = {
- .notifier_call = nf_flow_table_netdev_event,
-};
-
-static int __init nf_flow_table_module_init(void)
-{
- return register_netdevice_notifier(&flow_offload_netdev_notifier);
-}
-
-static void __exit nf_flow_table_module_exit(void)
-{
- unregister_netdevice_notifier(&flow_offload_netdev_notifier);
-}
-
-module_init(nf_flow_table_module_init);
-module_exit(nf_flow_table_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
--- /dev/null
+++ b/net/netfilter/nf_flow_table_core.c
@@ -0,0 +1,462 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+struct flow_offload_entry {
+ struct flow_offload flow;
+ struct nf_conn *ct;
+ struct rcu_head rcu_head;
+};
+
+static void
+flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+ struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
+ struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+ struct dst_entry *dst = route->tuple[dir].dst;
+
+ ft->dir = dir;
+
+ switch (ctt->src.l3num) {
+ case NFPROTO_IPV4:
+ ft->src_v4 = ctt->src.u3.in;
+ ft->dst_v4 = ctt->dst.u3.in;
+ ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
+ break;
+ case NFPROTO_IPV6:
+ ft->src_v6 = ctt->src.u3.in6;
+ ft->dst_v6 = ctt->dst.u3.in6;
+ ft->mtu = ip6_dst_mtu_forward(dst);
+ break;
+ }
+
+ ft->l3proto = ctt->src.l3num;
+ ft->l4proto = ctt->dst.protonum;
+ ft->src_port = ctt->src.u.tcp.port;
+ ft->dst_port = ctt->dst.u.tcp.port;
+
+ ft->iifidx = route->tuple[dir].ifindex;
+ ft->oifidx = route->tuple[!dir].ifindex;
+ ft->dst_cache = dst;
+}
+
+struct flow_offload *
+flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+{
+ struct flow_offload_entry *entry;
+ struct flow_offload *flow;
+
+ if (unlikely(nf_ct_is_dying(ct) ||
+ !atomic_inc_not_zero(&ct->ct_general.use)))
+ return NULL;
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ goto err_ct_refcnt;
+
+ flow = &entry->flow;
+
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+ goto err_dst_cache_original;
+
+ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+ goto err_dst_cache_reply;
+
+ entry->ct = ct;
+
+ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
+
+ if (ct->status & IPS_SRC_NAT)
+ flow->flags |= FLOW_OFFLOAD_SNAT;
+ else if (ct->status & IPS_DST_NAT)
+ flow->flags |= FLOW_OFFLOAD_DNAT;
+
+ return flow;
+
+err_dst_cache_reply:
+ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+err_dst_cache_original:
+ kfree(entry);
+err_ct_refcnt:
+ nf_ct_put(ct);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(flow_offload_alloc);
+
+void flow_offload_free(struct flow_offload *flow)
+{
+ struct flow_offload_entry *e;
+
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+ e = container_of(flow, struct flow_offload_entry, flow);
+ nf_ct_delete(e->ct, 0, 0);
+ nf_ct_put(e->ct);
+ kfree_rcu(e, rcu_head);
+}
+EXPORT_SYMBOL_GPL(flow_offload_free);
+
+void flow_offload_dead(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_DYING;
+}
+EXPORT_SYMBOL_GPL(flow_offload_dead);
+
+int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+{
+ flow->timeout = (u32)jiffies;
+
+ rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ *flow_table->type->params);
+ rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ *flow_table->type->params);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(flow_offload_add);
+
+static void flow_offload_del(struct nf_flowtable *flow_table,
+ struct flow_offload *flow)
+{
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ *flow_table->type->params);
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ *flow_table->type->params);
+
+ flow_offload_free(flow);
+}
+
+struct flow_offload_tuple_rhash *
+flow_offload_lookup(struct nf_flowtable *flow_table,
+ struct flow_offload_tuple *tuple)
+{
+ return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+ *flow_table->type->params);
+}
+EXPORT_SYMBOL_GPL(flow_offload_lookup);
+
+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct rhashtable_iter hti;
+ struct flow_offload *flow;
+ int err;
+
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+ if (err)
+ return err;
+
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+ goto out;
+
+ continue;
+ }
+ if (tuplehash->tuple.dir)
+ continue;
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+ iter(flow, data);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
+
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+{
+ return (__s32)(flow->timeout - (u32)jiffies) <= 0;
+}
+
+static inline bool nf_flow_is_dying(const struct flow_offload *flow)
+{
+ return flow->flags & FLOW_OFFLOAD_DYING;
+}
+
+static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct rhashtable_iter hti;
+ struct flow_offload *flow;
+ int err;
+
+ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
+ if (err)
+ return 0;
+
+ rhashtable_walk_start(&hti);
+
+ while ((tuplehash = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(tuplehash)) {
+ err = PTR_ERR(tuplehash);
+ if (err != -EAGAIN)
+ goto out;
+
+ continue;
+ }
+ if (tuplehash->tuple.dir)
+ continue;
+
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+
+ if (nf_flow_has_expired(flow) ||
+ nf_flow_is_dying(flow))
+ flow_offload_del(flow_table, flow);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ return 1;
+}
+
+void nf_flow_offload_work_gc(struct work_struct *work)
+{
+ struct nf_flowtable *flow_table;
+
+ flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+ nf_flow_offload_gc_step(flow_table);
+ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
+
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple *tuple = data;
+
+ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple_rhash *tuplehash = data;
+
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct flow_offload_tuple *tuple = arg->key;
+ const struct flow_offload_tuple_rhash *x = ptr;
+
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ return 1;
+
+ return 0;
+}
+
+const struct rhashtable_params nf_flow_offload_rhash_params = {
+ .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
+ .hashfn = flow_offload_hash,
+ .obj_hashfn = flow_offload_hash_obj,
+ .obj_cmpfn = flow_offload_hash_cmp,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
+
+static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ __be16 port, __be16 new_port)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace2(&udph->check, skb, port,
+ new_port, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, __be16 port, __be16 new_port)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+int nf_flow_snat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
+{
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+ return -1;
+
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = hdr->source;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = hdr->dest;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_snat_port);
+
+int nf_flow_dnat_port(const struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff,
+ u8 protocol, enum flow_offload_tuple_dir dir)
+{
+ struct flow_ports *hdr;
+ __be16 port, new_port;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+ return -1;
+
+ hdr = (void *)(skb_network_header(skb) + thoff);
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = hdr->dest;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
+ hdr->dest = new_port;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = hdr->source;
+ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
+ hdr->source = new_port;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+}
+EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+
+static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+{
+ struct net_device *dev = data;
+
+ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+ return;
+
+ flow_offload_dead(flow);
+}
+
+static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
+ void *data)
+{
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
+ flush_delayed_work(&flowtable->gc_work);
+}
+
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+{
+ nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+
+void nf_flow_table_free(struct nf_flowtable *flow_table)
+{
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ WARN_ON(!nf_flow_offload_gc_step(flow_table));
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_free);
+
+static int nf_flow_table_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
+
+ nf_flow_table_cleanup(dev_net(dev), dev);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block flow_offload_netdev_notifier = {
+ .notifier_call = nf_flow_table_netdev_event,
+};
+
+static int __init nf_flow_table_module_init(void)
+{
+ return register_netdevice_notifier(&flow_offload_netdev_notifier);
+}
+
+static void __exit nf_flow_table_module_exit(void)
+{
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+}
+
+module_init(nf_flow_table_module_init);
+module_exit(nf_flow_table_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");

View File

@ -0,0 +1,522 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 17 Feb 2018 11:49:44 +0100
Subject: [PATCH] netfilter: nf_flow_table: move ipv4 offload hook code to
nf_flow_table
Allows some minor code sharing with the ipv6 hook code and is also
useful as preparation for adding iptables support for offload
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
create mode 100644 net/netfilter/nf_flow_table_ip.c
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -2,248 +2,8 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
-#include <linux/rhashtable.h>
-#include <linux/ip.h>
-#include <linux/netdevice.h>
-#include <net/ip.h>
-#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
-/* For layer 4 checksum field offset. */
-#include <linux/tcp.h>
-#include <linux/udp.h>
-
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
- __be32 addr, __be32 new_addr)
-{
- struct tcphdr *tcph;
-
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
- tcph = (void *)(skb_network_header(skb) + thoff);
- inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
-
- return 0;
-}
-
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
- __be32 addr, __be32 new_addr)
-{
- struct udphdr *udph;
-
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
- udph = (void *)(skb_network_header(skb) + thoff);
- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
- inet_proto_csum_replace4(&udph->check, skb, addr,
- new_addr, true);
- if (!udph->check)
- udph->check = CSUM_MANGLED_0;
- }
-
- return 0;
-}
-
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
- unsigned int thoff, __be32 addr,
- __be32 new_addr)
-{
- switch (iph->protocol) {
- case IPPROTO_TCP:
- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
- break;
- case IPPROTO_UDP:
- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
- break;
- }
-
- return 0;
-}
-
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- struct iphdr *iph, unsigned int thoff,
- enum flow_offload_tuple_dir dir)
-{
- __be32 addr, new_addr;
-
- switch (dir) {
- case FLOW_OFFLOAD_DIR_ORIGINAL:
- addr = iph->saddr;
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
- iph->saddr = new_addr;
- break;
- case FLOW_OFFLOAD_DIR_REPLY:
- addr = iph->daddr;
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
- iph->daddr = new_addr;
- break;
- default:
- return -1;
- }
- csum_replace4(&iph->check, addr, new_addr);
-
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
-}
-
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- struct iphdr *iph, unsigned int thoff,
- enum flow_offload_tuple_dir dir)
-{
- __be32 addr, new_addr;
-
- switch (dir) {
- case FLOW_OFFLOAD_DIR_ORIGINAL:
- addr = iph->daddr;
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
- iph->daddr = new_addr;
- break;
- case FLOW_OFFLOAD_DIR_REPLY:
- addr = iph->saddr;
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
- iph->saddr = new_addr;
- break;
- default:
- return -1;
- }
-
- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
-}
-
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- enum flow_offload_tuple_dir dir)
-{
- struct iphdr *iph = ip_hdr(skb);
- unsigned int thoff = iph->ihl * 4;
-
- if (flow->flags & FLOW_OFFLOAD_SNAT &&
- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
- return -1;
- if (flow->flags & FLOW_OFFLOAD_DNAT &&
- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
- return -1;
-
- return 0;
-}
-
-static bool ip_has_options(unsigned int thoff)
-{
- return thoff != sizeof(struct iphdr);
-}
-
-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple)
-{
- struct flow_ports *ports;
- unsigned int thoff;
- struct iphdr *iph;
-
- if (!pskb_may_pull(skb, sizeof(*iph)))
- return -1;
-
- iph = ip_hdr(skb);
- thoff = iph->ihl * 4;
-
- if (ip_is_fragment(iph) ||
- unlikely(ip_has_options(thoff)))
- return -1;
-
- if (iph->protocol != IPPROTO_TCP &&
- iph->protocol != IPPROTO_UDP)
- return -1;
-
- thoff = iph->ihl * 4;
- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
- return -1;
-
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
-
- tuple->src_v4.s_addr = iph->saddr;
- tuple->dst_v4.s_addr = iph->daddr;
- tuple->src_port = ports->source;
- tuple->dst_port = ports->dest;
- tuple->l3proto = AF_INET;
- tuple->l4proto = iph->protocol;
- tuple->iifidx = dev->ifindex;
-
- return 0;
-}
-
-/* Based on ip_exceeds_mtu(). */
-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
-{
- if (skb->len <= mtu)
- return false;
-
- if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
- return false;
-
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
- return false;
-
- return true;
-}
-
-unsigned int
-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table = priv;
- struct flow_offload_tuple tuple = {};
- enum flow_offload_tuple_dir dir;
- struct flow_offload *flow;
- struct net_device *outdev;
- const struct rtable *rt;
- struct iphdr *iph;
- __be32 nexthop;
-
- if (skb->protocol != htons(ETH_P_IP))
- return NF_ACCEPT;
-
- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
- return NF_ACCEPT;
-
- tuplehash = flow_offload_lookup(flow_table, &tuple);
- if (tuplehash == NULL)
- return NF_ACCEPT;
-
- outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
- if (!outdev)
- return NF_ACCEPT;
-
- dir = tuplehash->tuple.dir;
- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
-
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
- return NF_ACCEPT;
-
- if (skb_try_make_writable(skb, sizeof(*iph)))
- return NF_DROP;
-
- if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
- nf_flow_nat_ip(flow, skb, dir) < 0)
- return NF_DROP;
-
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
- iph = ip_hdr(skb);
- ip_decrease_ttl(iph);
-
- skb->dev = outdev;
- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
-
- return NF_STOLEN;
-}
-EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -113,7 +113,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
-nf_flow_table-objs := nf_flow_table_core.o
+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
--- /dev/null
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -0,0 +1,245 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace4(&udph->check, skb, addr,
+ new_addr, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, __be32 addr,
+ __be32 new_addr)
+{
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+ csum_replace4(&iph->check, addr, new_addr);
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned int thoff = iph->ihl * 4;
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static bool ip_has_options(unsigned int thoff)
+{
+ return thoff != sizeof(struct iphdr);
+}
+
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ unsigned int thoff;
+ struct iphdr *iph;
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return -1;
+
+ iph = ip_hdr(skb);
+ thoff = iph->ihl * 4;
+
+ if (ip_is_fragment(iph) ||
+ unlikely(ip_has_options(thoff)))
+ return -1;
+
+ if (iph->protocol != IPPROTO_TCP &&
+ iph->protocol != IPPROTO_UDP)
+ return -1;
+
+ thoff = iph->ihl * 4;
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v4.s_addr = iph->saddr;
+ tuple->dst_v4.s_addr = iph->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET;
+ tuple->l4proto = iph->protocol;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ const struct rtable *rt;
+ struct iphdr *iph;
+ __be32 nexthop;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*iph)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ip(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ iph = ip_hdr(skb);
+ ip_decrease_ttl(iph);
+
+ skb->dev = outdev;
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+
+ return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);

View File

@ -0,0 +1,32 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 17 Feb 2018 11:51:20 +0100
Subject: [PATCH] netfilter: nf_flow_table: move ip header check out of
nf_flow_exceeds_mtu
Allows the function to be shared with the IPv6 hook code
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -181,9 +181,6 @@ static bool nf_flow_exceeds_mtu(const st
if (skb->len <= mtu)
return false;
- if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
- return false;
-
if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
@@ -222,7 +219,8 @@ nf_flow_offload_ip_hook(void *priv, stru
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
+ (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*iph)))

View File

@ -0,0 +1,483 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 17 Feb 2018 11:55:51 +0100
Subject: [PATCH] netfilter: nf_flow_table: move ipv6 offload hook code to
nf_flow_table
Useful as preparation for adding iptables support for offload
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -3,240 +3,8 @@
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
-#include <linux/ipv6.h>
-#include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
-/* For layer 4 checksum field offset. */
-#include <linux/tcp.h>
-#include <linux/udp.h>
-
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
- struct in6_addr *addr,
- struct in6_addr *new_addr)
-{
- struct tcphdr *tcph;
-
- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- return -1;
-
- tcph = (void *)(skb_network_header(skb) + thoff);
- inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
- new_addr->s6_addr32, true);
-
- return 0;
-}
-
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
- struct in6_addr *addr,
- struct in6_addr *new_addr)
-{
- struct udphdr *udph;
-
- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
- skb_try_make_writable(skb, thoff + sizeof(*udph)))
- return -1;
-
- udph = (void *)(skb_network_header(skb) + thoff);
- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
- inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
- new_addr->s6_addr32, true);
- if (!udph->check)
- udph->check = CSUM_MANGLED_0;
- }
-
- return 0;
-}
-
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff, struct in6_addr *addr,
- struct in6_addr *new_addr)
-{
- switch (ip6h->nexthdr) {
- case IPPROTO_TCP:
- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
- break;
- case IPPROTO_UDP:
- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
- return NF_DROP;
- break;
- }
-
- return 0;
-}
-
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff,
- enum flow_offload_tuple_dir dir)
-{
- struct in6_addr addr, new_addr;
-
- switch (dir) {
- case FLOW_OFFLOAD_DIR_ORIGINAL:
- addr = ip6h->saddr;
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
- ip6h->saddr = new_addr;
- break;
- case FLOW_OFFLOAD_DIR_REPLY:
- addr = ip6h->daddr;
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
- ip6h->daddr = new_addr;
- break;
- default:
- return -1;
- }
-
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
-}
-
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb, struct ipv6hdr *ip6h,
- unsigned int thoff,
- enum flow_offload_tuple_dir dir)
-{
- struct in6_addr addr, new_addr;
-
- switch (dir) {
- case FLOW_OFFLOAD_DIR_ORIGINAL:
- addr = ip6h->daddr;
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
- ip6h->daddr = new_addr;
- break;
- case FLOW_OFFLOAD_DIR_REPLY:
- addr = ip6h->saddr;
- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
- ip6h->saddr = new_addr;
- break;
- default:
- return -1;
- }
-
- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
-}
-
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
- struct sk_buff *skb,
- enum flow_offload_tuple_dir dir)
-{
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
- unsigned int thoff = sizeof(*ip6h);
-
- if (flow->flags & FLOW_OFFLOAD_SNAT &&
- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
- return -1;
- if (flow->flags & FLOW_OFFLOAD_DNAT &&
- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
- return -1;
-
- return 0;
-}
-
-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple)
-{
- struct flow_ports *ports;
- struct ipv6hdr *ip6h;
- unsigned int thoff;
-
- if (!pskb_may_pull(skb, sizeof(*ip6h)))
- return -1;
-
- ip6h = ipv6_hdr(skb);
-
- if (ip6h->nexthdr != IPPROTO_TCP &&
- ip6h->nexthdr != IPPROTO_UDP)
- return -1;
-
- thoff = sizeof(*ip6h);
- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
- return -1;
-
- ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
-
- tuple->src_v6 = ip6h->saddr;
- tuple->dst_v6 = ip6h->daddr;
- tuple->src_port = ports->source;
- tuple->dst_port = ports->dest;
- tuple->l3proto = AF_INET6;
- tuple->l4proto = ip6h->nexthdr;
- tuple->iifidx = dev->ifindex;
-
- return 0;
-}
-
-/* Based on ip_exceeds_mtu(). */
-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
-{
- if (skb->len <= mtu)
- return false;
-
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
- return false;
-
- return true;
-}
-
-unsigned int
-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table = priv;
- struct flow_offload_tuple tuple = {};
- enum flow_offload_tuple_dir dir;
- struct flow_offload *flow;
- struct net_device *outdev;
- struct in6_addr *nexthop;
- struct ipv6hdr *ip6h;
- struct rt6_info *rt;
-
- if (skb->protocol != htons(ETH_P_IPV6))
- return NF_ACCEPT;
-
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
- return NF_ACCEPT;
-
- tuplehash = flow_offload_lookup(flow_table, &tuple);
- if (tuplehash == NULL)
- return NF_ACCEPT;
-
- outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
- if (!outdev)
- return NF_ACCEPT;
-
- dir = tuplehash->tuple.dir;
- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
-
- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
- return NF_ACCEPT;
-
- if (skb_try_make_writable(skb, sizeof(*ip6h)))
- return NF_DROP;
-
- if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
- nf_flow_nat_ipv6(flow, skb, dir) < 0)
- return NF_DROP;
-
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
- ip6h = ipv6_hdr(skb);
- ip6h->hop_limit--;
-
- skb->dev = outdev;
- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
-
- return NF_STOLEN;
-}
-EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -4,8 +4,11 @@
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
/* For layer 4 checksum field offset. */
@@ -241,3 +244,215 @@ nf_flow_offload_ip_hook(void *priv, stru
return NF_STOLEN;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ unsigned int thoff = sizeof(*ip6h);
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return -1;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return -1;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct in6_addr *nexthop;
+ struct ipv6hdr *ip6h;
+ struct rt6_info *rt;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+
+ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ ip6h = ipv6_hdr(skb);
+ ip6h->hop_limit--;
+
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+ return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);

View File

@ -0,0 +1,23 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 17 Feb 2018 12:02:28 +0100
Subject: [PATCH] netfilter: nf_flow_table: relax mixed ipv4/ipv6 flowtable
dependencies
Since the offload hook code was moved, this table no longer depends on
the IPv4 and IPv6 flowtable modules
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -670,8 +670,7 @@ endif # NF_TABLES
config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
- depends on NF_FLOW_TABLE_IPV4
- depends on NF_FLOW_TABLE_IPV6
+ depends on NF_FLOW_TABLE
help
This option adds the flow table mixed IPv4/IPv6 support.

View File

@ -0,0 +1,298 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 18 Feb 2018 18:16:31 +0100
Subject: [PATCH] netfilter: nf_flow_table: move init code to
nf_flow_table_core.c
Reduces duplication of .gc and .params in flowtable type definitions and
makes the API clearer
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -14,9 +14,8 @@ struct nf_flowtable;
struct nf_flowtable_type {
struct list_head list;
int family;
- void (*gc)(struct work_struct *work);
+ int (*init)(struct nf_flowtable *ft);
void (*free)(struct nf_flowtable *ft);
- const struct rhashtable_params *params;
nf_hookfn *hook;
struct module *owner;
};
@@ -100,9 +99,8 @@ int nf_flow_table_iterate(struct nf_flow
void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
-void nf_flow_offload_work_gc(struct work_struct *work);
-extern const struct rhashtable_params nf_flow_offload_rhash_params;
void flow_offload_dead(struct flow_offload *flow);
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -7,8 +7,7 @@
static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
- .params = &nf_flow_offload_rhash_params,
- .gc = nf_flow_offload_work_gc,
+ .init = nf_flow_table_init,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -8,8 +8,7 @@
static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
- .params = &nf_flow_offload_rhash_params,
- .gc = nf_flow_offload_work_gc,
+ .init = nf_flow_table_init,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -116,16 +116,50 @@ void flow_offload_dead(struct flow_offlo
}
EXPORT_SYMBOL_GPL(flow_offload_dead);
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple *tuple = data;
+
+ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct flow_offload_tuple_rhash *tuplehash = data;
+
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct flow_offload_tuple *tuple = arg->key;
+ const struct flow_offload_tuple_rhash *x = ptr;
+
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ return 1;
+
+ return 0;
+}
+
+static const struct rhashtable_params nf_flow_offload_rhash_params = {
+ .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
+ .hashfn = flow_offload_hash,
+ .obj_hashfn = flow_offload_hash_obj,
+ .obj_cmpfn = flow_offload_hash_cmp,
+ .automatic_shrinking = true,
+};
+
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
{
flow->timeout = (u32)jiffies;
rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
- *flow_table->type->params);
+ nf_flow_offload_rhash_params);
rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
- *flow_table->type->params);
+ nf_flow_offload_rhash_params);
return 0;
}
EXPORT_SYMBOL_GPL(flow_offload_add);
@@ -135,10 +169,10 @@ static void flow_offload_del(struct nf_f
{
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
- *flow_table->type->params);
+ nf_flow_offload_rhash_params);
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
- *flow_table->type->params);
+ nf_flow_offload_rhash_params);
flow_offload_free(flow);
}
@@ -148,7 +182,7 @@ flow_offload_lookup(struct nf_flowtable
struct flow_offload_tuple *tuple)
{
return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
- *flow_table->type->params);
+ nf_flow_offload_rhash_params);
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
@@ -237,7 +271,7 @@ out:
return 1;
}
-void nf_flow_offload_work_gc(struct work_struct *work)
+static void nf_flow_offload_work_gc(struct work_struct *work)
{
struct nf_flowtable *flow_table;
@@ -245,42 +279,6 @@ void nf_flow_offload_work_gc(struct work
nf_flow_offload_gc_step(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
-
-static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
-{
- const struct flow_offload_tuple *tuple = data;
-
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
-}
-
-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
-{
- const struct flow_offload_tuple_rhash *tuplehash = data;
-
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
-}
-
-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
- const void *ptr)
-{
- const struct flow_offload_tuple *tuple = arg->key;
- const struct flow_offload_tuple_rhash *x = ptr;
-
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
- return 1;
-
- return 0;
-}
-
-const struct rhashtable_params nf_flow_offload_rhash_params = {
- .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
- .hashfn = flow_offload_hash,
- .obj_hashfn = flow_offload_hash_obj,
- .obj_cmpfn = flow_offload_hash_cmp,
- .automatic_shrinking = true,
-};
-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
__be16 port, __be16 new_port)
@@ -398,6 +396,24 @@ int nf_flow_dnat_port(const struct flow_
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+int nf_flow_table_init(struct nf_flowtable *flowtable)
+{
+ int err;
+
+ INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+
+ err = rhashtable_init(&flowtable->rhashtable,
+ &nf_flow_offload_rhash_params);
+ if (err < 0)
+ return err;
+
+ queue_delayed_work(system_power_efficient_wq,
+ &flowtable->gc_work, HZ);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_init);
+
static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
{
struct net_device *dev = data;
@@ -423,8 +439,10 @@ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup)
void nf_flow_table_free(struct nf_flowtable *flow_table)
{
+ cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table));
+ rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -22,8 +22,7 @@ nf_flow_offload_inet_hook(void *priv, st
static struct nf_flowtable_type flowtable_inet = {
.family = NFPROTO_INET,
- .params = &nf_flow_offload_rhash_params,
- .gc = nf_flow_offload_work_gc,
+ .init = nf_flow_table_init,
.free = nf_flow_table_free,
.hook = nf_flow_offload_inet_hook,
.owner = THIS_MODULE,
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5013,40 +5013,38 @@ static int nf_tables_newflowtable(struct
}
flowtable->data.type = type;
- err = rhashtable_init(&flowtable->data.rhashtable, type->params);
+ err = type->init(&flowtable->data);
if (err < 0)
goto err3;
err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
flowtable);
if (err < 0)
- goto err3;
+ goto err4;
for (i = 0; i < flowtable->ops_len; i++) {
err = nf_register_net_hook(net, &flowtable->ops[i]);
if (err < 0)
- goto err4;
+ goto err5;
}
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err5;
-
- INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
- queue_delayed_work(system_power_efficient_wq,
- &flowtable->data.gc_work, HZ);
+ goto err6;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
table->use++;
return 0;
-err5:
+err6:
i = flowtable->ops_len;
-err4:
+err5:
for (k = i - 1; k >= 0; k--)
nf_unregister_net_hook(net, &flowtable->ops[i]);
kfree(flowtable->ops);
+err4:
+ flowtable->data.type->free(&flowtable->data);
err3:
module_put(type->owner);
err2:
@@ -5327,10 +5325,8 @@ err:
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
- cancel_delayed_work_sync(&flowtable->data.gc_work);
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
- rhashtable_destroy(&flowtable->data.rhashtable);
module_put(flowtable->data.type->owner);
}

View File

@ -0,0 +1,22 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 20 Feb 2018 14:48:51 +0100
Subject: [PATCH] netfilter: nf_flow_table: fix priv pointer for netdev hook
The offload ip hook expects a pointer to the flowtable, not to the
rhashtable. Since the rhashtable is the first member, this is safe for
the moment, but breaks as soon as the structure layout changes
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4879,7 +4879,7 @@ static int nf_tables_flowtable_parse_hoo
flowtable->ops[i].pf = NFPROTO_NETDEV;
flowtable->ops[i].hooknum = hooknum;
flowtable->ops[i].priority = priority;
- flowtable->ops[i].priv = &flowtable->data.rhashtable;
+ flowtable->ops[i].priv = &flowtable->data;
flowtable->ops[i].hook = flowtable->data.type->hook;
flowtable->ops[i].dev = dev_array[i];
}

View File

@ -0,0 +1,114 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 20 Feb 2018 14:08:14 +0100
Subject: [PATCH] netfilter: nf_flow_table: track flow tables in nf_flow_table
directly
Avoids having nf_flow_table depend on nftables (useful for future
iptables backport work)
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -21,6 +21,7 @@ struct nf_flowtable_type {
};
struct nf_flowtable {
+ struct list_head list;
struct rhashtable rhashtable;
const struct nf_flowtable_type *type;
struct delayed_work gc_work;
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1091,9 +1091,6 @@ struct nft_flowtable {
struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
const struct nlattr *nla,
u8 genmask);
-void nft_flow_table_iterate(struct net *net,
- void (*iter)(struct nf_flowtable *flowtable, void *data),
- void *data);
void nft_register_flowtable_type(struct nf_flowtable_type *type);
void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -18,6 +18,9 @@ struct flow_offload_entry {
struct rcu_head rcu_head;
};
+static DEFINE_MUTEX(flowtable_lock);
+static LIST_HEAD(flowtables);
+
static void
flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
struct nf_flow_route *route,
@@ -410,6 +413,10 @@ int nf_flow_table_init(struct nf_flowtab
queue_delayed_work(system_power_efficient_wq,
&flowtable->gc_work, HZ);
+ mutex_lock(&flowtable_lock);
+ list_add(&flowtable->list, &flowtables);
+ mutex_unlock(&flowtable_lock);
+
return 0;
}
EXPORT_SYMBOL_GPL(nf_flow_table_init);
@@ -425,20 +432,28 @@ static void nf_flow_table_do_cleanup(str
}
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
- void *data)
+ struct net_device *dev)
{
- nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
+ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work);
}
void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
{
- nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
+ struct nf_flowtable *flowtable;
+
+ mutex_lock(&flowtable_lock);
+ list_for_each_entry(flowtable, &flowtables, list)
+ nf_flow_table_iterate_cleanup(flowtable, dev);
+ mutex_unlock(&flowtable_lock);
}
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
void nf_flow_table_free(struct nf_flowtable *flow_table)
{
+ mutex_lock(&flowtable_lock);
+ list_del(&flow_table->list);
+ mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table));
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4923,23 +4923,6 @@ static const struct nf_flowtable_type *n
return ERR_PTR(-ENOENT);
}
-void nft_flow_table_iterate(struct net *net,
- void (*iter)(struct nf_flowtable *flowtable, void *data),
- void *data)
-{
- struct nft_flowtable *flowtable;
- const struct nft_table *table;
-
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_for_each_entry(table, &net->nft.tables, list) {
- list_for_each_entry(flowtable, &table->flowtables, list) {
- iter(&flowtable->data, data);
- }
- }
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-}
-EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
-
static void nft_unregister_flowtable_net_hooks(struct net *net,
struct nft_flowtable *flowtable)
{

View File

@ -0,0 +1,38 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:37:27 +0100
Subject: [PATCH] netfilter: nf_flow_table: make flow_offload_dead inline
It is too trivial to keep as a separate exported function
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -103,7 +103,10 @@ void nf_flow_table_cleanup(struct net *n
int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
-void flow_offload_dead(struct flow_offload *flow);
+static inline void flow_offload_dead(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_DYING;
+}
int nf_flow_snat_port(const struct flow_offload *flow,
struct sk_buff *skb, unsigned int thoff,
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -113,12 +113,6 @@ void flow_offload_free(struct flow_offlo
}
EXPORT_SYMBOL_GPL(flow_offload_free);
-void flow_offload_dead(struct flow_offload *flow)
-{
- flow->flags |= FLOW_OFFLOAD_DYING;
-}
-EXPORT_SYMBOL_GPL(flow_offload_dead);
-
static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple *tuple = data;

View File

@ -0,0 +1,83 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:38:31 +0100
Subject: [PATCH] netfilter: nf_flow_table: add a new flow state for
tearing down offloading
Will be used to tear down the offload entry while keeping the conntrack
entry alive.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -68,6 +68,7 @@ struct flow_offload_tuple_rhash {
#define FLOW_OFFLOAD_SNAT 0x1
#define FLOW_OFFLOAD_DNAT 0x2
#define FLOW_OFFLOAD_DYING 0x4
+#define FLOW_OFFLOAD_TEARDOWN 0x8
struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
@@ -103,6 +104,7 @@ void nf_flow_table_cleanup(struct net *n
int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
+void flow_offload_teardown(struct flow_offload *flow);
static inline void flow_offload_dead(struct flow_offload *flow)
{
flow->flags |= FLOW_OFFLOAD_DYING;
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -174,6 +174,12 @@ static void flow_offload_del(struct nf_f
flow_offload_free(flow);
}
+void flow_offload_teardown(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+}
+EXPORT_SYMBOL_GPL(flow_offload_teardown);
+
struct flow_offload_tuple_rhash *
flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple)
@@ -226,11 +232,6 @@ static inline bool nf_flow_has_expired(c
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
}
-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
-{
- return flow->flags & FLOW_OFFLOAD_DYING;
-}
-
static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
{
struct flow_offload_tuple_rhash *tuplehash;
@@ -258,7 +259,8 @@ static int nf_flow_offload_gc_step(struc
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
if (nf_flow_has_expired(flow) ||
- nf_flow_is_dying(flow))
+ (flow->flags & (FLOW_OFFLOAD_DYING |
+ FLOW_OFFLOAD_TEARDOWN)))
flow_offload_del(flow_table, flow);
}
out:
@@ -419,10 +421,14 @@ static void nf_flow_table_do_cleanup(str
{
struct net_device *dev = data;
- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+ if (!dev) {
+ flow_offload_teardown(flow);
return;
+ }
- flow_offload_dead(flow);
+ if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
+ flow->tuplehash[1].tuple.iifidx == dev->ifindex)
+ flow_offload_dead(flow);
}
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,

View File

@ -0,0 +1,36 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:39:56 +0100
Subject: [PATCH] netfilter: nf_flow_table: in flow_offload_lookup, skip
entries being deleted
Preparation for sending flows back to the slow path
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -184,8 +184,21 @@ struct flow_offload_tuple_rhash *
flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple)
{
- return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
- nf_flow_offload_rhash_params);
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct flow_offload *flow;
+ int dir;
+
+ tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+ nf_flow_offload_rhash_params);
+ if (!tuplehash)
+ return NULL;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+ if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
+ return NULL;
+
+ return tuplehash;
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);

View File

@ -0,0 +1,99 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:41:11 +0100
Subject: [PATCH] netfilter: nf_flow_table: add support for sending flows
back to the slow path
Reset the timeout. For TCP, also set the state to indicate to use the
next incoming packets to reset window tracking.
This allows the slow path to take over again once the offload state has
been torn down
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -100,6 +100,43 @@ err_ct_refcnt:
}
EXPORT_SYMBOL_GPL(flow_offload_alloc);
+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+{
+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
+ tcp->seen[0].td_maxwin = 0;
+ tcp->seen[1].td_maxwin = 0;
+}
+
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+{
+ const struct nf_conntrack_l4proto *l4proto;
+ struct net *net = nf_ct_net(ct);
+ unsigned int *timeouts;
+ unsigned int timeout;
+ int l4num;
+
+ l4num = nf_ct_protonum(ct);
+ if (l4num == IPPROTO_TCP)
+ flow_offload_fixup_tcp(&ct->proto.tcp);
+
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
+ if (!l4proto)
+ return;
+
+ timeouts = l4proto->get_timeouts(net);
+ if (!timeouts)
+ return;
+
+ if (l4num == IPPROTO_TCP)
+ timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
+ else if (l4num == IPPROTO_UDP)
+ timeout = timeouts[UDP_CT_REPLIED];
+ else
+ return;
+
+ ct->timeout = nfct_time_stamp + timeout;
+}
+
void flow_offload_free(struct flow_offload *flow)
{
struct flow_offload_entry *e;
@@ -107,7 +144,8 @@ void flow_offload_free(struct flow_offlo
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
e = container_of(flow, struct flow_offload_entry, flow);
- nf_ct_delete(e->ct, 0, 0);
+ if (flow->flags & FLOW_OFFLOAD_DYING)
+ nf_ct_delete(e->ct, 0, 0);
nf_ct_put(e->ct);
kfree_rcu(e, rcu_head);
}
@@ -164,6 +202,8 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
+ struct flow_offload_entry *e;
+
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
nf_flow_offload_rhash_params);
@@ -171,12 +211,20 @@ static void flow_offload_del(struct nf_f
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params);
+ e = container_of(flow, struct flow_offload_entry, flow);
+ clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
+
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
+ struct flow_offload_entry *e;
+
flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+
+ e = container_of(flow, struct flow_offload_entry, flow);
+ flow_offload_fixup_ct_state(e->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);

View File

@ -0,0 +1,81 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:42:58 +0100
Subject: [PATCH] netfilter: nf_flow_table: tear down TCP flows if RST or
FIN was seen
Allow the slow path to handle the shutdown of the connection with proper
timeouts
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -15,6 +15,23 @@
#include <linux/tcp.h>
#include <linux/udp.h>
+static int nf_flow_tcp_state_check(struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ if (unlikely(tcph->fin || tcph->rst)) {
+ flow_offload_teardown(flow);
+ return -1;
+ }
+
+ return 0;
+}
+
static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
__be32 addr, __be32 new_addr)
{
@@ -118,10 +135,9 @@ static int nf_flow_dnat_ip(const struct
}
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- enum flow_offload_tuple_dir dir)
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
{
struct iphdr *iph = ip_hdr(skb);
- unsigned int thoff = iph->ihl * 4;
if (flow->flags & FLOW_OFFLOAD_SNAT &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
@@ -201,6 +217,7 @@ nf_flow_offload_ip_hook(void *priv, stru
struct flow_offload *flow;
struct net_device *outdev;
const struct rtable *rt;
+ unsigned int thoff;
struct iphdr *iph;
__be32 nexthop;
@@ -229,8 +246,12 @@ nf_flow_offload_ip_hook(void *priv, stru
if (skb_try_make_writable(skb, sizeof(*iph)))
return NF_DROP;
+ thoff = ip_hdr(skb)->ihl * 4;
+ if (nf_flow_tcp_state_check(flow, skb, thoff))
+ return NF_ACCEPT;
+
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
- nf_flow_nat_ip(flow, skb, dir) < 0)
+ nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
@@ -438,6 +459,9 @@ nf_flow_offload_ipv6_hook(void *priv, st
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
+ if (nf_flow_tcp_state_check(flow, skb, sizeof(*ip6h)))
+ return NF_ACCEPT;
+
if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;

View File

@ -0,0 +1,19 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 17:22:55 +0100
Subject: [PATCH] netfilter: nf_flow_table: fix checksum when handling DNAT
Add a missing call to csum_replace4 like on SNAT
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -130,6 +130,7 @@ static int nf_flow_dnat_ip(const struct
default:
return -1;
}
+ csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}

View File

@ -0,0 +1,89 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 15 Mar 2018 18:21:43 +0100
Subject: [PATCH] netfilter: nf_flow_table: clean up and fix dst handling
dst handling in the code is inconsistent and possibly wrong. In my test,
skb_dst(skb) holds the dst entry after routing but before NAT, so the
code could possibly return the same dst entry for both directions of a
connection.
Additionally, there was some confusion over the dst entry vs the address
passed as parameter to rt_nexthop/rt6_nexthop.
Do an explicit dst lookup for both ends of the connection and always use
the source address for it. When running the IP hook, use the dst entry
for the opposite direction for determining the route.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -238,7 +238,7 @@ nf_flow_offload_ip_hook(void *priv, stru
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ rt = (const struct rtable *)flow->tuplehash[!dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
@@ -455,7 +455,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ rt = (struct rt6_info *)flow->tuplehash[!dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -17,27 +17,38 @@ struct nft_flow_offload {
struct nft_flowtable *flowtable;
};
-static int nft_flow_route(const struct nft_pktinfo *pkt,
- const struct nf_conn *ct,
- struct nf_flow_route *route,
- enum ip_conntrack_dir dir)
+static struct dst_entry *
+nft_flow_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir,
+ const struct nft_pktinfo *pkt)
{
- struct dst_entry *this_dst = skb_dst(pkt->skb);
- struct dst_entry *other_dst = NULL;
+ struct dst_entry *dst;
struct flowi fl;
memset(&fl, 0, sizeof(fl));
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
- fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
break;
case NFPROTO_IPV6:
- fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
break;
}
- nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
- if (!other_dst)
+ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
+
+ return dst;
+}
+
+static int nft_flow_route(const struct nft_pktinfo *pkt,
+ const struct nf_conn *ct,
+ struct nf_flow_route *route,
+ enum ip_conntrack_dir dir)
+{
+ struct dst_entry *this_dst, *other_dst;
+
+ this_dst = nft_flow_dst(ct, dir, pkt);
+ other_dst = nft_flow_dst(ct, !dir, pkt);
+ if (!this_dst || !other_dst)
return -ENOENT;
route->tuple[dir].dst = this_dst;

View File

@ -0,0 +1,48 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 23 Mar 2018 17:15:22 +0100
Subject: [PATCH] netfilter: nf_flow_table: add missing condition for TCP state
check
Avoid looking at unrelated fields in UDP packets
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -15,11 +15,14 @@
#include <linux/tcp.h>
#include <linux/udp.h>
-static int nf_flow_tcp_state_check(struct flow_offload *flow,
- struct sk_buff *skb, unsigned int thoff)
+static int nf_flow_state_check(struct flow_offload *flow, int proto,
+ struct sk_buff *skb, unsigned int thoff)
{
struct tcphdr *tcph;
+ if (proto != IPPROTO_TCP)
+ return 0;
+
if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
return -1;
@@ -248,7 +251,7 @@ nf_flow_offload_ip_hook(void *priv, stru
return NF_DROP;
thoff = ip_hdr(skb)->ihl * 4;
- if (nf_flow_tcp_state_check(flow, skb, thoff))
+ if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
return NF_ACCEPT;
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
@@ -460,7 +463,8 @@ nf_flow_offload_ipv6_hook(void *priv, st
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
- if (nf_flow_tcp_state_check(flow, skb, sizeof(*ip6h)))
+ if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
+ sizeof(*ip6h)))
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*ip6h)))

View File

@ -0,0 +1,23 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 23 Mar 2018 19:12:30 +0100
Subject: [PATCH] netfilter: nf_flow_table: fix offloading connections with
SNAT+DNAT
Pass all NAT types to the flow offload struct, otherwise parts of the
address/port pair do not get translated properly, causing connection
stalls
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -84,7 +84,7 @@ flow_offload_alloc(struct nf_conn *ct, s
if (ct->status & IPS_SRC_NAT)
flow->flags |= FLOW_OFFLOAD_SNAT;
- else if (ct->status & IPS_DST_NAT)
+ if (ct->status & IPS_DST_NAT)
flow->flags |= FLOW_OFFLOAD_DNAT;
return flow;

View File

@ -433,6 +433,7 @@ CONFIG_BASE_FULL=y
CONFIG_BASE_SMALL=0
# CONFIG_BATMAN_ADV is not set
# CONFIG_BATTERY_BQ27XXX is not set
# CONFIG_BATTERY_BQ27XXX_HDQ is not set
# CONFIG_BATTERY_DS2760 is not set
# CONFIG_BATTERY_DS2780 is not set
# CONFIG_BATTERY_DS2781 is not set
@ -461,6 +462,7 @@ CONFIG_BCMA_POSSIBLE=y
# CONFIG_BCMGENET is not set
# CONFIG_BCM_IPROC_ADC is not set
# CONFIG_BCM_KONA_USB2_PHY is not set
# CONFIG_BCM_SBA_RAID is not set
# CONFIG_BDI_SWITCH is not set
# CONFIG_BE2ISCSI is not set
# CONFIG_BE2NET is not set
@ -590,6 +592,7 @@ CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT=3
CONFIG_BOOT_RAW=y
CONFIG_BPF=y
# CONFIG_BPF_JIT is not set
# CONFIG_BPF_JIT_ALWAYS_ON is not set
# CONFIG_BPF_STREAM_PARSER is not set
CONFIG_BPF_SYSCALL=y
# CONFIG_BPQETHER is not set
@ -702,6 +705,7 @@ CONFIG_CARDBUS=y
# CONFIG_CARDMAN_4040 is not set
# CONFIG_CARL9170 is not set
# CONFIG_CASSINI is not set
# CONFIG_CAVIUM_CPT is not set
# CONFIG_CAVIUM_ERRATUM_22375 is not set
# CONFIG_CAVIUM_ERRATUM_23144 is not set
# CONFIG_CAVIUM_ERRATUM_23154 is not set
@ -711,8 +715,8 @@ CONFIG_CARDBUS=y
# CONFIG_CB710_CORE is not set
# CONFIG_CC10001_ADC is not set
# CONFIG_CCS811 is not set
# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_CC_STACKPROTECTOR is not set
CONFIG_CC_STACKPROTECTOR_NONE=y
# CONFIG_CC_STACKPROTECTOR_REGULAR is not set
@ -725,6 +729,7 @@ CONFIG_CC_STACKPROTECTOR_NONE=y
# CONFIG_CGROUPS is not set
# CONFIG_CGROUP_BPF is not set
# CONFIG_CGROUP_DEBUG is not set
# CONFIG_CGROUP_NET_CLASSID is not set
# CONFIG_CGROUP_NET_PRIO is not set
# CONFIG_CGROUP_RDMA is not set
# CONFIG_CHARGER_BQ2415X is not set
@ -803,6 +808,8 @@ CONFIG_CMDLINE=""
# CONFIG_COMMON_CLK_SI5351 is not set
# CONFIG_COMMON_CLK_SI570 is not set
# CONFIG_COMMON_CLK_VC5 is not set
# CONFIG_COMMON_CLK_VERSATILE is not set
# CONFIG_COMMON_CLK_XGENE is not set
# CONFIG_COMMON_CLK_XLNX_CLKWZRD is not set
# CONFIG_COMPACTION is not set
# CONFIG_COMPAL_LAPTOP is not set
@ -890,6 +897,7 @@ CONFIG_CRYPTO_BLKCIPHER2=y
# CONFIG_CRYPTO_DEV_ATMEL_AES is not set
# CONFIG_CRYPTO_DEV_ATMEL_SHA is not set
# CONFIG_CRYPTO_DEV_ATMEL_TDES is not set
# CONFIG_CRYPTO_DEV_CAVIUM_ZIP is not set
# CONFIG_CRYPTO_DEV_CCP is not set
# CONFIG_CRYPTO_DEV_CCREE is not set
# CONFIG_CRYPTO_DEV_FSL_CAAM is not set
@ -907,6 +915,7 @@ CONFIG_CRYPTO_BLKCIPHER2=y
# CONFIG_CRYPTO_DEV_QAT_DH895xCC is not set
# CONFIG_CRYPTO_DEV_QAT_DH895xCCVF is not set
# CONFIG_CRYPTO_DEV_QCE is not set
# CONFIG_CRYPTO_DEV_SAFEXCEL is not set
# CONFIG_CRYPTO_DEV_SAHARA is not set
# CONFIG_CRYPTO_DEV_TALITOS is not set
# CONFIG_CRYPTO_DEV_VIRTIO is not set
@ -1079,6 +1088,7 @@ CONFIG_DEVPORT=y
# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
# CONFIG_DEVTMPFS is not set
# CONFIG_DEVTMPFS_MOUNT is not set
# CONFIG_DEV_DAX is not set
# CONFIG_DGAP is not set
# CONFIG_DGNC is not set
# CONFIG_DHT11 is not set
@ -1181,6 +1191,7 @@ CONFIG_DQL=y
# CONFIG_DRM_SII902X is not set
# CONFIG_DRM_SIL_SII8620 is not set
# CONFIG_DRM_STI is not set
# CONFIG_DRM_STM is not set
# CONFIG_DRM_TILCDC is not set
# CONFIG_DRM_TINYDRM is not set
# CONFIG_DRM_TI_TFP410 is not set
@ -1269,6 +1280,7 @@ CONFIG_EXPERT=y
CONFIG_EXT4_USE_FOR_EXT2=y
# CONFIG_EXTCON is not set
# CONFIG_EXTCON_ADC_JACK is not set
# CONFIG_EXTCON_AXP288 is not set
# CONFIG_EXTCON_GPIO is not set
# CONFIG_EXTCON_INTEL_INT3496 is not set
# CONFIG_EXTCON_MAX3355 is not set
@ -1522,6 +1534,7 @@ CONFIG_GENERIC_NET_UTILS=y
# CONFIG_HAMRADIO is not set
# CONFIG_HAPPYMEAL is not set
# CONFIG_HARDENED_USERCOPY is not set
# CONFIG_HARDEN_BRANCH_PREDICTOR is not set
# CONFIG_HARDLOCKUP_DETECTOR is not set
# CONFIG_HAVE_AOUT is not set
CONFIG_HAVE_ARCH_HARDENED_USERCOPY=y
@ -1690,6 +1703,7 @@ CONFIG_HW_PERF_EVENTS=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HW_RANDOM_AMD is not set
# CONFIG_HW_RANDOM_ATMEL is not set
# CONFIG_HW_RANDOM_CAVIUM is not set
# CONFIG_HW_RANDOM_EXYNOS is not set
# CONFIG_HW_RANDOM_GEODE is not set
# CONFIG_HW_RANDOM_INTEL is not set
@ -2609,6 +2623,7 @@ CONFIG_MISC_FILESYSTEMS=y
# CONFIG_MMC_BLOCK is not set
CONFIG_MMC_BLOCK_BOUNCE=y
CONFIG_MMC_BLOCK_MINORS=8
# CONFIG_MMC_CAVIUM_THUNDERX is not set
# CONFIG_MMC_CB710 is not set
# CONFIG_MMC_DEBUG is not set
# CONFIG_MMC_DW is not set
@ -2801,6 +2816,7 @@ CONFIG_MTD_SPLIT=y
# CONFIG_MTD_SPLIT_FIRMWARE is not set
CONFIG_MTD_SPLIT_FIRMWARE_NAME="firmware"
# CONFIG_MTD_SPLIT_FIT_FW is not set
# CONFIG_MTD_SPLIT_JIMAGE_FW is not set
# CONFIG_MTD_SPLIT_LZMA_FW is not set
# CONFIG_MTD_SPLIT_MINOR_FW is not set
# CONFIG_MTD_SPLIT_SEAMA_FW is not set
@ -2975,6 +2991,7 @@ CONFIG_NET_CORE=y
# CONFIG_NET_DSA_MV88E6XXX is not set
# CONFIG_NET_DSA_MV88E6XXX_NEED_PPU is not set
# CONFIG_NET_DSA_QCA8K is not set
# CONFIG_NET_DSA_SMSC_LAN9303_I2C is not set
# CONFIG_NET_DSA_SMSC_LAN9303_MDIO is not set
# CONFIG_NET_DSA_TAG_DSA is not set
# CONFIG_NET_DSA_TAG_EDSA is not set
@ -3132,6 +3149,7 @@ CONFIG_NFS_V3=y
# CONFIG_NFT_DUP_IPV6 is not set
# CONFIG_NFT_FIB_IPV4 is not set
# CONFIG_NFT_FIB_IPV6 is not set
# CONFIG_NFT_FLOW_OFFLOAD is not set
# CONFIG_NFT_OBJREF is not set
# CONFIG_NFT_RT is not set
# CONFIG_NFT_SET_BITMAP is not set
@ -3164,6 +3182,7 @@ CONFIG_NF_CONNTRACK_PROCFS=y
# CONFIG_NF_DEFRAG_IPV4 is not set
# CONFIG_NF_DUP_IPV4 is not set
# CONFIG_NF_DUP_IPV6 is not set
# CONFIG_NF_FLOW_TABLE is not set
# CONFIG_NF_LOG_ARP is not set
# CONFIG_NF_LOG_IPV4 is not set
# CONFIG_NF_LOG_NETDEV is not set
@ -3264,6 +3283,7 @@ CONFIG_NMI_LOG_BUF_SHIFT=13
# CONFIG_NTP_PPS is not set
# CONFIG_NVM is not set
# CONFIG_NVMEM is not set
# CONFIG_NVMEM_BCM_OCOTP is not set
# CONFIG_NVMEM_IMX_OCOTP is not set
# CONFIG_NVME_FC is not set
# CONFIG_NVME_TARGET is not set
@ -3391,6 +3411,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCIE_DW_PLAT is not set
# CONFIG_PCIE_ECRC is not set
# CONFIG_PCIE_IPROC is not set
# CONFIG_PCIE_KIRIN is not set
# CONFIG_PCIE_PTM is not set
# CONFIG_PCIPCWATCHDOG is not set
# CONFIG_PCI_ATMEL is not set
@ -3401,6 +3422,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PCI_ENDPOINT_TEST is not set
# CONFIG_PCI_FTPCI100 is not set
# CONFIG_PCI_HERMES is not set
# CONFIG_PCI_HISI is not set
# CONFIG_PCI_HOST_GENERIC is not set
# CONFIG_PCI_HOST_THUNDER_ECAM is not set
# CONFIG_PCI_HOST_THUNDER_PEM is not set
@ -3414,6 +3436,7 @@ CONFIG_PCI_QUIRKS=y
# CONFIG_PCI_STUB is not set
# CONFIG_PCI_SW_SWITCHTEC is not set
CONFIG_PCI_SYSCALL=y
# CONFIG_PCI_XGENE is not set
# CONFIG_PCMCIA is not set
# CONFIG_PCMCIA_3C574 is not set
# CONFIG_PCMCIA_3C589 is not set
@ -3457,6 +3480,7 @@ CONFIG_PCI_SYSCALL=y
# CONFIG_PHY_PXA_28NM_USB2 is not set
# CONFIG_PHY_QCOM_DWC3 is not set
# CONFIG_PHY_SAMSUNG_USB2 is not set
# CONFIG_PHY_XGENE is not set
# CONFIG_PI433 is not set
# CONFIG_PID_IN_CONTEXTIDR is not set
# CONFIG_PID_NS is not set
@ -3569,9 +3593,11 @@ CONFIG_PWRSEQ_EMMC=y
CONFIG_PWRSEQ_SIMPLE=y
# CONFIG_QCA7000 is not set
# CONFIG_QCA7000_SPI is not set
# CONFIG_QCA7000_UART is not set
# CONFIG_QCOM_EMAC is not set
# CONFIG_QCOM_FALKOR_ERRATUM_1003 is not set
# CONFIG_QCOM_FALKOR_ERRATUM_1009 is not set
# CONFIG_QCOM_FALKOR_ERRATUM_E1041 is not set
# CONFIG_QCOM_HIDMA is not set
# CONFIG_QCOM_HIDMA_MGMT is not set
# CONFIG_QCOM_QDF2400_ERRATUM_0065 is not set
@ -4486,6 +4512,7 @@ CONFIG_SND_PROC_FS=y
# CONFIG_SND_SOC_MEDIATEK is not set
# CONFIG_SND_SOC_MPC5200_AC97 is not set
# CONFIG_SND_SOC_MPC5200_I2S is not set
# CONFIG_SND_SOC_MSM8916_WCD_ANALOG is not set
# CONFIG_SND_SOC_MSM8916_WCD_DIGITAL is not set
# CONFIG_SND_SOC_MT2701 is not set
# CONFIG_SND_SOC_MT8173 is not set
@ -4719,6 +4746,7 @@ CONFIG_SWAP=y
# CONFIG_SWCONFIG_B53 is not set
# CONFIG_SWCONFIG_B53_SPI_DRIVER is not set
# CONFIG_SWCONFIG_LEDS is not set
# CONFIG_SW_SYNC is not set
# CONFIG_SX9500 is not set
# CONFIG_SXGBE_ETH is not set
# CONFIG_SYNCLINK_CS is not set
@ -4943,7 +4971,7 @@ CONFIG_TMPFS_XATTR=y
# CONFIG_TRACER_SNAPSHOT is not set
# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
# CONFIG_TRACE_BRANCH_PROFILING is not set
# CONFIG_TRACE_ENUM_MAP_FILE is not set
# CONFIG_TRACE_EVAL_MAP_FILE is not set
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_TRACE_SINK is not set
# CONFIG_TRACING_EVENTS_GPIO is not set
@ -4974,6 +5002,7 @@ CONFIG_TTY=y
# CONFIG_UACCESS_WITH_MEMCPY is not set
# CONFIG_UBIFS_ATIME_SUPPORT is not set
# CONFIG_UBIFS_FS_ENCRYPTION is not set
CONFIG_UBIFS_FS_FORMAT4=y
# CONFIG_UBIFS_FS_SECURITY is not set
# CONFIG_UBSAN is not set
# CONFIG_UCB1400_CORE is not set
@ -4992,6 +5021,7 @@ CONFIG_UNIX=y
CONFIG_UNIX98_PTYS=y
# CONFIG_UNIXWARE_DISKLABEL is not set
# CONFIG_UNIX_DIAG is not set
# CONFIG_UNMAP_KERNEL_AT_EL0 is not set
# CONFIG_UNUSED_SYMBOLS is not set
# CONFIG_UPROBES is not set
# CONFIG_UPROBE_EVENT is not set

View File

@ -13,7 +13,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/Makefile
+++ b/Makefile
@@ -402,7 +402,7 @@ KBUILD_AFLAGS_KERNEL :=
@@ -430,7 +430,7 @@ KBUILD_AFLAGS_KERNEL :=
KBUILD_CFLAGS_KERNEL :=
KBUILD_AFLAGS_MODULE := -DMODULE
KBUILD_CFLAGS_MODULE := -DMODULE
@ -21,4 +21,4 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+KBUILD_LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds $(if $(CONFIG_PROFILING),,-s)
GCC_PLUGINS_CFLAGS :=
# Read KERNELRELEASE from include/config/kernel.release (if it exists)
export ARCH SRCARCH SUBARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD

View File

@ -98,7 +98,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1896,6 +1896,13 @@ config TRIM_UNUSED_KSYMS
@@ -1903,6 +1903,13 @@ config TRIM_UNUSED_KSYMS
If unsure, or if you need to build out-of-tree modules, say N.
@ -114,7 +114,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
config MODULES_TREE_LOOKUP
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2997,9 +2997,11 @@ static struct module *setup_load_info(st
@@ -3006,9 +3006,11 @@ static struct module *setup_load_info(st
static int check_modinfo(struct module *mod, struct load_info *info, int flags)
{
@ -127,14 +127,14 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
if (flags & MODULE_INIT_IGNORE_VERMAGIC)
modmagic = NULL;
@@ -3020,6 +3022,7 @@ static int check_modinfo(struct module *
@@ -3029,6 +3031,7 @@ static int check_modinfo(struct module *
mod->name);
add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
}
+#endif
if (get_modinfo(info, "staging")) {
add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
check_modinfo_retpoline(mod, info);
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1982,7 +1982,9 @@ static void read_symbols(char *modname)
@ -158,7 +158,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
buf_printf(b, "\n");
buf_printf(b, "__visible struct module __this_module\n");
buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n");
@@ -2161,16 +2165,20 @@ static void add_header(struct buffer *b,
@@ -2161,8 +2165,10 @@ static void add_header(struct buffer *b,
static void add_intree_flag(struct buffer *b, int is_intree)
{
@ -168,6 +168,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+#endif
}
/* Cannot check for assembler */
@@ -2175,10 +2181,12 @@ static void add_retpoline(struct buffer
static void add_staging_flag(struct buffer *b, const char *name)
{
+#ifndef CONFIG_MODULE_STRIPPED
@ -179,7 +182,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
}
/**
@@ -2269,11 +2277,13 @@ static void add_depends(struct buffer *b
@@ -2277,11 +2285,13 @@ static void add_depends(struct buffer *b
static void add_srcversion(struct buffer *b, struct module *mod)
{
@ -193,7 +196,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
}
static void write_if_changed(struct buffer *b, const char *fname)
@@ -2509,7 +2519,9 @@ int main(int argc, char **argv)
@@ -2518,7 +2528,9 @@ int main(int argc, char **argv)
add_staging_flag(&buf, mod->name);
err |= add_versions(&buf, mod);
add_depends(&buf, mod, modules);

View File

@ -15,7 +15,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/Makefile
+++ b/Makefile
@@ -1212,7 +1212,6 @@ all: modules
@@ -1233,7 +1233,6 @@ all: modules
PHONY += modules
modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) modules.builtin
@ -23,7 +23,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
@$(kecho) ' Building modules, stage 2.';
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
@@ -1241,7 +1240,6 @@ _modinst_:
@@ -1262,7 +1261,6 @@ _modinst_:
rm -f $(MODLIB)/build ; \
ln -s $(CURDIR) $(MODLIB)/build ; \
fi

View File

@ -21,9 +21,9 @@ Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
--- a/Makefile
+++ b/Makefile
@@ -405,6 +405,11 @@ KBUILD_CFLAGS_MODULE := -DMODULE
KBUILD_LDFLAGS_MODULE = -T $(srctree)/scripts/module-common.lds $(if $(CONFIG_PROFILING),,-s)
GCC_PLUGINS_CFLAGS :=
@@ -272,6 +272,11 @@ else
scripts/Kbuild.include: ;
include scripts/Kbuild.include
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+KBUILD_CFLAGS_KERNEL += $(call cc-option,-ffunction-sections,)
@ -33,7 +33,7 @@ Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
# Read KERNELRELEASE from include/config/kernel.release (if it exists)
KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION)
@@ -784,11 +789,6 @@ ifdef CONFIG_DEBUG_SECTION_MISMATCH
@@ -787,11 +792,6 @@ ifdef CONFIG_DEBUG_SECTION_MISMATCH
KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
endif

View File

@ -90,7 +90,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
__used \
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -420,7 +420,7 @@ targets += $(extra-y) $(MAKECMDGOALS) $(
@@ -434,7 +434,7 @@ targets += $(extra-y) $(MAKECMDGOALS) $(
# Linker scripts preprocessor (.lds.S -> .lds)
# ---------------------------------------------------------------------------
quiet_cmd_cpp_lds_S = LDS $@

View File

@ -17,7 +17,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
depends on NETFILTER_ADVANCED
help
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
@@ -1012,7 +1011,6 @@ config NETFILTER_XT_TARGET_SECMARK
@@ -1037,7 +1036,6 @@ config NETFILTER_XT_TARGET_SECMARK
config NETFILTER_XT_TARGET_TCPMSS
tristate '"TCPMSS" target support'

View File

@ -0,0 +1,66 @@
From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Date: Fri, 7 Jun 2013 18:35:22 -0500
Subject: MIPS: r4k_cache: use more efficient cache blast
Optimize the compiler output for larger cache blast cases that are
common for DMA-based networking.
Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -682,16 +682,48 @@ static inline void prot##extra##blast_##
unsigned long end) \
{ \
unsigned long lsize = cpu_##desc##_line_size(); \
+ unsigned long lsize_2 = lsize * 2; \
+ unsigned long lsize_3 = lsize * 3; \
+ unsigned long lsize_4 = lsize * 4; \
+ unsigned long lsize_5 = lsize * 5; \
+ unsigned long lsize_6 = lsize * 6; \
+ unsigned long lsize_7 = lsize * 7; \
+ unsigned long lsize_8 = lsize * 8; \
unsigned long addr = start & ~(lsize - 1); \
- unsigned long aend = (end - 1) & ~(lsize - 1); \
+ unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \
+ int lines = (aend - addr) / lsize; \
\
__##pfx##flush_prologue \
\
- while (1) { \
+ while (lines >= 8) { \
+ prot##cache_op(hitop, addr); \
+ prot##cache_op(hitop, addr + lsize); \
+ prot##cache_op(hitop, addr + lsize_2); \
+ prot##cache_op(hitop, addr + lsize_3); \
+ prot##cache_op(hitop, addr + lsize_4); \
+ prot##cache_op(hitop, addr + lsize_5); \
+ prot##cache_op(hitop, addr + lsize_6); \
+ prot##cache_op(hitop, addr + lsize_7); \
+ addr += lsize_8; \
+ lines -= 8; \
+ } \
+ \
+ if (lines & 0x4) { \
+ prot##cache_op(hitop, addr); \
+ prot##cache_op(hitop, addr + lsize); \
+ prot##cache_op(hitop, addr + lsize_2); \
+ prot##cache_op(hitop, addr + lsize_3); \
+ addr += lsize_4; \
+ } \
+ \
+ if (lines & 0x2) { \
+ prot##cache_op(hitop, addr); \
+ prot##cache_op(hitop, addr + lsize); \
+ addr += lsize_2; \
+ } \
+ \
+ if (lines & 0x1) { \
prot##cache_op(hitop, addr); \
- if (addr == aend) \
- break; \
- addr += lsize; \
} \
\
__##pfx##flush_epilogue \

View File

@ -29,7 +29,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, bool local_rcv, bool local_orig)
{
+ if (to->flags & BR_ISOLATE_MODE)
+ if (to->flags & BR_ISOLATE_MODE && !local_orig)
+ to = NULL;
+
if (to && should_deliver(to, skb)) {

View File

@ -0,0 +1,496 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 20 Feb 2018 15:56:02 +0100
Subject: [PATCH] netfilter: add xt_OFFLOAD target
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
create mode 100644 net/netfilter/xt_OFFLOAD.c
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -75,8 +75,6 @@ config NF_TABLES_ARP
help
This option enables the ARP support for nf_tables.
-endif # NF_TABLES
-
config NF_FLOW_TABLE_IPV4
tristate "Netfilter flow table IPv4 module"
depends on NF_FLOW_TABLE
@@ -85,6 +83,8 @@ config NF_FLOW_TABLE_IPV4
To compile it as a module, choose M here.
+endif # NF_TABLES
+
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -69,7 +69,6 @@ config NFT_FIB_IPV6
multicast or blackhole.
endif # NF_TABLES_IPV6
-endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
tristate "Netfilter flow table IPv6 module"
@@ -79,6 +78,8 @@ config NF_FLOW_TABLE_IPV6
To compile it as a module, choose M here.
+endif # NF_TABLES
+
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -665,8 +665,6 @@ config NFT_FIB_NETDEV
endif # NF_TABLES_NETDEV
-endif # NF_TABLES
-
config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
depends on NF_FLOW_TABLE
@@ -675,11 +673,12 @@ config NF_FLOW_TABLE_INET
To compile it as a module, choose M here.
+endif # NF_TABLES
+
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
depends on NETFILTER_INGRESS
depends on NF_CONNTRACK
- depends on NF_TABLES
help
This option adds the flow table core infrastructure.
@@ -968,6 +967,15 @@ config NETFILTER_XT_TARGET_NOTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_XT_TARGET_CT
+config NETFILTER_XT_TARGET_FLOWOFFLOAD
+ tristate '"FLOWOFFLOAD" target support'
+ depends on NF_FLOW_TABLE
+ depends on NETFILTER_INGRESS
+ help
+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
+ module to speed up processing of packets by bypassing the usual
+ netfilter chains
+
config NETFILTER_XT_TARGET_RATEEST
tristate '"RATEEST" target support'
depends on NETFILTER_ADVANCED
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -134,6 +134,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
--- /dev/null
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2018 Felix Fietkau <nbd@nbd.name>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_flow_table.h>
+
+static struct nf_flowtable nf_flowtable;
+static HLIST_HEAD(hooks);
+static DEFINE_SPINLOCK(hooks_lock);
+static struct delayed_work hook_work;
+
+struct xt_flowoffload_hook {
+ struct hlist_node list;
+ struct nf_hook_ops ops;
+ struct net *net;
+ bool registered;
+ bool used;
+};
+
+static unsigned int
+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return nf_flow_offload_ip_hook(priv, skb, state);
+ case htons(ETH_P_IPV6):
+ return nf_flow_offload_ipv6_hook(priv, skb, state);
+ }
+
+ return NF_ACCEPT;
+}
+
+static int
+xt_flowoffload_create_hook(struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+ struct nf_hook_ops *ops;
+
+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
+ if (!hook)
+ return -ENOMEM;
+
+ ops = &hook->ops;
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = NF_NETDEV_INGRESS;
+ ops->priority = 10;
+ ops->priv = &nf_flowtable;
+ ops->hook = xt_flowoffload_net_hook;
+ ops->dev = dev;
+
+ hlist_add_head(&hook->list, &hooks);
+ mod_delayed_work(system_power_efficient_wq, &hook_work, 0);
+
+ return 0;
+}
+
+static struct xt_flowoffload_hook *
+flow_offload_lookup_hook(struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+
+ hlist_for_each_entry(hook, &hooks, list) {
+ if (hook->ops.dev == dev)
+ return hook;
+ }
+
+ return NULL;
+}
+
+static void
+xt_flowoffload_check_device(struct net_device *dev)
+{
+ struct xt_flowoffload_hook *hook;
+
+ spin_lock_bh(&hooks_lock);
+ hook = flow_offload_lookup_hook(dev);
+ if (hook)
+ hook->used = true;
+ else
+ xt_flowoffload_create_hook(dev);
+ spin_unlock_bh(&hooks_lock);
+}
+
+static void
+xt_flowoffload_register_hooks(void)
+{
+ struct xt_flowoffload_hook *hook;
+
+restart:
+ hlist_for_each_entry(hook, &hooks, list) {
+ if (hook->registered)
+ continue;
+
+ hook->registered = true;
+ hook->net = dev_net(hook->ops.dev);
+ spin_unlock_bh(&hooks_lock);
+ nf_register_net_hook(hook->net, &hook->ops);
+ spin_lock_bh(&hooks_lock);
+ goto restart;
+ }
+
+}
+
+static void
+xt_flowoffload_cleanup_hooks(void)
+{
+ struct xt_flowoffload_hook *hook;
+
+restart:
+ hlist_for_each_entry(hook, &hooks, list) {
+ if (hook->used || !hook->registered)
+ continue;
+
+ hlist_del(&hook->list);
+ spin_unlock_bh(&hooks_lock);
+ nf_unregister_net_hook(hook->net, &hook->ops);
+ kfree(hook);
+ spin_lock_bh(&hooks_lock);
+ goto restart;
+ }
+
+}
+
+static void
+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
+{
+ struct flow_offload_tuple *tuple = &flow->tuplehash[0].tuple;
+ struct xt_flowoffload_hook *hook;
+ bool *found = data;
+
+ spin_lock_bh(&hooks_lock);
+ hlist_for_each_entry(hook, &hooks, list) {
+ if (hook->ops.dev->ifindex != tuple->iifidx &&
+ hook->ops.dev->ifindex != tuple->oifidx)
+ continue;
+
+ hook->used = true;
+ *found = true;
+ }
+ spin_unlock_bh(&hooks_lock);
+}
+
+static void
+xt_flowoffload_hook_work(struct work_struct *work)
+{
+ struct xt_flowoffload_hook *hook;
+ bool found = false;
+ int err;
+
+ spin_lock_bh(&hooks_lock);
+ xt_flowoffload_register_hooks();
+ hlist_for_each_entry(hook, &hooks, list)
+ hook->used = false;
+ spin_unlock_bh(&hooks_lock);
+
+ err = nf_flow_table_iterate(&nf_flowtable, xt_flowoffload_check_hook,
+ &found);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ spin_lock_bh(&hooks_lock);
+ xt_flowoffload_cleanup_hooks();
+ spin_unlock_bh(&hooks_lock);
+
+out:
+ if (found)
+ queue_delayed_work(system_power_efficient_wq, &hook_work, HZ);
+}
+
+static bool
+xt_flowoffload_skip(struct sk_buff *skb)
+{
+ struct ip_options *opt = &(IPCB(skb)->opt);
+
+ if (unlikely(opt->optlen))
+ return true;
+ if (skb_sec_path(skb))
+ return true;
+
+ return false;
+}
+
+static struct dst_entry *
+xt_flowoffload_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir,
+ const struct xt_action_param *par)
+{
+ struct dst_entry *dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ switch (xt_family(par)) {
+ case NFPROTO_IPV4:
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ break;
+ case NFPROTO_IPV6:
+ fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6;
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ break;
+ }
+
+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
+
+ return dst;
+}
+
+static int
+xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
+ const struct xt_action_param *par,
+ struct nf_flow_route *route, enum ip_conntrack_dir dir)
+{
+ struct dst_entry *this_dst, *other_dst;
+
+ this_dst = xt_flowoffload_dst(ct, dir, par);
+ other_dst = xt_flowoffload_dst(ct, !dir, par);
+ if (!this_dst || !other_dst)
+ return -ENOENT;
+
+ route->tuple[dir].dst = this_dst;
+ route->tuple[dir].ifindex = xt_in(par)->ifindex;
+ route->tuple[!dir].dst = other_dst;
+ route->tuple[!dir].ifindex = xt_out(par)->ifindex;
+
+ return 0;
+}
+
+static unsigned int
+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_flowoffload_target_info *info = par->targinfo;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ struct nf_flow_route route;
+ struct flow_offload *flow;
+ struct nf_conn *ct;
+
+ if (xt_flowoffload_skip(skb))
+ return XT_CONTINUE;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return XT_CONTINUE;
+
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
+ case IPPROTO_TCP:
+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ return XT_CONTINUE;
+ break;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return XT_CONTINUE;
+ }
+
+ if (test_bit(IPS_HELPER_BIT, &ct->status))
+ return XT_CONTINUE;
+
+ if (ctinfo == IP_CT_NEW ||
+ ctinfo == IP_CT_RELATED)
+ return XT_CONTINUE;
+
+ if (!xt_in(par) || !xt_out(par))
+ return XT_CONTINUE;
+
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return XT_CONTINUE;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ if (xt_flowoffload_route(skb, ct, par, &route, dir) < 0)
+ goto err_flow_route;
+
+ flow = flow_offload_alloc(ct, &route);
+ if (!flow)
+ goto err_flow_alloc;
+
+ if (flow_offload_add(&nf_flowtable, flow) < 0)
+ goto err_flow_add;
+
+ xt_flowoffload_check_device(xt_in(par));
+ xt_flowoffload_check_device(xt_out(par));
+
+ if (info->flags & XT_FLOWOFFLOAD_HW)
+ nf_flow_offload_hw_add(xt_net(par), flow, ct);
+
+ return XT_CONTINUE;
+
+err_flow_add:
+ flow_offload_free(flow);
+err_flow_alloc:
+ dst_release(route.tuple[!dir].dst);
+err_flow_route:
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+ return XT_CONTINUE;
+}
+
+
+static int flowoffload_chk(const struct xt_tgchk_param *par)
+{
+ struct xt_flowoffload_target_info *info = par->targinfo;
+
+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct xt_target offload_tg_reg __read_mostly = {
+ .family = NFPROTO_UNSPEC,
+ .name = "FLOWOFFLOAD",
+ .revision = 0,
+ .targetsize = sizeof(struct xt_flowoffload_target_info),
+ .usersize = sizeof(struct xt_flowoffload_target_info),
+ .checkentry = flowoffload_chk,
+ .target = flowoffload_tg,
+ .me = THIS_MODULE,
+};
+
+static int xt_flowoffload_table_init(struct nf_flowtable *table)
+{
+ table->flags = NF_FLOWTABLE_F_HW;
+ nf_flow_table_init(table);
+ return 0;
+}
+
+static void xt_flowoffload_table_cleanup(struct nf_flowtable *table)
+{
+ nf_flow_table_free(table);
+}
+
+static int __init xt_flowoffload_tg_init(void)
+{
+ int ret;
+
+ INIT_DELAYED_WORK(&hook_work, xt_flowoffload_hook_work);
+
+ ret = xt_flowoffload_table_init(&nf_flowtable);
+ if (ret)
+ return ret;
+
+ ret = xt_register_target(&offload_tg_reg);
+ if (ret)
+ xt_flowoffload_table_cleanup(&nf_flowtable);
+
+ return ret;
+}
+
+static void __exit xt_flowoffload_tg_exit(void)
+{
+ xt_unregister_target(&offload_tg_reg);
+ xt_flowoffload_table_cleanup(&nf_flowtable);
+}
+
+MODULE_LICENSE("GPL");
+module_init(xt_flowoffload_tg_init);
+module_exit(xt_flowoffload_tg_exit);
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -6,7 +6,6 @@
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/ip6_route.h>
-#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
--- /dev/null
+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _XT_FLOWOFFLOAD_H
+#define _XT_FLOWOFFLOAD_H
+
+#include <linux/types.h>
+
+enum {
+ XT_FLOWOFFLOAD_HW = 1 << 0,
+
+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
+};
+
+struct xt_flowoffload_target_info {
+ __u32 flags;
+};
+
+#endif /* _XT_FLOWOFFLOAD_H */

View File

@ -13,7 +13,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -465,7 +465,11 @@ static int fq_codel_init(struct Qdisc *s
@@ -471,7 +471,11 @@ static int fq_codel_init(struct Qdisc *s
sch->limit = 10*1024;
q->flows_cnt = 1024;

View File

@ -14,7 +14,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -369,12 +369,13 @@ extern struct Qdisc_ops noop_qdisc_ops;
@@ -368,12 +368,13 @@ extern struct Qdisc_ops noop_qdisc_ops;
extern struct Qdisc_ops pfifo_fast_ops;
extern struct Qdisc_ops mq_qdisc_ops;
extern struct Qdisc_ops noqueue_qdisc_ops;
@ -55,7 +55,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
register_qdisc(&pfifo_head_drop_qdisc_ops);
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -694,7 +694,7 @@ static const struct Qdisc_class_ops fq_c
@@ -700,7 +700,7 @@ static const struct Qdisc_class_ops fq_c
.walk = fq_codel_walk,
};
@ -64,7 +64,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
.cl_ops = &fq_codel_class_ops,
.id = "fq_codel",
.priv_size = sizeof(struct fq_codel_sched_data),
@@ -709,6 +709,7 @@ static struct Qdisc_ops fq_codel_qdisc_o
@@ -715,6 +715,7 @@ static struct Qdisc_ops fq_codel_qdisc_o
.dump_stats = fq_codel_dump_stats,
.owner = THIS_MODULE,
};
@ -83,7 +83,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
EXPORT_SYMBOL(default_qdisc_ops);
/* Main transmission queue. */
@@ -764,7 +764,7 @@ static void attach_one_default_qdisc(str
@@ -760,7 +760,7 @@ static void attach_one_default_qdisc(str
void *_unused)
{
struct Qdisc *qdisc;

View File

@ -15,7 +15,7 @@
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1437,6 +1437,9 @@ int genphy_update_link(struct phy_device
@@ -1458,6 +1458,9 @@ int genphy_update_link(struct phy_device
{
int status;

View File

@ -56,7 +56,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
*/
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2494,6 +2494,10 @@ static inline int pskb_trim(struct sk_bu
@@ -2491,6 +2491,10 @@ static inline int pskb_trim(struct sk_bu
return (len < skb->len) ? __pskb_trim(skb, len) : 0;
}
@ -67,7 +67,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
/**
* pskb_trim_unique - remove end from a paged unique (not cloned) buffer
* @skb: buffer to alter
@@ -2624,16 +2628,6 @@ static inline struct sk_buff *dev_alloc_
@@ -2621,16 +2625,6 @@ static inline struct sk_buff *dev_alloc_
}
@ -101,7 +101,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
help
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2975,10 +2975,20 @@ static int xmit_one(struct sk_buff *skb,
@@ -2982,10 +2982,20 @@ static int xmit_one(struct sk_buff *skb,
if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
dev_queue_xmit_nit(skb, dev);
@ -128,7 +128,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
}
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -64,6 +64,7 @@
@@ -63,6 +63,7 @@
#include <linux/errqueue.h>
#include <linux/prefetch.h>
#include <linux/if_vlan.h>
@ -136,7 +136,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#include <net/protocol.h>
#include <net/dst.h>
@@ -503,6 +504,22 @@ skb_fail:
@@ -499,6 +500,22 @@ skb_fail:
}
EXPORT_SYMBOL(__napi_alloc_skb);

View File

@ -61,7 +61,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = __sk_dst_get(sk);
@@ -1597,9 +1609,11 @@ void sk_destruct(struct sock *sk)
@@ -1595,9 +1607,11 @@ void sk_destruct(struct sock *sk)
static void __sk_free(struct sock *sk)
{

View File

@ -232,7 +232,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
return -ENOMEM;
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2765,6 +2765,8 @@ static const struct file_operations proc
@@ -2769,6 +2769,8 @@ static const struct file_operations proc
static int __init proc_vmalloc_init(void)
{
@ -327,7 +327,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3383,6 +3383,8 @@ static __net_initdata struct pernet_oper
@@ -3378,6 +3378,8 @@ static __net_initdata struct pernet_oper
static int __init proto_init(void)
{
@ -393,7 +393,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -424,6 +424,9 @@ static struct pernet_operations ip_rt_pr
@@ -427,6 +427,9 @@ static struct pernet_operations ip_rt_pr
static int __init ip_rt_proc_init(void)
{

View File

@ -21,7 +21,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+ tristate
default n
select ANON_INODES
help
select IRQ_WORK
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,3 +1,7 @@
@ -54,7 +54,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
+MODULE_LICENSE("GPL");
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2146,6 +2146,7 @@ int wake_up_state(struct task_struct *p,
@@ -2147,6 +2147,7 @@ int wake_up_state(struct task_struct *p,
{
return try_to_wake_up(p, state, 0);
}

View File

@ -0,0 +1,23 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 21 Feb 2018 13:40:12 +0100
Subject: [PATCH] clocksource: mips-gic-timer: fix clocksource counter width
This code needs to use ffs instead of fls on the mask to determine the
shift for reading the GIC_CONFIG_COUNTBITS field.
Fixes: e07127a077c7 ("clocksource: mips-gic-timer: Use new GIC accessor functions")
Cc: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -164,7 +164,7 @@ static int __init __gic_clocksource_init
/* Set clocksource mask. */
count_width = read_gic_config() & GIC_CONFIG_COUNTBITS;
- count_width >>= __fls(GIC_CONFIG_COUNTBITS);
+ count_width >>= __ffs(GIC_CONFIG_COUNTBITS);
count_width *= 4;
count_width += 32;
gic_clocksource.mask = CLOCKSOURCE_MASK(count_width);

View File

@ -71,7 +71,7 @@ Signed-off-by: Tobias Wolf <dev-NTEO@vplace.de>
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6170,7 +6170,7 @@ static void __ref alloc_node_mem_map(str
@@ -6147,7 +6147,7 @@ static void __ref alloc_node_mem_map(str
mem_map = NODE_DATA(0)->node_mem_map;
#if defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) || defined(CONFIG_FLATMEM)
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)

View File

@ -0,0 +1,194 @@
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Tue, 30 Jan 2018 11:55:16 +0100
Subject: [PATCH V10 1/3] mtd: partitions: add of_match_table parser matching
for the "ofpart" type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In order to properly support compatibility strings as described in the
bindings/mtd/partition.txt "ofpart" type should be treated as an
indication for looking into OF. MTD should check "compatible" property
and search for a matching parser rather than blindly trying the one
supporting "fixed-partitions".
It also means that existing "fixed-partitions" parser should get renamed
to use a more meaningful name.
This commit achievies that aim by introducing a new mtd_part_of_parse().
It works by looking for a matching parser for every string in the
"compatibility" property (starting with the most specific one).
Please note that driver-specified parsers still take a precedence. It's
assumed that driver providing a parser type has a good reason for that
(e.g. having platform data with device-specific info). Also doing
otherwise could break existing setups. The same applies to using default
parsers (including "cmdlinepart") as some overwrite DT data with cmdline
argument.
Partition parsers can now provide an of_match_table to enable
flash<-->parser matching via device tree as documented in the
mtd/partition.txt.
This support is currently limited to built-in parsers as it uses
request_module() and friends. This should be sufficient for most cases
though as compiling parsers as modules isn't a common choice.
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Tested-by: Peter Rosin <peda@axentia.se>
---
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -30,6 +30,7 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
#include <linux/err.h>
+#include <linux/of.h>
#include "mtdcore.h"
@@ -894,6 +895,92 @@ static int mtd_part_do_parse(struct mtd_
}
/**
+ * mtd_part_get_compatible_parser - find MTD parser by a compatible string
+ *
+ * @compat: compatible string describing partitions in a device tree
+ *
+ * MTD parsers can specify supported partitions by providing a table of
+ * compatibility strings. This function finds a parser that advertises support
+ * for a passed value of "compatible".
+ */
+static struct mtd_part_parser *mtd_part_get_compatible_parser(const char *compat)
+{
+ struct mtd_part_parser *p, *ret = NULL;
+
+ spin_lock(&part_parser_lock);
+
+ list_for_each_entry(p, &part_parsers, list) {
+ const struct of_device_id *matches;
+
+ matches = p->of_match_table;
+ if (!matches)
+ continue;
+
+ for (; matches->compatible[0]; matches++) {
+ if (!strcmp(matches->compatible, compat) &&
+ try_module_get(p->owner)) {
+ ret = p;
+ break;
+ }
+ }
+
+ if (ret)
+ break;
+ }
+
+ spin_unlock(&part_parser_lock);
+
+ return ret;
+}
+
+static int mtd_part_of_parse(struct mtd_info *master,
+ struct mtd_partitions *pparts)
+{
+ struct mtd_part_parser *parser;
+ struct device_node *np;
+ struct property *prop;
+ const char *compat;
+ const char *fixed = "ofpart";
+ int ret, err = 0;
+
+ np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
+ of_property_for_each_string(np, "compatible", prop, compat) {
+ parser = mtd_part_get_compatible_parser(compat);
+ if (!parser)
+ continue;
+ ret = mtd_part_do_parse(parser, master, pparts, NULL);
+ if (ret > 0) {
+ of_node_put(np);
+ return ret;
+ }
+ mtd_part_parser_put(parser);
+ if (ret < 0 && !err)
+ err = ret;
+ }
+ of_node_put(np);
+
+ /*
+ * For backward compatibility we have to try the "ofpart"
+ * parser. It supports old DT format with partitions specified as a
+ * direct subnodes of a flash device DT node without any compatibility
+ * specified we could match.
+ */
+ parser = mtd_part_parser_get(fixed);
+ if (!parser && !request_module("%s", fixed))
+ parser = mtd_part_parser_get(fixed);
+ if (parser) {
+ ret = mtd_part_do_parse(parser, master, pparts, NULL);
+ if (ret > 0)
+ return ret;
+ mtd_part_parser_put(parser);
+ if (ret < 0 && !err)
+ err = ret;
+ }
+
+ return err;
+}
+
+/**
* parse_mtd_partitions - parse MTD partitions
* @master: the master partition (describes whole MTD device)
* @types: names of partition parsers to try or %NULL
@@ -925,19 +1012,30 @@ int parse_mtd_partitions(struct mtd_info
types = default_mtd_part_types;
for ( ; *types; types++) {
- pr_debug("%s: parsing partitions %s\n", master->name, *types);
- parser = mtd_part_parser_get(*types);
- if (!parser && !request_module("%s", *types))
+ /*
+ * ofpart is a special type that means OF partitioning info
+ * should be used. It requires a bit different logic so it is
+ * handled in a separated function.
+ */
+ if (!strcmp(*types, "ofpart")) {
+ ret = mtd_part_of_parse(master, pparts);
+ } else {
+ pr_debug("%s: parsing partitions %s\n", master->name,
+ *types);
parser = mtd_part_parser_get(*types);
- pr_debug("%s: got parser %s\n", master->name,
- parser ? parser->name : NULL);
- if (!parser)
- continue;
- ret = mtd_part_do_parse(parser, master, pparts, data);
+ if (!parser && !request_module("%s", *types))
+ parser = mtd_part_parser_get(*types);
+ pr_debug("%s: got parser %s\n", master->name,
+ parser ? parser->name : NULL);
+ if (!parser)
+ continue;
+ ret = mtd_part_do_parse(parser, master, pparts, data);
+ if (ret <= 0)
+ mtd_part_parser_put(parser);
+ }
/* Found partitions! */
if (ret > 0)
return 0;
- mtd_part_parser_put(parser);
/*
* Stash the first error we see; only report it if no parser
* succeeds
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -77,6 +77,7 @@ struct mtd_part_parser {
struct list_head list;
struct module *owner;
const char *name;
+ const struct of_device_id *of_match_table;
int (*parse_fn)(struct mtd_info *, const struct mtd_partition **,
struct mtd_part_parser_data *);
void (*cleanup)(const struct mtd_partition *pparts, int nr_parts);

View File

@ -0,0 +1,68 @@
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Tue, 30 Jan 2018 12:09:58 +0100
Subject: [PATCH V10 2/3] mtd: rename "ofpart" parser to "fixed-partitions" as
it fits it better
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Type "ofpart" means that OF should be used to get partitioning info and
this driver supports "fixed-partitions" binding only. Renaming it should
lead to less confusion especially when parsers for new compatibility
strings start to appear.
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
---
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -940,7 +940,7 @@ static int mtd_part_of_parse(struct mtd_
struct device_node *np;
struct property *prop;
const char *compat;
- const char *fixed = "ofpart";
+ const char *fixed = "fixed-partitions";
int ret, err = 0;
np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
@@ -960,7 +960,7 @@ static int mtd_part_of_parse(struct mtd_
of_node_put(np);
/*
- * For backward compatibility we have to try the "ofpart"
+ * For backward compatibility we have to try the "fixed-partitions"
* parser. It supports old DT format with partitions specified as a
* direct subnodes of a flash device DT node without any compatibility
* specified we could match.
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -25,9 +25,9 @@ static bool node_has_compatible(struct d
return of_get_property(pp, "compatible", NULL);
}
-static int parse_ofpart_partitions(struct mtd_info *master,
- const struct mtd_partition **pparts,
- struct mtd_part_parser_data *data)
+static int parse_fixed_partitions(struct mtd_info *master,
+ const struct mtd_partition **pparts,
+ struct mtd_part_parser_data *data)
{
struct mtd_partition *parts;
struct device_node *mtd_node;
@@ -141,8 +141,8 @@ ofpart_none:
}
static struct mtd_part_parser ofpart_parser = {
- .parse_fn = parse_ofpart_partitions,
- .name = "ofpart",
+ .parse_fn = parse_fixed_partitions,
+ .name = "fixed-partitions",
};
static int parse_ofoldpart_partitions(struct mtd_info *master,
@@ -229,4 +229,5 @@ MODULE_AUTHOR("Vitaly Wool, David Gibson
* with the same name. Since we provide the ofoldpart parser, we should have
* the corresponding alias.
*/
+MODULE_ALIAS("fixed-partitions");
MODULE_ALIAS("ofoldpart");

View File

@ -1,7 +1,7 @@
From 4ac9222778478a00c7fc9d347b7ed1e0e595120d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Thu, 4 Jan 2018 08:05:34 +0100
Subject: [PATCH] mtd: ofpart: add of_match_table with "fixed-partitions"
Subject: [PATCH V10 3/3] mtd: ofpart: add of_match_table with
"fixed-partitions"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@ -17,10 +17,7 @@ This matches existing bindings documentation.
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Reviewed-by: Brian Norris <computersforpeace@gmail.com>
Tested-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
drivers/mtd/ofpart.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@ -35,8 +32,8 @@ Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
+
static struct mtd_part_parser ofpart_parser = {
.parse_fn = parse_ofpart_partitions,
.name = "ofpart",
.parse_fn = parse_fixed_partitions,
.name = "fixed-partitions",
+ .of_match_table = parse_ofpart_match_table,
};

View File

@ -137,9 +137,9 @@ Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
* Do not forget to update 'parse_mtd_partitions()' kerneldoc comment if you
* are changing this array!
*/
@@ -963,6 +990,13 @@ int parse_mtd_partitions(struct mtd_info
struct property *prop;
const char *compat;
@@ -1007,6 +1034,13 @@ int parse_mtd_partitions(struct mtd_info
{
struct mtd_part_parser *parser;
int ret, err = 0;
+ const char *const *types_of = NULL;
+
@ -149,9 +149,9 @@ Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
+ types = types_of;
+ }
np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
of_property_for_each_string(np, "compatible", prop, compat) {
@@ -1004,6 +1038,7 @@ int parse_mtd_partitions(struct mtd_info
if (!types)
types = default_mtd_part_types;
@@ -1043,6 +1077,7 @@ int parse_mtd_partitions(struct mtd_info
if (ret < 0 && !err)
err = ret;
}

View File

@ -16,7 +16,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
#define ATH8035_PHY_ID 0x004dd072
#define AT803X_PHY_ID_MASK 0xffffffef
@@ -260,7 +261,8 @@ static int at803x_probe(struct phy_devic
@@ -256,7 +257,8 @@ static int at803x_probe(struct phy_devic
if (!priv)
return -ENOMEM;
@ -26,7 +26,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
goto does_not_require_reset_workaround;
gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
@@ -336,7 +338,7 @@ static void at803x_link_change_notify(st
@@ -332,7 +334,7 @@ static void at803x_link_change_notify(st
struct at803x_priv *priv = phydev->priv;
/*
@ -35,7 +35,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
* signalled. This is necessary to circumvent a hardware bug that
* occurs when the cable is unplugged while TX packets are pending
* in the FIFO. In such cases, the FIFO enters an error mode it
@@ -448,6 +450,24 @@ static struct phy_driver at803x_driver[]
@@ -444,6 +446,24 @@ static struct phy_driver at803x_driver[]
.aneg_done = at803x_aneg_done,
.ack_interrupt = &at803x_ack_interrupt,
.config_intr = &at803x_config_intr,
@ -60,7 +60,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
} };
module_phy_driver(at803x_driver);
@@ -455,6 +475,7 @@ module_phy_driver(at803x_driver);
@@ -451,6 +471,7 @@ module_phy_driver(at803x_driver);
static struct mdio_device_id __maybe_unused atheros_tbl[] = {
{ ATH8030_PHY_ID, AT803X_PHY_ID_MASK },
{ ATH8031_PHY_ID, AT803X_PHY_ID_MASK },

View File

@ -0,0 +1,61 @@
From d4c4bc11353f3bea6754f7d21e3612c9f32d1d64 Mon Sep 17 00:00:00 2001
From: Giuseppe Lippolis <giu.lippolis@gmail.com>
Date: Mon, 26 Mar 2018 16:34:39 +0200
Subject: [PATCH] net-usb: add qmi_wwan if on lte modem wistron neweb d18q1
This modem is embedded on dlink dwr-921 router.
The oem configuration states:
T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0
D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1
P: Vendor=1435 ProdID=0918 Rev= 2.32
S: Manufacturer=Android
S: Product=Android
S: SerialNumber=0123456789ABCDEF
C:* #Ifs= 7 Cfg#= 1 Atr=80 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none)
E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=32ms
E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=32ms
E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=32ms
E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
E: Ad=8a(I) Atr=03(Int.) MxPS= 64 Ivl=32ms
E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 6 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none)
E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=125us
Tested on openwrt distribution
Signed-off-by: Giuseppe Lippolis <giu.lippolis@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/usb/qmi_wwan.c | 3 +++
1 file changed, 3 insertions(+)
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1104,6 +1104,9 @@ static const struct usb_device_id produc
{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
{QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */
{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */
+ {QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */
+ {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */
+ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */
{QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */
{QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */
{QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */

View File

@ -0,0 +1,28 @@
From 743989254ea9f132517806d8893ca9b6cf9dc86b Mon Sep 17 00:00:00 2001
From: Pawel Dembicki <paweldembicki@gmail.com>
Date: Sat, 24 Mar 2018 22:08:14 +0100
Subject: [PATCH] net: qmi_wwan: add BroadMobi BM806U 2020:2033
BroadMobi BM806U is an Qualcomm MDM9225 based 3G/4G modem.
Tested hardware BM806U is mounted on D-Link DWR-921-C3 router.
The USB id is added to qmi_wwan.c to allow QMI communication with
the BM806U.
Tested on 4.14 kernel and OpenWRT.
Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/usb/qmi_wwan.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1183,6 +1183,7 @@ static const struct usb_device_id produc
{QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */
{QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */
{QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */
+ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */
{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */
{QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */
{QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */

View File

@ -14,7 +14,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
--- a/Makefile
+++ b/Makefile
@@ -638,12 +638,12 @@ KBUILD_CFLAGS += $(call cc-disable-warni
@@ -650,12 +650,12 @@ KBUILD_CFLAGS += $(call cc-disable-warni
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += $(call cc-option,-Oz,-Os)

View File

@ -1,32 +0,0 @@
From: David Heidelberger <david.heidelberger@ixit.cz>
Subject: uapi/kernel.h: glibc specific inclusion of sysinfo.h
including sysinfo.h from kernel.h makes no sense whatsoever,
but removing it breaks glibc's userspace header,
which includes kernel.h instead of sysinfo.h from their sys/sysinfo.h.
this seems to be a historical mistake.
on musl, including any header that uses kernel.h directly or indirectly
plus sys/sysinfo.h will produce a compile error due to redefinition of
struct sysinfo from sys/sysinfo.h.
so for now, only include it on glibc or when including from kernel
in order not to break their headers.
Signed-off-by: John Spencer <maillist-linux@barfooze.de>
Signed-off-by: David Heidelberger <david.heidelberger@ixit.cz>
Signed-off-by: Jonas Gorski <jogo@openwrt.org>
---
include/uapi/linux/kernel.h | 2 ++
1 file changed, 2 insertions(+)
--- a/include/uapi/linux/kernel.h
+++ b/include/uapi/linux/kernel.h
@@ -2,7 +2,9 @@
#ifndef _UAPI_LINUX_KERNEL_H
#define _UAPI_LINUX_KERNEL_H
+#if defined(__KERNEL__) || defined( __GLIBC__)
#include <linux/sysinfo.h>
+#endif
/*
* 'kernel.h' contains some often-used function prototypes etc

Some files were not shown because too many files have changed in this diff Show More