shortcut-fe:added support for Linux kernel 4.14

This commit is contained in:
coolsnowwolf 2020-07-01 02:56:41 +08:00
parent c0f3364974
commit cdca955c8e
21 changed files with 1481 additions and 534 deletions

View File

@ -0,0 +1,91 @@
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=fast-classifier
PKG_RELEASE:=1
include $(INCLUDE_DIR)/package.mk
define KernelPackage/$(PKG_NAME)/Default
SECTION:=kernel
CATEGORY:=Kernel modules
SUBMENU:=Network Support
DEPENDS:=+kmod-ipt-conntrack +kmod-shortcut-fe
TITLE:=Kernel driver for FAST Classifier
FILES:=$(PKG_BUILD_DIR)/fast-classifier.ko
KCONFIG:=CONFIG_NF_CONNTRACK_CHAIN_EVENTS=y CONFIG_NF_CONNTRACK_MARK=y
PROVIDES:=$(PKG_NAME)
endef
define KernelPackage/$(PKG_NAME)
$(call KernelPackage/$(PKG_NAME)/Default)
endef
define KernelPackage/$(PKG_NAME)-noload
$(call KernelPackage/$(PKG_NAME)/Default)
endef
define KernelPackage/$(PKG_NAME)/Default/description
FAST Classifier talks to SFE to make decisions about offloading connections
endef
define KernelPackage/$(PKG_NAME)/description
$(call KernelPackage/$(PKG_NAME)/Default/description)
endef
define KernelPackage/$(PKG_NAME)-noload/description
$(call KernelPackage/$(PKG_NAME)/Default/description)
This package does not load $(PKG_NAME) at boot by default
endef
define Package/fast-classifier-example
TITLE:=Example user space program for fast-classifier
DEPENDS:=+libnl +kmod-fast-classifier
endef
define Package/fast-classifier-example/description
Example user space program that communicates with fast
classifier kernel module
endef
SFE_MAKE_OPTS:=SFE_SUPPORT_IPV6=y
define Build/Compile/kmod
+$(MAKE) $(PKG_JOBS) -C "$(LINUX_DIR)" $(strip $(SFE_MAKE_OPTS)) \
$(KERNEL_MAKE_FLAGS) \
$(PKG_MAKE_FLAGS) \
SUBDIRS="$(PKG_BUILD_DIR)" \
CONFIG_FAST_CLASSIFIER=m \
EXTRA_CFLAGS="$(EXTRA_CFLAGS)" \
modules
endef
define Build/Compile/example
$(TARGET_CC) -o $(PKG_BUILD_DIR)/userspace_fast_classifier \
-I $(PKG_BUILD_DIR) \
-I$(STAGING_DIR)/usr/include/libnl \
-I$(STAGING_DIR)/usr/include/libnl3 \
-lnl-genl-3 -lnl-3 \
$(PKG_BUILD_DIR)/nl_classifier_test.c
endef
define Build/Compile
$(Build/Compile/kmod)
$(if $(CONFIG_PACKAGE_fast-classifier-example),$(Build/Compile/example))
endef
define Build/InstallDev
$(INSTALL_DIR) $(1)/usr/include
$(CP) $(PKG_BUILD_DIR)/fast-classifier.h $(1)/usr/include/
endef
define Package/fast-classifier-example/install
$(INSTALL_DIR) $(1)/sbin
$(CP) $(PKG_BUILD_DIR)/userspace_fast_classifier $(1)/sbin/
endef
$(eval $(call KernelPackage,$(PKG_NAME)))
$(eval $(call KernelPackage,$(PKG_NAME)-noload))
$(eval $(call BuildPackage,fast-classifier-example))

View File

@ -0,0 +1,10 @@
obj-$(CONFIG_FAST_CLASSIFIER) += fast-classifier.o
ifeq ($(SFE_SUPPORT_IPV6),)
SFE_SUPPORT_IPV6=y
endif
ccflags-$(SFE_SUPPORT_IPV6) += -DSFE_SUPPORT_IPV6
ccflags-y += -I$(obj)/../shortcut-fe
obj ?= .

View File

@ -3,7 +3,7 @@
* Shortcut forwarding engine connection manager.
* fast-classifier
*
* Copyright (c) 2013-2016 The Linux Foundation. All rights reserved.
* Copyright (c) 2013-2018 The Linux Foundation. All rights reserved.
* Permission to use, copy, modify, and/or distribute this software for
* any purpose with or without fee is hereby granted, provided that the
* above copyright notice and this permission notice appear in all copies.
@ -29,17 +29,16 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <linux/netfilter/xt_dscp.h>
#include <net/genetlink.h>
#include <linux/spinlock.h>
#include <linux/if_bridge.h>
#include <linux/hashtable.h>
#include <net/pkt_sched.h>
#include <linux/version.h>
#include "sfe_backport.h"
#include "sfe.h"
#include "sfe_cm.h"
#include <sfe_backport.h>
#include <sfe.h>
#include <sfe_cm.h>
#include "fast-classifier.h"
typedef enum fast_classifier_exception {
@ -111,14 +110,16 @@ struct fast_classifier {
u32 exceptions[FAST_CL_EXCEPTION_MAX];
};
static struct fast_classifier __fsc;
static struct fast_classifier __sc;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0))
static struct nla_policy fast_classifier_genl_policy[FAST_CLASSIFIER_A_MAX + 1] = {
[FAST_CLASSIFIER_A_TUPLE] = {
.type = NLA_UNSPEC,
.len = sizeof(struct fast_classifier_tuple)
},
};
#endif /*KERNEL_VERSION(5, 2, 0)*/
static struct genl_multicast_group fast_classifier_genl_mcgrp[] = {
{
@ -126,14 +127,6 @@ static struct genl_multicast_group fast_classifier_genl_mcgrp[] = {
},
};
static struct genl_family fast_classifier_gnl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = FAST_CLASSIFIER_GENL_HDRSIZE,
.name = FAST_CLASSIFIER_GENL_NAME,
.version = FAST_CLASSIFIER_GENL_VERSION,
.maxattr = FAST_CLASSIFIER_A_MAX,
};
static int fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info);
static int fast_classifier_nl_genl_msg_DUMP(struct sk_buff *skb, struct netlink_callback *cb);
@ -141,26 +134,48 @@ static struct genl_ops fast_classifier_gnl_ops[] = {
{
.cmd = FAST_CLASSIFIER_C_OFFLOAD,
.flags = 0,
#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0))
.policy = fast_classifier_genl_policy,
#endif /*KERNEL_VERSION(5, 2, 0)*/
.doit = fast_classifier_offload_genl_msg,
.dumpit = NULL,
},
{
.cmd = FAST_CLASSIFIER_C_OFFLOADED,
.flags = 0,
#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0))
.policy = fast_classifier_genl_policy,
#endif /*KERNEL_VERSION(5, 2, 0)*/
.doit = NULL,
.dumpit = fast_classifier_nl_genl_msg_DUMP,
},
{
.cmd = FAST_CLASSIFIER_C_DONE,
.flags = 0,
#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0))
.policy = fast_classifier_genl_policy,
#endif /*KERNEL_VERSION(5, 2, 0)*/
.doit = NULL,
.dumpit = fast_classifier_nl_genl_msg_DUMP,
},
};
static struct genl_family fast_classifier_gnl_family = {
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
.id = GENL_ID_GENERATE,
#endif /*KERNEL_VERSION(4, 10, 0)*/
.hdrsize = FAST_CLASSIFIER_GENL_HDRSIZE,
.name = FAST_CLASSIFIER_GENL_NAME,
.version = FAST_CLASSIFIER_GENL_VERSION,
.maxattr = FAST_CLASSIFIER_A_MAX,
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
.ops = fast_classifier_gnl_ops,
.n_ops = ARRAY_SIZE(fast_classifier_gnl_ops),
.mcgrps = fast_classifier_genl_mcgrp,
.n_mcgrps = ARRAY_SIZE(fast_classifier_genl_mcgrp),
#endif /*KERNEL_VERSION(4, 10, 0)*/
};
static atomic_t offload_msgs = ATOMIC_INIT(0);
static atomic_t offload_no_match_msgs = ATOMIC_INIT(0);
static atomic_t offloaded_msgs = ATOMIC_INIT(0);
@ -189,7 +204,7 @@ static bool skip_to_bridge_ingress;
*/
static inline void fast_classifier_incr_exceptions(fast_classifier_exception_t except)
{
struct fast_classifier *sc = &__fsc;
struct fast_classifier *sc = &__sc;
spin_lock_bh(&sc->lock);
sc->exceptions[except]++;
@ -202,7 +217,7 @@ static inline void fast_classifier_incr_exceptions(fast_classifier_exception_t e
*
* Returns 1 if the packet is forwarded or 0 if it isn't.
*/
static int fast_classifier_recv(struct sk_buff *skb)
int fast_classifier_recv(struct sk_buff *skb)
{
struct net_device *dev;
struct net_device *master_dev = NULL;
@ -224,22 +239,12 @@ static int fast_classifier_recv(struct sk_buff *skb)
(dev->priv_flags & IFF_BRIDGE_PORT)) {
master_dev = sfe_dev_get_master(dev);
if (!master_dev) {
DEBUG_WARN("master dev is NULL %s\n");
DEBUG_WARN("master dev is NULL %s\n", dev->name);
goto rx_exit;
}
dev = master_dev;
}
#ifdef CONFIG_NET_CLS_ACT
/*
* If ingress Qdisc configured, and packet not processed by ingress Qdisc yet
* We cannot accelerate this packet.
*/
if (dev->ingress_queue && !(skb->tc_verd & TC_NCLS)) {
goto rx_exit;
}
#endif
/*
* We're only interested in IPv4 and IPv6 packets.
*/
@ -311,7 +316,7 @@ rx_exit:
* structure, obtain the hardware address. This means this function also
* works if the neighbours are routers too.
*/
static bool fast_classifier_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, bool is_v4)
static bool fast_classifier_find_dev_and_mac_addr(struct sk_buff *skb, sfe_ip_addr_t *addr, struct net_device **dev, u8 *mac_addr, bool is_v4)
{
struct neighbour *neigh;
struct rtable *rt;
@ -319,12 +324,21 @@ static bool fast_classifier_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct ne
struct dst_entry *dst;
struct net_device *mac_dev;
/*
* If we have skb provided, use it as the original code is unable
* to lookup routes that are policy routed.
*/
if (unlikely(skb)) {
dst = skb_dst(skb);
goto skip_dst_lookup;
}
/*
* Look up the rtable entry for the IP address then get the hardware
* address from its neighbour structure. This means this works when the
* neighbours are routers too.
*/
if (is_v4) {
if (likely(is_v4)) {
rt = ip_route_output(&init_net, addr->ip, 0, 0, 0);
if (unlikely(IS_ERR(rt))) {
goto ret_fail;
@ -332,7 +346,11 @@ static bool fast_classifier_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct ne
dst = (struct dst_entry *)rt;
} else {
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0))
rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, NULL, 0);
#else
rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0);
#endif /*KERNEL_VERSION(4, 17, 0)*/
if (!rt6) {
goto ret_fail;
}
@ -340,18 +358,23 @@ static bool fast_classifier_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct ne
dst = (struct dst_entry *)rt6;
}
skip_dst_lookup:
rcu_read_lock();
neigh = dst_neigh_lookup(dst, addr);
neigh = sfe_dst_get_neighbour(dst, addr);
if (unlikely(!neigh)) {
rcu_read_unlock();
dst_release(dst);
if (likely(!skb))
dst_release(dst);
goto ret_fail;
}
if (unlikely(!(neigh->nud_state & NUD_VALID))) {
rcu_read_unlock();
neigh_release(neigh);
dst_release(dst);
if (likely(!skb))
dst_release(dst);
goto ret_fail;
}
@ -359,7 +382,9 @@ static bool fast_classifier_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct ne
if (!mac_dev) {
rcu_read_unlock();
neigh_release(neigh);
dst_release(dst);
if (likely(!skb))
dst_release(dst);
goto ret_fail;
}
@ -369,12 +394,18 @@ static bool fast_classifier_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct ne
*dev = mac_dev;
rcu_read_unlock();
neigh_release(neigh);
dst_release(dst);
if (likely(!skb))
dst_release(dst);
return true;
ret_fail:
DEBUG_TRACE("failed to find MAC address for IP: %pIS\n", addr);
if (is_v4) {
DEBUG_TRACE("failed to find MAC address for IP: %pI4\n", addr);
} else {
DEBUG_TRACE("failed to find MAC address for IP: %pI6\n", addr);
}
return false;
}
@ -439,16 +470,16 @@ static int fast_classifier_update_protocol(struct sfe_connection_create *p_sic,
* state can not be SYN_SENT, SYN_RECV because connection is assured
* Not managed states: FIN_WAIT, CLOSE_WAIT, LAST_ACK, TIME_WAIT, CLOSE.
*/
spin_lock_bh(&ct->lock);
spin_lock(&ct->lock);
if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) {
spin_unlock_bh(&ct->lock);
spin_unlock(&ct->lock);
fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_TCP_NOT_ESTABLISHED);
DEBUG_TRACE("connection in termination state: %#x, s: %pI4:%u, d: %pI4:%u\n",
ct->proto.tcp.state, &p_sic->src_ip, ntohs(p_sic->src_port),
&p_sic->dest_ip, ntohs(p_sic->dest_port));
return 0;
}
spin_unlock_bh(&ct->lock);
spin_unlock(&ct->lock);
break;
case IPPROTO_UDP:
@ -511,12 +542,17 @@ static void fast_classifier_send_genl_msg(int msg, struct fast_classifier_tuple
return;
}
genlmsg_end(skb, msg_head);
#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 19 , 0))
rc = genlmsg_end(skb, msg_head);
if (rc < 0) {
genlmsg_cancel(skb, msg_head);
nlmsg_free(skb);
return;
}
#else
genlmsg_end(skb, msg_head);
#endif
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
rc = genlmsg_multicast(&fast_classifier_gnl_family, skb, 0, 0, GFP_ATOMIC);
@ -525,14 +561,18 @@ static void fast_classifier_send_genl_msg(int msg, struct fast_classifier_tuple
#endif
switch (msg) {
case FAST_CLASSIFIER_C_OFFLOADED:
atomic_inc(&offloaded_msgs);
if (rc != 0)
if (rc == 0) {
atomic_inc(&offloaded_msgs);
} else {
atomic_inc(&offloaded_fail_msgs);
}
break;
case FAST_CLASSIFIER_C_DONE:
atomic_inc(&done_msgs);
if (rc != 0)
if (rc == 0) {
atomic_inc(&done_msgs);
} else {
atomic_inc(&done_fail_msgs);
}
break;
default:
DEBUG_ERROR("fast-classifer: Unknown message type sent!\n");
@ -540,7 +580,11 @@ static void fast_classifier_send_genl_msg(int msg, struct fast_classifier_tuple
}
DEBUG_TRACE("Notify NL message %d ", msg);
DEBUG_TRACE("sip=%pIS dip=%pIS ", &fc_msg->src_saddr, &fc_msg->dst_saddr);
if (fc_msg->ethertype == AF_INET) {
DEBUG_TRACE("sip=%pI4 dip=%pI4 ", &fc_msg->src_saddr, &fc_msg->dst_saddr);
} else {
DEBUG_TRACE("sip=%pI6 dip=%pI6 ", &fc_msg->src_saddr, &fc_msg->dst_saddr);
}
DEBUG_TRACE("protocol=%d sport=%d dport=%d smac=%pM dmac=%pM\n",
fc_msg->proto, fc_msg->sport, fc_msg->dport, fc_msg->smac, fc_msg->dmac);
}
@ -558,10 +602,13 @@ fast_classifier_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr,
struct sfe_connection_create *p_sic;
struct sfe_connection *conn;
u32 key;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0))
struct hlist_node *node;
#endif
key = fc_conn_hash(saddr, daddr, sport, dport, is_v4);
sfe_hash_for_each_possible(fc_conn_ht, conn, hl, key) {
sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) {
if (conn->is_v4 != is_v4) {
continue;
}
@ -595,10 +642,13 @@ fast_classifier_sb_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr,
struct sfe_connection_create *p_sic;
struct sfe_connection *conn;
u32 key;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0))
struct hlist_node *node;
#endif
key = fc_conn_hash(saddr, daddr, sport, dport, is_v4);
sfe_hash_for_each_possible(fc_conn_ht, conn, hl, key) {
sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) {
if (conn->is_v4 != is_v4) {
continue;
}
@ -619,7 +669,7 @@ fast_classifier_sb_find_conn(sfe_ip_addr_t *saddr, sfe_ip_addr_t *daddr,
*/
key = fc_conn_hash(daddr, saddr, dport, sport, is_v4);
sfe_hash_for_each_possible(fc_conn_ht, conn, hl, key) {
sfe_hash_for_each_possible(fc_conn_ht, conn, node, hl, key) {
if (conn->is_v4 != is_v4) {
continue;
}
@ -667,8 +717,13 @@ fast_classifier_add_conn(struct sfe_connection *conn)
DEBUG_TRACE(" -> adding item to sfe_connections, new size: %d\n", sfe_connections_size);
DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pIS dst_ip: %pIS, src_port: %d, dst_port: %d\n",
key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port);
if (conn->is_v4) {
DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n",
key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port);
} else {
DEBUG_TRACE("new offloadable: key: %u proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n",
key, sic->protocol, &(sic->src_ip), &(sic->dest_ip), sic->src_port, sic->dest_port);
}
return conn;
}
@ -687,15 +742,27 @@ fast_classifier_offload_genl_msg(struct sk_buff *skb, struct genl_info *info)
na = info->attrs[FAST_CLASSIFIER_A_TUPLE];
fc_msg = nla_data(na);
DEBUG_TRACE("want to offload: %d-%d, %pIS, %pIS, %d, %d SMAC=%pM DMAC=%pM\n",
fc_msg->ethertype,
fc_msg->proto,
&fc_msg->src_saddr,
&fc_msg->dst_saddr,
fc_msg->sport,
fc_msg->dport,
fc_msg->smac,
fc_msg->dmac);
if (fc_msg->ethertype == AF_INET) {
DEBUG_TRACE("want to offload: %d-%d, %pI4, %pI4, %d, %d SMAC=%pM DMAC=%pM\n",
fc_msg->ethertype,
fc_msg->proto,
&fc_msg->src_saddr,
&fc_msg->dst_saddr,
fc_msg->sport,
fc_msg->dport,
fc_msg->smac,
fc_msg->dmac);
} else {
DEBUG_TRACE("want to offload: %d-%d, %pI6, %pI6, %d, %d SMAC=%pM DMAC=%pM\n",
fc_msg->ethertype,
fc_msg->proto,
&fc_msg->src_saddr,
&fc_msg->dst_saddr,
fc_msg->sport,
fc_msg->dport,
fc_msg->smac,
fc_msg->dmac);
}
spin_lock_bh(&sfe_connections_lock);
conn = fast_classifier_sb_find_conn((sfe_ip_addr_t *)&fc_msg->src_saddr,
@ -747,12 +814,14 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
struct net_device *dev;
struct net_device *src_dev;
struct net_device *dest_dev;
struct net_device *src_dev_tmp;
struct net_device *dest_dev_tmp;
struct net_device *src_br_dev = NULL;
struct net_device *dest_br_dev = NULL;
struct nf_conntrack_tuple orig_tuple;
struct nf_conntrack_tuple reply_tuple;
struct sfe_connection *conn;
SFE_NF_CONN_ACCT(acct);
struct sk_buff *tmp_skb = NULL;
/*
* Don't process broadcast or multicast packets.
@ -790,6 +859,7 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
return NF_ACCEPT;
}
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0))
/*
* Don't process untracked connections.
*/
@ -798,6 +868,7 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
DEBUG_TRACE("untracked connection\n");
return NF_ACCEPT;
}
#endif /*KERNEL_VERSION(4, 12, 0)*/
/*
* Unconfirmed connection may be dropped by Linux at the final step,
@ -818,21 +889,6 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
return NF_ACCEPT;
}
/*
* Check if the acceleration of a flow could be rejected quickly.
*/
acct = nf_conn_acct_find(ct);
if (acct) {
long long packets = atomic64_read(&SFE_ACCT_COUNTER(acct)[CTINFO2DIR(ctinfo)].packets);
if ((packets > 0xff) && (packets & 0xff)) {
/*
* Connection hits slow path at least 256 times, so it must be not able to accelerate.
* But we also give it a chance to walk through ECM every 256 packets
*/
return NF_ACCEPT;
}
}
memset(&sic, 0, sizeof(sic));
/*
@ -850,7 +906,7 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
/*
* Get addressing information, non-NAT first
*/
if (is_v4) {
if (likely(is_v4)) {
u32 dscp;
sic.src_ip.ip = (__be32)orig_tuple.src.u3.ip;
@ -926,6 +982,21 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
sic.dest_port = orig_tuple.dst.u.udp.port;
sic.src_port_xlate = reply_tuple.dst.u.udp.port;
sic.dest_port_xlate = reply_tuple.src.u.udp.port;
/*
* Somehow, SFE is not playing nice with IPSec traffic.
* Do not accelerate for now.
*/
if (ntohs(sic.dest_port) == 4500 || ntohs(sic.dest_port) == 500) {
if (likely(is_v4))
DEBUG_TRACE("quarkysg:: IPsec bypass: %pI4:%d(%pI4:%d) to %pI4:%d(%pI4:%d)\n",
&sic.src_ip.ip, ntohs(sic.src_port), &sic.src_ip_xlate.ip, ntohs(sic.src_port_xlate),
&sic.dest_ip.ip, ntohs(sic.dest_port), &sic.dest_ip_xlate.ip, ntohs(sic.dest_port_xlate));
else
DEBUG_TRACE("quarkysg:: IPsec bypass: %pI6:%d to %pI6:%d\n",
&sic.src_ip.ip6, ntohs(sic.src_port), &sic.dest_ip.ip6, ntohs(sic.dest_port));
return NF_ACCEPT;
}
break;
default:
@ -948,8 +1019,13 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
sic.flags |= SFE_CREATE_FLAG_REMARK_PRIORITY;
}
DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pIS dst_ip: %pIS, src_port: %d, dst_port: %d\n",
sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port);
if (is_v4) {
DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n",
sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port);
} else {
DEBUG_TRACE("POST_ROUTE: checking new connection: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n",
sic.protocol, &sic.src_ip, &sic.dest_ip, sic.src_port, sic.dest_port);
}
/*
* If we already have this connection in our list, skip it
@ -1018,32 +1094,35 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
* Get the net device and MAC addresses that correspond to the various source and
* destination host addresses.
*/
if (!fast_classifier_find_dev_and_mac_addr(&sic.src_ip, &src_dev, sic.src_mac, is_v4)) {
if (!fast_classifier_find_dev_and_mac_addr(NULL, &sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) {
fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_DEV);
return NF_ACCEPT;
}
src_dev = src_dev_tmp;
if (!fast_classifier_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) {
if (!fast_classifier_find_dev_and_mac_addr(NULL, &sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) {
fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_SRC_XLATE_DEV);
goto done1;
}
dev_put(dev);
if (!fast_classifier_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) {
if (unlikely(!is_v4))
tmp_skb = skb;
if (!fast_classifier_find_dev_and_mac_addr(tmp_skb, &sic.dest_ip, &dev, sic.dest_mac, is_v4)) {
fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_DEV);
goto done1;
}
dev_put(dev);
if (!fast_classifier_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev, sic.dest_mac_xlate, is_v4)) {
if (!fast_classifier_find_dev_and_mac_addr(skb, &sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) {
fast_classifier_incr_exceptions(FAST_CL_EXCEPTION_NO_DEST_XLATE_DEV);
goto done1;
}
dest_dev = dest_dev_tmp;
/*
* Our devices may actually be part of a bridge interface. If that's
* Our devices may actually be part of a bridge interface. If that's
* the case then find the bridge interface instead.
*/
if (src_dev->priv_flags & IFF_BRIDGE_PORT) {
@ -1053,7 +1132,6 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
DEBUG_TRACE("no bridge found for: %s\n", src_dev->name);
goto done2;
}
src_dev = src_br_dev;
}
@ -1064,7 +1142,6 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name);
goto done3;
}
dest_dev = dest_br_dev;
}
@ -1082,7 +1159,7 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
if (!conn) {
printk(KERN_CRIT "ERROR: no memory for sfe\n");
goto done3;
goto done4;
}
conn->hits = 0;
conn->offload_permit = 0;
@ -1096,7 +1173,7 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
if (!p_sic) {
printk(KERN_CRIT "ERROR: no memory for sfe\n");
kfree(conn);
goto done3;
goto done4;
}
memcpy(p_sic, &sic, sizeof(sic));
@ -1111,20 +1188,18 @@ static unsigned int fast_classifier_post_routing(struct sk_buff *skb, bool is_v4
/*
* If we had bridge ports then release them too.
*/
done4:
if (dest_br_dev) {
dev_put(dest_br_dev);
}
done3:
if (src_br_dev) {
dev_put(src_br_dev);
}
done2:
dev_put(dest_dev);
dev_put(dest_dev_tmp);
done1:
dev_put(src_dev);
dev_put(src_dev_tmp);
return NF_ACCEPT;
}
@ -1198,6 +1273,7 @@ static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_eve
return NOTIFY_DONE;
}
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0))
/*
* If this is an untracked connection then we can't have any state either.
*/
@ -1205,6 +1281,7 @@ static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_eve
DEBUG_TRACE("ignoring untracked conn\n");
return NOTIFY_DONE;
}
#endif /*KERNEL_VERSION(4, 12, 0)*/
orig_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
sid.protocol = (s32)orig_tuple.dst.protonum;
@ -1267,8 +1344,13 @@ static int fast_classifier_conntrack_event(unsigned int events, struct nf_ct_eve
return NOTIFY_DONE;
}
DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pIS dst_ip: %pIS, src_port: %d, dst_port: %d\n",
sid.protocol, &sid.src_ip, &sid.dest_ip, sid.src_port, sid.dest_port);
if (is_v4) {
DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI4 dst_ip: %pI4, src_port: %d, dst_port: %d\n",
sid.protocol, &sid.src_ip, &sid.dest_ip, ntohs(sid.src_port), ntohs(sid.dest_port));
} else {
DEBUG_TRACE("Try to clean up: proto: %d src_ip: %pI6 dst_ip: %pI6, src_port: %d, dst_port: %d\n",
sid.protocol, &sid.src_ip, &sid.dest_ip, ntohs(sid.src_port), ntohs(sid.dest_port));
}
spin_lock_bh(&sfe_connections_lock);
@ -1416,7 +1498,9 @@ static void fast_classifier_sync_rule(struct sfe_connection_sync *sis)
}
ct = nf_ct_tuplehash_to_ctrack(h);
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0))
NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
#endif /*KERNEL_VERSION(4, 9, 0)*/
/*
* Only update if this is not a fixed timeout
@ -1427,7 +1511,7 @@ static void fast_classifier_sync_rule(struct sfe_connection_sync *sis)
ct->timeout += sis->delta_jiffies;
#else
ct->timeout.expires += sis->delta_jiffies;
#endif
#endif /*KERNEL_VERSION(4, 9, 0)*/
spin_unlock_bh(&ct->lock);
}
@ -1456,7 +1540,7 @@ static void fast_classifier_sync_rule(struct sfe_connection_sync *sis)
if (ct->proto.tcp.seen[1].td_maxwin < sis->dest_td_max_window) {
ct->proto.tcp.seen[1].td_maxwin = sis->dest_td_max_window;
}
if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) {
if ((s32)(ct->proto.tcp.seen[1].td_end - sis->dest_td_end) < 0) {
ct->proto.tcp.seen[1].td_end = sis->dest_td_end;
}
if ((s32)(ct->proto.tcp.seen[1].td_maxend - sis->dest_td_max_end) < 0) {
@ -1464,42 +1548,6 @@ static void fast_classifier_sync_rule(struct sfe_connection_sync *sis)
}
spin_unlock_bh(&ct->lock);
break;
case IPPROTO_UDP:
/*
* In Linux connection track, UDP flow has two timeout values:
* /proc/sys/net/netfilter/nf_conntrack_udp_timeout:
* this is for uni-direction UDP flow, normally its value is 60 seconds
* /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream:
* this is for bi-direction UDP flow, normally its value is 180 seconds
*
* Linux will update timer of UDP flow to stream timeout once it seen packets
* in reply direction. But if flow is accelerated by NSS or SFE, Linux won't
* see any packets. So we have to do the same thing in our stats sync message.
*/
if (!test_bit(IPS_ASSURED_BIT, &ct->status) && acct) {
u_int64_t reply_pkts = atomic64_read(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets);
if (reply_pkts != 0) {
struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
set_bit(IPS_ASSURED_BIT, &ct->status);
l4proto = __nf_ct_l4proto_find((sis->is_v6 ? AF_INET6 : AF_INET), IPPROTO_UDP);
timeouts = nf_ct_timeout_lookup(&init_net, ct, l4proto);
spin_lock_bh(&ct->lock);
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0))
ct->timeout = nfct_time_stamp + timeouts[UDP_CT_REPLIED];
#else
ct->timeout.expires = jiffies + timeouts[UDP_CT_REPLIED];
#endif
spin_unlock_bh(&ct->lock);
}
}
break;
}
/*
@ -1590,10 +1638,13 @@ static ssize_t fast_classifier_get_debug_info(struct device *dev,
size_t len = 0;
struct sfe_connection *conn;
u32 i;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0))
struct hlist_node *node;
#endif
spin_lock_bh(&sfe_connections_lock);
len += scnprintf(buf, PAGE_SIZE - len, "size=%d offload=%d offload_no_match=%d"
" offloaded=%d done=%d offl_dbg_msg_fail=%d done_dbg_msg_fail=%d\n",
" offloaded=%d done=%d offloaded_fail=%d done_fail=%d\n",
sfe_connections_size,
atomic_read(&offload_msgs),
atomic_read(&offload_no_match_msgs),
@ -1601,16 +1652,16 @@ static ssize_t fast_classifier_get_debug_info(struct device *dev,
atomic_read(&done_msgs),
atomic_read(&offloaded_fail_msgs),
atomic_read(&done_fail_msgs));
sfe_hash_for_each(fc_conn_ht, i, conn, hl) {
sfe_hash_for_each(fc_conn_ht, i, node, conn, hl) {
len += scnprintf(buf + len, PAGE_SIZE - len,
(conn->is_v4 ? "o=%d, p=%d [%pM]:%pI4:%u %pI4:%u:[%pM] m=%08x h=%d\n" : "o=%d, p=%d [%pM]:%pI6:%u %pI6:%u:[%pM] m=%08x h=%d\n"),
conn->offloaded,
conn->sic->protocol,
conn->sic->src_mac,
&conn->sic->src_ip,
conn->sic->src_port,
ntohs(conn->sic->src_port),
&conn->sic->dest_ip,
conn->sic->dest_port,
ntohs(conn->sic->dest_port),
conn->sic->dest_mac_xlate,
conn->sic->mark,
conn->hits);
@ -1658,7 +1709,7 @@ static ssize_t fast_classifier_get_exceptions(struct device *dev,
char *buf)
{
int idx, len;
struct fast_classifier *sc = &__fsc;
struct fast_classifier *sc = &__sc;
spin_lock_bh(&sc->lock);
for (len = 0, idx = 0; idx < FAST_CL_EXCEPTION_MAX; idx++) {
@ -1674,23 +1725,24 @@ static ssize_t fast_classifier_get_exceptions(struct device *dev,
/*
* sysfs attributes.
*/
static const struct device_attribute fast_classifier_attrs[] = {
__ATTR(offload_at_pkts, S_IWUSR | S_IRUGO, fast_classifier_get_offload_at_pkts, fast_classifier_set_offload_at_pkts),
__ATTR(debug_info, S_IRUGO, fast_classifier_get_debug_info, NULL),
__ATTR(skip_to_bridge_ingress, S_IWUSR | S_IRUGO, fast_classifier_get_skip_bridge_ingress, fast_classifier_set_skip_bridge_ingress),
__ATTR(exceptions, S_IRUGO, fast_classifier_get_exceptions, NULL),
};
static const struct device_attribute fast_classifier_offload_at_pkts_attr =
__ATTR(offload_at_pkts, S_IWUSR | S_IRUGO, fast_classifier_get_offload_at_pkts, fast_classifier_set_offload_at_pkts);
static const struct device_attribute fast_classifier_debug_info_attr =
__ATTR(debug_info, S_IRUGO, fast_classifier_get_debug_info, NULL);
static const struct device_attribute fast_classifier_skip_bridge_ingress =
__ATTR(skip_to_bridge_ingress, S_IWUSR | S_IRUGO, fast_classifier_get_skip_bridge_ingress, fast_classifier_set_skip_bridge_ingress);
static const struct device_attribute fast_classifier_exceptions_attr =
__ATTR(exceptions, S_IRUGO, fast_classifier_get_exceptions, NULL);
/*
* fast_classifier_init()
*/
static int __init fast_classifier_init(void)
{
struct fast_classifier *sc = &__fsc;
struct fast_classifier *sc = &__sc;
int result = -1;
size_t i, j;
printk(KERN_ALERT "fast-classifier: starting up\n");
printk(KERN_ALERT "fast-classifier (PBR safe v2.1.4a): starting up\n");
DEBUG_INFO("SFE CM init\n");
hash_init(fc_conn_ht);
@ -1704,13 +1756,34 @@ static int __init fast_classifier_init(void)
goto exit1;
}
for (i = 0; i < ARRAY_SIZE(fast_classifier_attrs); i++) {
result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_attrs[i].attr);
if (result) {
DEBUG_ERROR("failed to register %s : %d\n",
fast_classifier_attrs[i].attr.name, result);
goto exit2;
}
result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
if (result) {
DEBUG_ERROR("failed to register offload at pkgs: %d\n", result);
goto exit2;
}
result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr);
if (result) {
DEBUG_ERROR("failed to register debug dev: %d\n", result);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
goto exit2;
}
result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr);
if (result) {
DEBUG_ERROR("failed to register skip bridge on ingress: %d\n", result);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr);
goto exit2;
}
result = sysfs_create_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr);
if (result) {
DEBUG_ERROR("failed to register exceptions file: %d\n", result);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr);
goto exit2;
}
sc->dev_notifier.notifier_call = fast_classifier_device_event;
@ -1728,7 +1801,7 @@ static int __init fast_classifier_init(void)
/*
* Register our netfilter hooks.
*/
result = nf_register_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
result = nf_register_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
if (result < 0) {
DEBUG_ERROR("can't register nf post routing hook: %d\n", result);
goto exit3;
@ -1737,22 +1810,21 @@ static int __init fast_classifier_init(void)
#ifdef CONFIG_NF_CONNTRACK_EVENTS
/*
* Register a notifier hook to get fast notifications of expired connections.
* Note: In CONFIG_NF_CONNTRACK_CHAIN_EVENTS enabled case, nf_conntrack_register_notifier()
* function always returns 0.
*/
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
(void)nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier);
#else
result = nf_conntrack_register_notifier(&init_net, &fast_classifier_conntrack_notifier);
if (result < 0) {
DEBUG_ERROR("can't register nf notifier hook: %d\n", result);
goto exit4;
}
#endif
#endif
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
result = genl_register_family(&fast_classifier_gnl_family);
if (result) {
DEBUG_ERROR("failed to register genl family: %d\n", result);
goto exit5;
}
#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
result = genl_register_family_with_ops_groups(&fast_classifier_gnl_family,
fast_classifier_gnl_ops,
fast_classifier_genl_mcgrp);
@ -1788,8 +1860,8 @@ static int __init fast_classifier_init(void)
/*
* Hook the receive path in the network stack.
*/
BUG_ON(fast_nat_recv);
RCU_INIT_POINTER(fast_nat_recv, fast_classifier_recv);
BUG_ON(athrs_fast_nat_recv);
RCU_INIT_POINTER(athrs_fast_nat_recv, fast_classifier_recv);
/*
* Hook the shortcut sync callback.
@ -1809,17 +1881,18 @@ exit5:
exit4:
#endif
nf_unregister_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
nf_unregister_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
exit3:
unregister_inetaddr_notifier(&sc->inet_notifier);
unregister_inet6addr_notifier(&sc->inet6_notifier);
unregister_netdevice_notifier(&sc->dev_notifier);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_offload_at_pkts_attr.attr);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_debug_info_attr.attr);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_skip_bridge_ingress.attr);
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_exceptions_attr.attr);
exit2:
for (j = 0; j < i; j++) {
sysfs_remove_file(sc->sys_fast_classifier, &fast_classifier_attrs[j].attr);
}
kobject_put(sc->sys_fast_classifier);
exit1:
@ -1831,7 +1904,7 @@ exit1:
*/
static void __exit fast_classifier_exit(void)
{
struct fast_classifier *sc = &__fsc;
struct fast_classifier *sc = &__sc;
int result = -1;
DEBUG_INFO("SFE CM exit\n");
@ -1846,7 +1919,7 @@ static void __exit fast_classifier_exit(void)
/*
* Unregister our receive callback.
*/
RCU_INIT_POINTER(fast_nat_recv, NULL);
RCU_INIT_POINTER(athrs_fast_nat_recv, NULL);
/*
* Wait for all callbacks to complete.
@ -1868,14 +1941,14 @@ static void __exit fast_classifier_exit(void)
result = genl_unregister_family(&fast_classifier_gnl_family);
if (result != 0) {
printk(KERN_CRIT "Unable to unreigster genl_family\n");
printk(KERN_CRIT "Unable to unregister genl_family\n");
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
nf_conntrack_unregister_notifier(&init_net, &fast_classifier_conntrack_notifier);
#endif
nf_unregister_hooks(fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
nf_unregister_net_hooks(&init_net, fast_classifier_ops_post_routing, ARRAY_SIZE(fast_classifier_ops_post_routing));
unregister_inet6addr_notifier(&sc->inet6_notifier);
unregister_inetaddr_notifier(&sc->inet_notifier);

View File

@ -18,7 +18,7 @@
#include <stdio.h>
#include <arpa/inet.h>
#include "fast-classifier.h"
#include <fast-classifier.h>
static struct nl_sock *sock;
static struct nl_sock *sock_event;

View File

@ -6,10 +6,10 @@
include $(TOPDIR)/rules.mk
LUCI_TITLE:=LuCI support for Flow Offload
LUCI_DEPENDS:=+kmod-ipt-offload +pdnsd-alt +kmod-tcp-bbr @!LINUX_4_9
LUCI_DEPENDS:=+kmod-ipt-offload +pdnsd-alt +kmod-tcp-bbr @LINUX_4_19
LUCI_PKGARCH:=all
PKG_VERSION:=1.0
PKG_RELEASE:=15
PKG_RELEASE:=16
include $(TOPDIR)/feeds/luci/luci.mk

View File

@ -6,7 +6,7 @@
include $(TOPDIR)/rules.mk
LUCI_TITLE:=LuCI support for Turbo ACC (SFE)
LUCI_DEPENDS:=+kmod-fast-classifier +pdnsd-alt +kmod-tcp-bbr @LINUX_4_9
LUCI_DEPENDS:=+kmod-fast-classifier +pdnsd-alt +kmod-tcp-bbr @!LINUX_4_19
LUCI_PKGARCH:=all
PKG_VERSION:=1.0
PKG_RELEASE:=12

View File

@ -15,7 +15,7 @@ include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=shortcut-fe
PKG_RELEASE:=9
PKG_RELEASE:=1
include $(INCLUDE_DIR)/package.mk
@ -23,17 +23,27 @@ define KernelPackage/shortcut-fe
SECTION:=kernel
CATEGORY:=Kernel modules
SUBMENU:=Network Support
DEPENDS:=@IPV6 @LINUX_4_9
DEPENDS:=
TITLE:=Kernel driver for SFE
FILES:=$(PKG_BUILD_DIR)/shortcut-fe.ko $(PKG_BUILD_DIR)/shortcut-fe-ipv6.ko
KCONFIG:=CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_SHORTCUT_FE=y
AUTOLOAD:=$(call AutoProbe,shortcut-fe shortcut-fe-ipv6)
KCONFIG:=CONFIG_NF_CONNTRACK_EVENTS=y \
CONFIG_NF_CONNTRACK_TIMEOUT=y \
CONFIG_SHORTCUT_FE=y \
CONFIG_XFRM=y
AUTOLOAD:=$(call AutoLoad,09,shortcut-fe shortcut-fe-ipv6)
endef
define KernelPackage/shortcut-fe/Description
Shortcut is an in-Linux-kernel IP packet forwarding engine.
endef
define KernelPackage/shortcut-fe/install
$(INSTALL_DIR) $(1)/etc/init.d
$(INSTALL_BIN) ./files/etc/init.d/shortcut-fe $(1)/etc/init.d
$(INSTALL_DIR) $(1)/usr/bin
$(INSTALL_BIN) ./files/usr/bin/sfe_dump $(1)/usr/bin
endef
define KernelPackage/shortcut-fe-cm
SECTION:=kernel
CATEGORY:=Kernel modules
@ -41,81 +51,31 @@ define KernelPackage/shortcut-fe-cm
DEPENDS:=+kmod-ipt-conntrack +kmod-shortcut-fe
TITLE:=Kernel driver for SFE
FILES:=$(PKG_BUILD_DIR)/shortcut-fe-cm.ko
KCONFIG:=CONFIG_NF_CONNTRACK_CHAIN_EVENTS=y CONFIG_NF_CONNTRACK_MARK=y
AUTOLOAD:=$(call AutoProbe,shortcut-fe-cm)
KCONFIG:=CONFIG_NF_CONNTRACK_CHAIN_EVENTS=y
endef
define KernelPackage/shortcut-fe-cm/Description
Simple connection manager for the Shortcut forwarding engine.
endef
define KernelPackage/fast-classifier
SECTION:=kernel
CATEGORY:=Kernel modules
SUBMENU:=Network Support
DEPENDS:=+kmod-ipt-conntrack +kmod-shortcut-fe
TITLE:=Kernel driver for FAST Classifier
FILES:=$(PKG_BUILD_DIR)/fast-classifier.ko
KCONFIG:=CONFIG_NF_CONNTRACK_CHAIN_EVENTS=y CONFIG_NF_CONNTRACK_MARK=y
AUTOLOAD:=$(call AutoLoad,z,fast-classifier)
PROVIDES:=$(PKG_NAME)
endef
define KernelPackage/fast-classifier/description
FAST Classifier connection manager for Shortcut forwarding engine.
It talks to SFE to make decisions about offloading connections.
endef
define Package/fast-classifier-example
TITLE:=Example user space program for fast-classifier
DEPENDS:=+libnl +kmod-fast-classifier
endef
define Package/fast-classifier-example/description
Example user space program that communicates with fast
classifier kernel module
endef
MAKE_OPTS:= \
ARCH="$(LINUX_KARCH)" \
CROSS_COMPILE="$(TARGET_CROSS)" \
SUBDIRS="$(PKG_BUILD_DIR)" \
EXTRA_CFLAGS="$(EXTRA_CFLAGS)"
EXTRA_CFLAGS+=-DSFE_SUPPORT_IPV6
define Build/Compile
$(MAKE) -C "$(LINUX_DIR)" \
$(MAKE_OPTS) \
+$(MAKE) $(PKG_JOBS) -C "$(LINUX_DIR)" \
$(KERNEL_MAKE_FLAGS) \
$(PKG_MAKE_FLAGS) \
SUBDIRS="$(PKG_BUILD_DIR)" \
EXTRA_CFLAGS="$(EXTRA_CFLAGS)" \
SFE_SUPPORT_IPV6=1 \
modules
$(if $(CONFIG_PACKAGE_fast-classifier-example),$(Build/Compile/fast-classifier-example))
endef
define Build/Compile/fast-classifier-example
$(TARGET_CC) -o $(PKG_BUILD_DIR)/userspace_fast_classifier \
-I $(PKG_BUILD_DIR) \
-I$(STAGING_DIR)/usr/include/libnl \
-I$(STAGING_DIR)/usr/include/libnl3 \
-lnl-genl-3 -lnl-3 \
$(PKG_BUILD_DIR)/nl_classifier_test.c
endef
ifneq ($(CONFIG_PACKAGE_kmod-shortcut-fe)$(CONFIG_PACKAGE_kmod-shortcut-fe-cm)$(CONFIG_PACKAGE_kmod-fast-classifier),)
ifneq ($(CONFIG_PACKAGE_kmod-shortcut-fe)$(CONFIG_PACKAGE_kmod-shortcut-fe-cm),)
define Build/InstallDev
$(INSTALL_DIR) $(1)/usr/include/shortcut-fe
$(CP) -rf $(PKG_BUILD_DIR)/sfe.h $(1)/usr/include/shortcut-fe
ifneq ($(CONFIG_PACKAGE_kmod-fast-classifier),)
$(INSTALL_DIR) $(1)/usr/include
$(CP) $(PKG_BUILD_DIR)/fast-classifier.h $(1)/usr/include/
endif
endef
endif
define Package/fast-classifier-example/install
$(INSTALL_DIR) $(1)/sbin
$(CP) $(PKG_BUILD_DIR)/userspace_fast_classifier $(1)/sbin/
endef
$(eval $(call KernelPackage,shortcut-fe))
$(eval $(call KernelPackage,shortcut-fe-cm))
$(eval $(call KernelPackage,fast-classifier))
#$(eval $(call BuildPackage,fast-classifier-example))

View File

@ -0,0 +1,48 @@
#!/bin/sh /etc/rc.common
#
# Copyright (c) 2014-2015 The Linux Foundation. All rights reserved.
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all copies.
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
#SFE connection manager has a lower priority, it should be started after other connection manager
#to detect the existence of connection manager with higher priority
START=72
have_cm() {
[ -d "/sys/kernel/debug/ecm" ] && echo 1 && return
echo 0
}
#load shortcut-fe connection manager
load_sfe_cm() {
local kernel_version=$(uname -r)
#shortcut-fe-drv.ko is not needed because other connection manager is not enabled
[ -d "/sys/module/shortcut_fe_drv" ] && rmmod shortcut_fe_drv
[ -e "/lib/modules/$kernel_version/shortcut-fe-cm.ko" ] && {
[ -d /sys/module/shortcut_fe_cm ] || insmod /lib/modules/$kernel_version/shortcut-fe-cm.ko
}
[ -e "/lib/modules/$kernel_version/fast-classifier.ko" ] && {
[ -d /sys/module/fast_classifier ] || insmod /lib/modules/$kernel_version/fast-classifier.ko
}
}
start() {
[ "$(have_cm)" = "1" ] || load_sfe_cm
}
stop() {
[ -d /sys/module/shortcut_fe_cm ] && rmmod shortcut_fe_cm
[ -d /sys/module/fast_classifier ] && rmmod fast_classifier
}

View File

@ -0,0 +1,35 @@
#!/bin/sh
#
# Copyright (c) 2015 The Linux Foundation. All rights reserved.
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all copies.
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
#@sfe_dump
#@example : sfe_dump (ipv4|ipv6)
sfe_dump(){
[ -e "/dev/sfe_ipv4" ] || {
dev_num=$(cat /sys/sfe_ipv4/debug_dev)
mknod /dev/sfe_ipv4 c $dev_num 0
}
[ -e "/dev/sfe_ipv6" ] || {
dev_num=$(cat /sys/sfe_ipv6/debug_dev)
mknod /dev/sfe_ipv6 c $dev_num 0
}
cat /dev/sfe_$1
}
if [ -z "$1" ]; then
sfe_dump ipv4
sfe_dump ipv6
else
sfe_dump $1
fi

View File

@ -2,13 +2,22 @@
# Makefile for Shortcut FE.
#
obj-m += shortcut-fe.o shortcut-fe-ipv6.o shortcut-fe-cm.o fast-classifier.o
obj-m += shortcut-fe.o
ifdef SFE_SUPPORT_IPV6
obj-m += shortcut-fe-ipv6.o
endif
obj-m += shortcut-fe-cm.o
shortcut-fe-objs := \
sfe_ipv4.o
ifdef SFE_SUPPORT_IPV6
shortcut-fe-ipv6-objs := \
sfe_ipv6.o
endif
shortcut-fe-cm-objs := \
sfe_cm.o

View File

@ -1,122 +0,0 @@
Shortcut Forwarding Engine
--------------------------
Welcome to "Shortcut" :-)
Here's a quick FAQ:
Q) What is Shortcut?
A) Shortcut is an in-Linux-kernel IP packet forwarding engine. It's designed
to offer very high speed IP packet forwarding based on IP connection tracking.
It's dramatically faster than the standard netfilter-based NAT forwarding path
but is designed to synchronise state back to netfilter/conntrack so that it
doesn't need to deal with all of the complexities of special cases.
Q) What versions of IP does it support?
A) The current version only supports IPv4 but will be extended to support IPv6 in
the future.
Q) What transport protocols does it support?
A) TCP and UDP. It also knows enough about ICMP to spot ICMP error messages
related to TCP and UDP and handle things accordingly.
Q) Is there a design spec for this software?
A) Not at the moment. I'll write one when I get more time. The code is
intended to be a good tutorial though - it's very heavily commented. If you
find yourself reading something and not understanding it then I take that to
mean I've probably not done a sufficently good job of explaining what it's
doing in the comments. Let me know - I will try to fix it :-)
Q) Why was it written?
A) It was written as a demonstration of what can be done to provide high
performance forwarding inside the kernel. There were two initial motivations:
1) To provide a platform to enable research into how QoS analysis systems can
offload work and avoid huge Linux overheads.
2) To provide a tool to investigate the behaviour of various processors, SoCs
and software sets so that we can characterize and design new network processor
SoCs.
Q) How much faster is it than the Linux kernel forwarding path?
A) At the time of pushing this to github it's been tested on a QCA AP135.
This has a Scorpion (QCA Scopion, not the QMC one :-)) SoC, QCA9550. The
SoC's processor is a MIPS74K running at 720 MHz and with a DDR2 memory
subsystem that offers a peak of 600 MT/s (16-bit transfers).
Running IPv4 NAT forwarding of UDP between the board's 2 GMAC ports and
using a SmartBits 200 as a traffic generator Linux is able to forward 70k PPS.
Once the SFE code is invoked this will increase to 350k PPS!
There's also a slightly hacky mode which causes SFE to bypass the Linux
bridge layer, but this isn't really ready for use because it doesn't have
sufficient MAC address checks or integration of statistics back to the
Ethernet bridge, but that runs at 436k PPS.
Q) Are there any diagnostics?
A) Yes, this is a research tool after all! There's a complex way to do this
that's more general purpose and a simple one - here's the simple one:
mknod /dev/sfe c 253 0
The file /dev/sfe is an XML-ish output and provides details of all the
network connections currently being offloaded. It also reports the numbers
of packets that took various "exception" paths within the code. In addition
it provides a summary of the number of connections, attempts to accelerate
connections, cancel accelerations, etc. It also reports the numbers of
packets that were forwarded and not forwarded by the engine and has some
stats on the effectiveness of the hashing algorithm it uses.
Q) How does the code interact with Linux?
A) There are four minor patches required to make this software run with
Linux. These are currently against a 3.3.8 or 3.4.0 kernel:
* (net/core/dev.c) adds a hook to allow packets to be extracted out.
* (net/netfilter/nf_conntrack_proto_tcp.c) exposes a state variable inside
netfilter that's necessary to enable TCP sequence and ACK checking within
the offload path. Note that this specific patch is against the QCA QSDK
patched version of 3.3.8 - there's a slightly braindead "performance"
patch in that kernel, courtesy of the OpenWrt community that makes the
Linux forwarding path slightly faster at the expense of losing
functionality :-(
* (net/Kconfig) adds the shortcut-fe option.
* (net/Makefile) adds the shortcut-fe build support.
Once these are applied and the module is loaded then everything else
is automatic :-) The patches are in this git repo.
Q) Are any of the pieces reused from other projects?
A) Yes! Some of the forwarding concepts are reused from the Ubicom Network
Accelerator that morphed into part of the Akronite NSS. This code has all
been substantially changed though to accomodate Linux's needs.
There are also some pieces that I borrowed from the QCA "FastNAT" software
written by Xiaoping Fan <xfan@qca.qualcomm.com>. Xiaoping's code was the
first actual demonstration within QCA that this in-kernel concept could yield
signficant performance gains.
Enjoy!
Dave Hudson <dhudson@qti.qualcomm.com>

View File

@ -2,7 +2,7 @@
* sfe.h
* Shortcut forwarding engine.
*
* Copyright (c) 2013-2016 The Linux Foundation. All rights reserved.
* Copyright (c) 2013-2017 The Linux Foundation. All rights reserved.
* Permission to use, copy, modify, and/or distribute this software for
* any purpose with or without fee is hereby granted, provided that the
* above copyright notice and this permission notice appear in all copies.
@ -15,47 +15,100 @@
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Debug output verbosity level.
* The following are debug macros used throughout the SFE.
*
* The DEBUG_LEVEL enables the followings based on its value,
* when dynamic debug option is disabled.
*
* 0 = OFF
* 1 = ASSERTS / ERRORS
* 2 = 1 + WARN
* 3 = 2 + INFO
* 4 = 3 + TRACE
*/
#define DEBUG_LEVEL 0
#define DEBUG_LEVEL 2
#if (DEBUG_LEVEL < 1)
#define DEBUG_ASSERT(s, ...)
#define DEBUG_ERROR(s, ...)
#else
#define DEBUG_ERROR(s, ...) \
do { \
printk("%s[%u]: ERROR:", __FILE__, __LINE__); \
printk(s, ##__VA_ARGS__); \
} while (0)
#define DEBUG_ASSERT(c, s, ...) if (!(c)) { pr_emerg("ASSERT: %s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__); BUG(); }
#define DEBUG_ERROR(s, ...) pr_err("%s:%d:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#endif
#if defined(CONFIG_DYNAMIC_DEBUG)
/*
* Compile messages for dynamic enable/disable
*/
#define DEBUG_WARN(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#define DEBUG_INFO(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#define DEBUG_TRACE(s, ...) pr_debug("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#else
/*
* Statically compile messages at different levels
*/
#if (DEBUG_LEVEL < 2)
#define DEBUG_WARN(s, ...)
#else
#define DEBUG_WARN(s, ...) \
do { \
printk("%s[%u]: WARN:", __FILE__, __LINE__); \
printk(s, ##__VA_ARGS__); \
} while (0)
#define DEBUG_WARN(s, ...) pr_warn("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#endif
#if (DEBUG_LEVEL < 3)
#define DEBUG_INFO(s, ...)
#else
#define DEBUG_INFO(s, ...) \
do { \
printk("%s[%u]: INFO:", __FILE__, __LINE__); \
printk(s, ##__VA_ARGS__); \
} while (0)
#define DEBUG_INFO(s, ...) pr_notice("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#endif
#if (DEBUG_LEVEL < 4)
#define DEBUG_TRACE(s, ...)
#else
#define DEBUG_TRACE(s, ...) \
do { \
printk("%s[%u]: TRACE:", __FILE__, __LINE__); \
printk(s, ##__VA_ARGS__); \
} while (0)
#define DEBUG_TRACE(s, ...) pr_info("%s[%d]:" s, __FUNCTION__, __LINE__, ##__VA_ARGS__)
#endif
#endif
#ifdef CONFIG_NF_FLOW_COOKIE
typedef int (*flow_cookie_set_func_t)(u32 protocol, __be32 src_ip, __be16 src_port,
__be32 dst_ip, __be16 dst_port, u16 flow_cookie);
/*
* sfe_register_flow_cookie_cb
* register a function in SFE to let SFE use this function to configure flow cookie for a flow
*
* Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
* can use this function to configure flow cookie for a flow.
* return: 0, success; !=0, fail
*/
int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb);
/*
* sfe_unregister_flow_cookie_cb
* unregister function which is used to configure flow cookie for a flow
*
* return: 0, success; !=0, fail
*/
int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb);
typedef int (*sfe_ipv6_flow_cookie_set_func_t)(u32 protocol, __be32 src_ip[4], __be16 src_port,
__be32 dst_ip[4], __be16 dst_port, u16 flow_cookie);
/*
* sfe_ipv6_register_flow_cookie_cb
* register a function in SFE to let SFE use this function to configure flow cookie for a flow
*
* Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
* can use this function to configure flow cookie for a flow.
* return: 0, success; !=0, fail
*/
int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb);
/*
* sfe_ipv6_unregister_flow_cookie_cb
* unregister function which is used to configure flow cookie for a flow
*
* return: 0, success; !=0, fail
*/
int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb);
#endif /*CONFIG_NF_FLOW_COOKIE*/

View File

@ -17,6 +17,38 @@
#include <linux/version.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
#include <net/netfilter/nf_conntrack_timeout.h>
#else
enum udp_conntrack {
UDP_CT_UNREPLIED,
UDP_CT_REPLIED,
UDP_CT_MAX
};
static inline unsigned int *
nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
struct nf_conntrack_l4proto *l4proto)
{
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
struct nf_conn_timeout *timeout_ext;
unsigned int *timeouts;
timeout_ext = nf_ct_timeout_find(ct);
if (timeout_ext)
timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
else
timeouts = l4proto->get_timeouts(net);
return timeouts;
#else
return l4proto->get_timeouts(net);
#endif /*CONFIG_NF_CONNTRACK_TIMEOUT*/
}
#endif /*KERNEL_VERSION(3, 7, 0)*/
#endif /*KERNEL_VERSION(3, 4, 0)*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
#define sfe_define_post_routing_hook(FN_NAME, HOOKNUM, OPS, SKB, UNUSED, OUT, OKFN) \
static unsigned int FN_NAME(void *priv, \
@ -131,8 +163,33 @@ static inline struct net_device *sfe_dev_get_master(struct net_device *dev)
#define SFE_ACCT_COUNTER(NM) (NM)
#endif
#define sfe_hash_for_each_possible(name, obj, member, key) \
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
#define sfe_hash_for_each_possible(name, obj, node, member, key) \
hash_for_each_possible(name, obj, member, key)
#else
#define sfe_hash_for_each_possible(name, obj, node, member, key) \
hash_for_each_possible(name, obj, node, member, key)
#endif
#define sfe_hash_for_each(name, bkt, obj, member) \
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
#define sfe_hash_for_each(name, bkt, node, obj, member) \
hash_for_each(name, bkt, obj, member)
#else
#define sfe_hash_for_each(name, bkt, node, obj, member) \
hash_for_each(name, bkt, node, obj, member)
#endif
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
#define sfe_dst_get_neighbour(dst, daddr) dst_neigh_lookup(dst, daddr)
#else
static inline struct neighbour *
sfe_dst_get_neighbour(struct dst_entry *dst, void *daddr)
{
struct neighbour *neigh = dst_get_neighbour_noref(dst);
if (neigh)
neigh_hold(neigh);
return neigh;
}
#endif

View File

@ -2,7 +2,7 @@
* sfe-cm.c
* Shortcut forwarding engine connection manager.
*
* Copyright (c) 2013-2016 The Linux Foundation. All rights reserved.
* Copyright (c) 2013-2018 The Linux Foundation. All rights reserved.
* Permission to use, copy, modify, and/or distribute this software for
* any purpose with or without fee is hereby granted, provided that the
* above copyright notice and this permission notice appear in all copies.
@ -29,10 +29,9 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <linux/netfilter/xt_dscp.h>
#include <linux/if_bridge.h>
#include <net/pkt_sched.h>
#include <linux/version.h>
#include "sfe.h"
#include "sfe_cm.h"
@ -103,7 +102,6 @@ struct sfe_cm {
static struct sfe_cm __sc;
/*
* sfe_cm_incr_exceptions()
* increase an exception counter.
@ -123,7 +121,7 @@ static inline void sfe_cm_incr_exceptions(sfe_cm_exception_t except)
*
* Returns 1 if the packet is forwarded or 0 if it isn't.
*/
static int sfe_cm_recv(struct sk_buff *skb)
int sfe_cm_recv(struct sk_buff *skb)
{
struct net_device *dev;
@ -136,16 +134,6 @@ static int sfe_cm_recv(struct sk_buff *skb)
dev = skb->dev;
#ifdef CONFIG_NET_CLS_ACT
/*
* If ingress Qdisc configured, and packet not processed by ingress Qdisc yet
* We cannot accelerate this packet.
*/
if (dev->ingress_queue && !(skb->tc_verd & TC_NCLS)) {
return 0;
}
#endif
/*
* We're only interested in IPv4 and IPv6 packets.
*/
@ -232,7 +220,11 @@ static bool sfe_cm_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device
dst = (struct dst_entry *)rt;
} else {
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0))
rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, NULL, 0);
#else
rt6 = rt6_lookup(&init_net, (struct in6_addr *)addr->ip6, 0, 0, 0);
#endif /*KERNEL_VERSION(4, 17, 0)*/
if (!rt6) {
goto ret_fail;
}
@ -241,7 +233,7 @@ static bool sfe_cm_find_dev_and_mac_addr(sfe_ip_addr_t *addr, struct net_device
}
rcu_read_lock();
neigh = dst_neigh_lookup(dst, addr);
neigh = sfe_dst_get_neighbour(dst, addr);
if (unlikely(!neigh)) {
rcu_read_unlock();
dst_release(dst);
@ -297,6 +289,8 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
struct net_device *dev;
struct net_device *src_dev;
struct net_device *dest_dev;
struct net_device *src_dev_tmp;
struct net_device *dest_dev_tmp;
struct net_device *src_br_dev = NULL;
struct net_device *dest_br_dev = NULL;
struct nf_conntrack_tuple orig_tuple;
@ -358,6 +352,7 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
return NF_ACCEPT;
}
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0))
/*
* Don't process untracked connections.
*/
@ -366,6 +361,7 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
DEBUG_TRACE("untracked connection\n");
return NF_ACCEPT;
}
#endif /*KERNEL_VERSION(4, 12, 0)*/
/*
* Unconfirmed connection may be dropped by Linux at the final step,
@ -537,7 +533,11 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
* For packets de-capsulated from xfrm, we still can accelerate it
* on the direction we just received the packet.
*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0))
if (unlikely(skb_ext_exist(skb, SKB_EXT_SEC_PATH))) {
#else
if (unlikely(skb->sp)) {
#endif
if (sic.protocol == IPPROTO_TCP &&
!(sic.flags & SFE_CREATE_FLAG_NO_SEQ_CHECK)) {
return NF_ACCEPT;
@ -564,29 +564,29 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
* Get the net device and MAC addresses that correspond to the various source and
* destination host addresses.
*/
if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip, &src_dev, sic.src_mac, is_v4)) {
if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip, &src_dev_tmp, sic.src_mac, is_v4)) {
sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_SRC_DEV);
return NF_ACCEPT;
}
src_dev = src_dev_tmp;
if (!sfe_cm_find_dev_and_mac_addr(&sic.src_ip_xlate, &dev, sic.src_mac_xlate, is_v4)) {
sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_SRC_XLATE_DEV);
goto done1;
}
dev_put(dev);
if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip, &dev, sic.dest_mac, is_v4)) {
sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_DEST_DEV);
goto done1;
}
dev_put(dev);
if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev, sic.dest_mac_xlate, is_v4)) {
if (!sfe_cm_find_dev_and_mac_addr(&sic.dest_ip_xlate, &dest_dev_tmp, sic.dest_mac_xlate, is_v4)) {
sfe_cm_incr_exceptions(SFE_CM_EXCEPTION_NO_DEST_XLATE_DEV);
goto done1;
}
dest_dev = dest_dev_tmp;
/*
* Our devices may actually be part of a bridge interface. If that's
@ -599,7 +599,6 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
DEBUG_TRACE("no bridge found for: %s\n", src_dev->name);
goto done2;
}
src_dev = src_br_dev;
}
@ -610,7 +609,6 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
DEBUG_TRACE("no bridge found for: %s\n", dest_dev->name);
goto done3;
}
dest_dev = dest_br_dev;
}
@ -619,7 +617,7 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
sic.src_mtu = src_dev->mtu;
sic.dest_mtu = dest_dev->mtu;
sic.mark = skb->mark;
if (likely(is_v4)) {
sfe_ipv4_create_rule(&sic);
} else {
@ -632,17 +630,14 @@ static unsigned int sfe_cm_post_routing(struct sk_buff *skb, int is_v4)
if (dest_br_dev) {
dev_put(dest_br_dev);
}
done3:
if (src_br_dev) {
dev_put(src_br_dev);
}
done2:
dev_put(dest_dev);
dev_put(dest_dev_tmp);
done1:
dev_put(src_dev);
dev_put(src_dev_tmp);
return NF_ACCEPT;
}
@ -692,6 +687,7 @@ static int sfe_cm_conntrack_event(unsigned int events, struct nf_ct_event *item)
return NOTIFY_DONE;
}
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0))
/*
* If this is an untracked connection then we can't have any state either.
*/
@ -699,6 +695,7 @@ static int sfe_cm_conntrack_event(unsigned int events, struct nf_ct_event *item)
DEBUG_TRACE("ignoring untracked conn\n");
return NOTIFY_DONE;
}
#endif /*KERNEL_VERSION(4, 12, 0)*/
/*
* We're only interested in destroy events.
@ -771,7 +768,9 @@ static struct nf_ct_event_notifier sfe_cm_conntrack_notifier = {
*/
static struct nf_hook_ops sfe_cm_ops_post_routing[] __read_mostly = {
SFE_IPV4_NF_POST_ROUTING_HOOK(__sfe_cm_ipv4_post_routing_hook),
#ifdef SFE_SUPPORT_IPV6
SFE_IPV6_NF_POST_ROUTING_HOOK(__sfe_cm_ipv6_post_routing_hook),
#endif
};
/*
@ -824,18 +823,20 @@ static void sfe_cm_sync_rule(struct sfe_connection_sync *sis)
}
ct = nf_ct_tuplehash_to_ctrack(h);
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0))
NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
#endif /*KERNEL_VERSION(4, 9, 0)*/
/*
* Only update if this is not a fixed timeout
*/
if (!test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
spin_lock_bh(&ct->lock);
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0))
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0))
ct->timeout += sis->delta_jiffies;
#else
ct->timeout.expires += sis->delta_jiffies;
#endif
#endif /*KERNEL_VERSION(4, 9, 0)*/
spin_unlock_bh(&ct->lock);
}
@ -872,6 +873,7 @@ static void sfe_cm_sync_rule(struct sfe_connection_sync *sis)
}
spin_unlock_bh(&ct->lock);
break;
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0))
case IPPROTO_UDP:
/*
* In Linux connection track, UDP flow has two timeout values:
@ -888,25 +890,31 @@ static void sfe_cm_sync_rule(struct sfe_connection_sync *sis)
u_int64_t reply_pkts = atomic64_read(&SFE_ACCT_COUNTER(acct)[IP_CT_DIR_REPLY].packets);
if (reply_pkts != 0) {
struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
set_bit(IPS_ASSURED_BIT, &ct->status);
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0))
timeouts = nf_ct_timeout_lookup(ct);
#else
struct nf_conntrack_l4proto *l4proto;
l4proto = __nf_ct_l4proto_find((sis->is_v6 ? AF_INET6 : AF_INET), IPPROTO_UDP);
timeouts = nf_ct_timeout_lookup(&init_net, ct, l4proto);
#endif /*KERNEL_VERSION(4, 19, 0)*/
spin_lock_bh(&ct->lock);
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0))
ct->timeout = nfct_time_stamp + timeouts[UDP_CT_REPLIED];
ct->timeout = jiffies + timeouts[UDP_CT_REPLIED];
#else
ct->timeout.expires = jiffies + timeouts[UDP_CT_REPLIED];
#endif
#endif /*KERNEL_VERSION(4, 9, 0)*/
spin_unlock_bh(&ct->lock);
}
}
break;
#endif /*KERNEL_VERSION(3, 4, 0)*/
}
/*
@ -918,7 +926,7 @@ static void sfe_cm_sync_rule(struct sfe_connection_sync *sis)
/*
* sfe_cm_device_event()
*/
static int sfe_cm_device_event(struct notifier_block *this, unsigned long event, void *ptr)
int sfe_cm_device_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = SFE_DEV_EVENT_PTR(ptr);
@ -980,80 +988,11 @@ static ssize_t sfe_cm_get_exceptions(struct device *dev,
return len;
}
/*
* sfe_cm_get_stop
* dump stop
*/
static ssize_t sfe_cm_get_stop(struct device *dev,
struct device_attribute *attr,
char *buf)
{
int (*fast_recv)(struct sk_buff *skb);
rcu_read_lock();
fast_recv = rcu_dereference(fast_nat_recv);
rcu_read_unlock();
return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", fast_recv ? 0 : 1);
}
static ssize_t sfe_cm_set_stop(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int ret;
u32 num;
int (*fast_recv)(struct sk_buff *skb);
ret = kstrtou32(buf, 0, &num);
if (ret)
return ret;
/*
* Hook/Unhook the receive path in the network stack.
*/
if (num) {
RCU_INIT_POINTER(fast_nat_recv, NULL);
} else {
rcu_read_lock();
fast_recv = rcu_dereference(fast_nat_recv);
rcu_read_unlock();
if (!fast_recv) {
BUG_ON(fast_nat_recv);
RCU_INIT_POINTER(fast_nat_recv, sfe_cm_recv);
}
}
DEBUG_TRACE("sfe_cm_stop = %d\n", num);
return count;
}
/*
* sfe_cm_get_defunct_all
* dump state of SFE
*/
static ssize_t sfe_cm_get_defunct_all(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", 0);
}
static ssize_t sfe_cm_set_defunct_all(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
sfe_ipv4_destroy_all_rules_for_dev(NULL);
sfe_ipv6_destroy_all_rules_for_dev(NULL);
return count;
}
/*
* sysfs attributes.
*/
static const struct device_attribute sfe_attrs[] = {
__ATTR(exceptions, S_IRUGO, sfe_cm_get_exceptions, NULL),
__ATTR(stop, S_IWUSR | S_IRUGO, sfe_cm_get_stop, sfe_cm_set_stop),
__ATTR(defunct_all, S_IWUSR | S_IRUGO, sfe_cm_get_defunct_all, sfe_cm_set_defunct_all),
};
static const struct device_attribute sfe_cm_exceptions_attr =
__ATTR(exceptions, S_IRUGO, sfe_cm_get_exceptions, NULL);
/*
* sfe_cm_init()
@ -1062,7 +1001,6 @@ static int __init sfe_cm_init(void)
{
struct sfe_cm *sc = &__sc;
int result = -1;
size_t i, j;
DEBUG_INFO("SFE CM init\n");
@ -1075,13 +1013,13 @@ static int __init sfe_cm_init(void)
goto exit1;
}
for (i = 0; i < ARRAY_SIZE(sfe_attrs); i++) {
result = sysfs_create_file(sc->sys_sfe_cm, &sfe_attrs[i].attr);
if (result) {
DEBUG_ERROR("failed to register %s : %d\n",
sfe_attrs[i].attr.name, result);
goto exit2;
}
/*
* Create sys/sfe_cm/exceptions
*/
result = sysfs_create_file(sc->sys_sfe_cm, &sfe_cm_exceptions_attr.attr);
if (result) {
DEBUG_ERROR("failed to register exceptions file: %d\n", result);
goto exit2;
}
sc->dev_notifier.notifier_call = sfe_cm_device_event;
@ -1098,19 +1036,18 @@ static int __init sfe_cm_init(void)
/*
* Register our netfilter hooks.
*/
result = nf_register_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
result = nf_register_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
if (result < 0) {
DEBUG_ERROR("can't register nf post routing hook: %d\n", result);
goto exit3;
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
/*
* Register a notifier hook to get fast notifications of expired connections.
* Note: In CONFIG_NF_CONNTRACK_CHAIN_EVENTS enabled case, nf_conntrack_register_notifier()
* function always returns 0.
*/
#ifdef CONFIG_NF_CONNTRACK_EVENTS
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
(void)nf_conntrack_register_notifier(&init_net, &sfe_cm_conntrack_notifier);
#else
@ -1124,6 +1061,12 @@ static int __init sfe_cm_init(void)
spin_lock_init(&sc->lock);
/*
* Hook the receive path in the network stack.
*/
BUG_ON(athrs_fast_nat_recv);
RCU_INIT_POINTER(athrs_fast_nat_recv, sfe_cm_recv);
/*
* Hook the shortcut sync callback.
*/
@ -1134,7 +1077,7 @@ static int __init sfe_cm_init(void)
#ifdef CONFIG_NF_CONNTRACK_EVENTS
#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
exit4:
nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
#endif
#endif
exit3:
@ -1142,9 +1085,6 @@ exit3:
unregister_inetaddr_notifier(&sc->inet_notifier);
unregister_netdevice_notifier(&sc->dev_notifier);
exit2:
for (j = 0; j < i; j++) {
sysfs_remove_file(sc->sys_sfe_cm, &sfe_attrs[j].attr);
}
kobject_put(sc->sys_sfe_cm);
exit1:
@ -1169,7 +1109,7 @@ static void __exit sfe_cm_exit(void)
/*
* Unregister our receive callback.
*/
RCU_INIT_POINTER(fast_nat_recv, NULL);
RCU_INIT_POINTER(athrs_fast_nat_recv, NULL);
/*
* Wait for all callbacks to complete.
@ -1186,7 +1126,7 @@ static void __exit sfe_cm_exit(void)
nf_conntrack_unregister_notifier(&init_net, &sfe_cm_conntrack_notifier);
#endif
nf_unregister_hooks(sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
nf_unregister_net_hooks(&init_net, sfe_cm_ops_post_routing, ARRAY_SIZE(sfe_cm_ops_post_routing));
unregister_inet6addr_notifier(&sc->inet6_notifier);
unregister_inetaddr_notifier(&sc->inet_notifier);

View File

@ -147,7 +147,7 @@ struct sfe_connection_mark {
/*
* Expose the hook for the receive processing.
*/
extern int (*fast_nat_recv)(struct sk_buff *skb);
extern int (*athrs_fast_nat_recv)(struct sk_buff *skb);
/*
* Expose what should be a static flag in the TCP connection tracker.
@ -175,6 +175,7 @@ void sfe_ipv4_register_sync_rule_callback(sfe_sync_rule_callback_t callback);
void sfe_ipv4_update_rule(struct sfe_connection_create *sic);
void sfe_ipv4_mark_rule(struct sfe_connection_mark *mark);
#ifdef SFE_SUPPORT_IPV6
/*
* IPv6 APIs used by connection manager
*/
@ -185,6 +186,42 @@ void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev);
void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback);
void sfe_ipv6_update_rule(struct sfe_connection_create *sic);
void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark);
#else
static inline int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb)
{
return 0;
}
static inline int sfe_ipv6_create_rule(struct sfe_connection_create *sic)
{
return 0;
}
static inline void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid)
{
return;
}
static inline void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev)
{
return;
}
static inline void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t callback)
{
return;
}
static inline void sfe_ipv6_update_rule(struct sfe_connection_create *sic)
{
return;
}
static inline void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark)
{
return;
}
#endif
/*
* sfe_ipv6_addr_equal()

View File

@ -2,7 +2,7 @@
* sfe_ipv4.c
* Shortcut forwarding engine - IPv4 edition.
*
* Copyright (c) 2013-2016 The Linux Foundation. All rights reserved.
* Copyright (c) 2013-2016, 2019, The Linux Foundation. All rights reserved.
* Permission to use, copy, modify, and/or distribute this software for
* any purpose with or without fee is hereby granted, provided that the
* above copyright notice and this permission notice appear in all copies.
@ -21,7 +21,7 @@
#include <linux/icmp.h>
#include <net/tcp.h>
#include <linux/etherdevice.h>
#include <net/checksum.h>
#include <linux/version.h>
#include "sfe.h"
#include "sfe_cm.h"
@ -38,7 +38,7 @@
*/
#define SFE_IPV4_UNALIGNED_IP_HEADER 1
#if SFE_IPV4_UNALIGNED_IP_HEADER
#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((aligned(4)))
#define SFE_IPV4_UNALIGNED_STRUCT __attribute__((packed))
#else
#define SFE_IPV4_UNALIGNED_STRUCT
#endif
@ -202,6 +202,9 @@ struct sfe_ipv4_connection_match {
* Control the operations of the match.
*/
u32 flags; /* Bit flags */
#ifdef CONFIG_NF_FLOW_COOKIE
u32 flow_cookie; /* used flow cookie, for debug */
#endif
#ifdef CONFIG_XFRM
u32 flow_accel; /* The flow accelerated or not */
#endif
@ -300,6 +303,16 @@ struct sfe_ipv4_connection {
#define SFE_IPV4_CONNECTION_HASH_SIZE (1 << SFE_IPV4_CONNECTION_HASH_SHIFT)
#define SFE_IPV4_CONNECTION_HASH_MASK (SFE_IPV4_CONNECTION_HASH_SIZE - 1)
#ifdef CONFIG_NF_FLOW_COOKIE
#define SFE_FLOW_COOKIE_SIZE 2048
#define SFE_FLOW_COOKIE_MASK 0x7ff
struct sfe_flow_cookie_entry {
struct sfe_ipv4_connection_match *match;
unsigned long last_clean_time;
};
#endif
enum sfe_ipv4_exception_events {
SFE_IPV4_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION,
@ -337,7 +350,7 @@ enum sfe_ipv4_exception_events {
SFE_IPV4_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
SFE_IPV4_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
SFE_IPV4_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
SFE_IPV4_EXCEPTION_EVENT_CSUM_ERROR,
SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR,
SFE_IPV4_EXCEPTION_EVENT_LAST
};
@ -378,7 +391,7 @@ static char *sfe_ipv4_exception_events_string[SFE_IPV4_EXCEPTION_EVENT_LAST] = {
"DATAGRAM_INCOMPLETE",
"IP_OPTIONS_INCOMPLETE",
"UNHANDLED_PROTOCOL",
"CSUM_ERROR"
"CLONED_SKB_UNSHARE_ERROR"
};
/*
@ -402,6 +415,14 @@ struct sfe_ipv4 {
/* Connection hash table */
struct sfe_ipv4_connection_match *conn_match_hash[SFE_IPV4_CONNECTION_HASH_SIZE];
/* Connection match hash table */
#ifdef CONFIG_NF_FLOW_COOKIE
struct sfe_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
/* flow cookie table*/
flow_cookie_set_func_t flow_cookie_set_func;
/* function used to configure flow cookie in hardware*/
int flow_cookie_enable;
/* Enable/disable flow cookie at runtime */
#endif
/*
* Stats recorded in a sync period. These stats will be added to
@ -759,6 +780,36 @@ static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si
cm->next = prev_head;
*hash_head = cm;
#ifdef CONFIG_NF_FLOW_COOKIE
if (!si->flow_cookie_enable)
return;
/*
* Configure hardware to put a flow cookie in packet of this flow,
* then we can accelerate the lookup process when we received this packet.
*/
for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
flow_cookie_set_func_t func;
rcu_read_lock();
func = rcu_dereference(si->flow_cookie_set_func);
if (func) {
if (!func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
cm->match_dest_ip, cm->match_dest_port, conn_match_idx)) {
entry->match = cm;
cm->flow_cookie = conn_match_idx;
}
}
rcu_read_unlock();
break;
}
}
#endif
}
/*
@ -769,6 +820,36 @@ static inline void sfe_ipv4_insert_sfe_ipv4_connection_match(struct sfe_ipv4 *si
*/
static inline void sfe_ipv4_remove_sfe_ipv4_connection_match(struct sfe_ipv4 *si, struct sfe_ipv4_connection_match *cm)
{
#ifdef CONFIG_NF_FLOW_COOKIE
if (si->flow_cookie_enable) {
/*
* Tell hardware that we no longer need a flow cookie in packet of this flow
*/
unsigned int conn_match_idx;
for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
struct sfe_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
if (cm == entry->match) {
flow_cookie_set_func_t func;
rcu_read_lock();
func = rcu_dereference(si->flow_cookie_set_func);
if (func) {
func(cm->match_protocol, cm->match_src_ip, cm->match_src_port,
cm->match_dest_ip, cm->match_dest_port, 0);
}
rcu_read_unlock();
cm->flow_cookie = 0;
entry->match = NULL;
entry->last_clean_time = jiffies;
break;
}
}
}
#endif
/*
* Unlink the connection match entry from the hash.
*/
@ -1142,7 +1223,14 @@ static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct ne
/*
* Look for a connection match.
*/
#ifdef CONFIG_NF_FLOW_COOKIE
cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
if (unlikely(!cm)) {
cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
}
#else
cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
#endif
if (unlikely(!cm)) {
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
si->packets_not_forwarded++;
@ -1217,6 +1305,30 @@ static int sfe_ipv4_recv_udp(struct sfe_ipv4 *si, struct sk_buff *skb, struct ne
* From this point on we're good to modify the packet.
*/
/*
* Check if skb was cloned. If it was, unshare it. Because
* the data area is going to be written in this path and we don't want to
* change the cloned skb's data section.
*/
if (unlikely(skb_cloned(skb))) {
DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
skb = skb_unshare(skb, GFP_ATOMIC);
if (!skb) {
DEBUG_WARN("Failed to unshare the cloned skb\n");
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
return 0;
}
/*
* Update the iph and udph pointers with the unshared skb's data area.
*/
iph = (struct sfe_ipv4_ip_hdr *)skb->data;
udph = (struct sfe_ipv4_udp_hdr *)(skb->data + ihl);
}
/*
* Update DSCP
*/
@ -1502,7 +1614,14 @@ static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct ne
/*
* Look for a connection match.
*/
#ifdef CONFIG_NF_FLOW_COOKIE
cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
if (unlikely(!cm)) {
cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
}
#else
cm = sfe_ipv4_find_sfe_ipv4_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
#endif
if (unlikely(!cm)) {
/*
* We didn't get a connection but as TCP is connection-oriented that
@ -1766,6 +1885,30 @@ static int sfe_ipv4_recv_tcp(struct sfe_ipv4 *si, struct sk_buff *skb, struct ne
* From this point on we're good to modify the packet.
*/
/*
* Check if skb was cloned. If it was, unshare it. Because
* the data area is going to be written in this path and we don't want to
* change the cloned skb's data section.
*/
if (unlikely(skb_cloned(skb))) {
DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
skb = skb_unshare(skb, GFP_ATOMIC);
if (!skb) {
DEBUG_WARN("Failed to unshare the cloned skb\n");
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
return 0;
}
/*
* Update the iph and tcph pointers with the unshared skb's data area.
*/
iph = (struct sfe_ipv4_ip_hdr *)skb->data;
tcph = (struct sfe_ipv4_tcp_hdr *)(skb->data + ihl);
}
/*
* Update DSCP
*/
@ -2225,16 +2368,6 @@ int sfe_ipv4_recv(struct net_device *dev, struct sk_buff *skb)
flush_on_find = true;
}
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) {
spin_lock_bh(&si->lock);
si->exception_events[SFE_IPV4_EXCEPTION_EVENT_CSUM_ERROR]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
DEBUG_TRACE("checksum of ipv4 header is invalid\n");
return 0;
}
protocol = iph->protocol;
if (IPPROTO_UDP == protocol) {
return sfe_ipv4_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
@ -2445,6 +2578,9 @@ int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
original_cm->dscp = sic->src_dscp << SFE_IPV4_DSCP_SHIFT;
original_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
}
#ifdef CONFIG_NF_FLOW_COOKIE
original_cm->flow_cookie = 0;
#endif
#ifdef CONFIG_XFRM
original_cm->flow_accel = sic->original_accel;
#endif
@ -2501,6 +2637,9 @@ int sfe_ipv4_create_rule(struct sfe_connection_create *sic)
reply_cm->dscp = sic->dest_dscp << SFE_IPV4_DSCP_SHIFT;
reply_cm->flags |= SFE_IPV4_CONNECTION_MATCH_FLAG_DSCP_REMARK;
}
#ifdef CONFIG_NF_FLOW_COOKIE
reply_cm->flow_cookie = 0;
#endif
#ifdef CONFIG_XFRM
reply_cm->flow_accel = sic->reply_accel;
#endif
@ -2719,9 +2858,17 @@ another_round:
/*
* sfe_ipv4_periodic_sync()
*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
static void sfe_ipv4_periodic_sync(struct timer_list *arg)
#else
static void sfe_ipv4_periodic_sync(unsigned long arg)
#endif /*KERNEL_VERSION(4, 15, 0)*/
{
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg->cust_data;
#else
struct sfe_ipv4 *si = (struct sfe_ipv4 *)arg;
#endif /*KERNEL_VERSION(4, 15, 0)*/
u64 now_jiffies;
int quota;
sfe_sync_rule_callback_t sync_rule_callback;
@ -2889,6 +3036,9 @@ static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si,
u64 dest_rx_bytes;
u64 last_sync_jiffies;
u32 mark, src_priority, dest_priority, src_dscp, dest_dscp;
#ifdef CONFIG_NF_FLOW_COOKIE
int src_flow_cookie, dst_flow_cookie;
#endif
spin_lock_bh(&si->lock);
@ -2936,7 +3086,10 @@ static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si,
dest_rx_bytes = reply_cm->rx_byte_count64;
last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
mark = c->mark;
#ifdef CONFIG_NF_FLOW_COOKIE
src_flow_cookie = original_cm->flow_cookie;
dst_flow_cookie = reply_cm->flow_cookie;
#endif
spin_unlock_bh(&si->lock);
bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
@ -2951,6 +3104,9 @@ static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si,
"dest_port=\"%u\" dest_port_xlate=\"%u\" "
"dest_priority=\"%u\" dest_dscp=\"%u\" "
"dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
#ifdef CONFIG_NF_FLOW_COOKIE
"src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
#endif
"last_sync=\"%llu\" "
"mark=\"%08x\" />\n",
protocol,
@ -2964,6 +3120,9 @@ static bool sfe_ipv4_debug_dev_read_connections_connection(struct sfe_ipv4 *si,
ntohs(dest_port), ntohs(dest_port_xlate),
dest_priority, dest_dscp,
dest_rx_packets, dest_rx_bytes,
#ifdef CONFIG_NF_FLOW_COOKIE
src_flow_cookie, dst_flow_cookie,
#endif
last_sync_jiffies, mark);
if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
@ -3270,6 +3429,74 @@ static struct file_operations sfe_ipv4_debug_dev_fops = {
.release = sfe_ipv4_debug_dev_release
};
#ifdef CONFIG_NF_FLOW_COOKIE
/*
* sfe_register_flow_cookie_cb
* register a function in SFE to let SFE use this function to configure flow cookie for a flow
*
* Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
* can use this function to configure flow cookie for a flow.
* return: 0, success; !=0, fail
*/
int sfe_register_flow_cookie_cb(flow_cookie_set_func_t cb)
{
struct sfe_ipv4 *si = &__si;
BUG_ON(!cb);
if (si->flow_cookie_set_func) {
return -1;
}
rcu_assign_pointer(si->flow_cookie_set_func, cb);
return 0;
}
/*
* sfe_unregister_flow_cookie_cb
* unregister function which is used to configure flow cookie for a flow
*
* return: 0, success; !=0, fail
*/
int sfe_unregister_flow_cookie_cb(flow_cookie_set_func_t cb)
{
struct sfe_ipv4 *si = &__si;
RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
return 0;
}
/*
* sfe_ipv4_get_flow_cookie()
*/
static ssize_t sfe_ipv4_get_flow_cookie(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct sfe_ipv4 *si = &__si;
return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable);
}
/*
* sfe_ipv4_set_flow_cookie()
*/
static ssize_t sfe_ipv4_set_flow_cookie(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
struct sfe_ipv4 *si = &__si;
strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable);
return size;
}
/*
* sysfs attributes.
*/
static const struct device_attribute sfe_ipv4_flow_cookie_attr =
__ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv4_get_flow_cookie, sfe_ipv4_set_flow_cookie);
#endif /*CONFIG_NF_FLOW_COOKIE*/
/*
* sfe_ipv4_init()
*/
@ -3298,13 +3525,21 @@ static int __init sfe_ipv4_init(void)
goto exit2;
}
#ifdef CONFIG_NF_FLOW_COOKIE
result = sysfs_create_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
if (result) {
DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result);
goto exit3;
}
#endif /* CONFIG_NF_FLOW_COOKIE */
/*
* Register our debug char device.
*/
result = register_chrdev(0, "sfe_ipv4", &sfe_ipv4_debug_dev_fops);
if (result < 0) {
DEBUG_ERROR("Failed to register chrdev: %d\n", result);
goto exit3;
goto exit4;
}
si->debug_dev = result;
@ -3312,14 +3547,24 @@ static int __init sfe_ipv4_init(void)
/*
* Create a timer to handle periodic statistics.
*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
timer_setup(&si->timer, sfe_ipv4_periodic_sync, 0);
si->timer.cust_data = (unsigned long)si;
#else
setup_timer(&si->timer, sfe_ipv4_periodic_sync, (unsigned long)si);
#endif /*KERNEL_VERSION(4, 15, 0)*/
mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
spin_lock_init(&si->lock);
return 0;
exit4:
#ifdef CONFIG_NF_FLOW_COOKIE
sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
exit3:
#endif /* CONFIG_NF_FLOW_COOKIE */
sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
exit2:
@ -3347,6 +3592,9 @@ static void __exit sfe_ipv4_exit(void)
unregister_chrdev(si->debug_dev, "sfe_ipv4");
#ifdef CONFIG_NF_FLOW_COOKIE
sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_flow_cookie_attr.attr);
#endif /* CONFIG_NF_FLOW_COOKIE */
sysfs_remove_file(si->sys_sfe_ipv4, &sfe_ipv4_debug_dev_attr.attr);
kobject_put(si->sys_sfe_ipv4);
@ -3363,6 +3611,10 @@ EXPORT_SYMBOL(sfe_ipv4_destroy_all_rules_for_dev);
EXPORT_SYMBOL(sfe_ipv4_register_sync_rule_callback);
EXPORT_SYMBOL(sfe_ipv4_mark_rule);
EXPORT_SYMBOL(sfe_ipv4_update_rule);
#ifdef CONFIG_NF_FLOW_COOKIE
EXPORT_SYMBOL(sfe_register_flow_cookie_cb);
EXPORT_SYMBOL(sfe_unregister_flow_cookie_cb);
#endif
MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv4 edition");
MODULE_LICENSE("Dual BSD/GPL");

View File

@ -2,7 +2,7 @@
* sfe_ipv6.c
* Shortcut forwarding engine - IPv6 support.
*
* Copyright (c) 2015-2016 The Linux Foundation. All rights reserved.
* Copyright (c) 2015-2016, 2019, The Linux Foundation. All rights reserved.
* Permission to use, copy, modify, and/or distribute this software for
* any purpose with or without fee is hereby granted, provided that the
* above copyright notice and this permission notice appear in all copies.
@ -21,6 +21,7 @@
#include <linux/icmp.h>
#include <net/tcp.h>
#include <linux/etherdevice.h>
#include <linux/version.h>
#include "sfe.h"
#include "sfe_cm.h"
@ -37,7 +38,7 @@
*/
#define SFE_IPV6_UNALIGNED_IP_HEADER 1
#if SFE_IPV6_UNALIGNED_IP_HEADER
#define SFE_IPV6_UNALIGNED_STRUCT __attribute__((aligned(4)))
#define SFE_IPV6_UNALIGNED_STRUCT __attribute__((packed))
#else
#define SFE_IPV6_UNALIGNED_STRUCT
#endif
@ -231,6 +232,9 @@ struct sfe_ipv6_connection_match {
* Control the operations of the match.
*/
u32 flags; /* Bit flags */
#ifdef CONFIG_NF_FLOW_COOKIE
u32 flow_cookie; /* used flow cookie, for debug */
#endif
#ifdef CONFIG_XFRM
u32 flow_accel; /* The flow accelerated or not */
#endif
@ -324,6 +328,16 @@ struct sfe_ipv6_connection {
#define SFE_IPV6_CONNECTION_HASH_SIZE (1 << SFE_IPV6_CONNECTION_HASH_SHIFT)
#define SFE_IPV6_CONNECTION_HASH_MASK (SFE_IPV6_CONNECTION_HASH_SIZE - 1)
#ifdef CONFIG_NF_FLOW_COOKIE
#define SFE_FLOW_COOKIE_SIZE 2048
#define SFE_FLOW_COOKIE_MASK 0x7ff
struct sfe_ipv6_flow_cookie_entry {
struct sfe_ipv6_connection_match *match;
unsigned long last_clean_time;
};
#endif
enum sfe_ipv6_exception_events {
SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION,
@ -362,6 +376,7 @@ enum sfe_ipv6_exception_events {
SFE_IPV6_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL,
SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR,
SFE_IPV6_EXCEPTION_EVENT_LAST
};
@ -402,7 +417,8 @@ static char *sfe_ipv6_exception_events_string[SFE_IPV6_EXCEPTION_EVENT_LAST] = {
"DATAGRAM_INCOMPLETE",
"IP_OPTIONS_INCOMPLETE",
"UNHANDLED_PROTOCOL",
"FLOW_COOKIE_ADD_FAIL"
"FLOW_COOKIE_ADD_FAIL",
"CLONED_SKB_UNSHARE_ERROR"
};
/*
@ -426,6 +442,14 @@ struct sfe_ipv6 {
/* Connection hash table */
struct sfe_ipv6_connection_match *conn_match_hash[SFE_IPV6_CONNECTION_HASH_SIZE];
/* Connection match hash table */
#ifdef CONFIG_NF_FLOW_COOKIE
struct sfe_ipv6_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
/* flow cookie table*/
sfe_ipv6_flow_cookie_set_func_t flow_cookie_set_func;
/* function used to configure flow cookie in hardware*/
int flow_cookie_enable;
/* Enable/disable flow cookie at runtime */
#endif
/*
* Stats recorded in a sync period. These stats will be added to
@ -809,6 +833,38 @@ static inline void sfe_ipv6_insert_connection_match(struct sfe_ipv6 *si,
cm->next = prev_head;
*hash_head = cm;
#ifdef CONFIG_NF_FLOW_COOKIE
if (!si->flow_cookie_enable || !(cm->flags & (SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC | SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)))
return;
/*
* Configure hardware to put a flow cookie in packet of this flow,
* then we can accelerate the lookup process when we received this packet.
*/
for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
sfe_ipv6_flow_cookie_set_func_t func;
rcu_read_lock();
func = rcu_dereference(si->flow_cookie_set_func);
if (func) {
if (!func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port,
cm->match_dest_ip->addr, cm->match_dest_port, conn_match_idx)) {
entry->match = cm;
cm->flow_cookie = conn_match_idx;
} else {
si->exception_events[SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL]++;
}
}
rcu_read_unlock();
break;
}
}
#endif
}
/*
@ -819,6 +875,36 @@ static inline void sfe_ipv6_insert_connection_match(struct sfe_ipv6 *si,
*/
static inline void sfe_ipv6_remove_connection_match(struct sfe_ipv6 *si, struct sfe_ipv6_connection_match *cm)
{
#ifdef CONFIG_NF_FLOW_COOKIE
if (si->flow_cookie_enable) {
/*
* Tell hardware that we no longer need a flow cookie in packet of this flow
*/
unsigned int conn_match_idx;
for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
if (cm == entry->match) {
sfe_ipv6_flow_cookie_set_func_t func;
rcu_read_lock();
func = rcu_dereference(si->flow_cookie_set_func);
if (func) {
func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port,
cm->match_dest_ip->addr, cm->match_dest_port, 0);
}
rcu_read_unlock();
cm->flow_cookie = 0;
entry->match = NULL;
entry->last_clean_time = jiffies;
break;
}
}
}
#endif
/*
* Unlink the connection match entry from the hash.
*/
@ -1196,7 +1282,14 @@ static int sfe_ipv6_recv_udp(struct sfe_ipv6 *si, struct sk_buff *skb, struct ne
/*
* Look for a connection match.
*/
#ifdef CONFIG_NF_FLOW_COOKIE
cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
if (unlikely(!cm)) {
cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
}
#else
cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
#endif
if (unlikely(!cm)) {
si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
si->packets_not_forwarded++;
@ -1270,6 +1363,30 @@ static int sfe_ipv6_recv_udp(struct sfe_ipv6 *si, struct sk_buff *skb, struct ne
* From this point on we're good to modify the packet.
*/
/*
* Check if skb was cloned. If it was, unshare it. Because
* the data area is going to be written in this path and we don't want to
* change the cloned skb's data section.
*/
if (unlikely(skb_cloned(skb))) {
DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
skb = skb_unshare(skb, GFP_ATOMIC);
if (!skb) {
DEBUG_WARN("Failed to unshare the cloned skb\n");
si->exception_events[SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
return 0;
}
/*
* Update the iph and udph pointers with the unshared skb's data area.
*/
iph = (struct sfe_ipv6_ip_hdr *)skb->data;
udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl);
}
/*
* Update DSCP
*/
@ -1535,7 +1652,14 @@ static int sfe_ipv6_recv_tcp(struct sfe_ipv6 *si, struct sk_buff *skb, struct ne
/*
* Look for a connection match.
*/
#ifdef CONFIG_NF_FLOW_COOKIE
cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
if (unlikely(!cm)) {
cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
}
#else
cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
#endif
if (unlikely(!cm)) {
/*
* We didn't get a connection but as TCP is connection-oriented that
@ -1799,6 +1923,30 @@ static int sfe_ipv6_recv_tcp(struct sfe_ipv6 *si, struct sk_buff *skb, struct ne
* From this point on we're good to modify the packet.
*/
/*
* Check if skb was cloned. If it was, unshare it. Because
* the data area is going to be written in this path and we don't want to
* change the cloned skb's data section.
*/
if (unlikely(skb_cloned(skb))) {
DEBUG_TRACE("%p: skb is a cloned skb\n", skb);
skb = skb_unshare(skb, GFP_ATOMIC);
if (!skb) {
DEBUG_WARN("Failed to unshare the cloned skb\n");
si->exception_events[SFE_IPV6_EXCEPTION_EVENT_CLONED_SKB_UNSHARE_ERROR]++;
si->packets_not_forwarded++;
spin_unlock_bh(&si->lock);
return 0;
}
/*
* Update the iph and tcph pointers with the unshared skb's data area.
*/
iph = (struct sfe_ipv6_ip_hdr *)skb->data;
tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl);
}
/*
* Update DSCP
*/
@ -2444,6 +2592,9 @@ int sfe_ipv6_create_rule(struct sfe_connection_create *sic)
original_cm->dscp = sic->src_dscp << SFE_IPV6_DSCP_SHIFT;
original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK;
}
#ifdef CONFIG_NF_FLOW_COOKIE
original_cm->flow_cookie = 0;
#endif
#ifdef CONFIG_XFRM
original_cm->flow_accel = sic->original_accel;
#endif
@ -2500,6 +2651,9 @@ int sfe_ipv6_create_rule(struct sfe_connection_create *sic)
reply_cm->dscp = sic->dest_dscp << SFE_IPV6_DSCP_SHIFT;
reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK;
}
#ifdef CONFIG_NF_FLOW_COOKIE
reply_cm->flow_cookie = 0;
#endif
#ifdef CONFIG_XFRM
reply_cm->flow_accel = sic->reply_accel;
#endif
@ -2712,9 +2866,17 @@ another_round:
/*
* sfe_ipv6_periodic_sync()
*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
static void sfe_ipv6_periodic_sync(struct timer_list *arg)
#else
static void sfe_ipv6_periodic_sync(unsigned long arg)
#endif /*KERNEL_VERSION(4, 15, 0)*/
{
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg->cust_data;
#else
struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg;
#endif /*KERNEL_VERSION(4, 15, 0)*/
u64 now_jiffies;
int quota;
sfe_sync_rule_callback_t sync_rule_callback;
@ -2880,6 +3042,9 @@ static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si,
u64 dest_rx_bytes;
u64 last_sync_jiffies;
u32 mark, src_priority, dest_priority, src_dscp, dest_dscp;
#ifdef CONFIG_NF_FLOW_COOKIE
int src_flow_cookie, dst_flow_cookie;
#endif
spin_lock_bh(&si->lock);
@ -2927,7 +3092,10 @@ static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si,
dest_rx_bytes = reply_cm->rx_byte_count64;
last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
mark = c->mark;
#ifdef CONFIG_NF_FLOW_COOKIE
src_flow_cookie = original_cm->flow_cookie;
dst_flow_cookie = reply_cm->flow_cookie;
#endif
spin_unlock_bh(&si->lock);
bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
@ -2942,6 +3110,9 @@ static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si,
"dest_port=\"%u\" dest_port_xlate=\"%u\" "
"dest_priority=\"%u\" dest_dscp=\"%u\" "
"dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
#ifdef CONFIG_NF_FLOW_COOKIE
"src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
#endif
"last_sync=\"%llu\" "
"mark=\"%08x\" />\n",
protocol,
@ -2955,6 +3126,9 @@ static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si,
ntohs(dest_port), ntohs(dest_port_xlate),
dest_priority, dest_dscp,
dest_rx_packets, dest_rx_bytes,
#ifdef CONFIG_NF_FLOW_COOKIE
src_flow_cookie, dst_flow_cookie,
#endif
last_sync_jiffies, mark);
if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
@ -3263,6 +3437,74 @@ static struct file_operations sfe_ipv6_debug_dev_fops = {
.release = sfe_ipv6_debug_dev_release
};
#ifdef CONFIG_NF_FLOW_COOKIE
/*
* sfe_ipv6_register_flow_cookie_cb
* register a function in SFE to let SFE use this function to configure flow cookie for a flow
*
* Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
* can use this function to configure flow cookie for a flow.
* return: 0, success; !=0, fail
*/
int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb)
{
struct sfe_ipv6 *si = &__si6;
BUG_ON(!cb);
if (si->flow_cookie_set_func) {
return -1;
}
rcu_assign_pointer(si->flow_cookie_set_func, cb);
return 0;
}
/*
* sfe_ipv6_unregister_flow_cookie_cb
* unregister function which is used to configure flow cookie for a flow
*
* return: 0, success; !=0, fail
*/
int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb)
{
struct sfe_ipv6 *si = &__si6;
RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
return 0;
}
/*
* sfe_ipv6_get_flow_cookie()
*/
static ssize_t sfe_ipv6_get_flow_cookie(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct sfe_ipv6 *si = &__si6;
return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable);
}
/*
* sfe_ipv6_set_flow_cookie()
*/
static ssize_t sfe_ipv6_set_flow_cookie(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
struct sfe_ipv6 *si = &__si6;
strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable);
return size;
}
/*
* sysfs attributes.
*/
static const struct device_attribute sfe_ipv6_flow_cookie_attr =
__ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv6_get_flow_cookie, sfe_ipv6_set_flow_cookie);
#endif /*CONFIG_NF_FLOW_COOKIE*/
/*
* sfe_ipv6_init()
*/
@ -3291,13 +3533,21 @@ static int __init sfe_ipv6_init(void)
goto exit2;
}
#ifdef CONFIG_NF_FLOW_COOKIE
result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr);
if (result) {
DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result);
goto exit3;
}
#endif /* CONFIG_NF_FLOW_COOKIE */
/*
* Register our debug char device.
*/
result = register_chrdev(0, "sfe_ipv6", &sfe_ipv6_debug_dev_fops);
if (result < 0) {
DEBUG_ERROR("Failed to register chrdev: %d\n", result);
goto exit3;
goto exit4;
}
si->debug_dev = result;
@ -3305,14 +3555,24 @@ static int __init sfe_ipv6_init(void)
/*
* Create a timer to handle periodic statistics.
*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0))
timer_setup(&si->timer, sfe_ipv6_periodic_sync, 0);
si->timer.cust_data = (unsigned long)si;
#else
setup_timer(&si->timer, sfe_ipv6_periodic_sync, (unsigned long)si);
#endif /*KERNEL_VERSION(4, 15, 0)*/
mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
spin_lock_init(&si->lock);
return 0;
exit4:
#ifdef CONFIG_NF_FLOW_COOKIE
sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr);
exit3:
#endif /* CONFIG_NF_FLOW_COOKIE */
sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr);
exit2:
@ -3340,6 +3600,9 @@ static void __exit sfe_ipv6_exit(void)
unregister_chrdev(si->debug_dev, "sfe_ipv6");
#ifdef CONFIG_NF_FLOW_COOKIE
sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr);
#endif /* CONFIG_NF_FLOW_COOKIE */
sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr);
kobject_put(si->sys_sfe_ipv6);
@ -3355,6 +3618,10 @@ EXPORT_SYMBOL(sfe_ipv6_destroy_all_rules_for_dev);
EXPORT_SYMBOL(sfe_ipv6_register_sync_rule_callback);
EXPORT_SYMBOL(sfe_ipv6_mark_rule);
EXPORT_SYMBOL(sfe_ipv6_update_rule);
#ifdef CONFIG_NF_FLOW_COOKIE
EXPORT_SYMBOL(sfe_ipv6_register_flow_cookie_cb);
EXPORT_SYMBOL(sfe_ipv6_unregister_flow_cookie_cb);
#endif
MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv6 support");
MODULE_LICENSE("Dual BSD/GPL");

View File

@ -4354,6 +4354,7 @@ CONFIG_SERIAL_EARLYCON=y
# CONFIG_SG_POOL is not set
# CONFIG_SG_SPLIT is not set
CONFIG_SHMEM=y
# CONFIG_SHORTCUT_FE is not set
# CONFIG_SH_ETH is not set
# CONFIG_SH_TIMER_CMT is not set
# CONFIG_SH_TIMER_MTU2 is not set

View File

@ -0,0 +1,236 @@
--- a/include/linux/skbuff.h 2019-01-16 20:16:08.325745306 +0800
+++ b/include/linux/skbuff.h 2019-01-16 20:31:47.288028493 +0800
@@ -783,6 +783,9 @@ struct sk_buff {
__u8 tc_from_ingress:1;
#endif
__u8 gro_skip:1;
+#ifdef CONFIG_SHORTCUT_FE
+ __u8 fast_forwarded:1;
+#endif
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
--- a/include/linux/if_bridge.h 2019-01-16 20:51:47.871445535 +0800
+++ b/include/linux/if_bridge.h 2019-01-16 20:52:26.220269649 +0800
@@ -54,6 +54,8 @@ struct br_ip_list {
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
+extern void br_dev_update_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *nlstats);
typedef int br_should_route_hook_t(struct sk_buff *skb);
extern br_should_route_hook_t __rcu *br_should_route_hook;
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -20,6 +20,9 @@ struct timer_list {
void (*function)(unsigned long);
unsigned long data;
u32 flags;
+#ifdef CONFIG_SHORTCUT_FE
+ unsigned long cust_data;
+#endif
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
--- a/net/Kconfig 2019-01-16 20:36:30.266465286 +0800
+++ b/net/Kconfig 2019-01-16 20:36:41.980609067 +0800
@@ -463,3 +463,6 @@ config HAVE_CBPF_JIT
# Extended BPF JIT (eBPF)
config HAVE_EBPF_JIT
bool
+
+config SHORTCUT_FE
+ bool "Enables kernel network stack path for Shortcut Forwarding Engine
--- a/net/core/dev.c 2019-01-16 20:38:37.274933833 +0800
+++ b/net/core/dev.c 2019-01-16 20:44:07.773594898 +0800
@@ -3001,8 +3001,17 @@ static int xmit_one(struct sk_buff *skb,
unsigned int len;
int rc;
+#ifdef CONFIG_SHORTCUT_FE
+ /* If this skb has been fast forwarded then we don't want it to
+ * go to any taps (by definition we're trying to bypass them).
+ */
+ if (!skb->fast_forwarded) {
+#endif
if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
dev_queue_xmit_nit(skb, dev);
+#ifdef CONFIG_SHORTCUT_FE
+ }
+#endif
#ifdef CONFIG_ETHERNET_PACKET_MANGLE
if (!dev->eth_mangle_tx ||
@@ -4315,6 +4324,11 @@ void netdev_rx_handler_unregister(struct
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+#ifdef CONFIG_SHORTCUT_FE
+int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly;
+EXPORT_SYMBOL_GPL(athrs_fast_nat_recv);
+#endif
+
/*
* Limit the use of PFMEMALLOC reserves to those protocols that implement
* the special handling of PFMEMALLOC skbs.
@@ -4362,6 +4376,9 @@ static int __netif_receive_skb_core(stru
bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
+#ifdef CONFIG_SHORTCUT_FE
+ int (*fast_recv)(struct sk_buff *skb);
+#endif
net_timestamp_check(!netdev_tstamp_prequeue, skb);
@@ -4388,6 +4405,16 @@ another_round:
goto out;
}
+#ifdef CONFIG_SHORTCUT_FE
+ fast_recv = rcu_dereference(athrs_fast_nat_recv);
+ if (fast_recv) {
+ if (fast_recv(skb)) {
+ ret = NET_RX_SUCCESS;
+ goto out;
+ }
+ }
+#endif
+
if (skb_skip_tc_classify(skb))
goto skip_classify;
--- a/net/netfilter/nf_conntrack_proto_tcp.c 2019-01-16 20:47:40.886993297 +0800
+++ b/net/netfilter/nf_conntrack_proto_tcp.c 2019-01-16 20:48:57.700570104 +0800
@@ -35,11 +35,17 @@
/* Do not check the TCP window for incoming packets */
static int nf_ct_tcp_no_window_check __read_mostly = 1;
+#ifdef CONFIG_SHORTCUT_FE
+EXPORT_SYMBOL_GPL(nf_ct_tcp_no_window_check);
+#endif
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
static int nf_ct_tcp_be_liberal __read_mostly = 0;
+#ifdef CONFIG_SHORTCUT_FE
+EXPORT_SYMBOL_GPL(nf_ct_tcp_be_liberal);
+#endif
/* If it is set to zero, we disable picking up already established
connections. */
--- a/net/bridge/br_if.c 2019-01-16 20:54:51.919367044 +0800
+++ b/net/bridge/br_if.c 2019-01-16 20:55:53.812401263 +0800
@@ -653,3 +653,26 @@ void br_port_flags_change(struct net_bri
if (mask & BR_AUTO_MASK)
nbp_update_port_count(br);
}
+
+/* Update bridge statistics for bridge packets processed by offload engines */
+void br_dev_update_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *nlstats)
+{
+ struct net_bridge *br;
+ struct pcpu_sw_netstats *stats;
+
+ /* Is this a bridge? */
+ if (!(dev->priv_flags & IFF_EBRIDGE))
+ return;
+
+ br = netdev_priv(dev);
+ stats = this_cpu_ptr(br->stats);
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_packets += nlstats->rx_packets;
+ stats->rx_bytes += nlstats->rx_bytes;
+ stats->tx_packets += nlstats->tx_packets;
+ stats->tx_bytes += nlstats->tx_bytes;
+ u64_stats_update_end(&stats->syncp);
+}
+EXPORT_SYMBOL_GPL(br_dev_update_stats);
--- a/net/netfilter/Kconfig 2019-01-16 21:07:34.543460920 +0800
+++ b/net/netfilter/Kconfig 2019-01-16 21:08:14.739465937 +0800
@@ -146,6 +146,14 @@ config NF_CONNTRACK_TIMEOUT
If unsure, say `N'.
+config NF_CONNTRACK_CHAIN_EVENTS
+ bool "Register multiple callbacks to ct events"
+ depends on NF_CONNTRACK_EVENTS
+ help
+ Support multiple registrations.
+
+ If unsure, say `N'.
+
config NF_CONNTRACK_TIMESTAMP
bool 'Connection tracking timestamping'
depends on NETFILTER_ADVANCED
--- a/net/netfilter/nf_conntrack_ecache.c 2019-01-16 21:12:22.183462975 +0800
+++ b/net/netfilter/nf_conntrack_ecache.c 2019-01-16 21:26:10.379462031 +0800
@@ -122,13 +125,17 @@ int nf_conntrack_eventmask_report(unsign
{
int ret = 0;
struct net *net = nf_ct_net(ct);
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
struct nf_ct_event_notifier *notify;
+#endif
struct nf_conntrack_ecache *e;
rcu_read_lock();
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)
goto out_unlock;
+#endif
e = nf_ct_ecache_find(ct);
if (!e)
@@ -146,7 +153,12 @@ int nf_conntrack_eventmask_report(unsign
if (!((eventmask | missed) & e->ctmask))
goto out_unlock;
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ ret = atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
+ eventmask | missed, &item);
+#else
ret = notify->fcn(eventmask | missed, &item);
+#endif
if (unlikely(ret < 0 || missed)) {
spin_lock_bh(&ct->lock);
if (ret < 0) {
@@ -179,15 +191,19 @@ void nf_ct_deliver_cached_events(struct
{
struct net *net = nf_ct_net(ct);
unsigned long events, missed;
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
struct nf_ct_event_notifier *notify;
+#endif
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
int ret;
rcu_read_lock();
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (notify == NULL)
goto out_unlock;
+#endif
e = nf_ct_ecache_find(ct);
if (e == NULL)
@@ -210,7 +226,13 @@ void nf_ct_deliver_cached_events(struct
item.portid = 0;
item.report = 0;
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
+ ret = atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
+ events | missed,
+ &item);
+#else
ret = notify->fcn(events | missed, &item);
+#endif
if (likely(ret == 0 && !missed))
goto out_unlock;