mirror of
https://github.com/coolsnowwolf/lede.git
synced 2025-04-18 17:33:31 +00:00
Merge pull request #4905 from AmadeusGhost/modified
kernel: revert some config
This commit is contained in:
commit
129f29e24a
@ -2,6 +2,6 @@
|
||||
# /etc/sysctl.conf can be used to customize sysctl settings
|
||||
|
||||
# disable bridge firewalling by default
|
||||
net.bridge.bridge-nf-call-arptables=0
|
||||
net.bridge.bridge-nf-call-ip6tables=0
|
||||
net.bridge.bridge-nf-call-iptables=0
|
||||
net.bridge.bridge-nf-call-arptables=1
|
||||
net.bridge.bridge-nf-call-ip6tables=1
|
||||
net.bridge.bridge-nf-call-iptables=1
|
||||
|
@ -3,7 +3,8 @@
|
||||
|
||||
net.netfilter.nf_conntrack_acct=1
|
||||
net.netfilter.nf_conntrack_checksum=0
|
||||
net.netfilter.nf_conntrack_max=16384
|
||||
net.netfilter.nf_conntrack_max=65535
|
||||
net.netfilter.nf_conntrack_tcp_timeout_established=7440
|
||||
net.netfilter.nf_conntrack_udp_timeout=60
|
||||
net.netfilter.nf_conntrack_udp_timeout_stream=180
|
||||
net.netfilter.nf_conntrack_helper=1
|
||||
|
@ -281,7 +281,7 @@ endef
|
||||
|
||||
KernelPackage/crypto-ghash/imx6=$(KernelPackage/crypto-ghash/arm-ce)
|
||||
KernelPackage/crypto-ghash/ipq40xx=$(KernelPackage/crypto-ghash/arm-ce)
|
||||
KernelPackage/crypto-ghash/mvebu/cortexa9=$(KernelPackage/crypto-ghash/arm-ce)
|
||||
KernelPackage/crypto-ghash/mvebu=$(KernelPackage/crypto-ghash/arm-ce)
|
||||
|
||||
$(eval $(call KernelPackage,crypto-ghash))
|
||||
|
||||
|
@ -0,0 +1,217 @@
|
||||
From 712639929912c5eefb09facccb48d55b3f72c9f8 Mon Sep 17 00:00:00 2001
|
||||
From: George Amanakis <gamanakis@gmail.com>
|
||||
Date: Fri, 1 Mar 2019 16:04:05 +0100
|
||||
Subject: [PATCH] sch_cake: Make the dual modes fairer
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
CAKE host fairness does not work well with TCP flows in dual-srchost and
|
||||
dual-dsthost setup. The reason is that ACKs generated by TCP flows are
|
||||
classified as sparse flows, and affect flow isolation from other hosts. Fix
|
||||
this by calculating host_load based only on the bulk flows a host
|
||||
generates. In a hash collision the host_bulk_flow_count values must be
|
||||
decremented on the old hosts and incremented on the new ones *if* the queue
|
||||
is in the bulk set.
|
||||
|
||||
Reported-by: Pete Heist <peteheist@gmail.com>
|
||||
Signed-off-by: George Amanakis <gamanakis@gmail.com>
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
net/sched/sch_cake.c | 92 ++++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 63 insertions(+), 29 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -138,8 +138,8 @@ struct cake_flow {
|
||||
struct cake_host {
|
||||
u32 srchost_tag;
|
||||
u32 dsthost_tag;
|
||||
- u16 srchost_refcnt;
|
||||
- u16 dsthost_refcnt;
|
||||
+ u16 srchost_bulk_flow_count;
|
||||
+ u16 dsthost_bulk_flow_count;
|
||||
};
|
||||
|
||||
struct cake_heap_entry {
|
||||
@@ -746,8 +746,10 @@ skip_hash:
|
||||
* queue, accept the collision, update the host tags.
|
||||
*/
|
||||
q->way_collisions++;
|
||||
- q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
|
||||
- q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
|
||||
+ if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
|
||||
+ q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
|
||||
+ q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
|
||||
+ }
|
||||
allocate_src = cake_dsrc(flow_mode);
|
||||
allocate_dst = cake_ddst(flow_mode);
|
||||
found:
|
||||
@@ -767,13 +769,14 @@ found:
|
||||
}
|
||||
for (i = 0; i < CAKE_SET_WAYS;
|
||||
i++, k = (k + 1) % CAKE_SET_WAYS) {
|
||||
- if (!q->hosts[outer_hash + k].srchost_refcnt)
|
||||
+ if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
|
||||
break;
|
||||
}
|
||||
q->hosts[outer_hash + k].srchost_tag = srchost_hash;
|
||||
found_src:
|
||||
srchost_idx = outer_hash + k;
|
||||
- q->hosts[srchost_idx].srchost_refcnt++;
|
||||
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
|
||||
+ q->hosts[srchost_idx].srchost_bulk_flow_count++;
|
||||
q->flows[reduced_hash].srchost = srchost_idx;
|
||||
}
|
||||
|
||||
@@ -789,13 +792,14 @@ found_src:
|
||||
}
|
||||
for (i = 0; i < CAKE_SET_WAYS;
|
||||
i++, k = (k + 1) % CAKE_SET_WAYS) {
|
||||
- if (!q->hosts[outer_hash + k].dsthost_refcnt)
|
||||
+ if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
|
||||
break;
|
||||
}
|
||||
q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
|
||||
found_dst:
|
||||
dsthost_idx = outer_hash + k;
|
||||
- q->hosts[dsthost_idx].dsthost_refcnt++;
|
||||
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
|
||||
+ q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
|
||||
q->flows[reduced_hash].dsthost = dsthost_idx;
|
||||
}
|
||||
}
|
||||
@@ -1793,20 +1797,30 @@ static s32 cake_enqueue(struct sk_buff *
|
||||
b->sparse_flow_count++;
|
||||
|
||||
if (cake_dsrc(q->flow_mode))
|
||||
- host_load = max(host_load, srchost->srchost_refcnt);
|
||||
+ host_load = max(host_load, srchost->srchost_bulk_flow_count);
|
||||
|
||||
if (cake_ddst(q->flow_mode))
|
||||
- host_load = max(host_load, dsthost->dsthost_refcnt);
|
||||
+ host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
|
||||
|
||||
flow->deficit = (b->flow_quantum *
|
||||
quantum_div[host_load]) >> 16;
|
||||
} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
|
||||
+ struct cake_host *srchost = &b->hosts[flow->srchost];
|
||||
+ struct cake_host *dsthost = &b->hosts[flow->dsthost];
|
||||
+
|
||||
/* this flow was empty, accounted as a sparse flow, but actually
|
||||
* in the bulk rotation.
|
||||
*/
|
||||
flow->set = CAKE_SET_BULK;
|
||||
b->sparse_flow_count--;
|
||||
b->bulk_flow_count++;
|
||||
+
|
||||
+ if (cake_dsrc(q->flow_mode))
|
||||
+ srchost->srchost_bulk_flow_count++;
|
||||
+
|
||||
+ if (cake_ddst(q->flow_mode))
|
||||
+ dsthost->dsthost_bulk_flow_count++;
|
||||
+
|
||||
}
|
||||
|
||||
if (q->buffer_used > q->buffer_max_used)
|
||||
@@ -1974,23 +1988,8 @@ retry:
|
||||
dsthost = &b->hosts[flow->dsthost];
|
||||
host_load = 1;
|
||||
|
||||
- if (cake_dsrc(q->flow_mode))
|
||||
- host_load = max(host_load, srchost->srchost_refcnt);
|
||||
-
|
||||
- if (cake_ddst(q->flow_mode))
|
||||
- host_load = max(host_load, dsthost->dsthost_refcnt);
|
||||
-
|
||||
- WARN_ON(host_load > CAKE_QUEUES);
|
||||
-
|
||||
/* flow isolation (DRR++) */
|
||||
if (flow->deficit <= 0) {
|
||||
- /* The shifted prandom_u32() is a way to apply dithering to
|
||||
- * avoid accumulating roundoff errors
|
||||
- */
|
||||
- flow->deficit += (b->flow_quantum * quantum_div[host_load] +
|
||||
- (prandom_u32() >> 16)) >> 16;
|
||||
- list_move_tail(&flow->flowchain, &b->old_flows);
|
||||
-
|
||||
/* Keep all flows with deficits out of the sparse and decaying
|
||||
* rotations. No non-empty flow can go into the decaying
|
||||
* rotation, so they can't get deficits
|
||||
@@ -1999,6 +1998,13 @@ retry:
|
||||
if (flow->head) {
|
||||
b->sparse_flow_count--;
|
||||
b->bulk_flow_count++;
|
||||
+
|
||||
+ if (cake_dsrc(q->flow_mode))
|
||||
+ srchost->srchost_bulk_flow_count++;
|
||||
+
|
||||
+ if (cake_ddst(q->flow_mode))
|
||||
+ dsthost->dsthost_bulk_flow_count++;
|
||||
+
|
||||
flow->set = CAKE_SET_BULK;
|
||||
} else {
|
||||
/* we've moved it to the bulk rotation for
|
||||
@@ -2008,6 +2014,22 @@ retry:
|
||||
flow->set = CAKE_SET_SPARSE_WAIT;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ if (cake_dsrc(q->flow_mode))
|
||||
+ host_load = max(host_load, srchost->srchost_bulk_flow_count);
|
||||
+
|
||||
+ if (cake_ddst(q->flow_mode))
|
||||
+ host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
|
||||
+
|
||||
+ WARN_ON(host_load > CAKE_QUEUES);
|
||||
+
|
||||
+ /* The shifted prandom_u32() is a way to apply dithering to
|
||||
+ * avoid accumulating roundoff errors
|
||||
+ */
|
||||
+ flow->deficit += (b->flow_quantum * quantum_div[host_load] +
|
||||
+ (prandom_u32() >> 16)) >> 16;
|
||||
+ list_move_tail(&flow->flowchain, &b->old_flows);
|
||||
+
|
||||
goto retry;
|
||||
}
|
||||
|
||||
@@ -2028,6 +2050,13 @@ retry:
|
||||
&b->decaying_flows);
|
||||
if (flow->set == CAKE_SET_BULK) {
|
||||
b->bulk_flow_count--;
|
||||
+
|
||||
+ if (cake_dsrc(q->flow_mode))
|
||||
+ srchost->srchost_bulk_flow_count--;
|
||||
+
|
||||
+ if (cake_ddst(q->flow_mode))
|
||||
+ dsthost->dsthost_bulk_flow_count--;
|
||||
+
|
||||
b->decaying_flow_count++;
|
||||
} else if (flow->set == CAKE_SET_SPARSE ||
|
||||
flow->set == CAKE_SET_SPARSE_WAIT) {
|
||||
@@ -2041,14 +2070,19 @@ retry:
|
||||
if (flow->set == CAKE_SET_SPARSE ||
|
||||
flow->set == CAKE_SET_SPARSE_WAIT)
|
||||
b->sparse_flow_count--;
|
||||
- else if (flow->set == CAKE_SET_BULK)
|
||||
+ else if (flow->set == CAKE_SET_BULK) {
|
||||
b->bulk_flow_count--;
|
||||
- else
|
||||
+
|
||||
+ if (cake_dsrc(q->flow_mode))
|
||||
+ srchost->srchost_bulk_flow_count--;
|
||||
+
|
||||
+ if (cake_ddst(q->flow_mode))
|
||||
+ dsthost->dsthost_bulk_flow_count--;
|
||||
+
|
||||
+ } else
|
||||
b->decaying_flow_count--;
|
||||
|
||||
flow->set = CAKE_SET_NONE;
|
||||
- srchost->srchost_refcnt--;
|
||||
- dsthost->dsthost_refcnt--;
|
||||
}
|
||||
goto begin;
|
||||
}
|
@ -0,0 +1,118 @@
|
||||
From 0b5c7efdfc6e389ec6840579fe90bdb6f42b08dc Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
Date: Fri, 1 Mar 2019 16:04:05 +0100
|
||||
Subject: [PATCH] sch_cake: Permit use of connmarks as tin classifiers
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add flag 'FWMARK' to enable use of firewall connmarks as tin selector.
|
||||
The connmark (skbuff->mark) needs to be in the range 1->tin_cnt ie.
|
||||
for diffserv3 the mark needs to be 1->3.
|
||||
|
||||
Background
|
||||
|
||||
Typically CAKE uses DSCP as the basis for tin selection. DSCP values
|
||||
are relatively easily changed as part of the egress path, usually with
|
||||
iptables & the mangle table, ingress is more challenging. CAKE is often
|
||||
used on the WAN interface of a residential gateway where passthrough of
|
||||
DSCP from the ISP is either missing or set to unhelpful values thus use
|
||||
of ingress DSCP values for tin selection isn't helpful in that
|
||||
environment.
|
||||
|
||||
An approach to solving the ingress tin selection problem is to use
|
||||
CAKE's understanding of tc filters. Naive tc filters could match on
|
||||
source/destination port numbers and force tin selection that way, but
|
||||
multiple filters don't scale particularly well as each filter must be
|
||||
traversed whether it matches or not. e.g. a simple example to map 3
|
||||
firewall marks to tins:
|
||||
|
||||
MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
|
||||
tc filter add dev $DEV parent $MAJOR protocol all handle 0x01 fw action skbedit priority ${MAJOR}1
|
||||
tc filter add dev $DEV parent $MAJOR protocol all handle 0x02 fw action skbedit priority ${MAJOR}2
|
||||
tc filter add dev $DEV parent $MAJOR protocol all handle 0x03 fw action skbedit priority ${MAJOR}3
|
||||
|
||||
Another option is to use eBPF cls_act with tc filters e.g.
|
||||
|
||||
MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
|
||||
tc filter add dev $DEV parent $MAJOR bpf da obj my-bpf-fwmark-to-class.o
|
||||
|
||||
This has the disadvantages of a) needing someone to write & maintain
|
||||
the bpf program, b) a bpf toolchain to compile it and c) needing to
|
||||
hardcode the major number in the bpf program so it matches the cake
|
||||
instance (or forcing the cake instance to a particular major number)
|
||||
since the major number cannot be passed to the bpf program via tc
|
||||
command line.
|
||||
|
||||
As already hinted at by the previous examples, it would be helpful
|
||||
to associate tins with something that survives the Internet path and
|
||||
ideally allows tin selection on both egress and ingress. Netfilter's
|
||||
conntrack permits setting an identifying mark on a connection which
|
||||
can also be restored to an ingress packet with tc action connmark e.g.
|
||||
|
||||
tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \
|
||||
match u32 0 0 flowid 1:1 action connmark action mirred egress redirect dev ifb1
|
||||
|
||||
Since tc's connmark action has restored any connmark into skb->mark,
|
||||
any of the previous solutions are based upon it and in one form or
|
||||
another copy that mark to the skb->priority field where again CAKE
|
||||
picks this up.
|
||||
|
||||
This change cuts out at least one of the (less intuitive &
|
||||
non-scalable) middlemen and permit direct access to skb->mark.
|
||||
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
include/uapi/linux/pkt_sched.h | 1 +
|
||||
net/sched/sch_cake.c | 34 +++++++++++++++++++++++++++-------
|
||||
2 files changed, 28 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/include/uapi/linux/pkt_sched.h
|
||||
+++ b/include/uapi/linux/pkt_sched.h
|
||||
@@ -991,6 +991,7 @@ enum {
|
||||
TCA_CAKE_INGRESS,
|
||||
TCA_CAKE_ACK_FILTER,
|
||||
TCA_CAKE_SPLIT_GSO,
|
||||
+ TCA_CAKE_FWMARK,
|
||||
__TCA_CAKE_MAX
|
||||
};
|
||||
#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -258,7 +258,8 @@ enum {
|
||||
CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
|
||||
CAKE_FLAG_INGRESS = BIT(2),
|
||||
CAKE_FLAG_WASH = BIT(3),
|
||||
- CAKE_FLAG_SPLIT_GSO = BIT(4)
|
||||
+ CAKE_FLAG_SPLIT_GSO = BIT(4),
|
||||
+ CAKE_FLAG_FWMARK = BIT(5)
|
||||
};
|
||||
|
||||
/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
|
||||
@@ -2623,6 +2624,13 @@ static int cake_change(struct Qdisc *sch
|
||||
q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
|
||||
}
|
||||
|
||||
+ if (tb[TCA_CAKE_FWMARK]) {
|
||||
+ if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
|
||||
+ q->rate_flags |= CAKE_FLAG_FWMARK;
|
||||
+ else
|
||||
+ q->rate_flags &= ~CAKE_FLAG_FWMARK;
|
||||
+ }
|
||||
+
|
||||
if (q->tins) {
|
||||
sch_tree_lock(sch);
|
||||
cake_reconfigure(sch);
|
||||
@@ -2782,6 +2790,10 @@ static int cake_dump(struct Qdisc *sch,
|
||||
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
|
||||
goto nla_put_failure;
|
||||
|
||||
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK,
|
||||
+ !!(q->rate_flags & CAKE_FLAG_FWMARK)))
|
||||
+ goto nla_put_failure;
|
||||
+
|
||||
return nla_nest_end(skb, opts);
|
||||
|
||||
nla_put_failure:
|
@ -0,0 +1,102 @@
|
||||
From eab2fc822af38f31fd5f4e731b5d10b94904d919 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
|
||||
Date: Thu, 14 Mar 2019 23:08:22 +0100
|
||||
Subject: [PATCH] sch_cake: Interpret fwmark parameter as a bitmask
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
We initially interpreted the fwmark parameter as a flag that simply turned
|
||||
on the feature, using the whole skb->mark field as the index into the CAKE
|
||||
tin_order array. However, it is quite common for different applications to
|
||||
use different parts of the mask field for their own purposes, each using a
|
||||
different mask.
|
||||
|
||||
Support this use of subsets of the mark by interpreting the TCA_CAKE_FWMARK
|
||||
parameter as a bitmask to apply to the fwmark field when reading it. The
|
||||
result will be right-shifted by the number of unset lower bits of the mask
|
||||
before looking up the tin.
|
||||
|
||||
In the original commit message we also failed to credit Felix Resch with
|
||||
originally suggesting the fwmark feature back in 2017; so the Suggested-By
|
||||
in this commit covers the whole fwmark feature.
|
||||
|
||||
Fixes: 0b5c7efdfc6e ("sch_cake: Permit use of connmarks as tin classifiers")
|
||||
Suggested-by: Felix Resch <fuller@beif.de>
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
net/sched/sch_cake.c | 25 ++++++++++++-------------
|
||||
1 file changed, 12 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -211,6 +211,9 @@ struct cake_sched_data {
|
||||
u8 ack_filter;
|
||||
u8 atm_mode;
|
||||
|
||||
+ u32 fwmark_mask;
|
||||
+ u16 fwmark_shft;
|
||||
+
|
||||
/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
|
||||
u16 rate_shft;
|
||||
ktime_t time_next_packet;
|
||||
@@ -258,8 +261,7 @@ enum {
|
||||
CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
|
||||
CAKE_FLAG_INGRESS = BIT(2),
|
||||
CAKE_FLAG_WASH = BIT(3),
|
||||
- CAKE_FLAG_SPLIT_GSO = BIT(4),
|
||||
- CAKE_FLAG_FWMARK = BIT(5)
|
||||
+ CAKE_FLAG_SPLIT_GSO = BIT(4)
|
||||
};
|
||||
|
||||
/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
|
||||
@@ -1554,7 +1556,7 @@ static struct cake_tin_data *cake_select
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct cake_sched_data *q = qdisc_priv(sch);
|
||||
- u32 tin;
|
||||
+ u32 tin, mark;
|
||||
u8 dscp;
|
||||
|
||||
/* Tin selection: Default to diffserv-based selection, allow overriding
|
||||
@@ -1562,6 +1564,7 @@ static struct cake_tin_data *cake_select
|
||||
*/
|
||||
dscp = cake_handle_diffserv(skb,
|
||||
q->rate_flags & CAKE_FLAG_WASH);
|
||||
+ mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
|
||||
|
||||
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
|
||||
tin = 0;
|
||||
@@ -2178,6 +2181,7 @@ static const struct nla_policy cake_poli
|
||||
[TCA_CAKE_MPU] = { .type = NLA_U32 },
|
||||
[TCA_CAKE_INGRESS] = { .type = NLA_U32 },
|
||||
[TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 },
|
||||
+ [TCA_CAKE_FWMARK] = { .type = NLA_U32 },
|
||||
};
|
||||
|
||||
static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
|
||||
@@ -2625,10 +2629,8 @@ static int cake_change(struct Qdisc *sch
|
||||
}
|
||||
|
||||
if (tb[TCA_CAKE_FWMARK]) {
|
||||
- if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
|
||||
- q->rate_flags |= CAKE_FLAG_FWMARK;
|
||||
- else
|
||||
- q->rate_flags &= ~CAKE_FLAG_FWMARK;
|
||||
+ q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
|
||||
+ q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
|
||||
}
|
||||
|
||||
if (q->tins) {
|
||||
@@ -2790,8 +2792,7 @@ static int cake_dump(struct Qdisc *sch,
|
||||
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
|
||||
goto nla_put_failure;
|
||||
|
||||
- if (nla_put_u32(skb, TCA_CAKE_FWMARK,
|
||||
- !!(q->rate_flags & CAKE_FLAG_FWMARK)))
|
||||
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
|
||||
goto nla_put_failure;
|
||||
|
||||
return nla_nest_end(skb, opts);
|
@ -0,0 +1,158 @@
|
||||
From d7e1738f0a0b0573ac93cf570ba3df9dee61b68e Mon Sep 17 00:00:00 2001
|
||||
From: Kevin 'ldir' Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
Date: Wed, 18 Dec 2019 14:05:13 +0000
|
||||
Subject: [PATCH 2/2] sch_cake: drop unused variable tin_quantum_prio
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Turns out tin_quantum_prio isn't used anymore and is a leftover from a
|
||||
previous implementation of diffserv tins. Since the variable isn't used
|
||||
in any calculations it can be eliminated.
|
||||
|
||||
Drop variable and places where it was set. Rename remaining variable
|
||||
and consolidate naming of intermediate variables that set it.
|
||||
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
net/sched/sch_cake.c | 59 ++++++++++++++------------------------------
|
||||
1 file changed, 18 insertions(+), 41 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -173,8 +173,7 @@ struct cake_tin_data {
|
||||
u64 tin_rate_bps;
|
||||
u16 tin_rate_shft;
|
||||
|
||||
- u16 tin_quantum_prio;
|
||||
- u16 tin_quantum_band;
|
||||
+ u16 tin_quantum;
|
||||
s32 tin_deficit;
|
||||
u32 tin_backlog;
|
||||
u32 tin_dropped;
|
||||
@@ -1916,7 +1915,7 @@ begin:
|
||||
while (b->tin_deficit < 0 ||
|
||||
!(b->sparse_flow_count + b->bulk_flow_count)) {
|
||||
if (b->tin_deficit <= 0)
|
||||
- b->tin_deficit += b->tin_quantum_band;
|
||||
+ b->tin_deficit += b->tin_quantum;
|
||||
if (b->sparse_flow_count + b->bulk_flow_count)
|
||||
empty = false;
|
||||
|
||||
@@ -2237,8 +2236,7 @@ static int cake_config_besteffort(struct
|
||||
|
||||
cake_set_rate(b, rate, mtu,
|
||||
us_to_ns(q->target), us_to_ns(q->interval));
|
||||
- b->tin_quantum_band = 65535;
|
||||
- b->tin_quantum_prio = 65535;
|
||||
+ b->tin_quantum = 65535;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2249,8 +2247,7 @@ static int cake_config_precedence(struct
|
||||
struct cake_sched_data *q = qdisc_priv(sch);
|
||||
u32 mtu = psched_mtu(qdisc_dev(sch));
|
||||
u64 rate = q->rate_bps;
|
||||
- u32 quantum1 = 256;
|
||||
- u32 quantum2 = 256;
|
||||
+ u32 quantum = 256;
|
||||
u32 i;
|
||||
|
||||
q->tin_cnt = 8;
|
||||
@@ -2263,18 +2260,14 @@ static int cake_config_precedence(struct
|
||||
cake_set_rate(b, rate, mtu, us_to_ns(q->target),
|
||||
us_to_ns(q->interval));
|
||||
|
||||
- b->tin_quantum_prio = max_t(u16, 1U, quantum1);
|
||||
- b->tin_quantum_band = max_t(u16, 1U, quantum2);
|
||||
+ b->tin_quantum = max_t(u16, 1U, quantum);
|
||||
|
||||
/* calculate next class's parameters */
|
||||
rate *= 7;
|
||||
rate >>= 3;
|
||||
|
||||
- quantum1 *= 3;
|
||||
- quantum1 >>= 1;
|
||||
-
|
||||
- quantum2 *= 7;
|
||||
- quantum2 >>= 3;
|
||||
+ quantum *= 7;
|
||||
+ quantum >>= 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -2343,8 +2336,7 @@ static int cake_config_diffserv8(struct
|
||||
struct cake_sched_data *q = qdisc_priv(sch);
|
||||
u32 mtu = psched_mtu(qdisc_dev(sch));
|
||||
u64 rate = q->rate_bps;
|
||||
- u32 quantum1 = 256;
|
||||
- u32 quantum2 = 256;
|
||||
+ u32 quantum = 256;
|
||||
u32 i;
|
||||
|
||||
q->tin_cnt = 8;
|
||||
@@ -2360,18 +2352,14 @@ static int cake_config_diffserv8(struct
|
||||
cake_set_rate(b, rate, mtu, us_to_ns(q->target),
|
||||
us_to_ns(q->interval));
|
||||
|
||||
- b->tin_quantum_prio = max_t(u16, 1U, quantum1);
|
||||
- b->tin_quantum_band = max_t(u16, 1U, quantum2);
|
||||
+ b->tin_quantum = max_t(u16, 1U, quantum);
|
||||
|
||||
/* calculate next class's parameters */
|
||||
rate *= 7;
|
||||
rate >>= 3;
|
||||
|
||||
- quantum1 *= 3;
|
||||
- quantum1 >>= 1;
|
||||
-
|
||||
- quantum2 *= 7;
|
||||
- quantum2 >>= 3;
|
||||
+ quantum *= 7;
|
||||
+ quantum >>= 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -2410,17 +2398,11 @@ static int cake_config_diffserv4(struct
|
||||
cake_set_rate(&q->tins[3], rate >> 2, mtu,
|
||||
us_to_ns(q->target), us_to_ns(q->interval));
|
||||
|
||||
- /* priority weights */
|
||||
- q->tins[0].tin_quantum_prio = quantum;
|
||||
- q->tins[1].tin_quantum_prio = quantum >> 4;
|
||||
- q->tins[2].tin_quantum_prio = quantum << 2;
|
||||
- q->tins[3].tin_quantum_prio = quantum << 4;
|
||||
-
|
||||
/* bandwidth-sharing weights */
|
||||
- q->tins[0].tin_quantum_band = quantum;
|
||||
- q->tins[1].tin_quantum_band = quantum >> 4;
|
||||
- q->tins[2].tin_quantum_band = quantum >> 1;
|
||||
- q->tins[3].tin_quantum_band = quantum >> 2;
|
||||
+ q->tins[0].tin_quantum = quantum;
|
||||
+ q->tins[1].tin_quantum = quantum >> 4;
|
||||
+ q->tins[2].tin_quantum = quantum >> 1;
|
||||
+ q->tins[3].tin_quantum = quantum >> 2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2451,15 +2433,10 @@ static int cake_config_diffserv3(struct
|
||||
cake_set_rate(&q->tins[2], rate >> 2, mtu,
|
||||
us_to_ns(q->target), us_to_ns(q->interval));
|
||||
|
||||
- /* priority weights */
|
||||
- q->tins[0].tin_quantum_prio = quantum;
|
||||
- q->tins[1].tin_quantum_prio = quantum >> 4;
|
||||
- q->tins[2].tin_quantum_prio = quantum << 4;
|
||||
-
|
||||
/* bandwidth-sharing weights */
|
||||
- q->tins[0].tin_quantum_band = quantum;
|
||||
- q->tins[1].tin_quantum_band = quantum >> 4;
|
||||
- q->tins[2].tin_quantum_band = quantum >> 2;
|
||||
+ q->tins[0].tin_quantum = quantum;
|
||||
+ q->tins[1].tin_quantum = quantum >> 4;
|
||||
+ q->tins[2].tin_quantum = quantum >> 2;
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
From b3c424eb6a1a3c485de64619418a471dee6ce849 Mon Sep 17 00:00:00 2001
|
||||
From: Victorien Molle <victorien.molle@wifirst.fr>
|
||||
Date: Mon, 2 Dec 2019 15:11:38 +0100
|
||||
Subject: [PATCH] sch_cake: Add missing NLA policy entry TCA_CAKE_SPLIT_GSO
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This field has never been checked since introduction in mainline kernel
|
||||
|
||||
Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
|
||||
Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
|
||||
Fixes: 2db6dc2662ba "sch_cake: Make gso-splitting configurable from userspace"
|
||||
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
net/sched/sch_cake.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -2180,6 +2180,7 @@ static const struct nla_policy cake_poli
|
||||
[TCA_CAKE_MPU] = { .type = NLA_U32 },
|
||||
[TCA_CAKE_INGRESS] = { .type = NLA_U32 },
|
||||
[TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 },
|
||||
+ [TCA_CAKE_SPLIT_GSO] = { .type = NLA_U32 },
|
||||
[TCA_CAKE_FWMARK] = { .type = NLA_U32 },
|
||||
};
|
||||
|
@ -0,0 +1,170 @@
|
||||
From b0c19ed6088ab41dd2a727b60594b7297c15d6ce Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
|
||||
Date: Fri, 29 May 2020 14:43:44 +0200
|
||||
Subject: [PATCH] sch_cake: Take advantage of skb->hash where appropriate
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
While the other fq-based qdiscs take advantage of skb->hash and doesn't
|
||||
recompute it if it is already set, sch_cake does not.
|
||||
|
||||
This was a deliberate choice because sch_cake hashes various parts of the
|
||||
packet header to support its advanced flow isolation modes. However,
|
||||
foregoing the use of skb->hash entirely loses a few important benefits:
|
||||
|
||||
- When skb->hash is set by hardware, a few CPU cycles can be saved by not
|
||||
hashing again in software.
|
||||
|
||||
- Tunnel encapsulations will generally preserve the value of skb->hash from
|
||||
before the encapsulation, which allows flow-based qdiscs to distinguish
|
||||
between flows even though the outer packet header no longer has flow
|
||||
information.
|
||||
|
||||
It turns out that we can preserve these desirable properties in many cases,
|
||||
while still supporting the advanced flow isolation properties of sch_cake.
|
||||
This patch does so by reusing the skb->hash value as the flow_hash part of
|
||||
the hashing procedure in cake_hash() only in the following conditions:
|
||||
|
||||
- If the skb->hash is marked as covering the flow headers (skb->l4_hash is
|
||||
set)
|
||||
|
||||
AND
|
||||
|
||||
- NAT header rewriting is either disabled, or did not change any values
|
||||
used for hashing. The latter is important to match local-origin packets
|
||||
such as those of a tunnel endpoint.
|
||||
|
||||
The immediate motivation for fixing this was the recent patch to WireGuard
|
||||
to preserve the skb->hash on encapsulation. As such, this is also what I
|
||||
tested against; with this patch, added latency under load for competing
|
||||
flows drops from ~8 ms to sub-1ms on an RRUL test over a WireGuard tunnel
|
||||
going through a virtual link shaped to 1Gbps using sch_cake. This matches
|
||||
the results we saw with a similar setup using sch_fq_codel when testing the
|
||||
WireGuard patch.
|
||||
|
||||
Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
net/sched/sch_cake.c | 65 ++++++++++++++++++++++++++++++++++----------
|
||||
1 file changed, 51 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct co
|
||||
return drop;
|
||||
}
|
||||
|
||||
-static void cake_update_flowkeys(struct flow_keys *keys,
|
||||
+static bool cake_update_flowkeys(struct flow_keys *keys,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
||||
struct nf_conntrack_tuple tuple = {};
|
||||
- bool rev = !skb->_nfct;
|
||||
+ bool rev = !skb->_nfct, upd = false;
|
||||
+ __be32 ip;
|
||||
|
||||
if (tc_skb_protocol(skb) != htons(ETH_P_IP))
|
||||
- return;
|
||||
+ return false;
|
||||
|
||||
if (!nf_ct_get_tuple_skb(&tuple, skb))
|
||||
- return;
|
||||
+ return false;
|
||||
|
||||
- keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
|
||||
- keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
|
||||
+ ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
|
||||
+ if (ip != keys->addrs.v4addrs.src) {
|
||||
+ keys->addrs.v4addrs.src = ip;
|
||||
+ upd = true;
|
||||
+ }
|
||||
+ ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
|
||||
+ if (ip != keys->addrs.v4addrs.dst) {
|
||||
+ keys->addrs.v4addrs.dst = ip;
|
||||
+ upd = true;
|
||||
+ }
|
||||
|
||||
if (keys->ports.ports) {
|
||||
- keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
|
||||
- keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
|
||||
+ __be16 port;
|
||||
+
|
||||
+ port = rev ? tuple.dst.u.all : tuple.src.u.all;
|
||||
+ if (port != keys->ports.src) {
|
||||
+ keys->ports.src = port;
|
||||
+ upd = true;
|
||||
+ }
|
||||
+ port = rev ? tuple.src.u.all : tuple.dst.u.all;
|
||||
+ if (port != keys->ports.dst) {
|
||||
+ port = keys->ports.dst;
|
||||
+ upd = true;
|
||||
+ }
|
||||
}
|
||||
+ return upd;
|
||||
+#else
|
||||
+ return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode)
|
||||
static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
|
||||
int flow_mode, u16 flow_override, u16 host_override)
|
||||
{
|
||||
+ bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
|
||||
+ bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
|
||||
+ bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
|
||||
u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
|
||||
u16 reduced_hash, srchost_idx, dsthost_idx;
|
||||
struct flow_keys keys, host_keys;
|
||||
+ bool use_skbhash = skb->l4_hash;
|
||||
|
||||
if (unlikely(flow_mode == CAKE_FLOW_NONE))
|
||||
return 0;
|
||||
|
||||
- /* If both overrides are set we can skip packet dissection entirely */
|
||||
- if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
|
||||
- (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
|
||||
+ /* If both overrides are set, or we can use the SKB hash and nat mode is
|
||||
+ * disabled, we can skip packet dissection entirely. If nat mode is
|
||||
+ * enabled there's another check below after doing the conntrack lookup.
|
||||
+ */
|
||||
+ if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
|
||||
goto skip_hash;
|
||||
|
||||
skb_flow_dissect_flow_keys(skb, &keys,
|
||||
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
|
||||
|
||||
- if (flow_mode & CAKE_FLOW_NAT_FLAG)
|
||||
- cake_update_flowkeys(&keys, skb);
|
||||
+ /* Don't use the SKB hash if we change the lookup keys from conntrack */
|
||||
+ if (nat_enabled && cake_update_flowkeys(&keys, skb))
|
||||
+ use_skbhash = false;
|
||||
+
|
||||
+ /* If we can still use the SKB hash and don't need the host hash, we can
|
||||
+ * skip the rest of the hashing procedure
|
||||
+ */
|
||||
+ if (use_skbhash && !hash_hosts)
|
||||
+ goto skip_hash;
|
||||
|
||||
/* flow_hash_from_keys() sorts the addresses by value, so we have
|
||||
* to preserve their order in a separate data structure to treat
|
||||
@@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_dat
|
||||
/* This *must* be after the above switch, since as a
|
||||
* side-effect it sorts the src and dst addresses.
|
||||
*/
|
||||
- if (flow_mode & CAKE_FLOW_FLOWS)
|
||||
+ if (hash_flows && !use_skbhash)
|
||||
flow_hash = flow_hash_from_keys(&keys);
|
||||
|
||||
skip_hash:
|
||||
if (flow_override)
|
||||
flow_hash = flow_override - 1;
|
||||
+ else if (use_skbhash)
|
||||
+ flow_hash = skb->hash;
|
||||
if (host_override) {
|
||||
dsthost_hash = host_override - 1;
|
||||
srchost_hash = host_override - 1;
|
@ -0,0 +1,96 @@
|
||||
From 9208d2863ac689a563b92f2161d8d1e7127d0add Mon Sep 17 00:00:00 2001
|
||||
From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
|
||||
Date: Thu, 25 Jun 2020 22:12:07 +0200
|
||||
Subject: [PATCH] sch_cake: don't try to reallocate or unshare skb
|
||||
unconditionally
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
cake_handle_diffserv() tries to linearize mac and network header parts of
|
||||
skb and to make it writable unconditionally. In some cases it leads to full
|
||||
skb reallocation, which reduces throughput and increases CPU load. Some
|
||||
measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core
|
||||
CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable()
|
||||
reallocates skb, if skb was allocated in ethernet driver via so-called
|
||||
'build skb' method from page cache (it was discovered by strange increase
|
||||
of kmalloc-2048 slab at first).
|
||||
|
||||
Obtain DSCP value via read-only skb_header_pointer() call, and leave
|
||||
linearization only for DSCP bleaching or ECN CE setting. And, as an
|
||||
additional optimisation, skip diffserv parsing entirely if it is not needed
|
||||
by the current configuration.
|
||||
|
||||
Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
|
||||
Signed-off-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
|
||||
[ fix a few style issues, reflow commit message ]
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
net/sched/sch_cake.c | 41 ++++++++++++++++++++++++++++++-----------
|
||||
1 file changed, 30 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -1553,30 +1553,49 @@ static unsigned int cake_drop(struct Qdi
|
||||
|
||||
static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
|
||||
{
|
||||
- int wlen = skb_network_offset(skb);
|
||||
+ const int offset = skb_network_offset(skb);
|
||||
+ u16 *buf, buf_;
|
||||
u8 dscp;
|
||||
|
||||
switch (tc_skb_protocol(skb)) {
|
||||
case htons(ETH_P_IP):
|
||||
- wlen += sizeof(struct iphdr);
|
||||
- if (!pskb_may_pull(skb, wlen) ||
|
||||
- skb_try_make_writable(skb, wlen))
|
||||
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
|
||||
+ if (unlikely(!buf))
|
||||
return 0;
|
||||
|
||||
- dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
|
||||
- if (wash && dscp)
|
||||
+ /* ToS is in the second byte of iphdr */
|
||||
+ dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
|
||||
+
|
||||
+ if (wash && dscp) {
|
||||
+ const int wlen = offset + sizeof(struct iphdr);
|
||||
+
|
||||
+ if (!pskb_may_pull(skb, wlen) ||
|
||||
+ skb_try_make_writable(skb, wlen))
|
||||
+ return 0;
|
||||
+
|
||||
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
|
||||
+ }
|
||||
+
|
||||
return dscp;
|
||||
|
||||
case htons(ETH_P_IPV6):
|
||||
- wlen += sizeof(struct ipv6hdr);
|
||||
- if (!pskb_may_pull(skb, wlen) ||
|
||||
- skb_try_make_writable(skb, wlen))
|
||||
+ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
|
||||
+ if (unlikely(!buf))
|
||||
return 0;
|
||||
|
||||
- dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
|
||||
- if (wash && dscp)
|
||||
+ /* Traffic class is in the first and second bytes of ipv6hdr */
|
||||
+ dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
|
||||
+
|
||||
+ if (wash && dscp) {
|
||||
+ const int wlen = offset + sizeof(struct ipv6hdr);
|
||||
+
|
||||
+ if (!pskb_may_pull(skb, wlen) ||
|
||||
+ skb_try_make_writable(skb, wlen))
|
||||
+ return 0;
|
||||
+
|
||||
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
|
||||
+ }
|
||||
+
|
||||
return dscp;
|
||||
|
||||
case htons(ETH_P_ARP):
|
@ -0,0 +1,62 @@
|
||||
From 8c95eca0bb8c4bd2231a0d581f1ad0d50c90488c Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
|
||||
Date: Thu, 25 Jun 2020 22:12:08 +0200
|
||||
Subject: [PATCH] sch_cake: don't call diffserv parsing code when it is not
|
||||
needed
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
As a further optimisation of the diffserv parsing codepath, we can skip it
|
||||
entirely if CAKE is configured to neither use diffserv-based
|
||||
classification, nor to zero out the diffserv bits.
|
||||
|
||||
Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
net/sched/sch_cake.c | 13 +++++++++----
|
||||
1 file changed, 9 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -1551,7 +1551,7 @@ static unsigned int cake_drop(struct Qdi
|
||||
return idx + (tin << 16);
|
||||
}
|
||||
|
||||
-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
|
||||
+static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
|
||||
{
|
||||
const int offset = skb_network_offset(skb);
|
||||
u16 *buf, buf_;
|
||||
@@ -1612,14 +1612,17 @@ static struct cake_tin_data *cake_select
|
||||
{
|
||||
struct cake_sched_data *q = qdisc_priv(sch);
|
||||
u32 tin, mark;
|
||||
+ bool wash;
|
||||
u8 dscp;
|
||||
|
||||
/* Tin selection: Default to diffserv-based selection, allow overriding
|
||||
- * using firewall marks or skb->priority.
|
||||
+ * using firewall marks or skb->priority. Call DSCP parsing early if
|
||||
+ * wash is enabled, otherwise defer to below to skip unneeded parsing.
|
||||
*/
|
||||
- dscp = cake_handle_diffserv(skb,
|
||||
- q->rate_flags & CAKE_FLAG_WASH);
|
||||
mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
|
||||
+ wash = !!(q->rate_flags & CAKE_FLAG_WASH);
|
||||
+ if (wash)
|
||||
+ dscp = cake_handle_diffserv(skb, wash);
|
||||
|
||||
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
|
||||
tin = 0;
|
||||
@@ -1630,6 +1633,8 @@ static struct cake_tin_data *cake_select
|
||||
tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
|
||||
|
||||
else {
|
||||
+ if (!wash)
|
||||
+ dscp = cake_handle_diffserv(skb, wash);
|
||||
tin = q->tin_index[dscp];
|
||||
|
||||
if (unlikely(tin >= q->tin_cnt))
|
@ -0,0 +1,40 @@
|
||||
From 3f608f0c41360b11b04c763f348b712f651c8bac Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
|
||||
Date: Thu, 25 Jun 2020 22:12:09 +0200
|
||||
Subject: [PATCH] sch_cake: fix a few style nits
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
I spotted a few nits when comparing the in-tree version of sch_cake with
|
||||
the out-of-tree one: A redundant error variable declaration shadowing an
|
||||
outer declaration, and an indentation alignment issue. Fix both of these.
|
||||
|
||||
Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
net/sched/sch_cake.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -2713,7 +2713,7 @@ static int cake_init(struct Qdisc *sch,
|
||||
qdisc_watchdog_init(&q->watchdog, sch);
|
||||
|
||||
if (opt) {
|
||||
- int err = cake_change(sch, opt, extack);
|
||||
+ err = cake_change(sch, opt, extack);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
@@ -3030,7 +3030,7 @@ static int cake_dump_class_stats(struct
|
||||
PUT_STAT_S32(BLUE_TIMER_US,
|
||||
ktime_to_us(
|
||||
ktime_sub(now,
|
||||
- flow->cvars.blue_timer)));
|
||||
+ flow->cvars.blue_timer)));
|
||||
}
|
||||
if (flow->cvars.dropping) {
|
||||
PUT_STAT_S32(DROP_NEXT_US,
|
@ -0,0 +1,57 @@
|
||||
From b8392808eb3fc28e523e28cb258c81ca246deb9b Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
Date: Thu, 25 Jun 2020 22:18:00 +0200
|
||||
Subject: [PATCH] sch_cake: add RFC 8622 LE PHB support to CAKE diffserv
|
||||
handling
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Change tin mapping on diffserv3, 4 & 8 for LE PHB support, in essence
|
||||
making LE a member of the Bulk tin.
|
||||
|
||||
Bulk has the least priority and minimum of 1/16th total bandwidth in the
|
||||
face of higher priority traffic.
|
||||
|
||||
NB: Diffserv 3 & 4 swap tin 0 & 1 priorities from the default order as
|
||||
found in diffserv8, in case anyone is wondering why it looks a bit odd.
|
||||
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
[ reword commit message slightly ]
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||||
---
|
||||
net/sched/sch_cake.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -312,8 +312,8 @@ static const u8 precedence[] = {
|
||||
};
|
||||
|
||||
static const u8 diffserv8[] = {
|
||||
- 2, 5, 1, 2, 4, 2, 2, 2,
|
||||
- 0, 2, 1, 2, 1, 2, 1, 2,
|
||||
+ 2, 0, 1, 2, 4, 2, 2, 2,
|
||||
+ 1, 2, 1, 2, 1, 2, 1, 2,
|
||||
5, 2, 4, 2, 4, 2, 4, 2,
|
||||
3, 2, 3, 2, 3, 2, 3, 2,
|
||||
6, 2, 3, 2, 3, 2, 3, 2,
|
||||
@@ -323,7 +323,7 @@ static const u8 diffserv8[] = {
|
||||
};
|
||||
|
||||
static const u8 diffserv4[] = {
|
||||
- 0, 2, 0, 0, 2, 0, 0, 0,
|
||||
+ 0, 1, 0, 0, 2, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 0, 2, 0, 2, 0, 2, 0,
|
||||
2, 0, 2, 0, 2, 0, 2, 0,
|
||||
@@ -334,7 +334,7 @@ static const u8 diffserv4[] = {
|
||||
};
|
||||
|
||||
static const u8 diffserv3[] = {
|
||||
- 0, 0, 0, 0, 2, 0, 0, 0,
|
||||
+ 0, 1, 0, 0, 2, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
@ -0,0 +1,114 @@
|
||||
From a00590d570212c3c633bd463cef8ec7377cc7993 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
Date: Tue, 30 Jun 2020 12:07:44 +0100
|
||||
Subject: [PATCH] sch_cake: fix IP protocol handling in the presence of VLAN
|
||||
tags
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
|
||||
|
||||
CAKE was using the return value of tc_skb_protocol() and expecting it to be
|
||||
the IP protocol type. This can fail in the presence of QinQ VLAN tags,
|
||||
making CAKE unable to handle ECN marking and diffserv parsing in this case.
|
||||
Fix this by implementing our own version of tc_skb_protocol(), which will
|
||||
use skb->protocol directly, but also parse and skip over any VLAN tags and
|
||||
return the inner protocol number instead.
|
||||
|
||||
Also fix CE marking by implementing a version of INET_ECN_set_ce() that
|
||||
uses the same parsing routine.
|
||||
|
||||
Fixes: ea82511518f4 ("sch_cake: Add NAT awareness to packet classifier")
|
||||
Fixes: b2100cc56fca ("sch_cake: Use tc_skb_protocol() helper for getting packet protocol")
|
||||
Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
|
||||
Signed-off-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
|
||||
[ squash original two patches, rewrite commit message ]
|
||||
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
net/sched/sch_cake.c | 52 +++++++++++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 49 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/net/sched/sch_cake.c
|
||||
+++ b/net/sched/sch_cake.c
|
||||
@@ -497,6 +497,52 @@ static bool cobalt_queue_empty(struct co
|
||||
return down;
|
||||
}
|
||||
|
||||
+static __be16 cake_skb_proto(const struct sk_buff *skb)
|
||||
+{
|
||||
+ unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr);
|
||||
+ __be16 proto = skb->protocol;
|
||||
+ struct vlan_hdr vhdr, *vh;
|
||||
+
|
||||
+ while (proto == htons(ETH_P_8021Q) || proto == htons(ETH_P_8021AD)) {
|
||||
+ vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr);
|
||||
+ if (!vh)
|
||||
+ break;
|
||||
+
|
||||
+ proto = vh->h_vlan_encapsulated_proto;
|
||||
+ offset += sizeof(vhdr);
|
||||
+ }
|
||||
+
|
||||
+ return proto;
|
||||
+}
|
||||
+
|
||||
+static int cake_set_ce(struct sk_buff *skb)
|
||||
+{
|
||||
+ int wlen = skb_network_offset(skb);
|
||||
+
|
||||
+ switch (cake_skb_proto(skb)) {
|
||||
+ case htons(ETH_P_IP):
|
||||
+ wlen += sizeof(struct iphdr);
|
||||
+ if (!pskb_may_pull(skb, wlen) ||
|
||||
+ skb_try_make_writable(skb, wlen))
|
||||
+ return 0;
|
||||
+
|
||||
+ return IP_ECN_set_ce(ip_hdr(skb));
|
||||
+
|
||||
+ case htons(ETH_P_IPV6):
|
||||
+ wlen += sizeof(struct ipv6hdr);
|
||||
+ if (!pskb_may_pull(skb, wlen) ||
|
||||
+ skb_try_make_writable(skb, wlen))
|
||||
+ return 0;
|
||||
+
|
||||
+ return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
|
||||
+
|
||||
+ default:
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/* Call this with a freshly dequeued packet for possible congestion marking.
|
||||
* Returns true as an instruction to drop the packet, false for delivery.
|
||||
*/
|
||||
@@ -549,7 +595,7 @@ static bool cobalt_should_drop(struct co
|
||||
|
||||
if (next_due && vars->dropping) {
|
||||
/* Use ECN mark if possible, otherwise drop */
|
||||
- drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
|
||||
+ drop = !(vars->ecn_marked = cake_set_ce(skb));
|
||||
|
||||
vars->count++;
|
||||
if (!vars->count)
|
||||
@@ -592,7 +638,7 @@ static bool cake_update_flowkeys(struct
|
||||
bool rev = !skb->_nfct, upd = false;
|
||||
__be32 ip;
|
||||
|
||||
- if (tc_skb_protocol(skb) != htons(ETH_P_IP))
|
||||
+ if (cake_skb_proto(skb) != htons(ETH_P_IP))
|
||||
return false;
|
||||
|
||||
if (!nf_ct_get_tuple_skb(&tuple, skb))
|
||||
@@ -1557,7 +1603,7 @@ static u8 cake_handle_diffserv(struct sk
|
||||
u16 *buf, buf_;
|
||||
u8 dscp;
|
||||
|
||||
- switch (tc_skb_protocol(skb)) {
|
||||
+ switch (cake_skb_proto(skb)) {
|
||||
case htons(ETH_P_IP):
|
||||
buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
|
||||
if (unlikely(!buf))
|
Loading…
Reference in New Issue
Block a user