mirror of
https://github.com/coolsnowwolf/lede.git
synced 2025-04-16 04:13:31 +00:00
x86:Add net.ipv4.tcp_collapse_max_bytes option from Cloudflare and CRYPTO_XTS_AES_SYNC (#11128)
* x86: intel 10gen GPU i915 guc patch refresh * intel-microcode: 3.20230214.1 * x86: add tcp_collapse option support
This commit is contained in:
parent
3713f8b5b1
commit
ae5505832e
@ -8,13 +8,13 @@
|
||||
include $(TOPDIR)/rules.mk
|
||||
|
||||
PKG_NAME:=intel-microcode
|
||||
PKG_VERSION:=20221108
|
||||
PKG_VERSION:=20230214
|
||||
PKG_RELEASE:=1
|
||||
|
||||
PKG_SOURCE:=intel-microcode_3.$(PKG_VERSION).1.tar.xz
|
||||
PKG_SOURCE:=intel-microcode_3.$(PKG_VERSION).1~deb11u1.tar.xz
|
||||
PKG_SOURCE_URL:=@DEBIAN/pool/non-free/i/intel-microcode/
|
||||
PKG_HASH:=9c40fc5cbb386a4e1154f8f316422b28fccc9eaabdea707a80643f9bed3f9064
|
||||
PKG_BUILD_DIR:=$(BUILD_DIR)/intel-microcode-3.$(PKG_VERSION).1
|
||||
PKG_HASH:=1b10c7b622063029cc932cfa4115c278412e1dd01e709154e1ac7e200fe48baf
|
||||
PKG_BUILD_DIR:=$(BUILD_DIR)/intel-microcode-3.$(PKG_VERSION).1~deb11u1
|
||||
|
||||
PKG_BUILD_DEPENDS:=iucode-tool/host
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
|
||||
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
|
||||
@@ -23,7 +23,7 @@ static void uc_expand_default_options(st
|
||||
@@ -23,10 +23,14 @@ static void uc_expand_default_options(st
|
||||
return;
|
||||
|
||||
/* Don't enable GuC/HuC on pre-Gen12 */
|
||||
@ -9,3 +9,10 @@
|
||||
i915->params.enable_guc = 0;
|
||||
return;
|
||||
}
|
||||
+ if(GRAPHICS_VER(i915) < 11){
|
||||
+ i915->params.enable_guc = 2;
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
/* Don't enable GuC/HuC on older Gen12 platforms */
|
||||
if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
|
||||
|
@ -0,0 +1,164 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "mfreemon@cloudflare.com" <mfreemon@cloudflare.com>
|
||||
Date: Tue, 1 Mar 2022 17:06:02 -0600
|
||||
Subject: [PATCH] Add a sysctl to skip tcp collapse processing when the receive
|
||||
buffer is full.
|
||||
|
||||
For context and additional information about this patch, see the
|
||||
blog post at https://blog.cloudflare.com/optimizing-tcp-for-high-throughput-and-low-latency/
|
||||
|
||||
sysctl: net.ipv4.tcp_collapse_max_bytes
|
||||
|
||||
If tcp_collapse_max_bytes is non-zero, attempt to collapse the
|
||||
queue to free up memory if the current amount of memory allocated
|
||||
is less than tcp_collapse_max_bytes. Otherwise, the packet is
|
||||
dropped without attempting to collapse the queue.
|
||||
|
||||
If tcp_collapse_max_bytes is zero, this feature is disabled
|
||||
and the default Linux behavior is used. The default Linux
|
||||
behavior is to always perform the attempt to collapse the
|
||||
queue to free up memory.
|
||||
|
||||
When the receive queue is small, we want to collapse the
|
||||
queue. There are two reasons for this: (a) the latency of
|
||||
performing the collapse will be small on a small queue, and
|
||||
(b) we want to avoid sending a congestion signal (via a
|
||||
packet drop) to the sender when the receive queue is small.
|
||||
|
||||
The result is that we avoid latency spikes caused by the
|
||||
time it takes to perform the collapse logic when the receive
|
||||
queue is large and full, while preserving existing behavior
|
||||
and performance for all other cases.
|
||||
---
|
||||
include/net/netns/ipv4.h | 1 +
|
||||
include/trace/events/tcp.h | 7 +++++++
|
||||
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
|
||||
net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||
net/ipv4/tcp_ipv4.c | 2 ++
|
||||
5 files changed, 53 insertions(+)
|
||||
|
||||
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
|
||||
index 6c5b2efc4f17..bf2c9b5847e4 100644
|
||||
--- a/include/net/netns/ipv4.h
|
||||
+++ b/include/net/netns/ipv4.h
|
||||
@@ -182,6 +182,7 @@ struct netns_ipv4 {
|
||||
int sysctl_udp_rmem_min;
|
||||
|
||||
u8 sysctl_fib_notify_on_flag_change;
|
||||
+ unsigned int sysctl_tcp_collapse_max_bytes;
|
||||
|
||||
#ifdef CONFIG_NET_L3_MASTER_DEV
|
||||
u8 sysctl_udp_l3mdev_accept;
|
||||
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
|
||||
index 521059d8dc0a..35fa0f23680c 100644
|
||||
--- a/include/trace/events/tcp.h
|
||||
+++ b/include/trace/events/tcp.h
|
||||
@@ -187,6 +187,13 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
|
||||
TP_ARGS(sk)
|
||||
);
|
||||
|
||||
+DEFINE_EVENT(tcp_event_sk, tcp_collapse_max_bytes_exceeded,
|
||||
+
|
||||
+ TP_PROTO(struct sock *sk),
|
||||
+
|
||||
+ TP_ARGS(sk)
|
||||
+);
|
||||
+
|
||||
TRACE_EVENT(tcp_retransmit_synack,
|
||||
|
||||
TP_PROTO(const struct sock *sk, const struct request_sock *req),
|
||||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
|
||||
index 6f1e64d49232..a61e6b1ac0cc 100644
|
||||
--- a/net/ipv4/sysctl_net_ipv4.c
|
||||
+++ b/net/ipv4/sysctl_net_ipv4.c
|
||||
@@ -1406,6 +1406,13 @@ static struct ctl_table ipv4_net_table[] = {
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = &two,
|
||||
},
|
||||
+ {
|
||||
+ .procname = "tcp_collapse_max_bytes",
|
||||
+ .data = &init_net.ipv4.sysctl_tcp_collapse_max_bytes,
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_douintvec_minmax,
|
||||
+ },
|
||||
{ }
|
||||
};
|
||||
|
||||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
|
||||
index f3b623967436..204f33f2835c 100644
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -5340,6 +5340,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
|
||||
static int tcp_prune_queue(struct sock *sk)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
+ struct net *net = sock_net(sk);
|
||||
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
|
||||
|
||||
@@ -5351,6 +5352,39 @@ static int tcp_prune_queue(struct sock *sk)
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
return 0;
|
||||
|
||||
+ /* For context and additional information about this patch, see the
|
||||
+ * blog post at
|
||||
+ *
|
||||
+ * sysctl: net.ipv4.tcp_collapse_max_bytes
|
||||
+ *
|
||||
+ * If tcp_collapse_max_bytes is non-zero, attempt to collapse the
|
||||
+ * queue to free up memory if the current amount of memory allocated
|
||||
+ * is less than tcp_collapse_max_bytes. Otherwise, the packet is
|
||||
+ * dropped without attempting to collapse the queue.
|
||||
+ *
|
||||
+ * If tcp_collapse_max_bytes is zero, this feature is disabled
|
||||
+ * and the default Linux behavior is used. The default Linux
|
||||
+ * behavior is to always perform the attempt to collapse the
|
||||
+ * queue to free up memory.
|
||||
+ *
|
||||
+ * When the receive queue is small, we want to collapse the
|
||||
+ * queue. There are two reasons for this: (a) the latency of
|
||||
+ * performing the collapse will be small on a small queue, and
|
||||
+ * (b) we want to avoid sending a congestion signal (via a
|
||||
+ * packet drop) to the sender when the receive queue is small.
|
||||
+ *
|
||||
+ * The result is that we avoid latency spikes caused by the
|
||||
+ * time it takes to perform the collapse logic when the receive
|
||||
+ * queue is large and full, while preserving existing behavior
|
||||
+ * and performance for all other cases.
|
||||
+ */
|
||||
+ if (net->ipv4.sysctl_tcp_collapse_max_bytes &&
|
||||
+ (atomic_read(&sk->sk_rmem_alloc) > net->ipv4.sysctl_tcp_collapse_max_bytes)) {
|
||||
+ /* We are dropping the packet */
|
||||
+ trace_tcp_collapse_max_bytes_exceeded(sk);
|
||||
+ goto do_not_collapse;
|
||||
+ }
|
||||
+
|
||||
tcp_collapse_ofo_queue(sk);
|
||||
if (!skb_queue_empty(&sk->sk_receive_queue))
|
||||
tcp_collapse(sk, &sk->sk_receive_queue, NULL,
|
||||
@@ -5370,6 +5404,8 @@ static int tcp_prune_queue(struct sock *sk)
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
return 0;
|
||||
|
||||
+do_not_collapse:
|
||||
+
|
||||
/* If we are really being abused, tell the caller to silently
|
||||
* drop receive data on the floor. It will get retransmitted
|
||||
* and hopefully then we'll have sufficient space.
|
||||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
|
||||
index 0fe9461647da..4fadbf38525f 100644
|
||||
--- a/net/ipv4/tcp_ipv4.c
|
||||
+++ b/net/ipv4/tcp_ipv4.c
|
||||
@@ -3218,6 +3218,8 @@ static int __net_init tcp_sk_init(struct net *net)
|
||||
else
|
||||
net->ipv4.tcp_congestion_control = &tcp_reno;
|
||||
|
||||
+ net->ipv4.sysctl_tcp_collapse_max_bytes = 0;
|
||||
+
|
||||
return 0;
|
||||
fail:
|
||||
tcp_sk_exit(net);
|
||||
--
|
||||
2.35.1
|
||||
|
@ -0,0 +1,188 @@
|
||||
From 354d7a8febaa440dd3244466670315ed2805764e Mon Sep 17 00:00:00 2001
|
||||
From: Ignat Korchagin <ignat@cloudflare.com>
|
||||
Date: Wed, 4 Dec 2019 16:53:46 +0000
|
||||
Subject: [PATCH] Add xtsproxy Crypto API module
|
||||
|
||||
This module implements a Crypto API AES-XTS synchronous driver, which uses
|
||||
AES NI implementation as a backend and falls back to generic AES implementation,
|
||||
when FPU is not usable.
|
||||
---
|
||||
crypto/Kconfig | 10 ++++
|
||||
crypto/Makefile | 1 +
|
||||
crypto/xtsproxy.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 142 insertions(+)
|
||||
create mode 100644 crypto/xtsproxy.c
|
||||
|
||||
diff --git a/crypto/Kconfig b/crypto/Kconfig
|
||||
index 094ef56ab7b4..9964667cef85 100644
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -457,6 +457,16 @@ config CRYPTO_XTS
|
||||
key size 256, 384 or 512 bits. This implementation currently
|
||||
can't handle a sectorsize which is not a multiple of 16 bytes.
|
||||
|
||||
+config CRYPTO_XTS_AES_SYNC
|
||||
+ tristate "XTS AES synchronous implementation"
|
||||
+ depends on X86 && 64BIT
|
||||
+ select CRYPTO_AES
|
||||
+ select CRYPTO_AES_NI_INTEL
|
||||
+ help
|
||||
+ A synchronous AES-XTS implementaion, which uses AES NI as a
|
||||
+ backend implementation and falls back to generic implementation,
|
||||
+ when FPU is not usable.
|
||||
+
|
||||
config CRYPTO_KEYWRAP
|
||||
tristate "Key wrapping support"
|
||||
select CRYPTO_SKCIPHER
|
||||
diff --git a/crypto/Makefile b/crypto/Makefile
|
||||
index b279483fba50..4f6ddcbdc6d4 100644
|
||||
--- a/crypto/Makefile
|
||||
+++ b/crypto/Makefile
|
||||
@@ -90,6 +90,7 @@ obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
|
||||
obj-$(CONFIG_CRYPTO_CTS) += cts.o
|
||||
obj-$(CONFIG_CRYPTO_LRW) += lrw.o
|
||||
obj-$(CONFIG_CRYPTO_XTS) += xts.o
|
||||
+obj-$(CONFIG_CRYPTO_XTS_AES_SYNC) += xtsproxy.o
|
||||
obj-$(CONFIG_CRYPTO_CTR) += ctr.o
|
||||
obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
|
||||
obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
|
||||
diff --git a/crypto/xtsproxy.c b/crypto/xtsproxy.c
|
||||
new file mode 100644
|
||||
index 000000000000..51ecfb7b4891
|
||||
--- /dev/null
|
||||
+++ b/crypto/xtsproxy.c
|
||||
@@ -0,0 +1,131 @@
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/crypto.h>
|
||||
+#include <linux/err.h>
|
||||
+#include <crypto/internal/skcipher.h>
|
||||
+#include <crypto/aes.h>
|
||||
+#include <asm/fpu/api.h>
|
||||
+
|
||||
+struct xtsproxy_ctx {
|
||||
+ struct crypto_skcipher *xts_aesni;
|
||||
+ struct crypto_skcipher *xts_generic;
|
||||
+};
|
||||
+
|
||||
+static int xtsproxy_skcipher_init(struct crypto_skcipher *tfm)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+
|
||||
+ /* AESNI based XTS implementation, requires FPU to be available */
|
||||
+ ctx->xts_aesni = crypto_alloc_skcipher("__xts-aes-aesni", CRYPTO_ALG_INTERNAL, 0);
|
||||
+ if (IS_ERR(ctx->xts_aesni))
|
||||
+ return PTR_ERR(ctx->xts_aesni);
|
||||
+
|
||||
+ /* generic XTS implementation based on generic FPU-less AES */
|
||||
+ /* there is also aes-aesni implementation, which falls back to aes-generic */
|
||||
+ /* but we're doing FPU checks in our code, so no need to repeat those */
|
||||
+ /* as we will always fallback to aes-generic in this case */
|
||||
+ ctx->xts_generic = crypto_alloc_skcipher("xts(ecb(aes-generic))", 0, 0);
|
||||
+ if (IS_ERR(ctx->xts_generic))
|
||||
+ return PTR_ERR(ctx->xts_generic);
|
||||
+
|
||||
+ /* make sure we allocate enough request memory for both implementations */
|
||||
+ crypto_skcipher_set_reqsize(tfm, max(crypto_skcipher_reqsize(ctx->xts_aesni), crypto_skcipher_reqsize(ctx->xts_generic)));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void xtsproxy_skcipher_exit(struct crypto_skcipher *tfm)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+
|
||||
+ if (!IS_ERR_OR_NULL(ctx->xts_generic)) {
|
||||
+ crypto_free_skcipher(ctx->xts_generic);
|
||||
+ ctx->xts_generic = NULL;
|
||||
+ }
|
||||
+
|
||||
+ if (!IS_ERR_OR_NULL(ctx->xts_aesni)) {
|
||||
+ crypto_free_skcipher(ctx->xts_aesni);
|
||||
+ ctx->xts_aesni = NULL;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int xtsproxy_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keylen)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+ int err;
|
||||
+
|
||||
+ err = crypto_skcipher_setkey(ctx->xts_aesni, key, keylen);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ return crypto_skcipher_setkey(ctx->xts_generic, key, keylen);
|
||||
+}
|
||||
+
|
||||
+static int xtsproxy_encrypt(struct skcipher_request *req)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
|
||||
+
|
||||
+ if (irq_fpu_usable())
|
||||
+ skcipher_request_set_tfm(req, ctx->xts_aesni);
|
||||
+ else
|
||||
+ skcipher_request_set_tfm(req, ctx->xts_generic);
|
||||
+
|
||||
+ /* underlying implementations should not try to sleep */
|
||||
+ req->base.flags &= ~(CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG);
|
||||
+
|
||||
+ return crypto_skcipher_encrypt(req);
|
||||
+}
|
||||
+
|
||||
+static int xtsproxy_decrypt(struct skcipher_request *req)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
|
||||
+
|
||||
+ if (irq_fpu_usable())
|
||||
+ skcipher_request_set_tfm(req, ctx->xts_aesni);
|
||||
+ else
|
||||
+ skcipher_request_set_tfm(req, ctx->xts_generic);
|
||||
+
|
||||
+ /* underlying implementations should not try to sleep */
|
||||
+ req->base.flags &= ~(CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG);
|
||||
+
|
||||
+ return crypto_skcipher_decrypt(req);
|
||||
+}
|
||||
+
|
||||
+static struct skcipher_alg xtsproxy_skcipher = {
|
||||
+ .base = {
|
||||
+ .cra_name = "xts(aes)",
|
||||
+ .cra_driver_name = "xts-aes-xtsproxy",
|
||||
+ /* make sure we don't use it unless requested explicitly */
|
||||
+ .cra_priority = 0,
|
||||
+ /* .cra_flags = CRYPTO_ALG_INTERNAL, */
|
||||
+ .cra_blocksize = AES_BLOCK_SIZE,
|
||||
+ .cra_ctxsize = sizeof(struct xtsproxy_ctx),
|
||||
+ .cra_module = THIS_MODULE,
|
||||
+ },
|
||||
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
+ .ivsize = AES_BLOCK_SIZE,
|
||||
+ .init = xtsproxy_skcipher_init,
|
||||
+ .exit = xtsproxy_skcipher_exit,
|
||||
+ .setkey = xtsproxy_setkey,
|
||||
+ .encrypt = xtsproxy_encrypt,
|
||||
+ .decrypt = xtsproxy_decrypt,
|
||||
+};
|
||||
+
|
||||
+static int __init xtsproxy_init(void)
|
||||
+{
|
||||
+ return crypto_register_skcipher(&xtsproxy_skcipher);
|
||||
+}
|
||||
+
|
||||
+static void __exit xtsproxy_fini(void)
|
||||
+{
|
||||
+ crypto_unregister_skcipher(&xtsproxy_skcipher);
|
||||
+}
|
||||
+
|
||||
+module_init(xtsproxy_init);
|
||||
+module_exit(xtsproxy_fini);
|
||||
+
|
||||
+MODULE_DESCRIPTION("XTS-AES using AESNI implementation with generic AES fallback");
|
||||
+MODULE_AUTHOR("Ignat Korchagin <ignat@cloudflare.com>");
|
||||
+MODULE_LICENSE("GPL");
|
||||
+MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
--
|
||||
2.29.1
|
||||
|
@ -1,6 +1,6 @@
|
||||
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
|
||||
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
|
||||
@@ -26,7 +26,7 @@ static void uc_expand_default_options(st
|
||||
@@ -26,10 +26,14 @@ static void uc_expand_default_options(st
|
||||
return;
|
||||
|
||||
/* Don't enable GuC/HuC on pre-Gen12 */
|
||||
@ -9,3 +9,11 @@
|
||||
i915->params.enable_guc = 0;
|
||||
return;
|
||||
}
|
||||
+ if(GRAPHICS_VER(i915) < 11){
|
||||
+ i915->params.enable_guc = 2;
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
/* Don't enable GuC/HuC on older Gen12 platforms */
|
||||
if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
|
||||
|
||||
|
@ -0,0 +1,164 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: "mfreemon@cloudflare.com" <mfreemon@cloudflare.com>
|
||||
Date: Tue, 1 Mar 2022 17:06:02 -0600
|
||||
Subject: [PATCH] Add a sysctl to skip tcp collapse processing when the receive
|
||||
buffer is full.
|
||||
|
||||
For context and additional information about this patch, see the
|
||||
blog post at https://blog.cloudflare.com/optimizing-tcp-for-high-throughput-and-low-latency/
|
||||
|
||||
sysctl: net.ipv4.tcp_collapse_max_bytes
|
||||
|
||||
If tcp_collapse_max_bytes is non-zero, attempt to collapse the
|
||||
queue to free up memory if the current amount of memory allocated
|
||||
is less than tcp_collapse_max_bytes. Otherwise, the packet is
|
||||
dropped without attempting to collapse the queue.
|
||||
|
||||
If tcp_collapse_max_bytes is zero, this feature is disabled
|
||||
and the default Linux behavior is used. The default Linux
|
||||
behavior is to always perform the attempt to collapse the
|
||||
queue to free up memory.
|
||||
|
||||
When the receive queue is small, we want to collapse the
|
||||
queue. There are two reasons for this: (a) the latency of
|
||||
performing the collapse will be small on a small queue, and
|
||||
(b) we want to avoid sending a congestion signal (via a
|
||||
packet drop) to the sender when the receive queue is small.
|
||||
|
||||
The result is that we avoid latency spikes caused by the
|
||||
time it takes to perform the collapse logic when the receive
|
||||
queue is large and full, while preserving existing behavior
|
||||
and performance for all other cases.
|
||||
---
|
||||
include/net/netns/ipv4.h | 1 +
|
||||
include/trace/events/tcp.h | 7 +++++++
|
||||
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
|
||||
net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||
net/ipv4/tcp_ipv4.c | 2 ++
|
||||
5 files changed, 53 insertions(+)
|
||||
|
||||
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
|
||||
index 6c5b2efc4f17..bf2c9b5847e4 100644
|
||||
--- a/include/net/netns/ipv4.h
|
||||
+++ b/include/net/netns/ipv4.h
|
||||
@@ -182,6 +182,7 @@ struct netns_ipv4 {
|
||||
int sysctl_udp_rmem_min;
|
||||
|
||||
u8 sysctl_fib_notify_on_flag_change;
|
||||
+ unsigned int sysctl_tcp_collapse_max_bytes;
|
||||
|
||||
#ifdef CONFIG_NET_L3_MASTER_DEV
|
||||
u8 sysctl_udp_l3mdev_accept;
|
||||
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
|
||||
index 521059d8dc0a..35fa0f23680c 100644
|
||||
--- a/include/trace/events/tcp.h
|
||||
+++ b/include/trace/events/tcp.h
|
||||
@@ -187,6 +187,13 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
|
||||
TP_ARGS(sk)
|
||||
);
|
||||
|
||||
+DEFINE_EVENT(tcp_event_sk, tcp_collapse_max_bytes_exceeded,
|
||||
+
|
||||
+ TP_PROTO(struct sock *sk),
|
||||
+
|
||||
+ TP_ARGS(sk)
|
||||
+);
|
||||
+
|
||||
TRACE_EVENT(tcp_retransmit_synack,
|
||||
|
||||
TP_PROTO(const struct sock *sk, const struct request_sock *req),
|
||||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
|
||||
index 6f1e64d49232..a61e6b1ac0cc 100644
|
||||
--- a/net/ipv4/sysctl_net_ipv4.c
|
||||
+++ b/net/ipv4/sysctl_net_ipv4.c
|
||||
@@ -1406,6 +1406,13 @@ static struct ctl_table ipv4_net_table[] = {
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = &two,
|
||||
},
|
||||
+ {
|
||||
+ .procname = "tcp_collapse_max_bytes",
|
||||
+ .data = &init_net.ipv4.sysctl_tcp_collapse_max_bytes,
|
||||
+ .maxlen = sizeof(unsigned int),
|
||||
+ .mode = 0644,
|
||||
+ .proc_handler = proc_douintvec_minmax,
|
||||
+ },
|
||||
{ }
|
||||
};
|
||||
|
||||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
|
||||
index f3b623967436..204f33f2835c 100644
|
||||
--- a/net/ipv4/tcp_input.c
|
||||
+++ b/net/ipv4/tcp_input.c
|
||||
@@ -5340,6 +5340,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
|
||||
static int tcp_prune_queue(struct sock *sk)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
+ struct net *net = sock_net(sk);
|
||||
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
|
||||
|
||||
@@ -5351,6 +5352,39 @@ static int tcp_prune_queue(struct sock *sk)
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
return 0;
|
||||
|
||||
+ /* For context and additional information about this patch, see the
|
||||
+ * blog post at
|
||||
+ *
|
||||
+ * sysctl: net.ipv4.tcp_collapse_max_bytes
|
||||
+ *
|
||||
+ * If tcp_collapse_max_bytes is non-zero, attempt to collapse the
|
||||
+ * queue to free up memory if the current amount of memory allocated
|
||||
+ * is less than tcp_collapse_max_bytes. Otherwise, the packet is
|
||||
+ * dropped without attempting to collapse the queue.
|
||||
+ *
|
||||
+ * If tcp_collapse_max_bytes is zero, this feature is disabled
|
||||
+ * and the default Linux behavior is used. The default Linux
|
||||
+ * behavior is to always perform the attempt to collapse the
|
||||
+ * queue to free up memory.
|
||||
+ *
|
||||
+ * When the receive queue is small, we want to collapse the
|
||||
+ * queue. There are two reasons for this: (a) the latency of
|
||||
+ * performing the collapse will be small on a small queue, and
|
||||
+ * (b) we want to avoid sending a congestion signal (via a
|
||||
+ * packet drop) to the sender when the receive queue is small.
|
||||
+ *
|
||||
+ * The result is that we avoid latency spikes caused by the
|
||||
+ * time it takes to perform the collapse logic when the receive
|
||||
+ * queue is large and full, while preserving existing behavior
|
||||
+ * and performance for all other cases.
|
||||
+ */
|
||||
+ if (net->ipv4.sysctl_tcp_collapse_max_bytes &&
|
||||
+ (atomic_read(&sk->sk_rmem_alloc) > net->ipv4.sysctl_tcp_collapse_max_bytes)) {
|
||||
+ /* We are dropping the packet */
|
||||
+ trace_tcp_collapse_max_bytes_exceeded(sk);
|
||||
+ goto do_not_collapse;
|
||||
+ }
|
||||
+
|
||||
tcp_collapse_ofo_queue(sk);
|
||||
if (!skb_queue_empty(&sk->sk_receive_queue))
|
||||
tcp_collapse(sk, &sk->sk_receive_queue, NULL,
|
||||
@@ -5370,6 +5404,8 @@ static int tcp_prune_queue(struct sock *sk)
|
||||
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
|
||||
return 0;
|
||||
|
||||
+do_not_collapse:
|
||||
+
|
||||
/* If we are really being abused, tell the caller to silently
|
||||
* drop receive data on the floor. It will get retransmitted
|
||||
* and hopefully then we'll have sufficient space.
|
||||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
|
||||
index 0fe9461647da..4fadbf38525f 100644
|
||||
--- a/net/ipv4/tcp_ipv4.c
|
||||
+++ b/net/ipv4/tcp_ipv4.c
|
||||
@@ -3218,6 +3218,8 @@ static int __net_init tcp_sk_init(struct net *net)
|
||||
else
|
||||
net->ipv4.tcp_congestion_control = &tcp_reno;
|
||||
|
||||
+ net->ipv4.sysctl_tcp_collapse_max_bytes = 0;
|
||||
+
|
||||
return 0;
|
||||
fail:
|
||||
tcp_sk_exit(net);
|
||||
--
|
||||
2.35.1
|
||||
|
@ -0,0 +1,188 @@
|
||||
From 354d7a8febaa440dd3244466670315ed2805764e Mon Sep 17 00:00:00 2001
|
||||
From: Ignat Korchagin <ignat@cloudflare.com>
|
||||
Date: Wed, 4 Dec 2019 16:53:46 +0000
|
||||
Subject: [PATCH] Add xtsproxy Crypto API module
|
||||
|
||||
This module implements a Crypto API AES-XTS synchronous driver, which uses
|
||||
AES NI implementation as a backend and falls back to generic AES implementation,
|
||||
when FPU is not usable.
|
||||
---
|
||||
crypto/Kconfig | 10 ++++
|
||||
crypto/Makefile | 1 +
|
||||
crypto/xtsproxy.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 142 insertions(+)
|
||||
create mode 100644 crypto/xtsproxy.c
|
||||
|
||||
diff --git a/crypto/Kconfig b/crypto/Kconfig
|
||||
index 094ef56ab7b4..9964667cef85 100644
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -457,6 +457,16 @@ config CRYPTO_XTS
|
||||
key size 256, 384 or 512 bits. This implementation currently
|
||||
can't handle a sectorsize which is not a multiple of 16 bytes.
|
||||
|
||||
+config CRYPTO_XTS_AES_SYNC
|
||||
+ tristate "XTS AES synchronous implementation"
|
||||
+ depends on X86 && 64BIT
|
||||
+ select CRYPTO_AES
|
||||
+ select CRYPTO_AES_NI_INTEL
|
||||
+ help
|
||||
+ A synchronous AES-XTS implementaion, which uses AES NI as a
|
||||
+ backend implementation and falls back to generic implementation,
|
||||
+ when FPU is not usable.
|
||||
+
|
||||
config CRYPTO_KEYWRAP
|
||||
tristate "Key wrapping support"
|
||||
select CRYPTO_SKCIPHER
|
||||
diff --git a/crypto/Makefile b/crypto/Makefile
|
||||
index b279483fba50..4f6ddcbdc6d4 100644
|
||||
--- a/crypto/Makefile
|
||||
+++ b/crypto/Makefile
|
||||
@@ -90,6 +90,7 @@ obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
|
||||
obj-$(CONFIG_CRYPTO_CTS) += cts.o
|
||||
obj-$(CONFIG_CRYPTO_LRW) += lrw.o
|
||||
obj-$(CONFIG_CRYPTO_XTS) += xts.o
|
||||
+obj-$(CONFIG_CRYPTO_XTS_AES_SYNC) += xtsproxy.o
|
||||
obj-$(CONFIG_CRYPTO_CTR) += ctr.o
|
||||
obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
|
||||
obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
|
||||
diff --git a/crypto/xtsproxy.c b/crypto/xtsproxy.c
|
||||
new file mode 100644
|
||||
index 000000000000..51ecfb7b4891
|
||||
--- /dev/null
|
||||
+++ b/crypto/xtsproxy.c
|
||||
@@ -0,0 +1,131 @@
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/crypto.h>
|
||||
+#include <linux/err.h>
|
||||
+#include <crypto/internal/skcipher.h>
|
||||
+#include <crypto/aes.h>
|
||||
+#include <asm/fpu/api.h>
|
||||
+
|
||||
+struct xtsproxy_ctx {
|
||||
+ struct crypto_skcipher *xts_aesni;
|
||||
+ struct crypto_skcipher *xts_generic;
|
||||
+};
|
||||
+
|
||||
+static int xtsproxy_skcipher_init(struct crypto_skcipher *tfm)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+
|
||||
+ /* AESNI based XTS implementation, requires FPU to be available */
|
||||
+ ctx->xts_aesni = crypto_alloc_skcipher("__xts-aes-aesni", CRYPTO_ALG_INTERNAL, 0);
|
||||
+ if (IS_ERR(ctx->xts_aesni))
|
||||
+ return PTR_ERR(ctx->xts_aesni);
|
||||
+
|
||||
+ /* generic XTS implementation based on generic FPU-less AES */
|
||||
+ /* there is also aes-aesni implementation, which falls back to aes-generic */
|
||||
+ /* but we're doing FPU checks in our code, so no need to repeat those */
|
||||
+ /* as we will always fallback to aes-generic in this case */
|
||||
+ ctx->xts_generic = crypto_alloc_skcipher("xts(ecb(aes-generic))", 0, 0);
|
||||
+ if (IS_ERR(ctx->xts_generic))
|
||||
+ return PTR_ERR(ctx->xts_generic);
|
||||
+
|
||||
+ /* make sure we allocate enough request memory for both implementations */
|
||||
+ crypto_skcipher_set_reqsize(tfm, max(crypto_skcipher_reqsize(ctx->xts_aesni), crypto_skcipher_reqsize(ctx->xts_generic)));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void xtsproxy_skcipher_exit(struct crypto_skcipher *tfm)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+
|
||||
+ if (!IS_ERR_OR_NULL(ctx->xts_generic)) {
|
||||
+ crypto_free_skcipher(ctx->xts_generic);
|
||||
+ ctx->xts_generic = NULL;
|
||||
+ }
|
||||
+
|
||||
+ if (!IS_ERR_OR_NULL(ctx->xts_aesni)) {
|
||||
+ crypto_free_skcipher(ctx->xts_aesni);
|
||||
+ ctx->xts_aesni = NULL;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int xtsproxy_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
+ unsigned int keylen)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
+ int err;
|
||||
+
|
||||
+ err = crypto_skcipher_setkey(ctx->xts_aesni, key, keylen);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ return crypto_skcipher_setkey(ctx->xts_generic, key, keylen);
|
||||
+}
|
||||
+
|
||||
+static int xtsproxy_encrypt(struct skcipher_request *req)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
|
||||
+
|
||||
+ if (irq_fpu_usable())
|
||||
+ skcipher_request_set_tfm(req, ctx->xts_aesni);
|
||||
+ else
|
||||
+ skcipher_request_set_tfm(req, ctx->xts_generic);
|
||||
+
|
||||
+ /* underlying implementations should not try to sleep */
|
||||
+ req->base.flags &= ~(CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG);
|
||||
+
|
||||
+ return crypto_skcipher_encrypt(req);
|
||||
+}
|
||||
+
|
||||
+static int xtsproxy_decrypt(struct skcipher_request *req)
|
||||
+{
|
||||
+ struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
|
||||
+
|
||||
+ if (irq_fpu_usable())
|
||||
+ skcipher_request_set_tfm(req, ctx->xts_aesni);
|
||||
+ else
|
||||
+ skcipher_request_set_tfm(req, ctx->xts_generic);
|
||||
+
|
||||
+ /* underlying implementations should not try to sleep */
|
||||
+ req->base.flags &= ~(CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG);
|
||||
+
|
||||
+ return crypto_skcipher_decrypt(req);
|
||||
+}
|
||||
+
|
||||
+static struct skcipher_alg xtsproxy_skcipher = {
|
||||
+ .base = {
|
||||
+ .cra_name = "xts(aes)",
|
||||
+ .cra_driver_name = "xts-aes-xtsproxy",
|
||||
+ /* make sure we don't use it unless requested explicitly */
|
||||
+ .cra_priority = 0,
|
||||
+ /* .cra_flags = CRYPTO_ALG_INTERNAL, */
|
||||
+ .cra_blocksize = AES_BLOCK_SIZE,
|
||||
+ .cra_ctxsize = sizeof(struct xtsproxy_ctx),
|
||||
+ .cra_module = THIS_MODULE,
|
||||
+ },
|
||||
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
+ .ivsize = AES_BLOCK_SIZE,
|
||||
+ .init = xtsproxy_skcipher_init,
|
||||
+ .exit = xtsproxy_skcipher_exit,
|
||||
+ .setkey = xtsproxy_setkey,
|
||||
+ .encrypt = xtsproxy_encrypt,
|
||||
+ .decrypt = xtsproxy_decrypt,
|
||||
+};
|
||||
+
|
||||
+static int __init xtsproxy_init(void)
|
||||
+{
|
||||
+ return crypto_register_skcipher(&xtsproxy_skcipher);
|
||||
+}
|
||||
+
|
||||
+static void __exit xtsproxy_fini(void)
|
||||
+{
|
||||
+ crypto_unregister_skcipher(&xtsproxy_skcipher);
|
||||
+}
|
||||
+
|
||||
+module_init(xtsproxy_init);
|
||||
+module_exit(xtsproxy_fini);
|
||||
+
|
||||
+MODULE_DESCRIPTION("XTS-AES using AESNI implementation with generic AES fallback");
|
||||
+MODULE_AUTHOR("Ignat Korchagin <ignat@cloudflare.com>");
|
||||
+MODULE_LICENSE("GPL");
|
||||
+MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
--
|
||||
2.29.1
|
||||
|
Loading…
Reference in New Issue
Block a user