From ae5505832e35b83fe878cdd29c04f6fab66342ae Mon Sep 17 00:00:00 2001
From: littoy <103546957+littoy@users.noreply.github.com>
Date: Thu, 20 Apr 2023 15:00:42 +0800
Subject: [PATCH] =?UTF-8?q?x86=EF=BC=9AAdd=20net.ipv4.tcp=5Fcollapse=5Fmax?=
 =?UTF-8?q?=5Fbytes=20option=20from=20Cloudflare=20and=20CRYPTO=5FXTS=5FAE?=
 =?UTF-8?q?S=5FSYNC=20(#11128)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* x86: intel 10gen GPU i915 guc patch refresh

* intel-microcode: 3.20230214.1

* x86: add tcp_collapse option support
---
 package/firmware/intel-microcode/Makefile     |   8 +-
 .../patches-5.15/992-enable-intel-guc.patch   |   9 +-
 ...to-enable-disable-tcp_collapse-logic.patch | 164 +++++++++++++++
 .../999-Add-xtsproxy-Crypto-API-module.patch  | 188 ++++++++++++++++++
 .../patches-6.1/992-enable-intel-guc.patch    |  10 +-
 ...to-enable-disable-tcp_collapse-logic.patch | 164 +++++++++++++++
 .../999-Add-xtsproxy-Crypto-API-module.patch  | 188 ++++++++++++++++++
 7 files changed, 725 insertions(+), 6 deletions(-)
 create mode 100644 target/linux/x86/patches-5.15/998-add-a-sysctl-to-enable-disable-tcp_collapse-logic.patch
 create mode 100644 target/linux/x86/patches-5.15/999-Add-xtsproxy-Crypto-API-module.patch
 create mode 100644 target/linux/x86/patches-6.1/998-add-a-sysctl-to-enable-disable-tcp_collapse-logic.patch
 create mode 100644 target/linux/x86/patches-6.1/999-Add-xtsproxy-Crypto-API-module.patch

diff --git a/package/firmware/intel-microcode/Makefile b/package/firmware/intel-microcode/Makefile
index 58ae7ee74..634090a30 100644
--- a/package/firmware/intel-microcode/Makefile
+++ b/package/firmware/intel-microcode/Makefile
@@ -8,13 +8,13 @@
 include $(TOPDIR)/rules.mk
 
 PKG_NAME:=intel-microcode
-PKG_VERSION:=20221108
+PKG_VERSION:=20230214
 PKG_RELEASE:=1
 
-PKG_SOURCE:=intel-microcode_3.$(PKG_VERSION).1.tar.xz
+PKG_SOURCE:=intel-microcode_3.$(PKG_VERSION).1~deb11u1.tar.xz
 PKG_SOURCE_URL:=@DEBIAN/pool/non-free/i/intel-microcode/
-PKG_HASH:=9c40fc5cbb386a4e1154f8f316422b28fccc9eaabdea707a80643f9bed3f9064
-PKG_BUILD_DIR:=$(BUILD_DIR)/intel-microcode-3.$(PKG_VERSION).1
+PKG_HASH:=1b10c7b622063029cc932cfa4115c278412e1dd01e709154e1ac7e200fe48baf
+PKG_BUILD_DIR:=$(BUILD_DIR)/intel-microcode-3.$(PKG_VERSION).1~deb11u1
 
 PKG_BUILD_DEPENDS:=iucode-tool/host
 
diff --git a/target/linux/x86/patches-5.15/992-enable-intel-guc.patch b/target/linux/x86/patches-5.15/992-enable-intel-guc.patch
index 23834093e..08acd6d5f 100644
--- a/target/linux/x86/patches-5.15/992-enable-intel-guc.patch
+++ b/target/linux/x86/patches-5.15/992-enable-intel-guc.patch
@@ -1,6 +1,6 @@
 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
 +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
-@@ -23,7 +23,7 @@ static void uc_expand_default_options(st
+@@ -23,10 +23,14 @@ static void uc_expand_default_options(st
  		return;
  
  	/* Don't enable GuC/HuC on pre-Gen12 */
@@ -9,3 +9,10 @@
  		i915->params.enable_guc = 0;
  		return;
  	}
++	if(GRAPHICS_VER(i915) < 11){
++		i915->params.enable_guc = 2;
++		return;
++	}
+ 
+ 	/* Don't enable GuC/HuC on older Gen12 platforms */
+ 	if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
diff --git a/target/linux/x86/patches-5.15/998-add-a-sysctl-to-enable-disable-tcp_collapse-logic.patch b/target/linux/x86/patches-5.15/998-add-a-sysctl-to-enable-disable-tcp_collapse-logic.patch
new file mode 100644
index 000000000..d55b75a9f
--- /dev/null
+++ b/target/linux/x86/patches-5.15/998-add-a-sysctl-to-enable-disable-tcp_collapse-logic.patch
@@ -0,0 +1,164 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "mfreemon@cloudflare.com" <mfreemon@cloudflare.com>
+Date: Tue, 1 Mar 2022 17:06:02 -0600
+Subject: [PATCH] Add a sysctl to skip tcp collapse processing when the receive
+ buffer is full.
+
+For context and additional information about this patch, see the
+blog post at https://blog.cloudflare.com/optimizing-tcp-for-high-throughput-and-low-latency/
+
+sysctl:  net.ipv4.tcp_collapse_max_bytes
+
+If tcp_collapse_max_bytes is non-zero, attempt to collapse the
+queue to free up memory if the current amount of memory allocated
+is less than tcp_collapse_max_bytes.  Otherwise, the packet is
+dropped without attempting to collapse the queue.
+
+If tcp_collapse_max_bytes is zero, this feature is disabled
+and the default Linux behavior is used.  The default Linux
+behavior is to always perform the attempt to collapse the
+queue to free up memory.
+
+When the receive queue is small, we want to collapse the
+queue.  There are two reasons for this: (a) the latency of
+performing the collapse will be small on a small queue, and
+(b) we want to avoid sending a congestion signal (via a
+packet drop) to the sender when the receive queue is small.
+
+The result is that we avoid latency spikes caused by the
+time it takes to perform the collapse logic when the receive
+queue is large and full, while preserving existing behavior
+and performance for all other cases.
+---
+ include/net/netns/ipv4.h   |  1 +
+ include/trace/events/tcp.h |  7 +++++++
+ net/ipv4/sysctl_net_ipv4.c |  7 +++++++
+ net/ipv4/tcp_input.c       | 36 ++++++++++++++++++++++++++++++++++++
+ net/ipv4/tcp_ipv4.c        |  2 ++
+ 5 files changed, 53 insertions(+)
+
+diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
+index 6c5b2efc4f17..bf2c9b5847e4 100644
+--- a/include/net/netns/ipv4.h
++++ b/include/net/netns/ipv4.h
+@@ -182,6 +182,7 @@ struct netns_ipv4 {
+ 	int sysctl_udp_rmem_min;
+ 
+ 	u8 sysctl_fib_notify_on_flag_change;
++	unsigned int sysctl_tcp_collapse_max_bytes;
+ 
+ #ifdef CONFIG_NET_L3_MASTER_DEV
+ 	u8 sysctl_udp_l3mdev_accept;
+diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
+index 521059d8dc0a..35fa0f23680c 100644
+--- a/include/trace/events/tcp.h
++++ b/include/trace/events/tcp.h
+@@ -187,6 +187,13 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
+ 	TP_ARGS(sk)
+ );
+ 
++DEFINE_EVENT(tcp_event_sk, tcp_collapse_max_bytes_exceeded,
++
++	TP_PROTO(struct sock *sk),
++
++	TP_ARGS(sk)
++);
++
+ TRACE_EVENT(tcp_retransmit_synack,
+ 
+ 	TP_PROTO(const struct sock *sk, const struct request_sock *req),
+diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
+index 6f1e64d49232..a61e6b1ac0cc 100644
+--- a/net/ipv4/sysctl_net_ipv4.c
++++ b/net/ipv4/sysctl_net_ipv4.c
+@@ -1406,6 +1406,13 @@ static struct ctl_table ipv4_net_table[] = {
+ 		.extra1		= SYSCTL_ZERO,
+ 		.extra2		= &two,
+ 	},
++	{
++		.procname	= "tcp_collapse_max_bytes",
++		.data		= &init_net.ipv4.sysctl_tcp_collapse_max_bytes,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_douintvec_minmax,
++	},
+ 	{ }
+ };
+ 
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index f3b623967436..204f33f2835c 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5340,6 +5340,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
+ static int tcp_prune_queue(struct sock *sk)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
++	struct net *net = sock_net(sk);
+ 
+ 	NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
+ 
+@@ -5351,6 +5352,39 @@ static int tcp_prune_queue(struct sock *sk)
+ 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ 		return 0;
+ 
++	/* For context and additional information about this patch, see the
++	 * blog post at
++	 *
++	 * sysctl:  net.ipv4.tcp_collapse_max_bytes
++	 *
++	 * If tcp_collapse_max_bytes is non-zero, attempt to collapse the
++	 * queue to free up memory if the current amount of memory allocated
++	 * is less than tcp_collapse_max_bytes.  Otherwise, the packet is
++	 * dropped without attempting to collapse the queue.
++	 *
++	 * If tcp_collapse_max_bytes is zero, this feature is disabled
++	 * and the default Linux behavior is used.  The default Linux
++	 * behavior is to always perform the attempt to collapse the
++	 * queue to free up memory.
++	 *
++	 * When the receive queue is small, we want to collapse the
++	 * queue.  There are two reasons for this: (a) the latency of
++	 * performing the collapse will be small on a small queue, and
++	 * (b) we want to avoid sending a congestion signal (via a
++	 * packet drop) to the sender when the receive queue is small.
++	 *
++	 * The result is that we avoid latency spikes caused by the
++	 * time it takes to perform the collapse logic when the receive
++	 * queue is large and full, while preserving existing behavior
++	 * and performance for all other cases.
++	 */
++	if (net->ipv4.sysctl_tcp_collapse_max_bytes &&
++		(atomic_read(&sk->sk_rmem_alloc) > net->ipv4.sysctl_tcp_collapse_max_bytes)) {
++		/* We are dropping the packet */
++		trace_tcp_collapse_max_bytes_exceeded(sk);
++		goto do_not_collapse;
++	}
++
+ 	tcp_collapse_ofo_queue(sk);
+ 	if (!skb_queue_empty(&sk->sk_receive_queue))
+ 		tcp_collapse(sk, &sk->sk_receive_queue, NULL,
+@@ -5370,6 +5404,8 @@ static int tcp_prune_queue(struct sock *sk)
+ 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ 		return 0;
+ 
++do_not_collapse:
++
+ 	/* If we are really being abused, tell the caller to silently
+ 	 * drop receive data on the floor.  It will get retransmitted
+ 	 * and hopefully then we'll have sufficient space.
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 0fe9461647da..4fadbf38525f 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -3218,6 +3218,8 @@ static int __net_init tcp_sk_init(struct net *net)
+ 	else
+ 		net->ipv4.tcp_congestion_control = &tcp_reno;
+ 
++	net->ipv4.sysctl_tcp_collapse_max_bytes = 0;
++
+ 	return 0;
+ fail:
+ 	tcp_sk_exit(net);
+-- 
+2.35.1
+
diff --git a/target/linux/x86/patches-5.15/999-Add-xtsproxy-Crypto-API-module.patch b/target/linux/x86/patches-5.15/999-Add-xtsproxy-Crypto-API-module.patch
new file mode 100644
index 000000000..2344bde21
--- /dev/null
+++ b/target/linux/x86/patches-5.15/999-Add-xtsproxy-Crypto-API-module.patch
@@ -0,0 +1,188 @@
+From 354d7a8febaa440dd3244466670315ed2805764e Mon Sep 17 00:00:00 2001
+From: Ignat Korchagin <ignat@cloudflare.com>
+Date: Wed, 4 Dec 2019 16:53:46 +0000
+Subject: [PATCH] Add xtsproxy Crypto API module
+
+This module implements a Crypto API AES-XTS synchronous driver, which uses
+AES NI implementation as a backend and falls back to generic AES implementation,
+when FPU is not usable.
+---
+ crypto/Kconfig    |  10 ++++
+ crypto/Makefile   |   1 +
+ crypto/xtsproxy.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 142 insertions(+)
+ create mode 100644 crypto/xtsproxy.c
+
+diff --git a/crypto/Kconfig b/crypto/Kconfig
+index 094ef56ab7b4..9964667cef85 100644
+--- a/crypto/Kconfig
++++ b/crypto/Kconfig
+@@ -457,6 +457,16 @@ config CRYPTO_XTS
+ 	  key size 256, 384 or 512 bits. This implementation currently
+ 	  can't handle a sectorsize which is not a multiple of 16 bytes.
+ 
++config CRYPTO_XTS_AES_SYNC
++	tristate "XTS AES synchronous implementation"
++	depends on X86 && 64BIT
++	select CRYPTO_AES
++	select CRYPTO_AES_NI_INTEL
++	help
++	  A synchronous AES-XTS implementaion, which uses AES NI as a
++	  backend implementation and falls back to generic implementation,
++	  when FPU is not usable.
++
+ config CRYPTO_KEYWRAP
+ 	tristate "Key wrapping support"
+ 	select CRYPTO_SKCIPHER
+diff --git a/crypto/Makefile b/crypto/Makefile
+index b279483fba50..4f6ddcbdc6d4 100644
+--- a/crypto/Makefile
++++ b/crypto/Makefile
+@@ -90,6 +90,7 @@ obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
+ obj-$(CONFIG_CRYPTO_CTS) += cts.o
+ obj-$(CONFIG_CRYPTO_LRW) += lrw.o
+ obj-$(CONFIG_CRYPTO_XTS) += xts.o
++obj-$(CONFIG_CRYPTO_XTS_AES_SYNC) += xtsproxy.o
+ obj-$(CONFIG_CRYPTO_CTR) += ctr.o
+ obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
+ obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
+diff --git a/crypto/xtsproxy.c b/crypto/xtsproxy.c
+new file mode 100644
+index 000000000000..51ecfb7b4891
+--- /dev/null
++++ b/crypto/xtsproxy.c
+@@ -0,0 +1,131 @@
++#include <linux/module.h>
++#include <linux/crypto.h>
++#include <linux/err.h>
++#include <crypto/internal/skcipher.h>
++#include <crypto/aes.h>
++#include <asm/fpu/api.h>
++
++struct xtsproxy_ctx {
++	struct crypto_skcipher *xts_aesni;
++	struct crypto_skcipher *xts_generic;
++};
++
++static int xtsproxy_skcipher_init(struct crypto_skcipher *tfm)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
++
++	/* AESNI based XTS implementation, requires FPU to be available */
++	ctx->xts_aesni = crypto_alloc_skcipher("__xts-aes-aesni", CRYPTO_ALG_INTERNAL, 0);
++	if (IS_ERR(ctx->xts_aesni))
++		return PTR_ERR(ctx->xts_aesni);
++
++	/* generic XTS implementation based on generic FPU-less AES */
++	/* there is also aes-aesni implementation, which falls back to aes-generic */
++	/* but we're doing FPU checks in our code, so no need to repeat those */
++	/* as we will always fallback to aes-generic in this case */
++	ctx->xts_generic = crypto_alloc_skcipher("xts(ecb(aes-generic))", 0, 0);
++	if (IS_ERR(ctx->xts_generic))
++		return PTR_ERR(ctx->xts_generic);
++
++	/* make sure we allocate enough request memory for both implementations */
++	crypto_skcipher_set_reqsize(tfm, max(crypto_skcipher_reqsize(ctx->xts_aesni), crypto_skcipher_reqsize(ctx->xts_generic)));
++
++	return 0;
++}
++
++static void xtsproxy_skcipher_exit(struct crypto_skcipher *tfm)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
++
++	if (!IS_ERR_OR_NULL(ctx->xts_generic)) {
++		crypto_free_skcipher(ctx->xts_generic);
++		ctx->xts_generic = NULL;
++	}
++
++	if (!IS_ERR_OR_NULL(ctx->xts_aesni)) {
++		crypto_free_skcipher(ctx->xts_aesni);
++		ctx->xts_aesni = NULL;
++	}
++}
++
++static int xtsproxy_setkey(struct crypto_skcipher *tfm, const u8 *key,
++			    unsigned int keylen)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
++	int err;
++
++	err = crypto_skcipher_setkey(ctx->xts_aesni, key, keylen);
++	if (err)
++		return err;
++
++	return crypto_skcipher_setkey(ctx->xts_generic, key, keylen);
++}
++
++static int xtsproxy_encrypt(struct skcipher_request *req)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
++
++	if (irq_fpu_usable())
++		skcipher_request_set_tfm(req, ctx->xts_aesni);
++	else
++		skcipher_request_set_tfm(req, ctx->xts_generic);
++
++	/* underlying implementations should not try to sleep */
++	req->base.flags &= ~(CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG);
++
++	return crypto_skcipher_encrypt(req);
++}
++
++static int xtsproxy_decrypt(struct skcipher_request *req)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
++
++	if (irq_fpu_usable())
++		skcipher_request_set_tfm(req, ctx->xts_aesni);
++	else
++		skcipher_request_set_tfm(req, ctx->xts_generic);
++
++	/* underlying implementations should not try to sleep */
++	req->base.flags &= ~(CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG);
++
++	return crypto_skcipher_decrypt(req);
++}
++
++static struct skcipher_alg xtsproxy_skcipher = {
++	.base = {
++		.cra_name			= "xts(aes)",
++		.cra_driver_name	= "xts-aes-xtsproxy",
++		/* make sure we don't use it unless requested explicitly */
++		.cra_priority		= 0,
++		/* .cra_flags			= CRYPTO_ALG_INTERNAL, */
++		.cra_blocksize		= AES_BLOCK_SIZE,
++		.cra_ctxsize		= sizeof(struct xtsproxy_ctx),
++		.cra_module			= THIS_MODULE,
++	},
++	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
++	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
++	.ivsize			= AES_BLOCK_SIZE,
++	.init 			= xtsproxy_skcipher_init,
++	.exit 			= xtsproxy_skcipher_exit,
++	.setkey			= xtsproxy_setkey,
++	.encrypt		= xtsproxy_encrypt,
++	.decrypt		= xtsproxy_decrypt,
++};
++
++static int __init xtsproxy_init(void)
++{
++	return crypto_register_skcipher(&xtsproxy_skcipher);
++}
++
++static void __exit xtsproxy_fini(void)
++{
++	crypto_unregister_skcipher(&xtsproxy_skcipher);
++}
++
++module_init(xtsproxy_init);
++module_exit(xtsproxy_fini);
++
++MODULE_DESCRIPTION("XTS-AES using AESNI implementation with generic AES fallback");
++MODULE_AUTHOR("Ignat Korchagin <ignat@cloudflare.com>");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_CRYPTO("xts(aes)");
+-- 
+2.29.1
+
diff --git a/target/linux/x86/patches-6.1/992-enable-intel-guc.patch b/target/linux/x86/patches-6.1/992-enable-intel-guc.patch
index 26ea41594..2162d3cf9 100644
--- a/target/linux/x86/patches-6.1/992-enable-intel-guc.patch
+++ b/target/linux/x86/patches-6.1/992-enable-intel-guc.patch
@@ -1,6 +1,6 @@
 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
 +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
-@@ -26,7 +26,7 @@ static void uc_expand_default_options(st
+@@ -26,10 +26,14 @@ static void uc_expand_default_options(st
  		return;
  
  	/* Don't enable GuC/HuC on pre-Gen12 */
@@ -9,3 +9,11 @@
  		i915->params.enable_guc = 0;
  		return;
  	}
++	if(GRAPHICS_VER(i915) < 11){
++		i915->params.enable_guc = 2;
++		return;
++	}
+ 
+ 	/* Don't enable GuC/HuC on older Gen12 platforms */
+ 	if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
+
diff --git a/target/linux/x86/patches-6.1/998-add-a-sysctl-to-enable-disable-tcp_collapse-logic.patch b/target/linux/x86/patches-6.1/998-add-a-sysctl-to-enable-disable-tcp_collapse-logic.patch
new file mode 100644
index 000000000..d55b75a9f
--- /dev/null
+++ b/target/linux/x86/patches-6.1/998-add-a-sysctl-to-enable-disable-tcp_collapse-logic.patch
@@ -0,0 +1,164 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: "mfreemon@cloudflare.com" <mfreemon@cloudflare.com>
+Date: Tue, 1 Mar 2022 17:06:02 -0600
+Subject: [PATCH] Add a sysctl to skip tcp collapse processing when the receive
+ buffer is full.
+
+For context and additional information about this patch, see the
+blog post at https://blog.cloudflare.com/optimizing-tcp-for-high-throughput-and-low-latency/
+
+sysctl:  net.ipv4.tcp_collapse_max_bytes
+
+If tcp_collapse_max_bytes is non-zero, attempt to collapse the
+queue to free up memory if the current amount of memory allocated
+is less than tcp_collapse_max_bytes.  Otherwise, the packet is
+dropped without attempting to collapse the queue.
+
+If tcp_collapse_max_bytes is zero, this feature is disabled
+and the default Linux behavior is used.  The default Linux
+behavior is to always perform the attempt to collapse the
+queue to free up memory.
+
+When the receive queue is small, we want to collapse the
+queue.  There are two reasons for this: (a) the latency of
+performing the collapse will be small on a small queue, and
+(b) we want to avoid sending a congestion signal (via a
+packet drop) to the sender when the receive queue is small.
+
+The result is that we avoid latency spikes caused by the
+time it takes to perform the collapse logic when the receive
+queue is large and full, while preserving existing behavior
+and performance for all other cases.
+---
+ include/net/netns/ipv4.h   |  1 +
+ include/trace/events/tcp.h |  7 +++++++
+ net/ipv4/sysctl_net_ipv4.c |  7 +++++++
+ net/ipv4/tcp_input.c       | 36 ++++++++++++++++++++++++++++++++++++
+ net/ipv4/tcp_ipv4.c        |  2 ++
+ 5 files changed, 53 insertions(+)
+
+diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
+index 6c5b2efc4f17..bf2c9b5847e4 100644
+--- a/include/net/netns/ipv4.h
++++ b/include/net/netns/ipv4.h
+@@ -182,6 +182,7 @@ struct netns_ipv4 {
+ 	int sysctl_udp_rmem_min;
+ 
+ 	u8 sysctl_fib_notify_on_flag_change;
++	unsigned int sysctl_tcp_collapse_max_bytes;
+ 
+ #ifdef CONFIG_NET_L3_MASTER_DEV
+ 	u8 sysctl_udp_l3mdev_accept;
+diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
+index 521059d8dc0a..35fa0f23680c 100644
+--- a/include/trace/events/tcp.h
++++ b/include/trace/events/tcp.h
+@@ -187,6 +187,13 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
+ 	TP_ARGS(sk)
+ );
+ 
++DEFINE_EVENT(tcp_event_sk, tcp_collapse_max_bytes_exceeded,
++
++	TP_PROTO(struct sock *sk),
++
++	TP_ARGS(sk)
++);
++
+ TRACE_EVENT(tcp_retransmit_synack,
+ 
+ 	TP_PROTO(const struct sock *sk, const struct request_sock *req),
+diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
+index 6f1e64d49232..a61e6b1ac0cc 100644
+--- a/net/ipv4/sysctl_net_ipv4.c
++++ b/net/ipv4/sysctl_net_ipv4.c
+@@ -1406,6 +1406,13 @@ static struct ctl_table ipv4_net_table[] = {
+ 		.extra1		= SYSCTL_ZERO,
+ 		.extra2		= &two,
+ 	},
++	{
++		.procname	= "tcp_collapse_max_bytes",
++		.data		= &init_net.ipv4.sysctl_tcp_collapse_max_bytes,
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_douintvec_minmax,
++	},
+ 	{ }
+ };
+ 
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index f3b623967436..204f33f2835c 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5340,6 +5340,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
+ static int tcp_prune_queue(struct sock *sk)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
++	struct net *net = sock_net(sk);
+ 
+ 	NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
+ 
+@@ -5351,6 +5352,39 @@ static int tcp_prune_queue(struct sock *sk)
+ 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ 		return 0;
+ 
++	/* For context and additional information about this patch, see the
++	 * blog post at
++	 *
++	 * sysctl:  net.ipv4.tcp_collapse_max_bytes
++	 *
++	 * If tcp_collapse_max_bytes is non-zero, attempt to collapse the
++	 * queue to free up memory if the current amount of memory allocated
++	 * is less than tcp_collapse_max_bytes.  Otherwise, the packet is
++	 * dropped without attempting to collapse the queue.
++	 *
++	 * If tcp_collapse_max_bytes is zero, this feature is disabled
++	 * and the default Linux behavior is used.  The default Linux
++	 * behavior is to always perform the attempt to collapse the
++	 * queue to free up memory.
++	 *
++	 * When the receive queue is small, we want to collapse the
++	 * queue.  There are two reasons for this: (a) the latency of
++	 * performing the collapse will be small on a small queue, and
++	 * (b) we want to avoid sending a congestion signal (via a
++	 * packet drop) to the sender when the receive queue is small.
++	 *
++	 * The result is that we avoid latency spikes caused by the
++	 * time it takes to perform the collapse logic when the receive
++	 * queue is large and full, while preserving existing behavior
++	 * and performance for all other cases.
++	 */
++	if (net->ipv4.sysctl_tcp_collapse_max_bytes &&
++		(atomic_read(&sk->sk_rmem_alloc) > net->ipv4.sysctl_tcp_collapse_max_bytes)) {
++		/* We are dropping the packet */
++		trace_tcp_collapse_max_bytes_exceeded(sk);
++		goto do_not_collapse;
++	}
++
+ 	tcp_collapse_ofo_queue(sk);
+ 	if (!skb_queue_empty(&sk->sk_receive_queue))
+ 		tcp_collapse(sk, &sk->sk_receive_queue, NULL,
+@@ -5370,6 +5404,8 @@ static int tcp_prune_queue(struct sock *sk)
+ 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ 		return 0;
+ 
++do_not_collapse:
++
+ 	/* If we are really being abused, tell the caller to silently
+ 	 * drop receive data on the floor.  It will get retransmitted
+ 	 * and hopefully then we'll have sufficient space.
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 0fe9461647da..4fadbf38525f 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -3218,6 +3218,8 @@ static int __net_init tcp_sk_init(struct net *net)
+ 	else
+ 		net->ipv4.tcp_congestion_control = &tcp_reno;
+ 
++	net->ipv4.sysctl_tcp_collapse_max_bytes = 0;
++
+ 	return 0;
+ fail:
+ 	tcp_sk_exit(net);
+-- 
+2.35.1
+
diff --git a/target/linux/x86/patches-6.1/999-Add-xtsproxy-Crypto-API-module.patch b/target/linux/x86/patches-6.1/999-Add-xtsproxy-Crypto-API-module.patch
new file mode 100644
index 000000000..2344bde21
--- /dev/null
+++ b/target/linux/x86/patches-6.1/999-Add-xtsproxy-Crypto-API-module.patch
@@ -0,0 +1,188 @@
+From 354d7a8febaa440dd3244466670315ed2805764e Mon Sep 17 00:00:00 2001
+From: Ignat Korchagin <ignat@cloudflare.com>
+Date: Wed, 4 Dec 2019 16:53:46 +0000
+Subject: [PATCH] Add xtsproxy Crypto API module
+
+This module implements a Crypto API AES-XTS synchronous driver, which uses
+AES NI implementation as a backend and falls back to generic AES implementation,
+when FPU is not usable.
+---
+ crypto/Kconfig    |  10 ++++
+ crypto/Makefile   |   1 +
+ crypto/xtsproxy.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 142 insertions(+)
+ create mode 100644 crypto/xtsproxy.c
+
+diff --git a/crypto/Kconfig b/crypto/Kconfig
+index 094ef56ab7b4..9964667cef85 100644
+--- a/crypto/Kconfig
++++ b/crypto/Kconfig
+@@ -457,6 +457,16 @@ config CRYPTO_XTS
+ 	  key size 256, 384 or 512 bits. This implementation currently
+ 	  can't handle a sectorsize which is not a multiple of 16 bytes.
+ 
++config CRYPTO_XTS_AES_SYNC
++	tristate "XTS AES synchronous implementation"
++	depends on X86 && 64BIT
++	select CRYPTO_AES
++	select CRYPTO_AES_NI_INTEL
++	help
++	  A synchronous AES-XTS implementaion, which uses AES NI as a
++	  backend implementation and falls back to generic implementation,
++	  when FPU is not usable.
++
+ config CRYPTO_KEYWRAP
+ 	tristate "Key wrapping support"
+ 	select CRYPTO_SKCIPHER
+diff --git a/crypto/Makefile b/crypto/Makefile
+index b279483fba50..4f6ddcbdc6d4 100644
+--- a/crypto/Makefile
++++ b/crypto/Makefile
+@@ -90,6 +90,7 @@ obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
+ obj-$(CONFIG_CRYPTO_CTS) += cts.o
+ obj-$(CONFIG_CRYPTO_LRW) += lrw.o
+ obj-$(CONFIG_CRYPTO_XTS) += xts.o
++obj-$(CONFIG_CRYPTO_XTS_AES_SYNC) += xtsproxy.o
+ obj-$(CONFIG_CRYPTO_CTR) += ctr.o
+ obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
+ obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
+diff --git a/crypto/xtsproxy.c b/crypto/xtsproxy.c
+new file mode 100644
+index 000000000000..51ecfb7b4891
+--- /dev/null
++++ b/crypto/xtsproxy.c
+@@ -0,0 +1,131 @@
++#include <linux/module.h>
++#include <linux/crypto.h>
++#include <linux/err.h>
++#include <crypto/internal/skcipher.h>
++#include <crypto/aes.h>
++#include <asm/fpu/api.h>
++
++struct xtsproxy_ctx {
++	struct crypto_skcipher *xts_aesni;
++	struct crypto_skcipher *xts_generic;
++};
++
++static int xtsproxy_skcipher_init(struct crypto_skcipher *tfm)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
++
++	/* AESNI based XTS implementation, requires FPU to be available */
++	ctx->xts_aesni = crypto_alloc_skcipher("__xts-aes-aesni", CRYPTO_ALG_INTERNAL, 0);
++	if (IS_ERR(ctx->xts_aesni))
++		return PTR_ERR(ctx->xts_aesni);
++
++	/* generic XTS implementation based on generic FPU-less AES */
++	/* there is also aes-aesni implementation, which falls back to aes-generic */
++	/* but we're doing FPU checks in our code, so no need to repeat those */
++	/* as we will always fallback to aes-generic in this case */
++	ctx->xts_generic = crypto_alloc_skcipher("xts(ecb(aes-generic))", 0, 0);
++	if (IS_ERR(ctx->xts_generic))
++		return PTR_ERR(ctx->xts_generic);
++
++	/* make sure we allocate enough request memory for both implementations */
++	crypto_skcipher_set_reqsize(tfm, max(crypto_skcipher_reqsize(ctx->xts_aesni), crypto_skcipher_reqsize(ctx->xts_generic)));
++
++	return 0;
++}
++
++static void xtsproxy_skcipher_exit(struct crypto_skcipher *tfm)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
++
++	if (!IS_ERR_OR_NULL(ctx->xts_generic)) {
++		crypto_free_skcipher(ctx->xts_generic);
++		ctx->xts_generic = NULL;
++	}
++
++	if (!IS_ERR_OR_NULL(ctx->xts_aesni)) {
++		crypto_free_skcipher(ctx->xts_aesni);
++		ctx->xts_aesni = NULL;
++	}
++}
++
++static int xtsproxy_setkey(struct crypto_skcipher *tfm, const u8 *key,
++			    unsigned int keylen)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(tfm);
++	int err;
++
++	err = crypto_skcipher_setkey(ctx->xts_aesni, key, keylen);
++	if (err)
++		return err;
++
++	return crypto_skcipher_setkey(ctx->xts_generic, key, keylen);
++}
++
++static int xtsproxy_encrypt(struct skcipher_request *req)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
++
++	if (irq_fpu_usable())
++		skcipher_request_set_tfm(req, ctx->xts_aesni);
++	else
++		skcipher_request_set_tfm(req, ctx->xts_generic);
++
++	/* underlying implementations should not try to sleep */
++	req->base.flags &= ~(CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG);
++
++	return crypto_skcipher_encrypt(req);
++}
++
++static int xtsproxy_decrypt(struct skcipher_request *req)
++{
++	struct xtsproxy_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
++
++	if (irq_fpu_usable())
++		skcipher_request_set_tfm(req, ctx->xts_aesni);
++	else
++		skcipher_request_set_tfm(req, ctx->xts_generic);
++
++	/* underlying implementations should not try to sleep */
++	req->base.flags &= ~(CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG);
++
++	return crypto_skcipher_decrypt(req);
++}
++
++static struct skcipher_alg xtsproxy_skcipher = {
++	.base = {
++		.cra_name			= "xts(aes)",
++		.cra_driver_name	= "xts-aes-xtsproxy",
++		/* make sure we don't use it unless requested explicitly */
++		.cra_priority		= 0,
++		/* .cra_flags			= CRYPTO_ALG_INTERNAL, */
++		.cra_blocksize		= AES_BLOCK_SIZE,
++		.cra_ctxsize		= sizeof(struct xtsproxy_ctx),
++		.cra_module			= THIS_MODULE,
++	},
++	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
++	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
++	.ivsize			= AES_BLOCK_SIZE,
++	.init 			= xtsproxy_skcipher_init,
++	.exit 			= xtsproxy_skcipher_exit,
++	.setkey			= xtsproxy_setkey,
++	.encrypt		= xtsproxy_encrypt,
++	.decrypt		= xtsproxy_decrypt,
++};
++
++static int __init xtsproxy_init(void)
++{
++	return crypto_register_skcipher(&xtsproxy_skcipher);
++}
++
++static void __exit xtsproxy_fini(void)
++{
++	crypto_unregister_skcipher(&xtsproxy_skcipher);
++}
++
++module_init(xtsproxy_init);
++module_exit(xtsproxy_fini);
++
++MODULE_DESCRIPTION("XTS-AES using AESNI implementation with generic AES fallback");
++MODULE_AUTHOR("Ignat Korchagin <ignat@cloudflare.com>");
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_CRYPTO("xts(aes)");
+-- 
+2.29.1
+