mirror of
https://github.com/coolsnowwolf/lede.git
synced 2025-04-16 14:23:38 +00:00
treewide: remove kernel 6.0 support
This commit is contained in:
parent
2e543c861a
commit
56de4ccd61
@ -1,2 +0,0 @@
|
||||
LINUX_VERSION-6.0 = .12
|
||||
LINUX_KERNEL_HASH-6.0.12 = 89b730edf8942b49e02f9894244205886c9a214d629b35b88c4ff06ee9304f01
|
@ -109,9 +109,9 @@ define KernelPackage/fs-cifs
|
||||
+kmod-crypto-ccm \
|
||||
+kmod-crypto-ecb \
|
||||
+kmod-crypto-des \
|
||||
+(LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-asn1-decoder \
|
||||
+(LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-oid-registry \
|
||||
+(LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-dnsresolver
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-asn1-decoder \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-oid-registry \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-dnsresolver
|
||||
endef
|
||||
|
||||
define KernelPackage/fs-cifs/description
|
||||
@ -530,7 +530,7 @@ $(eval $(call KernelPackage,fs-ntfs))
|
||||
define KernelPackage/fs-ntfs3
|
||||
SUBMENU:=$(FS_MENU)
|
||||
TITLE:=NTFS3 Read-Write file system support
|
||||
DEPENDS:=@(LINUX_5_4||LINUX_5_10||LINUX_5_15||LINUX_6_0||LINUX_6_1) +kmod-nls-base
|
||||
DEPENDS:= +kmod-nls-base
|
||||
KCONFIG:= \
|
||||
CONFIG_NTFS3_FS \
|
||||
CONFIG_NTFS3_64BIT_CLUSTER=y \
|
||||
|
@ -142,7 +142,7 @@ $(eval $(call KernelPackage,mii))
|
||||
define KernelPackage/mdio-devres
|
||||
SUBMENU:=$(NETWORK_DEVICES_MENU)
|
||||
TITLE:=Supports MDIO device registration
|
||||
DEPENDS:=@(LINUX_5_10||LINUX_5_15||LINUX_6_0||LINUX_6_1) +kmod-libphy +(TARGET_armvirt||TARGET_bcm27xx_bcm2708||TARGET_tegra):kmod-of-mdio
|
||||
DEPENDS:=@(LINUX_5_10||LINUX_5_15||LINUX_6_1) +kmod-libphy +(TARGET_armvirt||TARGET_bcm27xx_bcm2708||TARGET_tegra):kmod-of-mdio
|
||||
KCONFIG:=CONFIG_MDIO_DEVRES
|
||||
HIDDEN:=1
|
||||
FILES:=$(LINUX_DIR)/drivers/net/phy/mdio_devres.ko
|
||||
@ -597,7 +597,7 @@ $(eval $(call KernelPackage,8139cp))
|
||||
define KernelPackage/r8169
|
||||
SUBMENU:=$(NETWORK_DEVICES_MENU)
|
||||
TITLE:=RealTek RTL-8169 PCI Gigabit Ethernet Adapter kernel support
|
||||
DEPENDS:=@PCI_SUPPORT +kmod-mii +r8169-firmware +kmod-phy-realtek +(LINUX_5_10||LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-mdio-devres
|
||||
DEPENDS:=@PCI_SUPPORT +kmod-mii +r8169-firmware +kmod-phy-realtek +(LINUX_5_10||LINUX_5_15||LINUX_6_1):kmod-mdio-devres
|
||||
KCONFIG:= \
|
||||
CONFIG_R8169 \
|
||||
CONFIG_R8169_NAPI=y \
|
||||
@ -723,7 +723,7 @@ $(eval $(call KernelPackage,igbvf))
|
||||
define KernelPackage/ixgbe
|
||||
SUBMENU:=$(NETWORK_DEVICES_MENU)
|
||||
TITLE:=Intel(R) 82598/82599 PCI-Express 10 Gigabit Ethernet support
|
||||
DEPENDS:=@PCI_SUPPORT +kmod-mdio +kmod-ptp +kmod-hwmon-core +kmod-libphy +(LINUX_5_10||LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-mdio-devres
|
||||
DEPENDS:=@PCI_SUPPORT +kmod-mdio +kmod-ptp +kmod-hwmon-core +kmod-libphy +(LINUX_5_10||LINUX_5_15||LINUX_6_1):kmod-mdio-devres
|
||||
KCONFIG:=CONFIG_IXGBE \
|
||||
CONFIG_IXGBE_VXLAN=n \
|
||||
CONFIG_IXGBE_HWMON=y \
|
||||
|
@ -1356,7 +1356,7 @@ define KernelPackage/qrtr
|
||||
SUBMENU:=$(NETWORK_SUPPORT_MENU)
|
||||
TITLE:=Qualcomm IPC Router support
|
||||
HIDDEN:=1
|
||||
DEPENDS:=@(LINUX_5_15||LINUX_6_0||LINUX_6_1)
|
||||
DEPENDS:=@(LINUX_5_15||LINUX_6_1)
|
||||
KCONFIG:=CONFIG_QRTR
|
||||
FILES:= \
|
||||
$(LINUX_DIR)/net/qrtr/qrtr.ko \
|
||||
|
@ -1136,8 +1136,8 @@ $(eval $(call KernelPackage,keys-trusted))
|
||||
define KernelPackage/tpm
|
||||
SUBMENU:=$(OTHER_MENU)
|
||||
TITLE:=TPM Hardware Support
|
||||
DEPENDS:= +kmod-random-core +(LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-asn1-decoder \
|
||||
+(LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-asn1-encoder +(LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-oid-registry
|
||||
DEPENDS:= +kmod-random-core +(LINUX_5_15||LINUX_6_1):kmod-asn1-decoder \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-asn1-encoder +(LINUX_5_15||LINUX_6_1):kmod-oid-registry
|
||||
KCONFIG:= CONFIG_TCG_TPM
|
||||
FILES:= $(LINUX_DIR)/drivers/char/tpm/tpm.ko
|
||||
AUTOLOAD:=$(call AutoLoad,10,tpm,1)
|
||||
@ -1283,7 +1283,7 @@ $(eval $(call KernelPackage,qcom-qmi-helpers))
|
||||
define KernelPackage/mhi
|
||||
SUBMENU:=$(OTHER_MENU)
|
||||
TITLE:=Modem Host Interface (MHI) bus
|
||||
DEPENDS:=@(LINUX_5_15||LINUX_6_0||LINUX_6_1)
|
||||
DEPENDS:=@(LINUX_5_15||LINUX_6_1)
|
||||
KCONFIG:=CONFIG_MHI_BUS \
|
||||
CONFIG_MHI_BUS_DEBUG=y \
|
||||
CONFIG_MHI_BUS_PCI_GENERIC=n \
|
||||
|
@ -1138,7 +1138,8 @@ $(eval $(call KernelPackage,usb-net-aqc111))
|
||||
|
||||
define KernelPackage/usb-net-asix
|
||||
TITLE:=Kernel module for USB-to-Ethernet Asix convertors
|
||||
DEPENDS:=+kmod-libphy +(LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-mdio-devres
|
||||
DEPENDS:=+(LINUX_5_4||LINUX_5_10):kmod-libphy \
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-mdio-devres +LINUX_6_1:kmod-phylink
|
||||
KCONFIG:=CONFIG_USB_NET_AX8817X
|
||||
FILES:= \
|
||||
$(LINUX_DIR)/drivers/$(USBNET_DIR)/asix.ko \
|
||||
|
@ -246,7 +246,7 @@ define KernelPackage/drm
|
||||
TITLE:=Direct Rendering Manager (DRM) support
|
||||
HIDDEN:=1
|
||||
DEPENDS:=+kmod-dma-buf +kmod-i2c-core +kmod-i2c-algo-bit +kmod-backlight \
|
||||
+(LINUX_5_15||LINUX_6_0||LINUX_6_1):kmod-fb
|
||||
+(LINUX_5_15||LINUX_6_1):kmod-fb
|
||||
KCONFIG:= \
|
||||
CONFIG_DRM \
|
||||
CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y \
|
||||
@ -268,7 +268,7 @@ $(eval $(call KernelPackage,drm))
|
||||
define KernelPackage/drm-buddy
|
||||
SUBMENU:=$(VIDEO_MENU)
|
||||
TITLE:=A page based buddy allocator
|
||||
DEPENDS:=@TARGET_x86 @DISPLAY_SUPPORT +kmod-drm @(LINUX_6_0||LINUX_6_1)
|
||||
DEPENDS:=@TARGET_x86 @DISPLAY_SUPPORT +kmod-drm @LINUX_6_1
|
||||
KCONFIG:=CONFIG_DRM_BUDDY
|
||||
FILES:= $(LINUX_DIR)/drivers/gpu/drm/drm_buddy.ko
|
||||
AUTOLOAD:=$(call AutoProbe,drm_buddy)
|
||||
@ -313,7 +313,7 @@ $(eval $(call KernelPackage,drm-kms-helper))
|
||||
define KernelPackage/drm-display-helper
|
||||
SUBMENU:=$(VIDEO_MENU)
|
||||
TITLE:=DRM helpers for display adapters drivers
|
||||
DEPENDS:=@DISPLAY_SUPPORT +kmod-drm +TARGET_x86:kmod-drm-buddy @(LINUX_6_0||LINUX_6_1)
|
||||
DEPENDS:=@DISPLAY_SUPPORT +kmod-drm +TARGET_x86:kmod-drm-buddy @LINUX_6_1
|
||||
KCONFIG:=CONFIG_DRM_DISPLAY_HELPER
|
||||
FILES:=$(LINUX_DIR)/drivers/gpu/drm/display/drm_display_helper.ko
|
||||
AUTOLOAD:=$(call AutoProbe,drm_display_helper)
|
||||
@ -330,7 +330,7 @@ define KernelPackage/drm-amdgpu
|
||||
TITLE:=AMDGPU DRM support
|
||||
DEPENDS:=@TARGET_x86 @DISPLAY_SUPPORT +kmod-backlight +kmod-drm-ttm \
|
||||
+kmod-drm-kms-helper +kmod-i2c-algo-bit +amdgpu-firmware \
|
||||
+(LINUX_6_0||LINUX_6_1):kmod-drm-display-helper
|
||||
+LINUX_6_1:kmod-drm-display-helper
|
||||
KCONFIG:=CONFIG_DRM_AMDGPU \
|
||||
CONFIG_DRM_AMDGPU_SI=y \
|
||||
CONFIG_DRM_AMDGPU_CIK=y \
|
||||
@ -1105,7 +1105,7 @@ define KernelPackage/drm-i915
|
||||
SUBMENU:=$(VIDEO_MENU)
|
||||
TITLE:=Intel GPU drm support
|
||||
DEPENDS:=@TARGET_x86 +kmod-drm-ttm +kmod-drm-kms-helper +i915-firmware \
|
||||
+(LINUX_6_0||LINUX_6_1):kmod-drm-display-helper
|
||||
+LINUX_6_1:kmod-drm-display-helper
|
||||
KCONFIG:= \
|
||||
CONFIG_INTEL_GTT \
|
||||
CONFIG_DRM_I915 \
|
||||
|
@ -1,75 +0,0 @@
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1386,16 +1386,6 @@ config BOOT_CONFIG_EMBED_FILE
|
||||
This bootconfig will be used if there is no initrd or no other
|
||||
bootconfig in the initrd.
|
||||
|
||||
-config INITRAMFS_PRESERVE_MTIME
|
||||
- bool "Preserve cpio archive mtimes in initramfs"
|
||||
- default y
|
||||
- help
|
||||
- Each entry in an initramfs cpio archive carries an mtime value. When
|
||||
- enabled, extracted cpio items take this mtime, with directory mtime
|
||||
- setting deferred until after creation of any child entries.
|
||||
-
|
||||
- If unsure, say Y.
|
||||
-
|
||||
choice
|
||||
prompt "Compiler optimization level"
|
||||
default CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
--- a/init/initramfs.c
|
||||
+++ b/init/initramfs.c
|
||||
@@ -127,17 +127,15 @@ static void __init free_hash(void)
|
||||
}
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_INITRAMFS_PRESERVE_MTIME
|
||||
-static void __init do_utime(char *filename, time64_t mtime)
|
||||
+static long __init do_utime(char *filename, time64_t mtime)
|
||||
{
|
||||
- struct timespec64 t[2] = { { .tv_sec = mtime }, { .tv_sec = mtime } };
|
||||
- init_utimes(filename, t);
|
||||
-}
|
||||
+ struct timespec64 t[2];
|
||||
|
||||
-static void __init do_utime_path(const struct path *path, time64_t mtime)
|
||||
-{
|
||||
- struct timespec64 t[2] = { { .tv_sec = mtime }, { .tv_sec = mtime } };
|
||||
- vfs_utimes(path, t);
|
||||
+ t[0].tv_sec = mtime;
|
||||
+ t[0].tv_nsec = 0;
|
||||
+ t[1].tv_sec = mtime;
|
||||
+ t[1].tv_nsec = 0;
|
||||
+ return init_utimes(filename, t);
|
||||
}
|
||||
|
||||
static __initdata LIST_HEAD(dir_list);
|
||||
@@ -170,12 +168,6 @@ static void __init dir_utime(void)
|
||||
kfree(de);
|
||||
}
|
||||
}
|
||||
-#else
|
||||
-static void __init do_utime(char *filename, time64_t mtime) {}
|
||||
-static void __init do_utime_path(const struct path *path, time64_t mtime) {}
|
||||
-static void __init dir_add(const char *name, time64_t mtime) {}
|
||||
-static void __init dir_utime(void) {}
|
||||
-#endif
|
||||
|
||||
static __initdata time64_t mtime;
|
||||
|
||||
@@ -407,10 +399,14 @@ static int __init do_name(void)
|
||||
static int __init do_copy(void)
|
||||
{
|
||||
if (byte_count >= body_len) {
|
||||
+ struct timespec64 t[2] = { };
|
||||
if (xwrite(wfile, victim, body_len, &wfile_pos) != body_len)
|
||||
error("write error");
|
||||
|
||||
- do_utime_path(&wfile->f_path, mtime);
|
||||
+ t[0].tv_sec = mtime;
|
||||
+ t[1].tv_sec = mtime;
|
||||
+ vfs_utimes(&wfile->f_path, t);
|
||||
+
|
||||
fput(wfile);
|
||||
if (csum_present && io_csum != hdr_csum)
|
||||
error("bad data checksum");
|
@ -1,122 +0,0 @@
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -2133,8 +2133,6 @@ struct net_device {
|
||||
|
||||
/* Protocol-specific pointers */
|
||||
|
||||
- struct in_device __rcu *ip_ptr;
|
||||
- struct inet6_dev __rcu *ip6_ptr;
|
||||
#if IS_ENABLED(CONFIG_VLAN_8021Q)
|
||||
struct vlan_info __rcu *vlan_info;
|
||||
#endif
|
||||
@@ -2147,18 +2145,16 @@ struct net_device {
|
||||
#if IS_ENABLED(CONFIG_ATALK)
|
||||
void *atalk_ptr;
|
||||
#endif
|
||||
+ struct in_device __rcu *ip_ptr;
|
||||
#if IS_ENABLED(CONFIG_DECNET)
|
||||
struct dn_dev __rcu *dn_ptr;
|
||||
#endif
|
||||
+ struct inet6_dev __rcu *ip6_ptr;
|
||||
#if IS_ENABLED(CONFIG_AX25)
|
||||
void *ax25_ptr;
|
||||
#endif
|
||||
-#if IS_ENABLED(CONFIG_CFG80211)
|
||||
struct wireless_dev *ieee80211_ptr;
|
||||
-#endif
|
||||
-#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN)
|
||||
struct wpan_dev *ieee802154_ptr;
|
||||
-#endif
|
||||
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
|
||||
struct mpls_dev __rcu *mpls_ptr;
|
||||
#endif
|
||||
--- a/include/net/cfg80211.h
|
||||
+++ b/include/net/cfg80211.h
|
||||
@@ -8379,9 +8379,7 @@ int cfg80211_register_netdevice(struct n
|
||||
*/
|
||||
static inline void cfg80211_unregister_netdevice(struct net_device *dev)
|
||||
{
|
||||
-#if IS_ENABLED(CONFIG_CFG80211)
|
||||
cfg80211_unregister_wdev(dev->ieee80211_ptr);
|
||||
-#endif
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/include/net/cfg802154.h
|
||||
+++ b/include/net/cfg802154.h
|
||||
@@ -373,7 +373,6 @@ struct wpan_dev {
|
||||
|
||||
#define to_phy(_dev) container_of(_dev, struct wpan_phy, dev)
|
||||
|
||||
-#if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN)
|
||||
static inline int
|
||||
wpan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
|
||||
const struct ieee802154_addr *daddr,
|
||||
@@ -384,7 +383,6 @@ wpan_dev_hard_header(struct sk_buff *skb
|
||||
|
||||
return wpan_dev->header_ops->create(skb, dev, daddr, saddr, len);
|
||||
}
|
||||
-#endif
|
||||
|
||||
struct wpan_phy *
|
||||
wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size);
|
||||
--- a/net/batman-adv/hard-interface.c
|
||||
+++ b/net/batman-adv/hard-interface.c
|
||||
@@ -308,11 +308,9 @@ static bool batadv_is_cfg80211_netdev(st
|
||||
if (!net_device)
|
||||
return false;
|
||||
|
||||
-#if IS_ENABLED(CONFIG_CFG80211)
|
||||
/* cfg80211 drivers have to set ieee80211_ptr */
|
||||
if (net_device->ieee80211_ptr)
|
||||
return true;
|
||||
-#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
--- a/net/core/net-sysfs.c
|
||||
+++ b/net/core/net-sysfs.c
|
||||
@@ -747,6 +747,7 @@ static const struct attribute_group nets
|
||||
.attrs = netstat_attrs,
|
||||
};
|
||||
|
||||
+#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
|
||||
static struct attribute *wireless_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
@@ -755,19 +756,7 @@ static const struct attribute_group wire
|
||||
.name = "wireless",
|
||||
.attrs = wireless_attrs,
|
||||
};
|
||||
-
|
||||
-static bool wireless_group_needed(struct net_device *ndev)
|
||||
-{
|
||||
-#if IS_ENABLED(CONFIG_CFG80211)
|
||||
- if (ndev->ieee80211_ptr)
|
||||
- return true;
|
||||
#endif
|
||||
-#if IS_ENABLED(CONFIG_WIRELESS_EXT)
|
||||
- if (ndev->wireless_handlers)
|
||||
- return true;
|
||||
-#endif
|
||||
- return false;
|
||||
-}
|
||||
|
||||
#else /* CONFIG_SYSFS */
|
||||
#define net_class_groups NULL
|
||||
@@ -2008,8 +1997,14 @@ int netdev_register_kobject(struct net_d
|
||||
|
||||
*groups++ = &netstat_group;
|
||||
|
||||
- if (wireless_group_needed(ndev))
|
||||
+#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
|
||||
+ if (ndev->ieee80211_ptr)
|
||||
+ *groups++ = &wireless_group;
|
||||
+#if IS_ENABLED(CONFIG_WIRELESS_EXT)
|
||||
+ else if (ndev->wireless_handlers)
|
||||
*groups++ = &wireless_group;
|
||||
+#endif
|
||||
+#endif
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
error = device_add(dev);
|
@ -1,21 +0,0 @@
|
||||
From 173019b66dcc9d68ad9333aa744dad1e369b5aa8 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sun, 9 Jul 2017 00:26:53 +0200
|
||||
Subject: [PATCH 34/34] kernel: add compile fix for linux 4.9 on x86
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
Makefile | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -537,7 +537,7 @@ KBUILD_LDFLAGS_MODULE :=
|
||||
KBUILD_LDFLAGS :=
|
||||
CLANG_FLAGS :=
|
||||
|
||||
-export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG
|
||||
+export ARCH SRCARCH SUBARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG
|
||||
export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
|
||||
export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
|
||||
export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
|
@ -1,143 +0,0 @@
|
||||
From e3264035bdac67898d685423ffb2f3a9c3a5964a Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 4 Aug 2021 01:31:34 -0600
|
||||
Subject: [PATCH 01/14] mm: x86, arm64: add arch_has_hw_pte_young()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Some architectures automatically set the accessed bit in PTEs, e.g.,
|
||||
x86 and arm64 v8.2. On architectures that do not have this capability,
|
||||
clearing the accessed bit in a PTE usually triggers a page fault
|
||||
following the TLB miss of this PTE (to emulate the accessed bit).
|
||||
|
||||
Being aware of this capability can help make better decisions, e.g.,
|
||||
whether to spread the work out over a period of time to reduce bursty
|
||||
page faults when trying to clear the accessed bit in many PTEs.
|
||||
|
||||
Note that theoretically this capability can be unreliable, e.g.,
|
||||
hotplugged CPUs might be different from builtin ones. Therefore it
|
||||
should not be used in architecture-independent code that involves
|
||||
correctness, e.g., to determine whether TLB flushes are required (in
|
||||
combination with the accessed bit).
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Barry Song <baohua@kernel.org>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Acked-by: Will Deacon <will@kernel.org>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: Ib49b44fb56df3333a2ff1fcc496fb1980b976e7a
|
||||
---
|
||||
arch/arm64/include/asm/pgtable.h | 15 ++-------------
|
||||
arch/x86/include/asm/pgtable.h | 6 +++---
|
||||
include/linux/pgtable.h | 13 +++++++++++++
|
||||
mm/memory.c | 14 +-------------
|
||||
4 files changed, 19 insertions(+), 29 deletions(-)
|
||||
|
||||
--- a/arch/arm64/include/asm/pgtable.h
|
||||
+++ b/arch/arm64/include/asm/pgtable.h
|
||||
@@ -1082,24 +1082,13 @@ static inline void update_mmu_cache(stru
|
||||
* page after fork() + CoW for pfn mappings. We don't always have a
|
||||
* hardware-managed access flag on arm64.
|
||||
*/
|
||||
-static inline bool arch_faults_on_old_pte(void)
|
||||
-{
|
||||
- /* The register read below requires a stable CPU to make any sense */
|
||||
- cant_migrate();
|
||||
-
|
||||
- return !cpu_has_hw_af();
|
||||
-}
|
||||
-#define arch_faults_on_old_pte arch_faults_on_old_pte
|
||||
+#define arch_has_hw_pte_young cpu_has_hw_af
|
||||
|
||||
/*
|
||||
* Experimentally, it's cheap to set the access flag in hardware and we
|
||||
* benefit from prefaulting mappings as 'old' to start with.
|
||||
*/
|
||||
-static inline bool arch_wants_old_prefaulted_pte(void)
|
||||
-{
|
||||
- return !arch_faults_on_old_pte();
|
||||
-}
|
||||
-#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
|
||||
+#define arch_wants_old_prefaulted_pte cpu_has_hw_af
|
||||
|
||||
static inline bool pud_sect_supported(void)
|
||||
{
|
||||
--- a/arch/x86/include/asm/pgtable.h
|
||||
+++ b/arch/x86/include/asm/pgtable.h
|
||||
@@ -1431,10 +1431,10 @@ static inline bool arch_has_pfn_modify_c
|
||||
return boot_cpu_has_bug(X86_BUG_L1TF);
|
||||
}
|
||||
|
||||
-#define arch_faults_on_old_pte arch_faults_on_old_pte
|
||||
-static inline bool arch_faults_on_old_pte(void)
|
||||
+#define arch_has_hw_pte_young arch_has_hw_pte_young
|
||||
+static inline bool arch_has_hw_pte_young(void)
|
||||
{
|
||||
- return false;
|
||||
+ return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_CHECK
|
||||
--- a/include/linux/pgtable.h
|
||||
+++ b/include/linux/pgtable.h
|
||||
@@ -260,6 +260,19 @@ static inline int pmdp_clear_flush_young
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
#endif
|
||||
|
||||
+#ifndef arch_has_hw_pte_young
|
||||
+/*
|
||||
+ * Return whether the accessed bit is supported on the local CPU.
|
||||
+ *
|
||||
+ * This stub assumes accessing through an old PTE triggers a page fault.
|
||||
+ * Architectures that automatically set the access bit should overwrite it.
|
||||
+ */
|
||||
+static inline bool arch_has_hw_pte_young(void)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
||||
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
||||
unsigned long address,
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -125,18 +125,6 @@ int randomize_va_space __read_mostly =
|
||||
2;
|
||||
#endif
|
||||
|
||||
-#ifndef arch_faults_on_old_pte
|
||||
-static inline bool arch_faults_on_old_pte(void)
|
||||
-{
|
||||
- /*
|
||||
- * Those arches which don't have hw access flag feature need to
|
||||
- * implement their own helper. By default, "true" means pagefault
|
||||
- * will be hit on old pte.
|
||||
- */
|
||||
- return true;
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
#ifndef arch_wants_old_prefaulted_pte
|
||||
static inline bool arch_wants_old_prefaulted_pte(void)
|
||||
{
|
||||
@@ -2872,7 +2860,7 @@ static inline bool __wp_page_copy_user(s
|
||||
* On architectures with software "accessed" bits, we would
|
||||
* take a double page fault, so mark it accessed here.
|
||||
*/
|
||||
- if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
|
||||
+ if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
|
||||
pte_t entry;
|
||||
|
||||
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
|
@ -1,132 +0,0 @@
|
||||
From 0c0016e6f53b52166fe4da61c81fa6b27f4650cd Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sat, 26 Sep 2020 21:17:18 -0600
|
||||
Subject: [PATCH 02/14] mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Some architectures support the accessed bit in non-leaf PMD entries,
|
||||
e.g., x86 sets the accessed bit in a non-leaf PMD entry when using it
|
||||
as part of linear address translation [1]. Page table walkers that
|
||||
clear the accessed bit may use this capability to reduce their search
|
||||
space.
|
||||
|
||||
Note that:
|
||||
1. Although an inline function is preferable, this capability is added
|
||||
as a configuration option for consistency with the existing macros.
|
||||
2. Due to the little interest in other varieties, this capability was
|
||||
only tested on Intel and AMD CPUs.
|
||||
|
||||
Thanks to the following developers for their efforts [2][3].
|
||||
Randy Dunlap <rdunlap@infradead.org>
|
||||
Stephen Rothwell <sfr@canb.auug.org.au>
|
||||
|
||||
[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
|
||||
Volume 3 (June 2021), section 4.8
|
||||
[2] https://lore.kernel.org/r/bfdcc7c8-922f-61a9-aa15-7e7250f04af7@infradead.org/
|
||||
[3] https://lore.kernel.org/r/20220413151513.5a0d7a7e@canb.auug.org.au/
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Barry Song <baohua@kernel.org>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I1a17be3ae926f721f7b17ea1539e5c39e8c4f9a8
|
||||
---
|
||||
arch/Kconfig | 8 ++++++++
|
||||
arch/x86/Kconfig | 1 +
|
||||
arch/x86/include/asm/pgtable.h | 3 ++-
|
||||
arch/x86/mm/pgtable.c | 5 ++++-
|
||||
include/linux/pgtable.h | 4 ++--
|
||||
5 files changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/arch/Kconfig
|
||||
+++ b/arch/Kconfig
|
||||
@@ -1418,6 +1418,14 @@ config DYNAMIC_SIGFRAME
|
||||
config HAVE_ARCH_NODE_DEV_GROUP
|
||||
bool
|
||||
|
||||
+config ARCH_HAS_NONLEAF_PMD_YOUNG
|
||||
+ bool
|
||||
+ help
|
||||
+ Architectures that select this option are capable of setting the
|
||||
+ accessed bit in non-leaf PMD entries when using them as part of linear
|
||||
+ address translations. Page table walkers that clear the accessed bit
|
||||
+ may use this capability to reduce their search space.
|
||||
+
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
||||
source "scripts/gcc-plugins/Kconfig"
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -85,6 +85,7 @@ config X86
|
||||
select ARCH_HAS_PMEM_API if X86_64
|
||||
select ARCH_HAS_PTE_DEVMAP if X86_64
|
||||
select ARCH_HAS_PTE_SPECIAL
|
||||
+ select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2
|
||||
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
|
||||
select ARCH_HAS_COPY_MC if X86_64
|
||||
select ARCH_HAS_SET_MEMORY
|
||||
--- a/arch/x86/include/asm/pgtable.h
|
||||
+++ b/arch/x86/include/asm/pgtable.h
|
||||
@@ -815,7 +815,8 @@ static inline unsigned long pmd_page_vad
|
||||
|
||||
static inline int pmd_bad(pmd_t pmd)
|
||||
{
|
||||
- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
||||
+ return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
|
||||
+ (_KERNPG_TABLE & ~_PAGE_ACCESSED);
|
||||
}
|
||||
|
||||
static inline unsigned long pages_to_mb(unsigned long npg)
|
||||
--- a/arch/x86/mm/pgtable.c
|
||||
+++ b/arch/x86/mm/pgtable.c
|
||||
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_
|
||||
return ret;
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
|
||||
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_
|
||||
|
||||
return ret;
|
||||
}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
int pudp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pud_t *pudp)
|
||||
{
|
||||
--- a/include/linux/pgtable.h
|
||||
+++ b/include/linux/pgtable.h
|
||||
@@ -213,7 +213,7 @@ static inline int ptep_test_and_clear_yo
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
||||
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
|
||||
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
@@ -234,7 +234,7 @@ static inline int pmdp_test_and_clear_yo
|
||||
BUILD_BUG();
|
||||
return 0;
|
||||
}
|
||||
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
@ -1,254 +0,0 @@
|
||||
From d8e0edcddc441574410a047ede56f79c849a6d37 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sun, 27 Sep 2020 20:49:08 -0600
|
||||
Subject: [PATCH 03/14] mm/vmscan.c: refactor shrink_node()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This patch refactors shrink_node() to improve readability for the
|
||||
upcoming changes to mm/vmscan.c.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Barry Song <baohua@kernel.org>
|
||||
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: Iae734b5b4030205b7db6e8c841f747b6f6ae1a04
|
||||
---
|
||||
mm/vmscan.c | 198 +++++++++++++++++++++++++++-------------------------
|
||||
1 file changed, 104 insertions(+), 94 deletions(-)
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -2740,6 +2740,109 @@ enum scan_balance {
|
||||
SCAN_FILE,
|
||||
};
|
||||
|
||||
+static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
|
||||
+{
|
||||
+ unsigned long file;
|
||||
+ struct lruvec *target_lruvec;
|
||||
+
|
||||
+ target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
+
|
||||
+ /*
|
||||
+ * Flush the memory cgroup stats, so that we read accurate per-memcg
|
||||
+ * lruvec stats for heuristics.
|
||||
+ */
|
||||
+ mem_cgroup_flush_stats();
|
||||
+
|
||||
+ /*
|
||||
+ * Determine the scan balance between anon and file LRUs.
|
||||
+ */
|
||||
+ spin_lock_irq(&target_lruvec->lru_lock);
|
||||
+ sc->anon_cost = target_lruvec->anon_cost;
|
||||
+ sc->file_cost = target_lruvec->file_cost;
|
||||
+ spin_unlock_irq(&target_lruvec->lru_lock);
|
||||
+
|
||||
+ /*
|
||||
+ * Target desirable inactive:active list ratios for the anon
|
||||
+ * and file LRU lists.
|
||||
+ */
|
||||
+ if (!sc->force_deactivate) {
|
||||
+ unsigned long refaults;
|
||||
+
|
||||
+ refaults = lruvec_page_state(target_lruvec,
|
||||
+ WORKINGSET_ACTIVATE_ANON);
|
||||
+ if (refaults != target_lruvec->refaults[0] ||
|
||||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
|
||||
+ sc->may_deactivate |= DEACTIVATE_ANON;
|
||||
+ else
|
||||
+ sc->may_deactivate &= ~DEACTIVATE_ANON;
|
||||
+
|
||||
+ /*
|
||||
+ * When refaults are being observed, it means a new
|
||||
+ * workingset is being established. Deactivate to get
|
||||
+ * rid of any stale active pages quickly.
|
||||
+ */
|
||||
+ refaults = lruvec_page_state(target_lruvec,
|
||||
+ WORKINGSET_ACTIVATE_FILE);
|
||||
+ if (refaults != target_lruvec->refaults[1] ||
|
||||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
|
||||
+ sc->may_deactivate |= DEACTIVATE_FILE;
|
||||
+ else
|
||||
+ sc->may_deactivate &= ~DEACTIVATE_FILE;
|
||||
+ } else
|
||||
+ sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
|
||||
+
|
||||
+ /*
|
||||
+ * If we have plenty of inactive file pages that aren't
|
||||
+ * thrashing, try to reclaim those first before touching
|
||||
+ * anonymous pages.
|
||||
+ */
|
||||
+ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
|
||||
+ if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
|
||||
+ sc->cache_trim_mode = 1;
|
||||
+ else
|
||||
+ sc->cache_trim_mode = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * Prevent the reclaimer from falling into the cache trap: as
|
||||
+ * cache pages start out inactive, every cache fault will tip
|
||||
+ * the scan balance towards the file LRU. And as the file LRU
|
||||
+ * shrinks, so does the window for rotation from references.
|
||||
+ * This means we have a runaway feedback loop where a tiny
|
||||
+ * thrashing file LRU becomes infinitely more attractive than
|
||||
+ * anon pages. Try to detect this based on file LRU size.
|
||||
+ */
|
||||
+ if (!cgroup_reclaim(sc)) {
|
||||
+ unsigned long total_high_wmark = 0;
|
||||
+ unsigned long free, anon;
|
||||
+ int z;
|
||||
+
|
||||
+ free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
|
||||
+ file = node_page_state(pgdat, NR_ACTIVE_FILE) +
|
||||
+ node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
+
|
||||
+ for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
+ struct zone *zone = &pgdat->node_zones[z];
|
||||
+
|
||||
+ if (!managed_zone(zone))
|
||||
+ continue;
|
||||
+
|
||||
+ total_high_wmark += high_wmark_pages(zone);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Consider anon: if that's low too, this isn't a
|
||||
+ * runaway file reclaim problem, but rather just
|
||||
+ * extreme pressure. Reclaim as per usual then.
|
||||
+ */
|
||||
+ anon = node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||
+
|
||||
+ sc->file_is_tiny =
|
||||
+ file + free <= total_high_wmark &&
|
||||
+ !(sc->may_deactivate & DEACTIVATE_ANON) &&
|
||||
+ anon >> sc->priority;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Determine how aggressively the anon and file LRU lists should be
|
||||
* scanned.
|
||||
@@ -3207,109 +3310,16 @@ static void shrink_node(pg_data_t *pgdat
|
||||
unsigned long nr_reclaimed, nr_scanned;
|
||||
struct lruvec *target_lruvec;
|
||||
bool reclaimable = false;
|
||||
- unsigned long file;
|
||||
|
||||
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
|
||||
again:
|
||||
- /*
|
||||
- * Flush the memory cgroup stats, so that we read accurate per-memcg
|
||||
- * lruvec stats for heuristics.
|
||||
- */
|
||||
- mem_cgroup_flush_stats();
|
||||
-
|
||||
memset(&sc->nr, 0, sizeof(sc->nr));
|
||||
|
||||
nr_reclaimed = sc->nr_reclaimed;
|
||||
nr_scanned = sc->nr_scanned;
|
||||
|
||||
- /*
|
||||
- * Determine the scan balance between anon and file LRUs.
|
||||
- */
|
||||
- spin_lock_irq(&target_lruvec->lru_lock);
|
||||
- sc->anon_cost = target_lruvec->anon_cost;
|
||||
- sc->file_cost = target_lruvec->file_cost;
|
||||
- spin_unlock_irq(&target_lruvec->lru_lock);
|
||||
-
|
||||
- /*
|
||||
- * Target desirable inactive:active list ratios for the anon
|
||||
- * and file LRU lists.
|
||||
- */
|
||||
- if (!sc->force_deactivate) {
|
||||
- unsigned long refaults;
|
||||
-
|
||||
- refaults = lruvec_page_state(target_lruvec,
|
||||
- WORKINGSET_ACTIVATE_ANON);
|
||||
- if (refaults != target_lruvec->refaults[0] ||
|
||||
- inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
|
||||
- sc->may_deactivate |= DEACTIVATE_ANON;
|
||||
- else
|
||||
- sc->may_deactivate &= ~DEACTIVATE_ANON;
|
||||
-
|
||||
- /*
|
||||
- * When refaults are being observed, it means a new
|
||||
- * workingset is being established. Deactivate to get
|
||||
- * rid of any stale active pages quickly.
|
||||
- */
|
||||
- refaults = lruvec_page_state(target_lruvec,
|
||||
- WORKINGSET_ACTIVATE_FILE);
|
||||
- if (refaults != target_lruvec->refaults[1] ||
|
||||
- inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
|
||||
- sc->may_deactivate |= DEACTIVATE_FILE;
|
||||
- else
|
||||
- sc->may_deactivate &= ~DEACTIVATE_FILE;
|
||||
- } else
|
||||
- sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
|
||||
-
|
||||
- /*
|
||||
- * If we have plenty of inactive file pages that aren't
|
||||
- * thrashing, try to reclaim those first before touching
|
||||
- * anonymous pages.
|
||||
- */
|
||||
- file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
|
||||
- if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
|
||||
- sc->cache_trim_mode = 1;
|
||||
- else
|
||||
- sc->cache_trim_mode = 0;
|
||||
-
|
||||
- /*
|
||||
- * Prevent the reclaimer from falling into the cache trap: as
|
||||
- * cache pages start out inactive, every cache fault will tip
|
||||
- * the scan balance towards the file LRU. And as the file LRU
|
||||
- * shrinks, so does the window for rotation from references.
|
||||
- * This means we have a runaway feedback loop where a tiny
|
||||
- * thrashing file LRU becomes infinitely more attractive than
|
||||
- * anon pages. Try to detect this based on file LRU size.
|
||||
- */
|
||||
- if (!cgroup_reclaim(sc)) {
|
||||
- unsigned long total_high_wmark = 0;
|
||||
- unsigned long free, anon;
|
||||
- int z;
|
||||
-
|
||||
- free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
|
||||
- file = node_page_state(pgdat, NR_ACTIVE_FILE) +
|
||||
- node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
-
|
||||
- for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
- struct zone *zone = &pgdat->node_zones[z];
|
||||
- if (!managed_zone(zone))
|
||||
- continue;
|
||||
-
|
||||
- total_high_wmark += high_wmark_pages(zone);
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Consider anon: if that's low too, this isn't a
|
||||
- * runaway file reclaim problem, but rather just
|
||||
- * extreme pressure. Reclaim as per usual then.
|
||||
- */
|
||||
- anon = node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||
-
|
||||
- sc->file_is_tiny =
|
||||
- file + free <= total_high_wmark &&
|
||||
- !(sc->may_deactivate & DEACTIVATE_ANON) &&
|
||||
- anon >> sc->priority;
|
||||
- }
|
||||
+ prepare_scan_count(pgdat, sc);
|
||||
|
||||
shrink_node_memcgs(pgdat, sc);
|
||||
|
@ -1,59 +0,0 @@
|
||||
From bc14d2c7c6d0fb8c79ad0fc5eab488b977cbcccf Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sun, 6 Mar 2022 20:22:40 -0700
|
||||
Subject: [PATCH 04/14] Revert "include/linux/mm_inline.h: fold
|
||||
__update_lru_size() into its sole caller"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This patch undoes the following refactor:
|
||||
commit 289ccba18af4 ("include/linux/mm_inline.h: fold __update_lru_size() into its sole caller")
|
||||
|
||||
The upcoming changes to include/linux/mm_inline.h will reuse
|
||||
__update_lru_size().
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I6155c407d50199a43b179c7f45904d4b7c052118
|
||||
---
|
||||
include/linux/mm_inline.h | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -34,7 +34,7 @@ static inline int page_is_file_lru(struc
|
||||
return folio_is_file_lru(page_folio(page));
|
||||
}
|
||||
|
||||
-static __always_inline void update_lru_size(struct lruvec *lruvec,
|
||||
+static __always_inline void __update_lru_size(struct lruvec *lruvec,
|
||||
enum lru_list lru, enum zone_type zid,
|
||||
long nr_pages)
|
||||
{
|
||||
@@ -43,6 +43,13 @@ static __always_inline void update_lru_s
|
||||
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
|
||||
__mod_zone_page_state(&pgdat->node_zones[zid],
|
||||
NR_ZONE_LRU_BASE + lru, nr_pages);
|
||||
+}
|
||||
+
|
||||
+static __always_inline void update_lru_size(struct lruvec *lruvec,
|
||||
+ enum lru_list lru, enum zone_type zid,
|
||||
+ long nr_pages)
|
||||
+{
|
||||
+ __update_lru_size(lruvec, lru, zid, nr_pages);
|
||||
#ifdef CONFIG_MEMCG
|
||||
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
|
||||
#endif
|
@ -1,777 +0,0 @@
|
||||
From 8c6beb4548c216da9dae5e1a7612a108396e3f9e Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Mon, 25 Jan 2021 21:12:33 -0700
|
||||
Subject: [PATCH 05/14] mm: multi-gen LRU: groundwork
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Evictable pages are divided into multiple generations for each lruvec.
|
||||
The youngest generation number is stored in lrugen->max_seq for both
|
||||
anon and file types as they are aged on an equal footing. The oldest
|
||||
generation numbers are stored in lrugen->min_seq[] separately for anon
|
||||
and file types as clean file pages can be evicted regardless of swap
|
||||
constraints. These three variables are monotonically increasing.
|
||||
|
||||
Generation numbers are truncated into order_base_2(MAX_NR_GENS+1) bits
|
||||
in order to fit into the gen counter in folio->flags. Each truncated
|
||||
generation number is an index to lrugen->lists[]. The sliding window
|
||||
technique is used to track at least MIN_NR_GENS and at most
|
||||
MAX_NR_GENS generations. The gen counter stores a value within [1,
|
||||
MAX_NR_GENS] while a page is on one of lrugen->lists[]. Otherwise it
|
||||
stores 0.
|
||||
|
||||
There are two conceptually independent procedures: "the aging", which
|
||||
produces young generations, and "the eviction", which consumes old
|
||||
generations. They form a closed-loop system, i.e., "the page reclaim".
|
||||
Both procedures can be invoked from userspace for the purposes of
|
||||
working set estimation and proactive reclaim. These techniques are
|
||||
commonly used to optimize job scheduling (bin packing) in data
|
||||
centers [1][2].
|
||||
|
||||
To avoid confusion, the terms "hot" and "cold" will be applied to the
|
||||
multi-gen LRU, as a new convention; the terms "active" and "inactive"
|
||||
will be applied to the active/inactive LRU, as usual.
|
||||
|
||||
The protection of hot pages and the selection of cold pages are based
|
||||
on page access channels and patterns. There are two access channels:
|
||||
one through page tables and the other through file descriptors. The
|
||||
protection of the former channel is by design stronger because:
|
||||
1. The uncertainty in determining the access patterns of the former
|
||||
channel is higher due to the approximation of the accessed bit.
|
||||
2. The cost of evicting the former channel is higher due to the TLB
|
||||
flushes required and the likelihood of encountering the dirty bit.
|
||||
3. The penalty of underprotecting the former channel is higher because
|
||||
applications usually do not prepare themselves for major page
|
||||
faults like they do for blocked I/O. E.g., GUI applications
|
||||
commonly use dedicated I/O threads to avoid blocking rendering
|
||||
threads.
|
||||
There are also two access patterns: one with temporal locality and the
|
||||
other without. For the reasons listed above, the former channel is
|
||||
assumed to follow the former pattern unless VM_SEQ_READ or
|
||||
VM_RAND_READ is present; the latter channel is assumed to follow the
|
||||
latter pattern unless outlying refaults have been observed [3][4].
|
||||
|
||||
The next patch will address the "outlying refaults". Three macros,
|
||||
i.e., LRU_REFS_WIDTH, LRU_REFS_PGOFF and LRU_REFS_MASK, used later are
|
||||
added in this patch to make the entire patchset less diffy.
|
||||
|
||||
A page is added to the youngest generation on faulting. The aging
|
||||
needs to check the accessed bit at least twice before handing this
|
||||
page over to the eviction. The first check takes care of the accessed
|
||||
bit set on the initial fault; the second check makes sure this page
|
||||
has not been used since then. This protocol, AKA second chance,
|
||||
requires a minimum of two generations, hence MIN_NR_GENS.
|
||||
|
||||
[1] https://dl.acm.org/doi/10.1145/3297858.3304053
|
||||
[2] https://dl.acm.org/doi/10.1145/3503222.3507731
|
||||
[3] https://lwn.net/Articles/495543/
|
||||
[4] https://lwn.net/Articles/815342/
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I71de7cd15b8dfa6f9fdd838023474693c4fee0a7
|
||||
---
|
||||
fs/fuse/dev.c | 3 +-
|
||||
include/linux/mm_inline.h | 175 ++++++++++++++++++++++++++++++
|
||||
include/linux/mmzone.h | 102 +++++++++++++++++
|
||||
include/linux/page-flags-layout.h | 13 ++-
|
||||
include/linux/page-flags.h | 4 +-
|
||||
include/linux/sched.h | 4 +
|
||||
kernel/bounds.c | 5 +
|
||||
mm/Kconfig | 8 ++
|
||||
mm/huge_memory.c | 3 +-
|
||||
mm/memcontrol.c | 2 +
|
||||
mm/memory.c | 25 +++++
|
||||
mm/mm_init.c | 6 +-
|
||||
mm/mmzone.c | 2 +
|
||||
mm/swap.c | 11 +-
|
||||
mm/vmscan.c | 75 +++++++++++++
|
||||
15 files changed, 424 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/fs/fuse/dev.c
|
||||
+++ b/fs/fuse/dev.c
|
||||
@@ -776,7 +776,8 @@ static int fuse_check_page(struct page *
|
||||
1 << PG_active |
|
||||
1 << PG_workingset |
|
||||
1 << PG_reclaim |
|
||||
- 1 << PG_waiters))) {
|
||||
+ 1 << PG_waiters |
|
||||
+ LRU_GEN_MASK | LRU_REFS_MASK))) {
|
||||
dump_page(page, "fuse: trying to steal weird page");
|
||||
return 1;
|
||||
}
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -40,6 +40,9 @@ static __always_inline void __update_lru
|
||||
{
|
||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
|
||||
+ lockdep_assert_held(&lruvec->lru_lock);
|
||||
+ WARN_ON_ONCE(nr_pages != (int)nr_pages);
|
||||
+
|
||||
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
|
||||
__mod_zone_page_state(&pgdat->node_zones[zid],
|
||||
NR_ZONE_LRU_BASE + lru, nr_pages);
|
||||
@@ -101,11 +104,177 @@ static __always_inline enum lru_list fol
|
||||
return lru;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+
|
||||
+static inline bool lru_gen_enabled(void)
|
||||
+{
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_in_fault(void)
|
||||
+{
|
||||
+ return current->in_lru_fault;
|
||||
+}
|
||||
+
|
||||
+static inline int lru_gen_from_seq(unsigned long seq)
|
||||
+{
|
||||
+ return seq % MAX_NR_GENS;
|
||||
+}
|
||||
+
|
||||
+static inline int folio_lru_gen(struct folio *folio)
|
||||
+{
|
||||
+ unsigned long flags = READ_ONCE(folio->flags);
|
||||
+
|
||||
+ return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
|
||||
+{
|
||||
+ unsigned long max_seq = lruvec->lrugen.max_seq;
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
|
||||
+
|
||||
+ /* see the comment on MIN_NR_GENS */
|
||||
+ return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
|
||||
+}
|
||||
+
|
||||
+static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio,
|
||||
+ int old_gen, int new_gen)
|
||||
+{
|
||||
+ int type = folio_is_file_lru(folio);
|
||||
+ int zone = folio_zonenum(folio);
|
||||
+ int delta = folio_nr_pages(folio);
|
||||
+ enum lru_list lru = type * LRU_INACTIVE_FILE;
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
|
||||
+ VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
|
||||
+ VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1);
|
||||
+
|
||||
+ if (old_gen >= 0)
|
||||
+ WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
|
||||
+ lrugen->nr_pages[old_gen][type][zone] - delta);
|
||||
+ if (new_gen >= 0)
|
||||
+ WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
|
||||
+ lrugen->nr_pages[new_gen][type][zone] + delta);
|
||||
+
|
||||
+ /* addition */
|
||||
+ if (old_gen < 0) {
|
||||
+ if (lru_gen_is_active(lruvec, new_gen))
|
||||
+ lru += LRU_ACTIVE;
|
||||
+ __update_lru_size(lruvec, lru, zone, delta);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* deletion */
|
||||
+ if (new_gen < 0) {
|
||||
+ if (lru_gen_is_active(lruvec, old_gen))
|
||||
+ lru += LRU_ACTIVE;
|
||||
+ __update_lru_size(lruvec, lru, zone, -delta);
|
||||
+ return;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
+{
|
||||
+ unsigned long seq;
|
||||
+ unsigned long flags;
|
||||
+ int gen = folio_lru_gen(folio);
|
||||
+ int type = folio_is_file_lru(folio);
|
||||
+ int zone = folio_zonenum(folio);
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
|
||||
+
|
||||
+ if (folio_test_unevictable(folio))
|
||||
+ return false;
|
||||
+ /*
|
||||
+ * There are three common cases for this page:
|
||||
+ * 1. If it's hot, e.g., freshly faulted in or previously hot and
|
||||
+ * migrated, add it to the youngest generation.
|
||||
+ * 2. If it's cold but can't be evicted immediately, i.e., an anon page
|
||||
+ * not in swapcache or a dirty page pending writeback, add it to the
|
||||
+ * second oldest generation.
|
||||
+ * 3. Everything else (clean, cold) is added to the oldest generation.
|
||||
+ */
|
||||
+ if (folio_test_active(folio))
|
||||
+ seq = lrugen->max_seq;
|
||||
+ else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
|
||||
+ (folio_test_reclaim(folio) &&
|
||||
+ (folio_test_dirty(folio) || folio_test_writeback(folio))))
|
||||
+ seq = lrugen->min_seq[type] + 1;
|
||||
+ else
|
||||
+ seq = lrugen->min_seq[type];
|
||||
+
|
||||
+ gen = lru_gen_from_seq(seq);
|
||||
+ flags = (gen + 1UL) << LRU_GEN_PGOFF;
|
||||
+ /* see the comment on MIN_NR_GENS about PG_active */
|
||||
+ set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
|
||||
+
|
||||
+ lru_gen_update_size(lruvec, folio, -1, gen);
|
||||
+ /* for folio_rotate_reclaimable() */
|
||||
+ if (reclaiming)
|
||||
+ list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
|
||||
+ else
|
||||
+ list_add(&folio->lru, &lrugen->lists[gen][type][zone]);
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+ int gen = folio_lru_gen(folio);
|
||||
+
|
||||
+ if (gen < 0)
|
||||
+ return false;
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
|
||||
+
|
||||
+ /* for folio_migrate_flags() */
|
||||
+ flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
|
||||
+ flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags);
|
||||
+ gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
+
|
||||
+ lru_gen_update_size(lruvec, folio, gen, -1);
|
||||
+ list_del(&folio->lru);
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+#else /* !CONFIG_LRU_GEN */
|
||||
+
|
||||
+static inline bool lru_gen_enabled(void)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_in_fault(void)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
static __always_inline
|
||||
void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
|
||||
{
|
||||
enum lru_list lru = folio_lru_list(folio);
|
||||
|
||||
+ if (lru_gen_add_folio(lruvec, folio, false))
|
||||
+ return;
|
||||
+
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
folio_nr_pages(folio));
|
||||
if (lru != LRU_UNEVICTABLE)
|
||||
@@ -123,6 +292,9 @@ void lruvec_add_folio_tail(struct lruvec
|
||||
{
|
||||
enum lru_list lru = folio_lru_list(folio);
|
||||
|
||||
+ if (lru_gen_add_folio(lruvec, folio, true))
|
||||
+ return;
|
||||
+
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
folio_nr_pages(folio));
|
||||
/* This is not expected to be used on LRU_UNEVICTABLE */
|
||||
@@ -140,6 +312,9 @@ void lruvec_del_folio(struct lruvec *lru
|
||||
{
|
||||
enum lru_list lru = folio_lru_list(folio);
|
||||
|
||||
+ if (lru_gen_del_folio(lruvec, folio, false))
|
||||
+ return;
|
||||
+
|
||||
if (lru != LRU_UNEVICTABLE)
|
||||
list_del(&folio->lru);
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -314,6 +314,102 @@ enum lruvec_flags {
|
||||
*/
|
||||
};
|
||||
|
||||
+#endif /* !__GENERATING_BOUNDS_H */
|
||||
+
|
||||
+/*
|
||||
+ * Evictable pages are divided into multiple generations. The youngest and the
|
||||
+ * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
|
||||
+ * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
|
||||
+ * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
|
||||
+ * corresponding generation. The gen counter in folio->flags stores gen+1 while
|
||||
+ * a page is on one of lrugen->lists[]. Otherwise it stores 0.
|
||||
+ *
|
||||
+ * A page is added to the youngest generation on faulting. The aging needs to
|
||||
+ * check the accessed bit at least twice before handing this page over to the
|
||||
+ * eviction. The first check takes care of the accessed bit set on the initial
|
||||
+ * fault; the second check makes sure this page hasn't been used since then.
|
||||
+ * This process, AKA second chance, requires a minimum of two generations,
|
||||
+ * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
|
||||
+ * LRU, e.g., /proc/vmstat, these two generations are considered active; the
|
||||
+ * rest of generations, if they exist, are considered inactive. See
|
||||
+ * lru_gen_is_active().
|
||||
+ *
|
||||
+ * PG_active is always cleared while a page is on one of lrugen->lists[] so that
|
||||
+ * the aging needs not to worry about it. And it's set again when a page
|
||||
+ * considered active is isolated for non-reclaiming purposes, e.g., migration.
|
||||
+ * See lru_gen_add_folio() and lru_gen_del_folio().
|
||||
+ *
|
||||
+ * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
|
||||
+ * number of categories of the active/inactive LRU when keeping track of
|
||||
+ * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
|
||||
+ * in folio->flags.
|
||||
+ */
|
||||
+#define MIN_NR_GENS 2U
|
||||
+#define MAX_NR_GENS 4U
|
||||
+
|
||||
+#ifndef __GENERATING_BOUNDS_H
|
||||
+
|
||||
+struct lruvec;
|
||||
+
|
||||
+#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
|
||||
+#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
|
||||
+
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+
|
||||
+enum {
|
||||
+ LRU_GEN_ANON,
|
||||
+ LRU_GEN_FILE,
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * The youngest generation number is stored in max_seq for both anon and file
|
||||
+ * types as they are aged on an equal footing. The oldest generation numbers are
|
||||
+ * stored in min_seq[] separately for anon and file types as clean file pages
|
||||
+ * can be evicted regardless of swap constraints.
|
||||
+ *
|
||||
+ * Normally anon and file min_seq are in sync. But if swapping is constrained,
|
||||
+ * e.g., out of swap space, file min_seq is allowed to advance and leave anon
|
||||
+ * min_seq behind.
|
||||
+ *
|
||||
+ * The number of pages in each generation is eventually consistent and therefore
|
||||
+ * can be transiently negative.
|
||||
+ */
|
||||
+struct lru_gen_struct {
|
||||
+ /* the aging increments the youngest generation number */
|
||||
+ unsigned long max_seq;
|
||||
+ /* the eviction increments the oldest generation numbers */
|
||||
+ unsigned long min_seq[ANON_AND_FILE];
|
||||
+ /* the multi-gen LRU lists, lazily sorted on eviction */
|
||||
+ struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
+ /* the multi-gen LRU sizes, eventually consistent */
|
||||
+ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
+};
|
||||
+
|
||||
+void lru_gen_init_lruvec(struct lruvec *lruvec);
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+void lru_gen_init_memcg(struct mem_cgroup *memcg);
|
||||
+void lru_gen_exit_memcg(struct mem_cgroup *memcg);
|
||||
+#endif
|
||||
+
|
||||
+#else /* !CONFIG_LRU_GEN */
|
||||
+
|
||||
+static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
struct lruvec {
|
||||
struct list_head lists[NR_LRU_LISTS];
|
||||
/* per lruvec lru_lock for memcg */
|
||||
@@ -331,6 +427,10 @@ struct lruvec {
|
||||
unsigned long refaults[ANON_AND_FILE];
|
||||
/* Various lruvec state flags (enum lruvec_flags) */
|
||||
unsigned long flags;
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ /* evictable pages divided into generations */
|
||||
+ struct lru_gen_struct lrugen;
|
||||
+#endif
|
||||
#ifdef CONFIG_MEMCG
|
||||
struct pglist_data *pgdat;
|
||||
#endif
|
||||
@@ -746,6 +846,8 @@ static inline bool zone_is_empty(struct
|
||||
#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
|
||||
#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
|
||||
#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
|
||||
+#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
|
||||
+#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH)
|
||||
|
||||
/*
|
||||
* Define the bit shifts to access each section. For non-existent
|
||||
--- a/include/linux/page-flags-layout.h
|
||||
+++ b/include/linux/page-flags-layout.h
|
||||
@@ -55,7 +55,8 @@
|
||||
#define SECTIONS_WIDTH 0
|
||||
#endif
|
||||
|
||||
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
|
||||
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
|
||||
+ <= BITS_PER_LONG - NR_PAGEFLAGS
|
||||
#define NODES_WIDTH NODES_SHIFT
|
||||
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
#error "Vmemmap: No space for nodes field in page flags"
|
||||
@@ -89,8 +90,8 @@
|
||||
#define LAST_CPUPID_SHIFT 0
|
||||
#endif
|
||||
|
||||
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
|
||||
- <= BITS_PER_LONG - NR_PAGEFLAGS
|
||||
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
|
||||
+ KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
|
||||
#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
|
||||
#else
|
||||
#define LAST_CPUPID_WIDTH 0
|
||||
@@ -100,10 +101,12 @@
|
||||
#define LAST_CPUPID_NOT_IN_PAGE_FLAGS
|
||||
#endif
|
||||
|
||||
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
|
||||
- > BITS_PER_LONG - NR_PAGEFLAGS
|
||||
+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
|
||||
+ KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
|
||||
#error "Not enough bits in page flags"
|
||||
#endif
|
||||
|
||||
+#define LRU_REFS_WIDTH 0
|
||||
+
|
||||
#endif
|
||||
#endif /* _LINUX_PAGE_FLAGS_LAYOUT */
|
||||
--- a/include/linux/page-flags.h
|
||||
+++ b/include/linux/page-flags.h
|
||||
@@ -1058,7 +1058,7 @@ static __always_inline void __ClearPageA
|
||||
1UL << PG_private | 1UL << PG_private_2 | \
|
||||
1UL << PG_writeback | 1UL << PG_reserved | \
|
||||
1UL << PG_slab | 1UL << PG_active | \
|
||||
- 1UL << PG_unevictable | __PG_MLOCKED)
|
||||
+ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
|
||||
|
||||
/*
|
||||
* Flags checked when a page is prepped for return by the page allocator.
|
||||
@@ -1069,7 +1069,7 @@ static __always_inline void __ClearPageA
|
||||
* alloc-free cycle to prevent from reusing the page.
|
||||
*/
|
||||
#define PAGE_FLAGS_CHECK_AT_PREP \
|
||||
- (PAGEFLAGS_MASK & ~__PG_HWPOISON)
|
||||
+ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
|
||||
|
||||
#define PAGE_FLAGS_PRIVATE \
|
||||
(1UL << PG_private | 1UL << PG_private_2)
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -914,6 +914,10 @@ struct task_struct {
|
||||
#ifdef CONFIG_MEMCG
|
||||
unsigned in_user_fault:1;
|
||||
#endif
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ /* whether the LRU algorithm may apply to this access */
|
||||
+ unsigned in_lru_fault:1;
|
||||
+#endif
|
||||
#ifdef CONFIG_COMPAT_BRK
|
||||
unsigned brk_randomized:1;
|
||||
#endif
|
||||
--- a/kernel/bounds.c
|
||||
+++ b/kernel/bounds.c
|
||||
@@ -22,6 +22,11 @@ int main(void)
|
||||
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
|
||||
#endif
|
||||
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
|
||||
+#else
|
||||
+ DEFINE(LRU_GEN_WIDTH, 0);
|
||||
+#endif
|
||||
/* End of constants */
|
||||
|
||||
return 0;
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -1124,6 +1124,14 @@ config PTE_MARKER_UFFD_WP
|
||||
purposes. It is required to enable userfaultfd write protection on
|
||||
file-backed memory types like shmem and hugetlbfs.
|
||||
|
||||
+config LRU_GEN
|
||||
+ bool "Multi-Gen LRU"
|
||||
+ depends on MMU
|
||||
+ # make sure folio->flags has enough spare bits
|
||||
+ depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
|
||||
+ help
|
||||
+ A high performance LRU implementation to overcommit memory.
|
||||
+
|
||||
source "mm/damon/Kconfig"
|
||||
|
||||
endmenu
|
||||
--- a/mm/huge_memory.c
|
||||
+++ b/mm/huge_memory.c
|
||||
@@ -2438,7 +2438,8 @@ static void __split_huge_page_tail(struc
|
||||
#ifdef CONFIG_64BIT
|
||||
(1L << PG_arch_2) |
|
||||
#endif
|
||||
- (1L << PG_dirty)));
|
||||
+ (1L << PG_dirty) |
|
||||
+ LRU_GEN_MASK | LRU_REFS_MASK));
|
||||
|
||||
/* ->mapping in first tail page is compound_mapcount */
|
||||
VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -5170,6 +5170,7 @@ static void __mem_cgroup_free(struct mem
|
||||
|
||||
static void mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
{
|
||||
+ lru_gen_exit_memcg(memcg);
|
||||
memcg_wb_domain_exit(memcg);
|
||||
__mem_cgroup_free(memcg);
|
||||
}
|
||||
@@ -5228,6 +5229,7 @@ static struct mem_cgroup *mem_cgroup_all
|
||||
memcg->deferred_split_queue.split_queue_len = 0;
|
||||
#endif
|
||||
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
|
||||
+ lru_gen_init_memcg(memcg);
|
||||
return memcg;
|
||||
fail:
|
||||
mem_cgroup_id_remove(memcg);
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -5110,6 +5110,27 @@ static inline void mm_account_fault(stru
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
|
||||
+{
|
||||
+ /* the LRU algorithm doesn't apply to sequential or random reads */
|
||||
+ current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_exit_fault(void)
|
||||
+{
|
||||
+ current->in_lru_fault = false;
|
||||
+}
|
||||
+#else
|
||||
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_exit_fault(void)
|
||||
+{
|
||||
+}
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
/*
|
||||
* By the time we get here, we already hold the mm semaphore
|
||||
*
|
||||
@@ -5141,11 +5162,15 @@ vm_fault_t handle_mm_fault(struct vm_are
|
||||
if (flags & FAULT_FLAG_USER)
|
||||
mem_cgroup_enter_user_fault();
|
||||
|
||||
+ lru_gen_enter_fault(vma);
|
||||
+
|
||||
if (unlikely(is_vm_hugetlb_page(vma)))
|
||||
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
|
||||
else
|
||||
ret = __handle_mm_fault(vma, address, flags);
|
||||
|
||||
+ lru_gen_exit_fault();
|
||||
+
|
||||
if (flags & FAULT_FLAG_USER) {
|
||||
mem_cgroup_exit_user_fault();
|
||||
/*
|
||||
--- a/mm/mm_init.c
|
||||
+++ b/mm/mm_init.c
|
||||
@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layo
|
||||
|
||||
shift = 8 * sizeof(unsigned long);
|
||||
width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
|
||||
- - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
|
||||
+ - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
|
||||
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
|
||||
- "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
|
||||
+ "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
|
||||
SECTIONS_WIDTH,
|
||||
NODES_WIDTH,
|
||||
ZONES_WIDTH,
|
||||
LAST_CPUPID_WIDTH,
|
||||
KASAN_TAG_WIDTH,
|
||||
+ LRU_GEN_WIDTH,
|
||||
+ LRU_REFS_WIDTH,
|
||||
NR_PAGEFLAGS);
|
||||
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
|
||||
"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
|
||||
--- a/mm/mmzone.c
|
||||
+++ b/mm/mmzone.c
|
||||
@@ -88,6 +88,8 @@ void lruvec_init(struct lruvec *lruvec)
|
||||
* Poison its list head, so that any operations on it would crash.
|
||||
*/
|
||||
list_del(&lruvec->lists[LRU_UNEVICTABLE]);
|
||||
+
|
||||
+ lru_gen_init_lruvec(lruvec);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -484,6 +484,11 @@ void folio_add_lru(struct folio *folio)
|
||||
folio_test_unevictable(folio), folio);
|
||||
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
|
||||
|
||||
+ /* see the comment in lru_gen_add_folio() */
|
||||
+ if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
|
||||
+ lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
|
||||
+ folio_set_active(folio);
|
||||
+
|
||||
folio_get(folio);
|
||||
local_lock(&cpu_fbatches.lock);
|
||||
fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
|
||||
@@ -575,7 +580,7 @@ static void lru_deactivate_file_fn(struc
|
||||
|
||||
static void lru_deactivate_fn(struct lruvec *lruvec, struct folio *folio)
|
||||
{
|
||||
- if (folio_test_active(folio) && !folio_test_unevictable(folio)) {
|
||||
+ if (!folio_test_unevictable(folio) && (folio_test_active(folio) || lru_gen_enabled())) {
|
||||
long nr_pages = folio_nr_pages(folio);
|
||||
|
||||
lruvec_del_folio(lruvec, folio);
|
||||
@@ -688,8 +693,8 @@ void deactivate_page(struct page *page)
|
||||
{
|
||||
struct folio *folio = page_folio(page);
|
||||
|
||||
- if (folio_test_lru(folio) && folio_test_active(folio) &&
|
||||
- !folio_test_unevictable(folio)) {
|
||||
+ if (folio_test_lru(folio) && !folio_test_unevictable(folio) &&
|
||||
+ (folio_test_active(folio) || lru_gen_enabled())) {
|
||||
struct folio_batch *fbatch;
|
||||
|
||||
folio_get(folio);
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3062,6 +3062,81 @@ static bool can_age_anon_pages(struct pg
|
||||
return can_demote(pgdat->node_id, sc);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * shorthand helpers
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+#define for_each_gen_type_zone(gen, type, zone) \
|
||||
+ for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
|
||||
+ for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
|
||||
+ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
|
||||
+
|
||||
+static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int nid)
|
||||
+{
|
||||
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (memcg) {
|
||||
+ struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
|
||||
+
|
||||
+ /* for hotadd_new_pgdat() */
|
||||
+ if (!lruvec->pgdat)
|
||||
+ lruvec->pgdat = pgdat;
|
||||
+
|
||||
+ return lruvec;
|
||||
+ }
|
||||
+#endif
|
||||
+ VM_WARN_ON_ONCE(!mem_cgroup_disabled());
|
||||
+
|
||||
+ return pgdat ? &pgdat->__lruvec : NULL;
|
||||
+}
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * initialization
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
+{
|
||||
+ int gen, type, zone;
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ lrugen->max_seq = MIN_NR_GENS + 1;
|
||||
+
|
||||
+ for_each_gen_type_zone(gen, type, zone)
|
||||
+ INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+void lru_gen_exit_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
|
||||
+ sizeof(lruvec->lrugen.nr_pages)));
|
||||
+ }
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static int __init init_lru_gen(void)
|
||||
+{
|
||||
+ BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
|
||||
+ BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
|
||||
+
|
||||
+ return 0;
|
||||
+};
|
||||
+late_initcall(init_lru_gen);
|
||||
+
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
unsigned long nr[NR_LRU_LISTS];
|
File diff suppressed because it is too large
Load Diff
@ -1,476 +0,0 @@
|
||||
From 93fa87bdef9e7fa9977355c4712c000f31639231 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 20:43:22 -0700
|
||||
Subject: [PATCH 07/14] mm: multi-gen LRU: exploit locality in rmap
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Searching the rmap for PTEs mapping each page on an LRU list (to test
|
||||
and clear the accessed bit) can be expensive because pages from
|
||||
different VMAs (PA space) are not cache friendly to the rmap (VA
|
||||
space). For workloads mostly using mapped pages, searching the rmap
|
||||
can incur the highest CPU cost in the reclaim path.
|
||||
|
||||
This patch exploits spatial locality to reduce the trips into the
|
||||
rmap. When shrink_page_list() walks the rmap and finds a young PTE, a
|
||||
new function lru_gen_look_around() scans at most BITS_PER_LONG-1
|
||||
adjacent PTEs. On finding another young PTE, it clears the accessed
|
||||
bit and updates the gen counter of the page mapped by this PTE to
|
||||
(max_seq%MAX_NR_GENS)+1.
|
||||
|
||||
Server benchmark results:
|
||||
Single workload:
|
||||
fio (buffered I/O): no change
|
||||
|
||||
Single workload:
|
||||
memcached (anon): +[3, 5]%
|
||||
Ops/sec KB/sec
|
||||
patch1-6: 1106168.46 43025.04
|
||||
patch1-7: 1147696.57 44640.29
|
||||
|
||||
Configurations:
|
||||
no change
|
||||
|
||||
Client benchmark results:
|
||||
kswapd profiles:
|
||||
patch1-6
|
||||
39.03% lzo1x_1_do_compress (real work)
|
||||
18.47% page_vma_mapped_walk (overhead)
|
||||
6.74% _raw_spin_unlock_irq
|
||||
3.97% do_raw_spin_lock
|
||||
2.49% ptep_clear_flush
|
||||
2.48% anon_vma_interval_tree_iter_first
|
||||
1.92% folio_referenced_one
|
||||
1.88% __zram_bvec_write
|
||||
1.48% memmove
|
||||
1.31% vma_interval_tree_iter_next
|
||||
|
||||
patch1-7
|
||||
48.16% lzo1x_1_do_compress (real work)
|
||||
8.20% page_vma_mapped_walk (overhead)
|
||||
7.06% _raw_spin_unlock_irq
|
||||
2.92% ptep_clear_flush
|
||||
2.53% __zram_bvec_write
|
||||
2.11% do_raw_spin_lock
|
||||
2.02% memmove
|
||||
1.93% lru_gen_look_around
|
||||
1.56% free_unref_page_list
|
||||
1.40% memset
|
||||
|
||||
Configurations:
|
||||
no change
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Barry Song <baohua@kernel.org>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I4b9ca0fd20f566ce554e703f14cee3fe0048c2fd
|
||||
---
|
||||
include/linux/memcontrol.h | 31 +++++++
|
||||
include/linux/mm.h | 5 +
|
||||
include/linux/mmzone.h | 6 ++
|
||||
mm/internal.h | 1 +
|
||||
mm/memcontrol.c | 1 +
|
||||
mm/rmap.c | 6 ++
|
||||
mm/swap.c | 4 +-
|
||||
mm/vmscan.c | 184 +++++++++++++++++++++++++++++++++++++
|
||||
8 files changed, 236 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/memcontrol.h
|
||||
+++ b/include/linux/memcontrol.h
|
||||
@@ -444,6 +444,7 @@ static inline struct obj_cgroup *__folio
|
||||
* - LRU isolation
|
||||
* - lock_page_memcg()
|
||||
* - exclusive reference
|
||||
+ * - mem_cgroup_trylock_pages()
|
||||
*
|
||||
* For a kmem folio a caller should hold an rcu read lock to protect memcg
|
||||
* associated with a kmem folio from being released.
|
||||
@@ -505,6 +506,7 @@ static inline struct mem_cgroup *folio_m
|
||||
* - LRU isolation
|
||||
* - lock_page_memcg()
|
||||
* - exclusive reference
|
||||
+ * - mem_cgroup_trylock_pages()
|
||||
*
|
||||
* For a kmem page a caller should hold an rcu read lock to protect memcg
|
||||
* associated with a kmem page from being released.
|
||||
@@ -959,6 +961,23 @@ void unlock_page_memcg(struct page *page
|
||||
|
||||
void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
|
||||
|
||||
+/* try to stablize folio_memcg() for all the pages in a memcg */
|
||||
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ rcu_read_lock();
|
||||
+
|
||||
+ if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
|
||||
+ return true;
|
||||
+
|
||||
+ rcu_read_unlock();
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static inline void mem_cgroup_unlock_pages(void)
|
||||
+{
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
/* idx can be of type enum memcg_stat_item or node_stat_item */
|
||||
static inline void mod_memcg_state(struct mem_cgroup *memcg,
|
||||
int idx, int val)
|
||||
@@ -1433,6 +1452,18 @@ static inline void folio_memcg_unlock(st
|
||||
{
|
||||
}
|
||||
|
||||
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ /* to match folio_memcg_rcu() */
|
||||
+ rcu_read_lock();
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static inline void mem_cgroup_unlock_pages(void)
|
||||
+{
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
static inline void mem_cgroup_handle_over_high(void)
|
||||
{
|
||||
}
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -1465,6 +1465,11 @@ static inline unsigned long folio_pfn(st
|
||||
return page_to_pfn(&folio->page);
|
||||
}
|
||||
|
||||
+static inline struct folio *pfn_folio(unsigned long pfn)
|
||||
+{
|
||||
+ return page_folio(pfn_to_page(pfn));
|
||||
+}
|
||||
+
|
||||
static inline atomic_t *folio_pincount_ptr(struct folio *folio)
|
||||
{
|
||||
return &folio_page(folio, 1)->compound_pincount;
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -372,6 +372,7 @@ enum lruvec_flags {
|
||||
#ifndef __GENERATING_BOUNDS_H
|
||||
|
||||
struct lruvec;
|
||||
+struct page_vma_mapped_walk;
|
||||
|
||||
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
|
||||
#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
|
||||
@@ -427,6 +428,7 @@ struct lru_gen_struct {
|
||||
};
|
||||
|
||||
void lru_gen_init_lruvec(struct lruvec *lruvec);
|
||||
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
void lru_gen_init_memcg(struct mem_cgroup *memcg);
|
||||
@@ -439,6 +441,10 @@ static inline void lru_gen_init_lruvec(s
|
||||
{
|
||||
}
|
||||
|
||||
+static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_MEMCG
|
||||
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
--- a/mm/internal.h
|
||||
+++ b/mm/internal.h
|
||||
@@ -83,6 +83,7 @@ vm_fault_t do_swap_page(struct vm_fault
|
||||
void folio_rotate_reclaimable(struct folio *folio);
|
||||
bool __folio_end_writeback(struct folio *folio);
|
||||
void deactivate_file_folio(struct folio *folio);
|
||||
+void folio_activate(struct folio *folio);
|
||||
|
||||
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
|
||||
unsigned long floor, unsigned long ceiling);
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -2789,6 +2789,7 @@ static void commit_charge(struct folio *
|
||||
* - LRU isolation
|
||||
* - lock_page_memcg()
|
||||
* - exclusive reference
|
||||
+ * - mem_cgroup_trylock_pages()
|
||||
*/
|
||||
folio->memcg_data = (unsigned long)memcg;
|
||||
}
|
||||
--- a/mm/rmap.c
|
||||
+++ b/mm/rmap.c
|
||||
@@ -833,6 +833,12 @@ static bool folio_referenced_one(struct
|
||||
}
|
||||
|
||||
if (pvmw.pte) {
|
||||
+ if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
|
||||
+ !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
|
||||
+ lru_gen_look_around(&pvmw);
|
||||
+ referenced++;
|
||||
+ }
|
||||
+
|
||||
if (ptep_clear_flush_young_notify(vma, address,
|
||||
pvmw.pte)) {
|
||||
/*
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -366,7 +366,7 @@ static void folio_activate_drain(int cpu
|
||||
folio_batch_move_lru(fbatch, folio_activate_fn);
|
||||
}
|
||||
|
||||
-static void folio_activate(struct folio *folio)
|
||||
+void folio_activate(struct folio *folio)
|
||||
{
|
||||
if (folio_test_lru(folio) && !folio_test_active(folio) &&
|
||||
!folio_test_unevictable(folio)) {
|
||||
@@ -385,7 +385,7 @@ static inline void folio_activate_drain(
|
||||
{
|
||||
}
|
||||
|
||||
-static void folio_activate(struct folio *folio)
|
||||
+void folio_activate(struct folio *folio)
|
||||
{
|
||||
struct lruvec *lruvec;
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -1635,6 +1635,11 @@ retry:
|
||||
if (!sc->may_unmap && folio_mapped(folio))
|
||||
goto keep_locked;
|
||||
|
||||
+ /* folio_update_gen() tried to promote this page? */
|
||||
+ if (lru_gen_enabled() && !ignore_references &&
|
||||
+ folio_mapped(folio) && folio_test_referenced(folio))
|
||||
+ goto keep_locked;
|
||||
+
|
||||
/*
|
||||
* The number of dirty pages determines if a node is marked
|
||||
* reclaim_congested. kswapd will stall and start writing
|
||||
@@ -3231,6 +3236,29 @@ static bool positive_ctrl_err(struct ctr
|
||||
* the aging
|
||||
******************************************************************************/
|
||||
|
||||
+/* promote pages accessed through page tables */
|
||||
+static int folio_update_gen(struct folio *folio, int gen)
|
||||
+{
|
||||
+ unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
|
||||
+ VM_WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
+
|
||||
+ do {
|
||||
+ /* lru_gen_del_folio() has isolated this page? */
|
||||
+ if (!(old_flags & LRU_GEN_MASK)) {
|
||||
+ /* for shrink_page_list() */
|
||||
+ new_flags = old_flags | BIT(PG_referenced);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
|
||||
+ new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
|
||||
+ } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
|
||||
+
|
||||
+ return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
+}
|
||||
+
|
||||
/* protect pages accessed multiple times through file descriptors */
|
||||
static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
{
|
||||
@@ -3242,6 +3270,11 @@ static int folio_inc_gen(struct lruvec *
|
||||
VM_WARN_ON_ONCE_FOLIO(!(old_flags & LRU_GEN_MASK), folio);
|
||||
|
||||
do {
|
||||
+ new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
||||
+ /* folio_update_gen() has promoted this page? */
|
||||
+ if (new_gen >= 0 && new_gen != old_gen)
|
||||
+ return new_gen;
|
||||
+
|
||||
new_gen = (old_gen + 1) % MAX_NR_GENS;
|
||||
|
||||
new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
|
||||
@@ -3256,6 +3289,43 @@ static int folio_inc_gen(struct lruvec *
|
||||
return new_gen;
|
||||
}
|
||||
|
||||
+static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
|
||||
+{
|
||||
+ unsigned long pfn = pte_pfn(pte);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
|
||||
+
|
||||
+ if (!pte_present(pte) || is_zero_pfn(pfn))
|
||||
+ return -1;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
|
||||
+ return -1;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(!pfn_valid(pfn)))
|
||||
+ return -1;
|
||||
+
|
||||
+ return pfn;
|
||||
+}
|
||||
+
|
||||
+static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
|
||||
+ struct pglist_data *pgdat)
|
||||
+{
|
||||
+ struct folio *folio;
|
||||
+
|
||||
+ /* try to avoid unnecessary memory loads */
|
||||
+ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
|
||||
+ return NULL;
|
||||
+
|
||||
+ folio = pfn_folio(pfn);
|
||||
+ if (folio_nid(folio) != pgdat->node_id)
|
||||
+ return NULL;
|
||||
+
|
||||
+ if (folio_memcg_rcu(folio) != memcg)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return folio;
|
||||
+}
|
||||
+
|
||||
static void inc_min_seq(struct lruvec *lruvec, int type)
|
||||
{
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
@@ -3455,6 +3525,114 @@ static void lru_gen_age_node(struct pgli
|
||||
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * This function exploits spatial locality when shrink_page_list() walks the
|
||||
+ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
|
||||
+ */
|
||||
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
+{
|
||||
+ int i;
|
||||
+ pte_t *pte;
|
||||
+ unsigned long start;
|
||||
+ unsigned long end;
|
||||
+ unsigned long addr;
|
||||
+ unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
|
||||
+ struct folio *folio = pfn_folio(pvmw->pfn);
|
||||
+ struct mem_cgroup *memcg = folio_memcg(folio);
|
||||
+ struct pglist_data *pgdat = folio_pgdat(folio);
|
||||
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+ int old_gen, new_gen = lru_gen_from_seq(max_seq);
|
||||
+
|
||||
+ lockdep_assert_held(pvmw->ptl);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
|
||||
+
|
||||
+ if (spin_is_contended(pvmw->ptl))
|
||||
+ return;
|
||||
+
|
||||
+ start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
|
||||
+ end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
|
||||
+
|
||||
+ if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
|
||||
+ if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
+ end = start + MIN_LRU_BATCH * PAGE_SIZE;
|
||||
+ else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
+ start = end - MIN_LRU_BATCH * PAGE_SIZE;
|
||||
+ else {
|
||||
+ start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
|
||||
+ end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ arch_enter_lazy_mmu_mode();
|
||||
+
|
||||
+ for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
|
||||
+ unsigned long pfn;
|
||||
+
|
||||
+ pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
|
||||
+ if (pfn == -1)
|
||||
+ continue;
|
||||
+
|
||||
+ if (!pte_young(pte[i]))
|
||||
+ continue;
|
||||
+
|
||||
+ folio = get_pfn_folio(pfn, memcg, pgdat);
|
||||
+ if (!folio)
|
||||
+ continue;
|
||||
+
|
||||
+ if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
|
||||
+ VM_WARN_ON_ONCE(true);
|
||||
+
|
||||
+ if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
|
||||
+ !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
|
||||
+ !folio_test_swapcache(folio)))
|
||||
+ folio_mark_dirty(folio);
|
||||
+
|
||||
+ old_gen = folio_lru_gen(folio);
|
||||
+ if (old_gen < 0)
|
||||
+ folio_set_referenced(folio);
|
||||
+ else if (old_gen != new_gen)
|
||||
+ __set_bit(i, bitmap);
|
||||
+ }
|
||||
+
|
||||
+ arch_leave_lazy_mmu_mode();
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
|
||||
+ for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
|
||||
+ folio = pfn_folio(pte_pfn(pte[i]));
|
||||
+ folio_activate(folio);
|
||||
+ }
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* folio_update_gen() requires stable folio_memcg() */
|
||||
+ if (!mem_cgroup_trylock_pages(memcg))
|
||||
+ return;
|
||||
+
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+ new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
|
||||
+
|
||||
+ for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
|
||||
+ folio = pfn_folio(pte_pfn(pte[i]));
|
||||
+ if (folio_memcg_rcu(folio) != memcg)
|
||||
+ continue;
|
||||
+
|
||||
+ old_gen = folio_update_gen(folio, new_gen);
|
||||
+ if (old_gen < 0 || old_gen == new_gen)
|
||||
+ continue;
|
||||
+
|
||||
+ lru_gen_update_size(lruvec, folio, old_gen, new_gen);
|
||||
+ }
|
||||
+
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+
|
||||
+ mem_cgroup_unlock_pages();
|
||||
+}
|
||||
+
|
||||
/******************************************************************************
|
||||
* the eviction
|
||||
******************************************************************************/
|
||||
@@ -3491,6 +3669,12 @@ static bool sort_folio(struct lruvec *lr
|
||||
return true;
|
||||
}
|
||||
|
||||
+ /* promoted */
|
||||
+ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
|
||||
+ list_move(&folio->lru, &lrugen->lists[gen][type][zone]);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
/* protected */
|
||||
if (tier > tier_idx) {
|
||||
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
|
File diff suppressed because it is too large
Load Diff
@ -1,290 +0,0 @@
|
||||
From 6b9670b94ba2b49b289b997121062500e32fc3e4 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 19:59:54 -0700
|
||||
Subject: [PATCH 09/14] mm: multi-gen LRU: optimize multiple memcgs
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
When multiple memcgs are available, it is possible to use generations
|
||||
as a frame of reference to make better choices and improve overall
|
||||
performance under global memory pressure. This patch adds a basic
|
||||
optimization to select memcgs that can drop single-use unmapped clean
|
||||
pages first. Doing so reduces the chance of going into the aging path
|
||||
or swapping, which can be costly.
|
||||
|
||||
A typical example that benefits from this optimization is a server
|
||||
running mixed types of workloads, e.g., heavy anon workload in one
|
||||
memcg and heavy buffered I/O workload in the other.
|
||||
|
||||
Though this optimization can be applied to both kswapd and direct
|
||||
reclaim, it is only added to kswapd to keep the patchset manageable.
|
||||
Later improvements may cover the direct reclaim path.
|
||||
|
||||
While ensuring certain fairness to all eligible memcgs, proportional
|
||||
scans of individual memcgs also require proper backoff to avoid
|
||||
overshooting their aggregate reclaim target by too much. Otherwise it
|
||||
can cause high direct reclaim latency. The conditions for backoff are:
|
||||
1. At low priorities, for direct reclaim, if aging fairness or direct
|
||||
reclaim latency is at risk, i.e., aging one memcg multiple times or
|
||||
swapping after the target is met.
|
||||
2. At high priorities, for global reclaim, if per-zone free pages are
|
||||
above respective watermarks.
|
||||
|
||||
Server benchmark results:
|
||||
Mixed workloads:
|
||||
fio (buffered I/O): +[19, 21]%
|
||||
IOPS BW
|
||||
patch1-8: 1880k 7343MiB/s
|
||||
patch1-9: 2252k 8796MiB/s
|
||||
|
||||
memcached (anon): +[119, 123]%
|
||||
Ops/sec KB/sec
|
||||
patch1-8: 862768.65 33514.68
|
||||
patch1-9: 1911022.12 74234.54
|
||||
|
||||
Mixed workloads:
|
||||
fio (buffered I/O): +[75, 77]%
|
||||
IOPS BW
|
||||
5.19-rc1: 1279k 4996MiB/s
|
||||
patch1-9: 2252k 8796MiB/s
|
||||
|
||||
memcached (anon): +[13, 15]%
|
||||
Ops/sec KB/sec
|
||||
5.19-rc1: 1673524.04 65008.87
|
||||
patch1-9: 1911022.12 74234.54
|
||||
|
||||
Configurations:
|
||||
(changes since patch 6)
|
||||
|
||||
cat mixed.sh
|
||||
modprobe brd rd_nr=2 rd_size=56623104
|
||||
|
||||
swapoff -a
|
||||
mkswap /dev/ram0
|
||||
swapon /dev/ram0
|
||||
|
||||
mkfs.ext4 /dev/ram1
|
||||
mount -t ext4 /dev/ram1 /mnt
|
||||
|
||||
memtier_benchmark -S /var/run/memcached/memcached.sock \
|
||||
-P memcache_binary -n allkeys --key-minimum=1 \
|
||||
--key-maximum=50000000 --key-pattern=P:P -c 1 -t 36 \
|
||||
--ratio 1:0 --pipeline 8 -d 2000
|
||||
|
||||
fio -name=mglru --numjobs=36 --directory=/mnt --size=1408m \
|
||||
--buffered=1 --ioengine=io_uring --iodepth=128 \
|
||||
--iodepth_batch_submit=32 --iodepth_batch_complete=32 \
|
||||
--rw=randread --random_distribution=random --norandommap \
|
||||
--time_based --ramp_time=10m --runtime=90m --group_reporting &
|
||||
pid=$!
|
||||
|
||||
sleep 200
|
||||
|
||||
memtier_benchmark -S /var/run/memcached/memcached.sock \
|
||||
-P memcache_binary -n allkeys --key-minimum=1 \
|
||||
--key-maximum=50000000 --key-pattern=R:R -c 1 -t 36 \
|
||||
--ratio 0:1 --pipeline 8 --randomize --distinct-client-seed
|
||||
|
||||
kill -INT $pid
|
||||
wait
|
||||
|
||||
Client benchmark results:
|
||||
no change (CONFIG_MEMCG=n)
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I7e00e0c733437e534ac98031cf8154a681becc00
|
||||
---
|
||||
mm/vmscan.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 95 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -131,6 +131,12 @@ struct scan_control {
|
||||
/* Always discard instead of demoting to lower tier memory */
|
||||
unsigned int no_demotion:1;
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ /* help kswapd make better choices among multiple memcgs */
|
||||
+ unsigned int memcgs_need_aging:1;
|
||||
+ unsigned long last_reclaimed;
|
||||
+#endif
|
||||
+
|
||||
/* Allocation order */
|
||||
s8 order;
|
||||
|
||||
@@ -4441,6 +4447,19 @@ static void lru_gen_age_node(struct pgli
|
||||
|
||||
VM_WARN_ON_ONCE(!current_is_kswapd());
|
||||
|
||||
+ sc->last_reclaimed = sc->nr_reclaimed;
|
||||
+
|
||||
+ /*
|
||||
+ * To reduce the chance of going into the aging path, which can be
|
||||
+ * costly, optimistically skip it if the flag below was cleared in the
|
||||
+ * eviction path. This improves the overall performance when multiple
|
||||
+ * memcgs are available.
|
||||
+ */
|
||||
+ if (!sc->memcgs_need_aging) {
|
||||
+ sc->memcgs_need_aging = true;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
set_mm_walk(pgdat);
|
||||
|
||||
memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
@@ -4852,7 +4871,8 @@ static int isolate_folios(struct lruvec
|
||||
return scanned;
|
||||
}
|
||||
|
||||
-static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
|
||||
+static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
|
||||
+ bool *need_swapping)
|
||||
{
|
||||
int type;
|
||||
int scanned;
|
||||
@@ -4915,6 +4935,9 @@ static int evict_folios(struct lruvec *l
|
||||
|
||||
sc->nr_reclaimed += reclaimed;
|
||||
|
||||
+ if (need_swapping && type == LRU_GEN_ANON)
|
||||
+ *need_swapping = true;
|
||||
+
|
||||
return scanned;
|
||||
}
|
||||
|
||||
@@ -4924,9 +4947,8 @@ static int evict_folios(struct lruvec *l
|
||||
* reclaim.
|
||||
*/
|
||||
static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
|
||||
- bool can_swap)
|
||||
+ bool can_swap, bool *need_aging)
|
||||
{
|
||||
- bool need_aging;
|
||||
unsigned long nr_to_scan;
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
DEFINE_MAX_SEQ(lruvec);
|
||||
@@ -4936,8 +4958,8 @@ static unsigned long get_nr_to_scan(stru
|
||||
(mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
|
||||
return 0;
|
||||
|
||||
- need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
|
||||
- if (!need_aging)
|
||||
+ *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
|
||||
+ if (!*need_aging)
|
||||
return nr_to_scan;
|
||||
|
||||
/* skip the aging path at the default priority */
|
||||
@@ -4954,10 +4976,67 @@ done:
|
||||
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
|
||||
}
|
||||
|
||||
+static bool should_abort_scan(struct lruvec *lruvec, unsigned long seq,
|
||||
+ struct scan_control *sc, bool need_swapping)
|
||||
+{
|
||||
+ int i;
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+
|
||||
+ if (!current_is_kswapd()) {
|
||||
+ /* age each memcg at most once to ensure fairness */
|
||||
+ if (max_seq - seq > 1)
|
||||
+ return true;
|
||||
+
|
||||
+ /* over-swapping can increase allocation latency */
|
||||
+ if (sc->nr_reclaimed >= sc->nr_to_reclaim && need_swapping)
|
||||
+ return true;
|
||||
+
|
||||
+ /* give this thread a chance to exit and free its memory */
|
||||
+ if (fatal_signal_pending(current)) {
|
||||
+ sc->nr_reclaimed += MIN_LRU_BATCH;
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (cgroup_reclaim(sc))
|
||||
+ return false;
|
||||
+ } else if (sc->nr_reclaimed - sc->last_reclaimed < sc->nr_to_reclaim)
|
||||
+ return false;
|
||||
+
|
||||
+ /* keep scanning at low priorities to ensure fairness */
|
||||
+ if (sc->priority > DEF_PRIORITY - 2)
|
||||
+ return false;
|
||||
+
|
||||
+ /*
|
||||
+ * A minimum amount of work was done under global memory pressure. For
|
||||
+ * kswapd, it may be overshooting. For direct reclaim, the allocation
|
||||
+ * may succeed if all suitable zones are somewhat safe. In either case,
|
||||
+ * it's better to stop now, and restart later if necessary.
|
||||
+ */
|
||||
+ for (i = 0; i <= sc->reclaim_idx; i++) {
|
||||
+ unsigned long wmark;
|
||||
+ struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
|
||||
+
|
||||
+ if (!managed_zone(zone))
|
||||
+ continue;
|
||||
+
|
||||
+ wmark = current_is_kswapd() ? high_wmark_pages(zone) : low_wmark_pages(zone);
|
||||
+ if (wmark > zone_page_state(zone, NR_FREE_PAGES))
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ sc->nr_reclaimed += MIN_LRU_BATCH;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
struct blk_plug plug;
|
||||
+ bool need_aging = false;
|
||||
+ bool need_swapping = false;
|
||||
unsigned long scanned = 0;
|
||||
+ unsigned long reclaimed = sc->nr_reclaimed;
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
|
||||
lru_add_drain();
|
||||
|
||||
@@ -4977,21 +5056,28 @@ static void lru_gen_shrink_lruvec(struct
|
||||
else
|
||||
swappiness = 0;
|
||||
|
||||
- nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
|
||||
+ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
|
||||
if (!nr_to_scan)
|
||||
- break;
|
||||
+ goto done;
|
||||
|
||||
- delta = evict_folios(lruvec, sc, swappiness);
|
||||
+ delta = evict_folios(lruvec, sc, swappiness, &need_swapping);
|
||||
if (!delta)
|
||||
- break;
|
||||
+ goto done;
|
||||
|
||||
scanned += delta;
|
||||
if (scanned >= nr_to_scan)
|
||||
break;
|
||||
|
||||
+ if (should_abort_scan(lruvec, max_seq, sc, need_swapping))
|
||||
+ break;
|
||||
+
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
+ /* see the comment in lru_gen_age_node() */
|
||||
+ if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
|
||||
+ sc->memcgs_need_aging = false;
|
||||
+done:
|
||||
clear_mm_walk();
|
||||
|
||||
blk_finish_plug(&plug);
|
@ -1,475 +0,0 @@
|
||||
From ef61bb3622ee0f36e055dfd5006badff08f5ce61 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 19:52:09 -0700
|
||||
Subject: [PATCH 10/14] mm: multi-gen LRU: kill switch
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add /sys/kernel/mm/lru_gen/enabled as a kill switch. Components that
|
||||
can be disabled include:
|
||||
0x0001: the multi-gen LRU core
|
||||
0x0002: walking page table, when arch_has_hw_pte_young() returns
|
||||
true
|
||||
0x0004: clearing the accessed bit in non-leaf PMD entries, when
|
||||
CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
|
||||
[yYnN]: apply to all the components above
|
||||
E.g.,
|
||||
echo y >/sys/kernel/mm/lru_gen/enabled
|
||||
cat /sys/kernel/mm/lru_gen/enabled
|
||||
0x0007
|
||||
echo 5 >/sys/kernel/mm/lru_gen/enabled
|
||||
cat /sys/kernel/mm/lru_gen/enabled
|
||||
0x0005
|
||||
|
||||
NB: the page table walks happen on the scale of seconds under heavy
|
||||
memory pressure, in which case the mmap_lock contention is a lesser
|
||||
concern, compared with the LRU lock contention and the I/O congestion.
|
||||
So far the only well-known case of the mmap_lock contention happens on
|
||||
Android, due to Scudo [1] which allocates several thousand VMAs for
|
||||
merely a few hundred MBs. The SPF and the Maple Tree also have
|
||||
provided their own assessments [2][3]. However, if walking page tables
|
||||
does worsen the mmap_lock contention, the kill switch can be used to
|
||||
disable it. In this case the multi-gen LRU will suffer a minor
|
||||
performance degradation, as shown previously.
|
||||
|
||||
Clearing the accessed bit in non-leaf PMD entries can also be
|
||||
disabled, since this behavior was not tested on x86 varieties other
|
||||
than Intel and AMD.
|
||||
|
||||
[1] https://source.android.com/devices/tech/debug/scudo
|
||||
[2] https://lore.kernel.org/r/20220128131006.67712-1-michel@lespinasse.org/
|
||||
[3] https://lore.kernel.org/r/20220426150616.3937571-1-Liam.Howlett@oracle.com/
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I4c909618e8fed7fb1337f6624bbe542ec920a515
|
||||
---
|
||||
include/linux/cgroup.h | 15 ++-
|
||||
include/linux/mm_inline.h | 15 ++-
|
||||
include/linux/mmzone.h | 9 ++
|
||||
kernel/cgroup/cgroup-internal.h | 1 -
|
||||
mm/Kconfig | 6 +
|
||||
mm/vmscan.c | 228 +++++++++++++++++++++++++++++++-
|
||||
6 files changed, 265 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/include/linux/cgroup.h
|
||||
+++ b/include/linux/cgroup.h
|
||||
@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgr
|
||||
css_put(&cgrp->self);
|
||||
}
|
||||
|
||||
+extern struct mutex cgroup_mutex;
|
||||
+
|
||||
+static inline void cgroup_lock(void)
|
||||
+{
|
||||
+ mutex_lock(&cgroup_mutex);
|
||||
+}
|
||||
+
|
||||
+static inline void cgroup_unlock(void)
|
||||
+{
|
||||
+ mutex_unlock(&cgroup_mutex);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* task_css_set_check - obtain a task's css_set with extra access conditions
|
||||
* @task: the task to obtain css_set for
|
||||
@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgr
|
||||
* as locks used during the cgroup_subsys::attach() methods.
|
||||
*/
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
-extern struct mutex cgroup_mutex;
|
||||
extern spinlock_t css_set_lock;
|
||||
#define task_css_set_check(task, __c) \
|
||||
rcu_dereference_check((task)->cgroups, \
|
||||
@@ -708,6 +719,8 @@ struct cgroup;
|
||||
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
|
||||
static inline void css_get(struct cgroup_subsys_state *css) {}
|
||||
static inline void css_put(struct cgroup_subsys_state *css) {}
|
||||
+static inline void cgroup_lock(void) {}
|
||||
+static inline void cgroup_unlock(void) {}
|
||||
static inline int cgroup_attach_task_all(struct task_struct *from,
|
||||
struct task_struct *t) { return 0; }
|
||||
static inline int cgroupstats_build(struct cgroupstats *stats,
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -106,10 +106,21 @@ static __always_inline enum lru_list fol
|
||||
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN_ENABLED
|
||||
static inline bool lru_gen_enabled(void)
|
||||
{
|
||||
- return true;
|
||||
+ DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);
|
||||
+
|
||||
+ return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
|
||||
+}
|
||||
+#else
|
||||
+static inline bool lru_gen_enabled(void)
|
||||
+{
|
||||
+ DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);
|
||||
+
|
||||
+ return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
|
||||
}
|
||||
+#endif
|
||||
|
||||
static inline bool lru_gen_in_fault(void)
|
||||
{
|
||||
@@ -222,7 +233,7 @@ static inline bool lru_gen_add_folio(str
|
||||
|
||||
VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
|
||||
|
||||
- if (folio_test_unevictable(folio))
|
||||
+ if (folio_test_unevictable(folio) || !lrugen->enabled)
|
||||
return false;
|
||||
/*
|
||||
* There are three common cases for this page:
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -384,6 +384,13 @@ enum {
|
||||
LRU_GEN_FILE,
|
||||
};
|
||||
|
||||
+enum {
|
||||
+ LRU_GEN_CORE,
|
||||
+ LRU_GEN_MM_WALK,
|
||||
+ LRU_GEN_NONLEAF_YOUNG,
|
||||
+ NR_LRU_GEN_CAPS
|
||||
+};
|
||||
+
|
||||
#define MIN_LRU_BATCH BITS_PER_LONG
|
||||
#define MAX_LRU_BATCH (MIN_LRU_BATCH * 64)
|
||||
|
||||
@@ -425,6 +432,8 @@ struct lru_gen_struct {
|
||||
/* can be modified without holding the LRU lock */
|
||||
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
|
||||
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
|
||||
+ /* whether the multi-gen LRU is enabled */
|
||||
+ bool enabled;
|
||||
};
|
||||
|
||||
enum {
|
||||
--- a/kernel/cgroup/cgroup-internal.h
|
||||
+++ b/kernel/cgroup/cgroup-internal.h
|
||||
@@ -164,7 +164,6 @@ struct cgroup_mgctx {
|
||||
#define DEFINE_CGROUP_MGCTX(name) \
|
||||
struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
|
||||
|
||||
-extern struct mutex cgroup_mutex;
|
||||
extern spinlock_t css_set_lock;
|
||||
extern struct cgroup_subsys *cgroup_subsys[];
|
||||
extern struct list_head cgroup_roots;
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -1133,6 +1133,12 @@ config LRU_GEN
|
||||
help
|
||||
A high performance LRU implementation to overcommit memory.
|
||||
|
||||
+config LRU_GEN_ENABLED
|
||||
+ bool "Enable by default"
|
||||
+ depends on LRU_GEN
|
||||
+ help
|
||||
+ This option enables the multi-gen LRU by default.
|
||||
+
|
||||
config LRU_GEN_STATS
|
||||
bool "Full stats for debugging"
|
||||
depends on LRU_GEN
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -51,6 +51,7 @@
|
||||
#include <linux/psi.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
+#include <linux/ctype.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/div64.h>
|
||||
@@ -3082,6 +3083,14 @@ static bool can_age_anon_pages(struct pg
|
||||
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN_ENABLED
|
||||
+DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
|
||||
+#define get_cap(cap) static_branch_likely(&lru_gen_caps[cap])
|
||||
+#else
|
||||
+DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
|
||||
+#define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap])
|
||||
+#endif
|
||||
+
|
||||
/******************************************************************************
|
||||
* shorthand helpers
|
||||
******************************************************************************/
|
||||
@@ -3958,7 +3967,8 @@ static void walk_pmd_range_locked(pud_t
|
||||
goto next;
|
||||
|
||||
if (!pmd_trans_huge(pmd[i])) {
|
||||
- if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
|
||||
+ if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
|
||||
+ get_cap(LRU_GEN_NONLEAF_YOUNG))
|
||||
pmdp_test_and_clear_young(vma, addr, pmd + i);
|
||||
goto next;
|
||||
}
|
||||
@@ -4056,10 +4066,12 @@ restart:
|
||||
walk->mm_stats[MM_NONLEAF_TOTAL]++;
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
|
||||
- if (!pmd_young(val))
|
||||
- continue;
|
||||
+ if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
|
||||
+ if (!pmd_young(val))
|
||||
+ continue;
|
||||
|
||||
- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
|
||||
+ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
|
||||
+ }
|
||||
#endif
|
||||
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
|
||||
continue;
|
||||
@@ -4321,7 +4333,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
* handful of PTEs. Spreading the work out over a period of time usually
|
||||
* is less efficient, but it avoids bursty page faults.
|
||||
*/
|
||||
- if (!arch_has_hw_pte_young()) {
|
||||
+ if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
|
||||
success = iterate_mm_list_nowalk(lruvec, max_seq);
|
||||
goto done;
|
||||
}
|
||||
@@ -5084,6 +5096,208 @@ done:
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
+ * state change
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
|
||||
+{
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ if (lrugen->enabled) {
|
||||
+ enum lru_list lru;
|
||||
+
|
||||
+ for_each_evictable_lru(lru) {
|
||||
+ if (!list_empty(&lruvec->lists[lru]))
|
||||
+ return false;
|
||||
+ }
|
||||
+ } else {
|
||||
+ int gen, type, zone;
|
||||
+
|
||||
+ for_each_gen_type_zone(gen, type, zone) {
|
||||
+ if (!list_empty(&lrugen->lists[gen][type][zone]))
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool fill_evictable(struct lruvec *lruvec)
|
||||
+{
|
||||
+ enum lru_list lru;
|
||||
+ int remaining = MAX_LRU_BATCH;
|
||||
+
|
||||
+ for_each_evictable_lru(lru) {
|
||||
+ int type = is_file_lru(lru);
|
||||
+ bool active = is_active_lru(lru);
|
||||
+ struct list_head *head = &lruvec->lists[lru];
|
||||
+
|
||||
+ while (!list_empty(head)) {
|
||||
+ bool success;
|
||||
+ struct folio *folio = lru_to_folio(head);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio) != active, folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_lru_gen(folio) != -1, folio);
|
||||
+
|
||||
+ lruvec_del_folio(lruvec, folio);
|
||||
+ success = lru_gen_add_folio(lruvec, folio, false);
|
||||
+ VM_WARN_ON_ONCE(!success);
|
||||
+
|
||||
+ if (!--remaining)
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool drain_evictable(struct lruvec *lruvec)
|
||||
+{
|
||||
+ int gen, type, zone;
|
||||
+ int remaining = MAX_LRU_BATCH;
|
||||
+
|
||||
+ for_each_gen_type_zone(gen, type, zone) {
|
||||
+ struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
|
||||
+
|
||||
+ while (!list_empty(head)) {
|
||||
+ bool success;
|
||||
+ struct folio *folio = lru_to_folio(head);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
|
||||
+
|
||||
+ success = lru_gen_del_folio(lruvec, folio, false);
|
||||
+ VM_WARN_ON_ONCE(!success);
|
||||
+ lruvec_add_folio(lruvec, folio);
|
||||
+
|
||||
+ if (!--remaining)
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_change_state(bool enabled)
|
||||
+{
|
||||
+ static DEFINE_MUTEX(state_mutex);
|
||||
+
|
||||
+ struct mem_cgroup *memcg;
|
||||
+
|
||||
+ cgroup_lock();
|
||||
+ cpus_read_lock();
|
||||
+ get_online_mems();
|
||||
+ mutex_lock(&state_mutex);
|
||||
+
|
||||
+ if (enabled == lru_gen_enabled())
|
||||
+ goto unlock;
|
||||
+
|
||||
+ if (enabled)
|
||||
+ static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
|
||||
+ else
|
||||
+ static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
|
||||
+
|
||||
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
+ do {
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ if (!lruvec)
|
||||
+ continue;
|
||||
+
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
|
||||
+ VM_WARN_ON_ONCE(!state_is_valid(lruvec));
|
||||
+
|
||||
+ lruvec->lrugen.enabled = enabled;
|
||||
+
|
||||
+ while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) {
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+ cond_resched();
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+ }
|
||||
+
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+ }
|
||||
+
|
||||
+ cond_resched();
|
||||
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
+unlock:
|
||||
+ mutex_unlock(&state_mutex);
|
||||
+ put_online_mems();
|
||||
+ cpus_read_unlock();
|
||||
+ cgroup_unlock();
|
||||
+}
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * sysfs interface
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
+{
|
||||
+ unsigned int caps = 0;
|
||||
+
|
||||
+ if (get_cap(LRU_GEN_CORE))
|
||||
+ caps |= BIT(LRU_GEN_CORE);
|
||||
+
|
||||
+ if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
|
||||
+ caps |= BIT(LRU_GEN_MM_WALK);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
|
||||
+ caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
|
||||
+
|
||||
+ return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
|
||||
+}
|
||||
+
|
||||
+static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ int i;
|
||||
+ unsigned int caps;
|
||||
+
|
||||
+ if (tolower(*buf) == 'n')
|
||||
+ caps = 0;
|
||||
+ else if (tolower(*buf) == 'y')
|
||||
+ caps = -1;
|
||||
+ else if (kstrtouint(buf, 0, &caps))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ for (i = 0; i < NR_LRU_GEN_CAPS; i++) {
|
||||
+ bool enabled = caps & BIT(i);
|
||||
+
|
||||
+ if (i == LRU_GEN_CORE)
|
||||
+ lru_gen_change_state(enabled);
|
||||
+ else if (enabled)
|
||||
+ static_branch_enable(&lru_gen_caps[i]);
|
||||
+ else
|
||||
+ static_branch_disable(&lru_gen_caps[i]);
|
||||
+ }
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
|
||||
+ enabled, 0644, show_enabled, store_enabled
|
||||
+);
|
||||
+
|
||||
+static struct attribute *lru_gen_attrs[] = {
|
||||
+ &lru_gen_enabled_attr.attr,
|
||||
+ NULL
|
||||
+};
|
||||
+
|
||||
+static struct attribute_group lru_gen_attr_group = {
|
||||
+ .name = "lru_gen",
|
||||
+ .attrs = lru_gen_attrs,
|
||||
+};
|
||||
+
|
||||
+/******************************************************************************
|
||||
* initialization
|
||||
******************************************************************************/
|
||||
|
||||
@@ -5093,6 +5307,7 @@ void lru_gen_init_lruvec(struct lruvec *
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
|
||||
lrugen->max_seq = MIN_NR_GENS + 1;
|
||||
+ lrugen->enabled = lru_gen_enabled();
|
||||
|
||||
for_each_gen_type_zone(gen, type, zone)
|
||||
INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
|
||||
@@ -5132,6 +5347,9 @@ static int __init init_lru_gen(void)
|
||||
BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
|
||||
BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
|
||||
|
||||
+ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
|
||||
+ pr_err("lru_gen: failed to create sysfs group\n");
|
||||
+
|
||||
return 0;
|
||||
};
|
||||
late_initcall(init_lru_gen);
|
@ -1,202 +0,0 @@
|
||||
From 9d92c76fb8ac09ff195024139575d8c4db66b672 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 20:08:50 -0700
|
||||
Subject: [PATCH 11/14] mm: multi-gen LRU: thrashing prevention
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add /sys/kernel/mm/lru_gen/min_ttl_ms for thrashing prevention, as
|
||||
requested by many desktop users [1].
|
||||
|
||||
When set to value N, it prevents the working set of N milliseconds
|
||||
from getting evicted. The OOM killer is triggered if this working set
|
||||
cannot be kept in memory. Based on the average human detectable lag
|
||||
(~100ms), N=1000 usually eliminates intolerable lags due to thrashing.
|
||||
Larger values like N=3000 make lags less noticeable at the risk of
|
||||
premature OOM kills.
|
||||
|
||||
Compared with the size-based approach [2], this time-based approach
|
||||
has the following advantages:
|
||||
1. It is easier to configure because it is agnostic to applications
|
||||
and memory sizes.
|
||||
2. It is more reliable because it is directly wired to the OOM killer.
|
||||
|
||||
[1] https://lore.kernel.org/r/Ydza%2FzXKY9ATRoh6@google.com/
|
||||
[2] https://lore.kernel.org/r/20101028191523.GA14972@google.com/
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I007499d7e47374b59fd620e8c3962940bc9f788e
|
||||
---
|
||||
include/linux/mmzone.h | 2 ++
|
||||
mm/vmscan.c | 74 ++++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 73 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -419,6 +419,8 @@ struct lru_gen_struct {
|
||||
unsigned long max_seq;
|
||||
/* the eviction increments the oldest generation numbers */
|
||||
unsigned long min_seq[ANON_AND_FILE];
|
||||
+ /* the birth time of each generation in jiffies */
|
||||
+ unsigned long timestamps[MAX_NR_GENS];
|
||||
/* the multi-gen LRU lists, lazily sorted on eviction */
|
||||
struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
/* the multi-gen LRU sizes, eventually consistent */
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4305,6 +4305,7 @@ static void inc_max_seq(struct lruvec *l
|
||||
for (type = 0; type < ANON_AND_FILE; type++)
|
||||
reset_ctrl_pos(lruvec, type, false);
|
||||
|
||||
+ WRITE_ONCE(lrugen->timestamps[next], jiffies);
|
||||
/* make sure preceding modifications appear */
|
||||
smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
|
||||
|
||||
@@ -4432,7 +4433,7 @@ static bool should_run_aging(struct lruv
|
||||
return false;
|
||||
}
|
||||
|
||||
-static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
+static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
|
||||
{
|
||||
bool need_aging;
|
||||
unsigned long nr_to_scan;
|
||||
@@ -4446,16 +4447,36 @@ static void age_lruvec(struct lruvec *lr
|
||||
mem_cgroup_calculate_protection(NULL, memcg);
|
||||
|
||||
if (mem_cgroup_below_min(memcg))
|
||||
- return;
|
||||
+ return false;
|
||||
|
||||
need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
|
||||
+
|
||||
+ if (min_ttl) {
|
||||
+ int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
|
||||
+ unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
||||
+
|
||||
+ if (time_is_after_jiffies(birth + min_ttl))
|
||||
+ return false;
|
||||
+
|
||||
+ /* the size is likely too small to be helpful */
|
||||
+ if (!nr_to_scan && sc->priority != DEF_PRIORITY)
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
if (need_aging)
|
||||
try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
|
||||
+/* to protect the working set of the last N jiffies */
|
||||
+static unsigned long lru_gen_min_ttl __read_mostly;
|
||||
+
|
||||
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
+ bool success = false;
|
||||
+ unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
|
||||
|
||||
VM_WARN_ON_ONCE(!current_is_kswapd());
|
||||
|
||||
@@ -4478,12 +4499,32 @@ static void lru_gen_age_node(struct pgli
|
||||
do {
|
||||
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
|
||||
- age_lruvec(lruvec, sc);
|
||||
+ if (age_lruvec(lruvec, sc, min_ttl))
|
||||
+ success = true;
|
||||
|
||||
cond_resched();
|
||||
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
|
||||
clear_mm_walk();
|
||||
+
|
||||
+ /* check the order to exclude compaction-induced reclaim */
|
||||
+ if (success || !min_ttl || sc->order)
|
||||
+ return;
|
||||
+
|
||||
+ /*
|
||||
+ * The main goal is to OOM kill if every generation from all memcgs is
|
||||
+ * younger than min_ttl. However, another possibility is all memcgs are
|
||||
+ * either below min or empty.
|
||||
+ */
|
||||
+ if (mutex_trylock(&oom_lock)) {
|
||||
+ struct oom_control oc = {
|
||||
+ .gfp_mask = sc->gfp_mask,
|
||||
+ };
|
||||
+
|
||||
+ out_of_memory(&oc);
|
||||
+
|
||||
+ mutex_unlock(&oom_lock);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5240,6 +5281,28 @@ unlock:
|
||||
* sysfs interface
|
||||
******************************************************************************/
|
||||
|
||||
+static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
+{
|
||||
+ return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
|
||||
+}
|
||||
+
|
||||
+static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ unsigned int msecs;
|
||||
+
|
||||
+ if (kstrtouint(buf, 0, &msecs))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs));
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR(
|
||||
+ min_ttl_ms, 0644, show_min_ttl, store_min_ttl
|
||||
+);
|
||||
+
|
||||
static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
unsigned int caps = 0;
|
||||
@@ -5288,6 +5351,7 @@ static struct kobj_attribute lru_gen_ena
|
||||
);
|
||||
|
||||
static struct attribute *lru_gen_attrs[] = {
|
||||
+ &lru_gen_min_ttl_attr.attr,
|
||||
&lru_gen_enabled_attr.attr,
|
||||
NULL
|
||||
};
|
||||
@@ -5303,12 +5367,16 @@ static struct attribute_group lru_gen_at
|
||||
|
||||
void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
{
|
||||
+ int i;
|
||||
int gen, type, zone;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
|
||||
lrugen->max_seq = MIN_NR_GENS + 1;
|
||||
lrugen->enabled = lru_gen_enabled();
|
||||
|
||||
+ for (i = 0; i <= MIN_NR_GENS + 1; i++)
|
||||
+ lrugen->timestamps[i] = jiffies;
|
||||
+
|
||||
for_each_gen_type_zone(gen, type, zone)
|
||||
INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
|
||||
|
@ -1,557 +0,0 @@
|
||||
From d1e0e5fcdea16d4ceead496a0ea2fdbb6bc5bfe4 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Thu, 27 Jan 2022 20:12:41 -0700
|
||||
Subject: [PATCH 12/14] mm: multi-gen LRU: debugfs interface
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add /sys/kernel/debug/lru_gen for working set estimation and proactive
|
||||
reclaim. These techniques are commonly used to optimize job scheduling
|
||||
(bin packing) in data centers [1][2].
|
||||
|
||||
Compared with the page table-based approach and the PFN-based
|
||||
approach, this lruvec-based approach has the following advantages:
|
||||
1. It offers better choices because it is aware of memcgs, NUMA nodes,
|
||||
shared mappings and unmapped page cache.
|
||||
2. It is more scalable because it is O(nr_hot_pages), whereas the
|
||||
PFN-based approach is O(nr_total_pages).
|
||||
|
||||
Add /sys/kernel/debug/lru_gen_full for debugging.
|
||||
|
||||
[1] https://dl.acm.org/doi/10.1145/3297858.3304053
|
||||
[2] https://dl.acm.org/doi/10.1145/3503222.3507731
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I7bb06f14e0a94901a076cc3767d0855d4f1ea3ab
|
||||
---
|
||||
include/linux/nodemask.h | 1 +
|
||||
mm/vmscan.c | 411 ++++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 402 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/include/linux/nodemask.h
|
||||
+++ b/include/linux/nodemask.h
|
||||
@@ -493,6 +493,7 @@ static inline int num_node_state(enum no
|
||||
#define first_online_node 0
|
||||
#define first_memory_node 0
|
||||
#define next_online_node(nid) (MAX_NUMNODES)
|
||||
+#define next_memory_node(nid) (MAX_NUMNODES)
|
||||
#define nr_node_ids 1U
|
||||
#define nr_online_nodes 1U
|
||||
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -52,6 +52,7 @@
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/ctype.h>
|
||||
+#include <linux/debugfs.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/div64.h>
|
||||
@@ -4209,12 +4210,40 @@ static void clear_mm_walk(void)
|
||||
kfree(walk);
|
||||
}
|
||||
|
||||
-static void inc_min_seq(struct lruvec *lruvec, int type)
|
||||
+static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
|
||||
{
|
||||
+ int zone;
|
||||
+ int remaining = MAX_LRU_BATCH;
|
||||
struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+ int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
||||
+
|
||||
+ if (type == LRU_GEN_ANON && !can_swap)
|
||||
+ goto done;
|
||||
+
|
||||
+ /* prevent cold/hot inversion if force_scan is true */
|
||||
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
+ struct list_head *head = &lrugen->lists[old_gen][type][zone];
|
||||
+
|
||||
+ while (!list_empty(head)) {
|
||||
+ struct folio *folio = lru_to_folio(head);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
|
||||
+ VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
|
||||
|
||||
+ new_gen = folio_inc_gen(lruvec, folio, false);
|
||||
+ list_move_tail(&folio->lru, &lrugen->lists[new_gen][type][zone]);
|
||||
+
|
||||
+ if (!--remaining)
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+done:
|
||||
reset_ctrl_pos(lruvec, type, true);
|
||||
WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
|
||||
static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
|
||||
@@ -4260,7 +4289,7 @@ next:
|
||||
return success;
|
||||
}
|
||||
|
||||
-static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
|
||||
+static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
|
||||
{
|
||||
int prev, next;
|
||||
int type, zone;
|
||||
@@ -4274,9 +4303,13 @@ static void inc_max_seq(struct lruvec *l
|
||||
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
|
||||
continue;
|
||||
|
||||
- VM_WARN_ON_ONCE(type == LRU_GEN_FILE || can_swap);
|
||||
+ VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
|
||||
|
||||
- inc_min_seq(lruvec, type);
|
||||
+ while (!inc_min_seq(lruvec, type, can_swap)) {
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+ cond_resched();
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4313,7 +4346,7 @@ static void inc_max_seq(struct lruvec *l
|
||||
}
|
||||
|
||||
static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
- struct scan_control *sc, bool can_swap)
|
||||
+ struct scan_control *sc, bool can_swap, bool force_scan)
|
||||
{
|
||||
bool success;
|
||||
struct lru_gen_mm_walk *walk;
|
||||
@@ -4334,7 +4367,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
* handful of PTEs. Spreading the work out over a period of time usually
|
||||
* is less efficient, but it avoids bursty page faults.
|
||||
*/
|
||||
- if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
|
||||
+ if (!force_scan && !(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
|
||||
success = iterate_mm_list_nowalk(lruvec, max_seq);
|
||||
goto done;
|
||||
}
|
||||
@@ -4348,7 +4381,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
walk->lruvec = lruvec;
|
||||
walk->max_seq = max_seq;
|
||||
walk->can_swap = can_swap;
|
||||
- walk->force_scan = false;
|
||||
+ walk->force_scan = force_scan;
|
||||
|
||||
do {
|
||||
success = iterate_mm_list(lruvec, walk, &mm);
|
||||
@@ -4368,7 +4401,7 @@ done:
|
||||
|
||||
VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
|
||||
|
||||
- inc_max_seq(lruvec, can_swap);
|
||||
+ inc_max_seq(lruvec, can_swap, force_scan);
|
||||
/* either this sees any waiters or they will see updated max_seq */
|
||||
if (wq_has_sleeper(&lruvec->mm_state.wait))
|
||||
wake_up_all(&lruvec->mm_state.wait);
|
||||
@@ -4464,7 +4497,7 @@ static bool age_lruvec(struct lruvec *lr
|
||||
}
|
||||
|
||||
if (need_aging)
|
||||
- try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
|
||||
+ try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -5023,7 +5056,7 @@ static unsigned long get_nr_to_scan(stru
|
||||
if (current_is_kswapd())
|
||||
return 0;
|
||||
|
||||
- if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap))
|
||||
+ if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
|
||||
return nr_to_scan;
|
||||
done:
|
||||
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
|
||||
@@ -5362,6 +5395,361 @@ static struct attribute_group lru_gen_at
|
||||
};
|
||||
|
||||
/******************************************************************************
|
||||
+ * debugfs interface
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
|
||||
+{
|
||||
+ struct mem_cgroup *memcg;
|
||||
+ loff_t nr_to_skip = *pos;
|
||||
+
|
||||
+ m->private = kvmalloc(PATH_MAX, GFP_KERNEL);
|
||||
+ if (!m->private)
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
+ do {
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node_state(nid, N_MEMORY) {
|
||||
+ if (!nr_to_skip--)
|
||||
+ return get_lruvec(memcg, nid);
|
||||
+ }
|
||||
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_seq_stop(struct seq_file *m, void *v)
|
||||
+{
|
||||
+ if (!IS_ERR_OR_NULL(v))
|
||||
+ mem_cgroup_iter_break(NULL, lruvec_memcg(v));
|
||||
+
|
||||
+ kvfree(m->private);
|
||||
+ m->private = NULL;
|
||||
+}
|
||||
+
|
||||
+static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
+{
|
||||
+ int nid = lruvec_pgdat(v)->node_id;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(v);
|
||||
+
|
||||
+ ++*pos;
|
||||
+
|
||||
+ nid = next_memory_node(nid);
|
||||
+ if (nid == MAX_NUMNODES) {
|
||||
+ memcg = mem_cgroup_iter(NULL, memcg, NULL);
|
||||
+ if (!memcg)
|
||||
+ return NULL;
|
||||
+
|
||||
+ nid = first_memory_node;
|
||||
+ }
|
||||
+
|
||||
+ return get_lruvec(memcg, nid);
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
|
||||
+ unsigned long max_seq, unsigned long *min_seq,
|
||||
+ unsigned long seq)
|
||||
+{
|
||||
+ int i;
|
||||
+ int type, tier;
|
||||
+ int hist = lru_hist_from_seq(seq);
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+
|
||||
+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
|
||||
+ seq_printf(m, " %10d", tier);
|
||||
+ for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
+ const char *s = " ";
|
||||
+ unsigned long n[3] = {};
|
||||
+
|
||||
+ if (seq == max_seq) {
|
||||
+ s = "RT ";
|
||||
+ n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
|
||||
+ n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
|
||||
+ } else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
|
||||
+ s = "rep";
|
||||
+ n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
|
||||
+ n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
|
||||
+ if (tier)
|
||||
+ n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < 3; i++)
|
||||
+ seq_printf(m, " %10lu%c", n[i], s[i]);
|
||||
+ }
|
||||
+ seq_putc(m, '\n');
|
||||
+ }
|
||||
+
|
||||
+ seq_puts(m, " ");
|
||||
+ for (i = 0; i < NR_MM_STATS; i++) {
|
||||
+ const char *s = " ";
|
||||
+ unsigned long n = 0;
|
||||
+
|
||||
+ if (seq == max_seq && NR_HIST_GENS == 1) {
|
||||
+ s = "LOYNFA";
|
||||
+ n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
|
||||
+ } else if (seq != max_seq && NR_HIST_GENS > 1) {
|
||||
+ s = "loynfa";
|
||||
+ n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
|
||||
+ }
|
||||
+
|
||||
+ seq_printf(m, " %10lu%c", n, s[i]);
|
||||
+ }
|
||||
+ seq_putc(m, '\n');
|
||||
+}
|
||||
+
|
||||
+static int lru_gen_seq_show(struct seq_file *m, void *v)
|
||||
+{
|
||||
+ unsigned long seq;
|
||||
+ bool full = !debugfs_real_fops(m->file)->write;
|
||||
+ struct lruvec *lruvec = v;
|
||||
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
|
||||
+ int nid = lruvec_pgdat(lruvec)->node_id;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+ DEFINE_MIN_SEQ(lruvec);
|
||||
+
|
||||
+ if (nid == first_memory_node) {
|
||||
+ const char *path = memcg ? m->private : "";
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (memcg)
|
||||
+ cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
|
||||
+#endif
|
||||
+ seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path);
|
||||
+ }
|
||||
+
|
||||
+ seq_printf(m, " node %5d\n", nid);
|
||||
+
|
||||
+ if (!full)
|
||||
+ seq = min_seq[LRU_GEN_ANON];
|
||||
+ else if (max_seq >= MAX_NR_GENS)
|
||||
+ seq = max_seq - MAX_NR_GENS + 1;
|
||||
+ else
|
||||
+ seq = 0;
|
||||
+
|
||||
+ for (; seq <= max_seq; seq++) {
|
||||
+ int type, zone;
|
||||
+ int gen = lru_gen_from_seq(seq);
|
||||
+ unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
||||
+
|
||||
+ seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth));
|
||||
+
|
||||
+ for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
+ unsigned long size = 0;
|
||||
+ char mark = full && seq < min_seq[type] ? 'x' : ' ';
|
||||
+
|
||||
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
|
||||
+ size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
|
||||
+
|
||||
+ seq_printf(m, " %10lu%c", size, mark);
|
||||
+ }
|
||||
+
|
||||
+ seq_putc(m, '\n');
|
||||
+
|
||||
+ if (full)
|
||||
+ lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static const struct seq_operations lru_gen_seq_ops = {
|
||||
+ .start = lru_gen_seq_start,
|
||||
+ .stop = lru_gen_seq_stop,
|
||||
+ .next = lru_gen_seq_next,
|
||||
+ .show = lru_gen_seq_show,
|
||||
+};
|
||||
+
|
||||
+static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
|
||||
+ bool can_swap, bool force_scan)
|
||||
+{
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+ DEFINE_MIN_SEQ(lruvec);
|
||||
+
|
||||
+ if (seq < max_seq)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (seq > max_seq)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
|
||||
+ return -ERANGE;
|
||||
+
|
||||
+ try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
|
||||
+ int swappiness, unsigned long nr_to_reclaim)
|
||||
+{
|
||||
+ DEFINE_MAX_SEQ(lruvec);
|
||||
+
|
||||
+ if (seq + MIN_NR_GENS > max_seq)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ sc->nr_reclaimed = 0;
|
||||
+
|
||||
+ while (!signal_pending(current)) {
|
||||
+ DEFINE_MIN_SEQ(lruvec);
|
||||
+
|
||||
+ if (seq < min_seq[!swappiness])
|
||||
+ return 0;
|
||||
+
|
||||
+ if (sc->nr_reclaimed >= nr_to_reclaim)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (!evict_folios(lruvec, sc, swappiness, NULL))
|
||||
+ return 0;
|
||||
+
|
||||
+ cond_resched();
|
||||
+ }
|
||||
+
|
||||
+ return -EINTR;
|
||||
+}
|
||||
+
|
||||
+static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
|
||||
+ struct scan_control *sc, int swappiness, unsigned long opt)
|
||||
+{
|
||||
+ struct lruvec *lruvec;
|
||||
+ int err = -EINVAL;
|
||||
+ struct mem_cgroup *memcg = NULL;
|
||||
+
|
||||
+ if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (!mem_cgroup_disabled()) {
|
||||
+ rcu_read_lock();
|
||||
+ memcg = mem_cgroup_from_id(memcg_id);
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (memcg && !css_tryget(&memcg->css))
|
||||
+ memcg = NULL;
|
||||
+#endif
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ if (!memcg)
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ if (memcg_id != mem_cgroup_id(memcg))
|
||||
+ goto done;
|
||||
+
|
||||
+ lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ if (swappiness < 0)
|
||||
+ swappiness = get_swappiness(lruvec, sc);
|
||||
+ else if (swappiness > 200)
|
||||
+ goto done;
|
||||
+
|
||||
+ switch (cmd) {
|
||||
+ case '+':
|
||||
+ err = run_aging(lruvec, seq, sc, swappiness, opt);
|
||||
+ break;
|
||||
+ case '-':
|
||||
+ err = run_eviction(lruvec, seq, sc, swappiness, opt);
|
||||
+ break;
|
||||
+ }
|
||||
+done:
|
||||
+ mem_cgroup_put(memcg);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
|
||||
+ size_t len, loff_t *pos)
|
||||
+{
|
||||
+ void *buf;
|
||||
+ char *cur, *next;
|
||||
+ unsigned int flags;
|
||||
+ struct blk_plug plug;
|
||||
+ int err = -EINVAL;
|
||||
+ struct scan_control sc = {
|
||||
+ .may_writepage = true,
|
||||
+ .may_unmap = true,
|
||||
+ .may_swap = true,
|
||||
+ .reclaim_idx = MAX_NR_ZONES - 1,
|
||||
+ .gfp_mask = GFP_KERNEL,
|
||||
+ };
|
||||
+
|
||||
+ buf = kvmalloc(len + 1, GFP_KERNEL);
|
||||
+ if (!buf)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ if (copy_from_user(buf, src, len)) {
|
||||
+ kvfree(buf);
|
||||
+ return -EFAULT;
|
||||
+ }
|
||||
+
|
||||
+ set_task_reclaim_state(current, &sc.reclaim_state);
|
||||
+ flags = memalloc_noreclaim_save();
|
||||
+ blk_start_plug(&plug);
|
||||
+ if (!set_mm_walk(NULL)) {
|
||||
+ err = -ENOMEM;
|
||||
+ goto done;
|
||||
+ }
|
||||
+
|
||||
+ next = buf;
|
||||
+ next[len] = '\0';
|
||||
+
|
||||
+ while ((cur = strsep(&next, ",;\n"))) {
|
||||
+ int n;
|
||||
+ int end;
|
||||
+ char cmd;
|
||||
+ unsigned int memcg_id;
|
||||
+ unsigned int nid;
|
||||
+ unsigned long seq;
|
||||
+ unsigned int swappiness = -1;
|
||||
+ unsigned long opt = -1;
|
||||
+
|
||||
+ cur = skip_spaces(cur);
|
||||
+ if (!*cur)
|
||||
+ continue;
|
||||
+
|
||||
+ n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
|
||||
+ &seq, &end, &swappiness, &end, &opt, &end);
|
||||
+ if (n < 4 || cur[end]) {
|
||||
+ err = -EINVAL;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt);
|
||||
+ if (err)
|
||||
+ break;
|
||||
+ }
|
||||
+done:
|
||||
+ clear_mm_walk();
|
||||
+ blk_finish_plug(&plug);
|
||||
+ memalloc_noreclaim_restore(flags);
|
||||
+ set_task_reclaim_state(current, NULL);
|
||||
+
|
||||
+ kvfree(buf);
|
||||
+
|
||||
+ return err ? : len;
|
||||
+}
|
||||
+
|
||||
+static int lru_gen_seq_open(struct inode *inode, struct file *file)
|
||||
+{
|
||||
+ return seq_open(file, &lru_gen_seq_ops);
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations lru_gen_rw_fops = {
|
||||
+ .open = lru_gen_seq_open,
|
||||
+ .read = seq_read,
|
||||
+ .write = lru_gen_seq_write,
|
||||
+ .llseek = seq_lseek,
|
||||
+ .release = seq_release,
|
||||
+};
|
||||
+
|
||||
+static const struct file_operations lru_gen_ro_fops = {
|
||||
+ .open = lru_gen_seq_open,
|
||||
+ .read = seq_read,
|
||||
+ .llseek = seq_lseek,
|
||||
+ .release = seq_release,
|
||||
+};
|
||||
+
|
||||
+/******************************************************************************
|
||||
* initialization
|
||||
******************************************************************************/
|
||||
|
||||
@@ -5418,6 +5806,9 @@ static int __init init_lru_gen(void)
|
||||
if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
|
||||
pr_err("lru_gen: failed to create sysfs group\n");
|
||||
|
||||
+ debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
|
||||
+ debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
|
||||
+
|
||||
return 0;
|
||||
};
|
||||
late_initcall(init_lru_gen);
|
@ -1,253 +0,0 @@
|
||||
From 22199c9b30ffcc332be643577709a2af960e6786 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sun, 23 Jan 2022 16:44:43 -0700
|
||||
Subject: [PATCH 13/14] mm: multi-gen LRU: admin guide
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add an admin guide.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I1902178bcbb5adfa0a748c4d284a6456059bdd7e
|
||||
---
|
||||
Documentation/admin-guide/mm/index.rst | 1 +
|
||||
Documentation/admin-guide/mm/multigen_lru.rst | 162 ++++++++++++++++++
|
||||
mm/Kconfig | 3 +-
|
||||
mm/vmscan.c | 4 +
|
||||
4 files changed, 169 insertions(+), 1 deletion(-)
|
||||
create mode 100644 Documentation/admin-guide/mm/multigen_lru.rst
|
||||
|
||||
--- a/Documentation/admin-guide/mm/index.rst
|
||||
+++ b/Documentation/admin-guide/mm/index.rst
|
||||
@@ -32,6 +32,7 @@ the Linux memory management.
|
||||
idle_page_tracking
|
||||
ksm
|
||||
memory-hotplug
|
||||
+ multigen_lru
|
||||
nommu-mmap
|
||||
numa_memory_policy
|
||||
numaperf
|
||||
--- /dev/null
|
||||
+++ b/Documentation/admin-guide/mm/multigen_lru.rst
|
||||
@@ -0,0 +1,162 @@
|
||||
+.. SPDX-License-Identifier: GPL-2.0
|
||||
+
|
||||
+=============
|
||||
+Multi-Gen LRU
|
||||
+=============
|
||||
+The multi-gen LRU is an alternative LRU implementation that optimizes
|
||||
+page reclaim and improves performance under memory pressure. Page
|
||||
+reclaim decides the kernel's caching policy and ability to overcommit
|
||||
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
|
||||
+
|
||||
+Quick start
|
||||
+===========
|
||||
+Build the kernel with the following configurations.
|
||||
+
|
||||
+* ``CONFIG_LRU_GEN=y``
|
||||
+* ``CONFIG_LRU_GEN_ENABLED=y``
|
||||
+
|
||||
+All set!
|
||||
+
|
||||
+Runtime options
|
||||
+===============
|
||||
+``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the
|
||||
+following subsections.
|
||||
+
|
||||
+Kill switch
|
||||
+-----------
|
||||
+``enabled`` accepts different values to enable or disable the
|
||||
+following components. Its default value depends on
|
||||
+``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled
|
||||
+unless some of them have unforeseen side effects. Writing to
|
||||
+``enabled`` has no effect when a component is not supported by the
|
||||
+hardware, and valid values will be accepted even when the main switch
|
||||
+is off.
|
||||
+
|
||||
+====== ===============================================================
|
||||
+Values Components
|
||||
+====== ===============================================================
|
||||
+0x0001 The main switch for the multi-gen LRU.
|
||||
+0x0002 Clearing the accessed bit in leaf page table entries in large
|
||||
+ batches, when MMU sets it (e.g., on x86). This behavior can
|
||||
+ theoretically worsen lock contention (mmap_lock). If it is
|
||||
+ disabled, the multi-gen LRU will suffer a minor performance
|
||||
+ degradation for workloads that contiguously map hot pages,
|
||||
+ whose accessed bits can be otherwise cleared by fewer larger
|
||||
+ batches.
|
||||
+0x0004 Clearing the accessed bit in non-leaf page table entries as
|
||||
+ well, when MMU sets it (e.g., on x86). This behavior was not
|
||||
+ verified on x86 varieties other than Intel and AMD. If it is
|
||||
+ disabled, the multi-gen LRU will suffer a negligible
|
||||
+ performance degradation.
|
||||
+[yYnN] Apply to all the components above.
|
||||
+====== ===============================================================
|
||||
+
|
||||
+E.g.,
|
||||
+::
|
||||
+
|
||||
+ echo y >/sys/kernel/mm/lru_gen/enabled
|
||||
+ cat /sys/kernel/mm/lru_gen/enabled
|
||||
+ 0x0007
|
||||
+ echo 5 >/sys/kernel/mm/lru_gen/enabled
|
||||
+ cat /sys/kernel/mm/lru_gen/enabled
|
||||
+ 0x0005
|
||||
+
|
||||
+Thrashing prevention
|
||||
+--------------------
|
||||
+Personal computers are more sensitive to thrashing because it can
|
||||
+cause janks (lags when rendering UI) and negatively impact user
|
||||
+experience. The multi-gen LRU offers thrashing prevention to the
|
||||
+majority of laptop and desktop users who do not have ``oomd``.
|
||||
+
|
||||
+Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of
|
||||
+``N`` milliseconds from getting evicted. The OOM killer is triggered
|
||||
+if this working set cannot be kept in memory. In other words, this
|
||||
+option works as an adjustable pressure relief valve, and when open, it
|
||||
+terminates applications that are hopefully not being used.
|
||||
+
|
||||
+Based on the average human detectable lag (~100ms), ``N=1000`` usually
|
||||
+eliminates intolerable janks due to thrashing. Larger values like
|
||||
+``N=3000`` make janks less noticeable at the risk of premature OOM
|
||||
+kills.
|
||||
+
|
||||
+The default value ``0`` means disabled.
|
||||
+
|
||||
+Experimental features
|
||||
+=====================
|
||||
+``/sys/kernel/debug/lru_gen`` accepts commands described in the
|
||||
+following subsections. Multiple command lines are supported, so does
|
||||
+concatenation with delimiters ``,`` and ``;``.
|
||||
+
|
||||
+``/sys/kernel/debug/lru_gen_full`` provides additional stats for
|
||||
+debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from
|
||||
+evicted generations in this file.
|
||||
+
|
||||
+Working set estimation
|
||||
+----------------------
|
||||
+Working set estimation measures how much memory an application needs
|
||||
+in a given time interval, and it is usually done with little impact on
|
||||
+the performance of the application. E.g., data centers want to
|
||||
+optimize job scheduling (bin packing) to improve memory utilizations.
|
||||
+When a new job comes in, the job scheduler needs to find out whether
|
||||
+each server it manages can allocate a certain amount of memory for
|
||||
+this new job before it can pick a candidate. To do so, the job
|
||||
+scheduler needs to estimate the working sets of the existing jobs.
|
||||
+
|
||||
+When it is read, ``lru_gen`` returns a histogram of numbers of pages
|
||||
+accessed over different time intervals for each memcg and node.
|
||||
+``MAX_NR_GENS`` decides the number of bins for each histogram. The
|
||||
+histograms are noncumulative.
|
||||
+::
|
||||
+
|
||||
+ memcg memcg_id memcg_path
|
||||
+ node node_id
|
||||
+ min_gen_nr age_in_ms nr_anon_pages nr_file_pages
|
||||
+ ...
|
||||
+ max_gen_nr age_in_ms nr_anon_pages nr_file_pages
|
||||
+
|
||||
+Each bin contains an estimated number of pages that have been accessed
|
||||
+within ``age_in_ms``. E.g., ``min_gen_nr`` contains the coldest pages
|
||||
+and ``max_gen_nr`` contains the hottest pages, since ``age_in_ms`` of
|
||||
+the former is the largest and that of the latter is the smallest.
|
||||
+
|
||||
+Users can write the following command to ``lru_gen`` to create a new
|
||||
+generation ``max_gen_nr+1``:
|
||||
+
|
||||
+ ``+ memcg_id node_id max_gen_nr [can_swap [force_scan]]``
|
||||
+
|
||||
+``can_swap`` defaults to the swap setting and, if it is set to ``1``,
|
||||
+it forces the scan of anon pages when swap is off, and vice versa.
|
||||
+``force_scan`` defaults to ``1`` and, if it is set to ``0``, it
|
||||
+employs heuristics to reduce the overhead, which is likely to reduce
|
||||
+the coverage as well.
|
||||
+
|
||||
+A typical use case is that a job scheduler runs this command at a
|
||||
+certain time interval to create new generations, and it ranks the
|
||||
+servers it manages based on the sizes of their cold pages defined by
|
||||
+this time interval.
|
||||
+
|
||||
+Proactive reclaim
|
||||
+-----------------
|
||||
+Proactive reclaim induces page reclaim when there is no memory
|
||||
+pressure. It usually targets cold pages only. E.g., when a new job
|
||||
+comes in, the job scheduler wants to proactively reclaim cold pages on
|
||||
+the server it selected, to improve the chance of successfully landing
|
||||
+this new job.
|
||||
+
|
||||
+Users can write the following command to ``lru_gen`` to evict
|
||||
+generations less than or equal to ``min_gen_nr``.
|
||||
+
|
||||
+ ``- memcg_id node_id min_gen_nr [swappiness [nr_to_reclaim]]``
|
||||
+
|
||||
+``min_gen_nr`` should be less than ``max_gen_nr-1``, since
|
||||
+``max_gen_nr`` and ``max_gen_nr-1`` are not fully aged (equivalent to
|
||||
+the active list) and therefore cannot be evicted. ``swappiness``
|
||||
+overrides the default value in ``/proc/sys/vm/swappiness``.
|
||||
+``nr_to_reclaim`` limits the number of pages to evict.
|
||||
+
|
||||
+A typical use case is that a job scheduler runs this command before it
|
||||
+tries to land a new job on a server. If it fails to materialize enough
|
||||
+cold pages because of the overestimation, it retries on the next
|
||||
+server according to the ranking result obtained from the working set
|
||||
+estimation step. This less forceful approach limits the impacts on the
|
||||
+existing jobs.
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -1131,7 +1131,8 @@ config LRU_GEN
|
||||
# make sure folio->flags has enough spare bits
|
||||
depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
|
||||
help
|
||||
- A high performance LRU implementation to overcommit memory.
|
||||
+ A high performance LRU implementation to overcommit memory. See
|
||||
+ Documentation/admin-guide/mm/multigen_lru.rst for details.
|
||||
|
||||
config LRU_GEN_ENABLED
|
||||
bool "Enable by default"
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -5319,6 +5319,7 @@ static ssize_t show_min_ttl(struct kobje
|
||||
return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
|
||||
}
|
||||
|
||||
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
|
||||
static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
@@ -5352,6 +5353,7 @@ static ssize_t show_enabled(struct kobje
|
||||
return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
|
||||
}
|
||||
|
||||
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
|
||||
static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
@@ -5499,6 +5501,7 @@ static void lru_gen_seq_show_full(struct
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
|
||||
static int lru_gen_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
unsigned long seq;
|
||||
@@ -5657,6 +5660,7 @@ done:
|
||||
return err;
|
||||
}
|
||||
|
||||
+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
|
||||
static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
|
||||
size_t len, loff_t *pos)
|
||||
{
|
@ -1,202 +0,0 @@
|
||||
From bd82a74f6b5c0a75ef61be5e9be34319bb17328f Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Sun, 6 Mar 2022 20:35:00 -0700
|
||||
Subject: [PATCH 14/14] mm: multi-gen LRU: design doc
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Add a design doc.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Acked-by: Brian Geffon <bgeffon@google.com>
|
||||
Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
|
||||
Acked-by: Steven Barrett <steven@liquorix.net>
|
||||
Acked-by: Suleiman Souhlal <suleiman@google.com>
|
||||
Tested-by: Daniel Byrne <djbyrne@mtu.edu>
|
||||
Tested-by: Donald Carr <d@chaos-reins.com>
|
||||
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
|
||||
Tested-by: Sofia Trinh <sofia.trinh@edi.works>
|
||||
Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
|
||||
Change-Id: I958afcabf5abc37b3e58f72638d35a349c31b98d
|
||||
---
|
||||
Documentation/mm/index.rst | 1 +
|
||||
Documentation/mm/multigen_lru.rst | 159 ++++++++++++++++++++++++++++++
|
||||
2 files changed, 160 insertions(+)
|
||||
create mode 100644 Documentation/mm/multigen_lru.rst
|
||||
|
||||
--- a/Documentation/mm/index.rst
|
||||
+++ b/Documentation/mm/index.rst
|
||||
@@ -51,6 +51,7 @@ above structured documentation, or delet
|
||||
ksm
|
||||
memory-model
|
||||
mmu_notifier
|
||||
+ multigen_lru
|
||||
numa
|
||||
overcommit-accounting
|
||||
page_migration
|
||||
--- /dev/null
|
||||
+++ b/Documentation/mm/multigen_lru.rst
|
||||
@@ -0,0 +1,159 @@
|
||||
+.. SPDX-License-Identifier: GPL-2.0
|
||||
+
|
||||
+=============
|
||||
+Multi-Gen LRU
|
||||
+=============
|
||||
+The multi-gen LRU is an alternative LRU implementation that optimizes
|
||||
+page reclaim and improves performance under memory pressure. Page
|
||||
+reclaim decides the kernel's caching policy and ability to overcommit
|
||||
+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
|
||||
+
|
||||
+Design overview
|
||||
+===============
|
||||
+Objectives
|
||||
+----------
|
||||
+The design objectives are:
|
||||
+
|
||||
+* Good representation of access recency
|
||||
+* Try to profit from spatial locality
|
||||
+* Fast paths to make obvious choices
|
||||
+* Simple self-correcting heuristics
|
||||
+
|
||||
+The representation of access recency is at the core of all LRU
|
||||
+implementations. In the multi-gen LRU, each generation represents a
|
||||
+group of pages with similar access recency. Generations establish a
|
||||
+(time-based) common frame of reference and therefore help make better
|
||||
+choices, e.g., between different memcgs on a computer or different
|
||||
+computers in a data center (for job scheduling).
|
||||
+
|
||||
+Exploiting spatial locality improves efficiency when gathering the
|
||||
+accessed bit. A rmap walk targets a single page and does not try to
|
||||
+profit from discovering a young PTE. A page table walk can sweep all
|
||||
+the young PTEs in an address space, but the address space can be too
|
||||
+sparse to make a profit. The key is to optimize both methods and use
|
||||
+them in combination.
|
||||
+
|
||||
+Fast paths reduce code complexity and runtime overhead. Unmapped pages
|
||||
+do not require TLB flushes; clean pages do not require writeback.
|
||||
+These facts are only helpful when other conditions, e.g., access
|
||||
+recency, are similar. With generations as a common frame of reference,
|
||||
+additional factors stand out. But obvious choices might not be good
|
||||
+choices; thus self-correction is necessary.
|
||||
+
|
||||
+The benefits of simple self-correcting heuristics are self-evident.
|
||||
+Again, with generations as a common frame of reference, this becomes
|
||||
+attainable. Specifically, pages in the same generation can be
|
||||
+categorized based on additional factors, and a feedback loop can
|
||||
+statistically compare the refault percentages across those categories
|
||||
+and infer which of them are better choices.
|
||||
+
|
||||
+Assumptions
|
||||
+-----------
|
||||
+The protection of hot pages and the selection of cold pages are based
|
||||
+on page access channels and patterns. There are two access channels:
|
||||
+
|
||||
+* Accesses through page tables
|
||||
+* Accesses through file descriptors
|
||||
+
|
||||
+The protection of the former channel is by design stronger because:
|
||||
+
|
||||
+1. The uncertainty in determining the access patterns of the former
|
||||
+ channel is higher due to the approximation of the accessed bit.
|
||||
+2. The cost of evicting the former channel is higher due to the TLB
|
||||
+ flushes required and the likelihood of encountering the dirty bit.
|
||||
+3. The penalty of underprotecting the former channel is higher because
|
||||
+ applications usually do not prepare themselves for major page
|
||||
+ faults like they do for blocked I/O. E.g., GUI applications
|
||||
+ commonly use dedicated I/O threads to avoid blocking rendering
|
||||
+ threads.
|
||||
+
|
||||
+There are also two access patterns:
|
||||
+
|
||||
+* Accesses exhibiting temporal locality
|
||||
+* Accesses not exhibiting temporal locality
|
||||
+
|
||||
+For the reasons listed above, the former channel is assumed to follow
|
||||
+the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is
|
||||
+present, and the latter channel is assumed to follow the latter
|
||||
+pattern unless outlying refaults have been observed.
|
||||
+
|
||||
+Workflow overview
|
||||
+=================
|
||||
+Evictable pages are divided into multiple generations for each
|
||||
+``lruvec``. The youngest generation number is stored in
|
||||
+``lrugen->max_seq`` for both anon and file types as they are aged on
|
||||
+an equal footing. The oldest generation numbers are stored in
|
||||
+``lrugen->min_seq[]`` separately for anon and file types as clean file
|
||||
+pages can be evicted regardless of swap constraints. These three
|
||||
+variables are monotonically increasing.
|
||||
+
|
||||
+Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)``
|
||||
+bits in order to fit into the gen counter in ``folio->flags``. Each
|
||||
+truncated generation number is an index to ``lrugen->lists[]``. The
|
||||
+sliding window technique is used to track at least ``MIN_NR_GENS`` and
|
||||
+at most ``MAX_NR_GENS`` generations. The gen counter stores a value
|
||||
+within ``[1, MAX_NR_GENS]`` while a page is on one of
|
||||
+``lrugen->lists[]``; otherwise it stores zero.
|
||||
+
|
||||
+Each generation is divided into multiple tiers. A page accessed ``N``
|
||||
+times through file descriptors is in tier ``order_base_2(N)``. Unlike
|
||||
+generations, tiers do not have dedicated ``lrugen->lists[]``. In
|
||||
+contrast to moving across generations, which requires the LRU lock,
|
||||
+moving across tiers only involves atomic operations on
|
||||
+``folio->flags`` and therefore has a negligible cost. A feedback loop
|
||||
+modeled after the PID controller monitors refaults over all the tiers
|
||||
+from anon and file types and decides which tiers from which types to
|
||||
+evict or protect.
|
||||
+
|
||||
+There are two conceptually independent procedures: the aging and the
|
||||
+eviction. They form a closed-loop system, i.e., the page reclaim.
|
||||
+
|
||||
+Aging
|
||||
+-----
|
||||
+The aging produces young generations. Given an ``lruvec``, it
|
||||
+increments ``max_seq`` when ``max_seq-min_seq+1`` approaches
|
||||
+``MIN_NR_GENS``. The aging promotes hot pages to the youngest
|
||||
+generation when it finds them accessed through page tables; the
|
||||
+demotion of cold pages happens consequently when it increments
|
||||
+``max_seq``. The aging uses page table walks and rmap walks to find
|
||||
+young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list``
|
||||
+and calls ``walk_page_range()`` with each ``mm_struct`` on this list
|
||||
+to scan PTEs, and after each iteration, it increments ``max_seq``. For
|
||||
+the latter, when the eviction walks the rmap and finds a young PTE,
|
||||
+the aging scans the adjacent PTEs. For both, on finding a young PTE,
|
||||
+the aging clears the accessed bit and updates the gen counter of the
|
||||
+page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``.
|
||||
+
|
||||
+Eviction
|
||||
+--------
|
||||
+The eviction consumes old generations. Given an ``lruvec``, it
|
||||
+increments ``min_seq`` when ``lrugen->lists[]`` indexed by
|
||||
+``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to
|
||||
+evict from, it first compares ``min_seq[]`` to select the older type.
|
||||
+If both types are equally old, it selects the one whose first tier has
|
||||
+a lower refault percentage. The first tier contains single-use
|
||||
+unmapped clean pages, which are the best bet. The eviction sorts a
|
||||
+page according to its gen counter if the aging has found this page
|
||||
+accessed through page tables and updated its gen counter. It also
|
||||
+moves a page to the next generation, i.e., ``min_seq+1``, if this page
|
||||
+was accessed multiple times through file descriptors and the feedback
|
||||
+loop has detected outlying refaults from the tier this page is in. To
|
||||
+this end, the feedback loop uses the first tier as the baseline, for
|
||||
+the reason stated earlier.
|
||||
+
|
||||
+Summary
|
||||
+-------
|
||||
+The multi-gen LRU can be disassembled into the following parts:
|
||||
+
|
||||
+* Generations
|
||||
+* Rmap walks
|
||||
+* Page table walks
|
||||
+* Bloom filters
|
||||
+* PID controller
|
||||
+
|
||||
+The aging and the eviction form a producer-consumer model;
|
||||
+specifically, the latter drives the former by the sliding window over
|
||||
+generations. Within the aging, rmap walks drive page table walks by
|
||||
+inserting hot densely populated page tables to the Bloom filters.
|
||||
+Within the eviction, the PID controller uses refaults as the feedback
|
||||
+to select types to evict and tiers to protect.
|
File diff suppressed because it is too large
Load Diff
@ -1,174 +0,0 @@
|
||||
From a779a482fb9b9f8fcdf8b2519c789b4b9bb5dd05 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 16:56:48 +0200
|
||||
Subject: build: add a hack for removing non-essential module info
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
include/linux/module.h | 13 ++++++++-----
|
||||
include/linux/moduleparam.h | 15 ++++++++++++---
|
||||
init/Kconfig | 7 +++++++
|
||||
kernel/module.c | 5 ++++-
|
||||
scripts/mod/modpost.c | 12 ++++++++++++
|
||||
5 files changed, 43 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/include/linux/module.h
|
||||
+++ b/include/linux/module.h
|
||||
@@ -164,6 +164,7 @@ extern void cleanup_module(void);
|
||||
|
||||
/* Generic info of form tag = "info" */
|
||||
#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
|
||||
+#define MODULE_INFO_STRIP(tag, info) __MODULE_INFO_STRIP(tag, tag, info)
|
||||
|
||||
/* For userspace: you can also call me... */
|
||||
#define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)
|
||||
@@ -233,12 +234,12 @@ extern void cleanup_module(void);
|
||||
* Author(s), use "Name <email>" or just "Name", for multiple
|
||||
* authors use multiple MODULE_AUTHOR() statements/lines.
|
||||
*/
|
||||
-#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
|
||||
+#define MODULE_AUTHOR(_author) MODULE_INFO_STRIP(author, _author)
|
||||
|
||||
/* What your module does. */
|
||||
-#define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)
|
||||
+#define MODULE_DESCRIPTION(_description) MODULE_INFO_STRIP(description, _description)
|
||||
|
||||
-#ifdef MODULE
|
||||
+#if defined(MODULE) && !defined(CONFIG_MODULE_STRIPPED)
|
||||
/* Creates an alias so file2alias.c can find device table. */
|
||||
#define MODULE_DEVICE_TABLE(type, name) \
|
||||
extern typeof(name) __mod_##type##__##name##_device_table \
|
||||
@@ -265,7 +266,9 @@ extern typeof(name) __mod_##type##__##na
|
||||
*/
|
||||
|
||||
#if defined(MODULE) || !defined(CONFIG_SYSFS)
|
||||
-#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
|
||||
+#define MODULE_VERSION(_version) MODULE_INFO_STRIP(version, _version)
|
||||
+#elif defined(CONFIG_MODULE_STRIPPED)
|
||||
+#define MODULE_VERSION(_version) __MODULE_INFO_DISABLED(version)
|
||||
#else
|
||||
#define MODULE_VERSION(_version) \
|
||||
MODULE_INFO(version, _version); \
|
||||
@@ -288,7 +291,7 @@ extern typeof(name) __mod_##type##__##na
|
||||
/* Optional firmware file (or files) needed by the module
|
||||
* format is simply firmware file name. Multiple firmware
|
||||
* files require multiple MODULE_FIRMWARE() specifiers */
|
||||
-#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
|
||||
+#define MODULE_FIRMWARE(_firmware) MODULE_INFO_STRIP(firmware, _firmware)
|
||||
|
||||
#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, __stringify(ns))
|
||||
|
||||
--- a/include/linux/moduleparam.h
|
||||
+++ b/include/linux/moduleparam.h
|
||||
@@ -20,6 +20,16 @@
|
||||
/* Chosen so that structs with an unsigned long line up. */
|
||||
#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
|
||||
|
||||
+/* This struct is here for syntactic coherency, it is not used */
|
||||
+#define __MODULE_INFO_DISABLED(name) \
|
||||
+ struct __UNIQUE_ID(name) {}
|
||||
+
|
||||
+#ifdef CONFIG_MODULE_STRIPPED
|
||||
+#define __MODULE_INFO_STRIP(tag, name, info) __MODULE_INFO_DISABLED(name)
|
||||
+#else
|
||||
+#define __MODULE_INFO_STRIP(tag, name, info) __MODULE_INFO(tag, name, info)
|
||||
+#endif
|
||||
+
|
||||
#define __MODULE_INFO(tag, name, info) \
|
||||
static const char __UNIQUE_ID(name)[] \
|
||||
__used __section(".modinfo") __aligned(1) \
|
||||
@@ -31,7 +41,7 @@
|
||||
/* One for each parameter, describing how to use it. Some files do
|
||||
multiple of these per line, so can't just use MODULE_INFO. */
|
||||
#define MODULE_PARM_DESC(_parm, desc) \
|
||||
- __MODULE_INFO(parm, _parm, #_parm ":" desc)
|
||||
+ __MODULE_INFO_STRIP(parm, _parm, #_parm ":" desc)
|
||||
|
||||
struct kernel_param;
|
||||
|
||||
--- a/kernel/module/Kconfig
|
||||
+++ b/kernel/module/Kconfig
|
||||
@@ -286,6 +286,13 @@ config UNUSED_KSYMS_WHITELIST
|
||||
one per line. The path can be absolute, or relative to the kernel
|
||||
source tree.
|
||||
|
||||
+config MODULE_STRIPPED
|
||||
+ bool "Reduce module size"
|
||||
+ depends on MODULES
|
||||
+ help
|
||||
+ Remove module parameter descriptions, author info, version, aliases,
|
||||
+ device tables, etc.
|
||||
+
|
||||
config MODULES_TREE_LOOKUP
|
||||
def_bool y
|
||||
depends on PERF_EVENTS || TRACING || CFI_CLANG
|
||||
--- a/kernel/module/main.c
|
||||
+++ b/kernel/module/main.c
|
||||
@@ -1954,9 +1954,11 @@ static int setup_load_info(struct load_i
|
||||
|
||||
static int check_modinfo(struct module *mod, struct load_info *info, int flags)
|
||||
{
|
||||
- const char *modmagic = get_modinfo(info, "vermagic");
|
||||
int err;
|
||||
|
||||
+#ifndef CONFIG_MODULE_STRIPPED
|
||||
+ const char *modmagic = get_modinfo(info, "vermagic");
|
||||
+
|
||||
if (flags & MODULE_INIT_IGNORE_VERMAGIC)
|
||||
modmagic = NULL;
|
||||
|
||||
@@ -1977,6 +1979,7 @@ static int check_modinfo(struct module *
|
||||
mod->name);
|
||||
add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
|
||||
}
|
||||
+#endif
|
||||
|
||||
check_modinfo_retpoline(mod, info);
|
||||
|
||||
--- a/scripts/mod/modpost.c
|
||||
+++ b/scripts/mod/modpost.c
|
||||
@@ -1817,7 +1817,9 @@ static void read_symbols(const char *mod
|
||||
symname = remove_dot(info.strtab + sym->st_name);
|
||||
|
||||
handle_symbol(mod, &info, sym, symname);
|
||||
+#ifndef CONFIG_MODULE_STRIPPED
|
||||
handle_moddevtable(mod, &info, sym, symname);
|
||||
+#endif
|
||||
}
|
||||
|
||||
for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
|
||||
@@ -1980,8 +1982,10 @@ static void add_header(struct buffer *b,
|
||||
buf_printf(b, "BUILD_SALT;\n");
|
||||
buf_printf(b, "BUILD_LTO_INFO;\n");
|
||||
buf_printf(b, "\n");
|
||||
+#ifndef CONFIG_MODULE_STRIPPED
|
||||
buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
|
||||
buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n");
|
||||
+#endif
|
||||
buf_printf(b, "\n");
|
||||
buf_printf(b, "__visible struct module __this_module\n");
|
||||
buf_printf(b, "__section(\".gnu.linkonce.this_module\") = {\n");
|
||||
@@ -2101,11 +2105,13 @@ static void add_depends(struct buffer *b
|
||||
|
||||
static void add_srcversion(struct buffer *b, struct module *mod)
|
||||
{
|
||||
+#ifndef CONFIG_MODULE_STRIPPED
|
||||
if (mod->srcversion[0]) {
|
||||
buf_printf(b, "\n");
|
||||
buf_printf(b, "MODULE_INFO(srcversion, \"%s\");\n",
|
||||
mod->srcversion);
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void write_buf(struct buffer *b, const char *fname)
|
||||
@@ -2191,7 +2197,9 @@ static void write_mod_c_file(struct modu
|
||||
add_exported_symbols(&buf, mod);
|
||||
add_versions(&buf, mod);
|
||||
add_depends(&buf, mod);
|
||||
+#ifndef CONFIG_MODULE_STRIPPED
|
||||
add_moddevtable(&buf, mod);
|
||||
+#endif
|
||||
add_srcversion(&buf, mod);
|
||||
|
||||
ret = snprintf(fname, sizeof(fname), "%s.mod.c", mod->name);
|
@ -1,11 +0,0 @@
|
||||
--- a/scripts/kconfig/conf.c
|
||||
+++ b/scripts/kconfig/conf.c
|
||||
@@ -432,6 +432,8 @@ static int conf_sym(struct menu *menu)
|
||||
break;
|
||||
continue;
|
||||
case 0:
|
||||
+ if (!sym_has_value(sym) && !tty_stdio && getenv("FAIL_ON_UNCONFIGURED"))
|
||||
+ exit(1);
|
||||
newval = oldval;
|
||||
break;
|
||||
case '?':
|
File diff suppressed because it is too large
Load Diff
@ -1,22 +0,0 @@
|
||||
From e44fc2af1ddc452b6659d08c16973d65c73b7d0a Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
Date: Wed, 5 Feb 2020 18:36:43 +0000
|
||||
Subject: [PATCH] file2alias: build on macos
|
||||
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
scripts/mod/file2alias.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/scripts/mod/file2alias.c
|
||||
+++ b/scripts/mod/file2alias.c
|
||||
@@ -38,6 +38,9 @@ typedef struct {
|
||||
__u8 b[16];
|
||||
} guid_t;
|
||||
|
||||
+#ifdef __APPLE__
|
||||
+#define uuid_t compat_uuid_t
|
||||
+#endif
|
||||
/* backwards compatibility, don't use in new code */
|
||||
typedef struct {
|
||||
__u8 b[16];
|
@ -1,83 +0,0 @@
|
||||
From 48232d3d931c95953ce2ddfe7da7bb164aef6a73 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:03:16 +0200
|
||||
Subject: fix portability of some includes files in tools/ used on the host
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
tools/include/tools/be_byteshift.h | 4 ++++
|
||||
tools/include/tools/le_byteshift.h | 4 ++++
|
||||
tools/include/tools/linux_types.h | 22 ++++++++++++++++++++++
|
||||
3 files changed, 30 insertions(+)
|
||||
create mode 100644 tools/include/tools/linux_types.h
|
||||
|
||||
--- a/tools/include/tools/be_byteshift.h
|
||||
+++ b/tools/include/tools/be_byteshift.h
|
||||
@@ -2,6 +2,10 @@
|
||||
#ifndef _TOOLS_BE_BYTESHIFT_H
|
||||
#define _TOOLS_BE_BYTESHIFT_H
|
||||
|
||||
+#ifndef __linux__
|
||||
+#include "linux_types.h"
|
||||
+#endif
|
||||
+
|
||||
#include <stdint.h>
|
||||
|
||||
static inline uint16_t __get_unaligned_be16(const uint8_t *p)
|
||||
--- a/tools/include/tools/le_byteshift.h
|
||||
+++ b/tools/include/tools/le_byteshift.h
|
||||
@@ -2,6 +2,10 @@
|
||||
#ifndef _TOOLS_LE_BYTESHIFT_H
|
||||
#define _TOOLS_LE_BYTESHIFT_H
|
||||
|
||||
+#ifndef __linux__
|
||||
+#include "linux_types.h"
|
||||
+#endif
|
||||
+
|
||||
#include <stdint.h>
|
||||
|
||||
static inline uint16_t __get_unaligned_le16(const uint8_t *p)
|
||||
--- /dev/null
|
||||
+++ b/tools/include/tools/linux_types.h
|
||||
@@ -0,0 +1,26 @@
|
||||
+#ifndef __LINUX_TYPES_H
|
||||
+#define __LINUX_TYPES_H
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+typedef int8_t __s8;
|
||||
+typedef uint8_t __u8;
|
||||
+typedef uint8_t __be8;
|
||||
+typedef uint8_t __le8;
|
||||
+
|
||||
+typedef int16_t __s16;
|
||||
+typedef uint16_t __u16;
|
||||
+typedef uint16_t __be16;
|
||||
+typedef uint16_t __le16;
|
||||
+
|
||||
+typedef int32_t __s32;
|
||||
+typedef uint32_t __u32;
|
||||
+typedef uint32_t __be32;
|
||||
+typedef uint32_t __le32;
|
||||
+
|
||||
+typedef int64_t __s64;
|
||||
+typedef uint64_t __u64;
|
||||
+typedef uint64_t __be64;
|
||||
+typedef uint64_t __le64;
|
||||
+
|
||||
+#endif
|
||||
--- a/tools/include/linux/types.h
|
||||
+++ b/tools/include/linux/types.h
|
||||
@@ -10,8 +10,12 @@
|
||||
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
|
||||
#endif
|
||||
|
||||
+#ifndef __linux__
|
||||
+#include <tools/linux_types.h>
|
||||
+#else
|
||||
#include <asm/types.h>
|
||||
#include <asm/posix_types.h>
|
||||
+#endif
|
||||
|
||||
struct page;
|
||||
struct kmem_cache;
|
@ -1,24 +0,0 @@
|
||||
From be9be95ff10e16a5b4ad36f903978d0cc5747024 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:04:08 +0200
|
||||
Subject: kernel: fix linux/spi/spidev.h portability issues with musl
|
||||
|
||||
Felix will try to get this define included into musl
|
||||
|
||||
lede-commit: 795e7cf60de19e7a076a46874fab7bb88b43bbff
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
include/uapi/linux/spi/spidev.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/include/uapi/linux/spi/spidev.h
|
||||
+++ b/include/uapi/linux/spi/spidev.h
|
||||
@@ -93,7 +93,7 @@ struct spi_ioc_transfer {
|
||||
|
||||
/* not all platforms use <asm-generic/ioctl.h> or _IOC_TYPECHECK() ... */
|
||||
#define SPI_MSGSIZE(N) \
|
||||
- ((((N)*(sizeof (struct spi_ioc_transfer))) < (1 << _IOC_SIZEBITS)) \
|
||||
+ ((((N)*(sizeof (struct spi_ioc_transfer))) < (1 << 13)) \
|
||||
? ((N)*(sizeof (struct spi_ioc_transfer))) : 0)
|
||||
#define SPI_IOC_MESSAGE(N) _IOW(SPI_IOC_MAGIC, 0, char[SPI_MSGSIZE(N)])
|
||||
|
@ -1,123 +0,0 @@
|
||||
From e3d8676f5722b7622685581e06e8f53e6138e3ab Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sat, 15 Jul 2017 23:42:36 +0200
|
||||
Subject: use -ffunction-sections, -fdata-sections and --gc-sections
|
||||
|
||||
In combination with kernel symbol export stripping this significantly reduces
|
||||
the kernel image size. Used on both ARM and MIPS architectures.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
Signed-off-by: Jonas Gorski <jogo@openwrt.org>
|
||||
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
|
||||
---
|
||||
--- a/arch/arm/Kconfig
|
||||
+++ b/arch/arm/Kconfig
|
||||
@@ -121,6 +121,7 @@ config ARM
|
||||
select HAVE_UID16
|
||||
select HAVE_VIRT_CPU_ACCOUNTING_GEN
|
||||
select IRQ_FORCED_THREADING
|
||||
+ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
|
||||
select MODULES_USE_ELF_REL
|
||||
select NEED_DMA_MAP_STATE
|
||||
select OF_EARLY_FLATTREE if OF
|
||||
--- a/arch/arm/boot/compressed/Makefile
|
||||
+++ b/arch/arm/boot/compressed/Makefile
|
||||
@@ -91,6 +91,7 @@ endif
|
||||
ifeq ($(CONFIG_USE_OF),y)
|
||||
OBJS += $(libfdt_objs) fdt_check_mem_start.o
|
||||
endif
|
||||
+KBUILD_CFLAGS_KERNEL := $(patsubst -f%-sections,,$(KBUILD_CFLAGS_KERNEL))
|
||||
|
||||
OBJS += lib1funcs.o ashldi3.o bswapsdi2.o
|
||||
|
||||
--- a/arch/arm/kernel/vmlinux.lds.S
|
||||
+++ b/arch/arm/kernel/vmlinux.lds.S
|
||||
@@ -75,7 +75,7 @@ SECTIONS
|
||||
. = ALIGN(4);
|
||||
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
|
||||
__start___ex_table = .;
|
||||
- ARM_MMU_KEEP(*(__ex_table))
|
||||
+ KEEP(*(__ex_table))
|
||||
__stop___ex_table = .;
|
||||
}
|
||||
|
||||
@@ -100,24 +100,24 @@ SECTIONS
|
||||
}
|
||||
.init.arch.info : {
|
||||
__arch_info_begin = .;
|
||||
- *(.arch.info.init)
|
||||
+ KEEP(*(.arch.info.init))
|
||||
__arch_info_end = .;
|
||||
}
|
||||
.init.tagtable : {
|
||||
__tagtable_begin = .;
|
||||
- *(.taglist.init)
|
||||
+ KEEP(*(.taglist.init))
|
||||
__tagtable_end = .;
|
||||
}
|
||||
#ifdef CONFIG_SMP_ON_UP
|
||||
.init.smpalt : {
|
||||
__smpalt_begin = .;
|
||||
- *(.alt.smp.init)
|
||||
+ KEEP(*(.alt.smp.init))
|
||||
__smpalt_end = .;
|
||||
}
|
||||
#endif
|
||||
.init.pv_table : {
|
||||
__pv_table_begin = .;
|
||||
- *(.pv_table)
|
||||
+ KEEP(*(.pv_table))
|
||||
__pv_table_end = .;
|
||||
}
|
||||
|
||||
--- a/arch/arm/include/asm/vmlinux.lds.h
|
||||
+++ b/arch/arm/include/asm/vmlinux.lds.h
|
||||
@@ -42,13 +42,13 @@
|
||||
#define PROC_INFO \
|
||||
. = ALIGN(4); \
|
||||
__proc_info_begin = .; \
|
||||
- *(.proc.info.init) \
|
||||
+ KEEP(*(.proc.info.init)) \
|
||||
__proc_info_end = .;
|
||||
|
||||
#define IDMAP_TEXT \
|
||||
ALIGN_FUNCTION(); \
|
||||
__idmap_text_start = .; \
|
||||
- *(.idmap.text) \
|
||||
+ KEEP(*(.idmap.text)) \
|
||||
__idmap_text_end = .; \
|
||||
|
||||
#define ARM_DISCARD \
|
||||
@@ -109,12 +109,12 @@
|
||||
. = ALIGN(8); \
|
||||
.ARM.unwind_idx : { \
|
||||
__start_unwind_idx = .; \
|
||||
- *(.ARM.exidx*) \
|
||||
+ KEEP(*(.ARM.exidx*)) \
|
||||
__stop_unwind_idx = .; \
|
||||
} \
|
||||
.ARM.unwind_tab : { \
|
||||
__start_unwind_tab = .; \
|
||||
- *(.ARM.extab*) \
|
||||
+ KEEP(*(.ARM.extab*)) \
|
||||
__stop_unwind_tab = .; \
|
||||
}
|
||||
|
||||
@@ -126,7 +126,7 @@
|
||||
__vectors_lma = .; \
|
||||
OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \
|
||||
.vectors { \
|
||||
- *(.vectors) \
|
||||
+ KEEP(*(.vectors)) \
|
||||
} \
|
||||
.vectors.bhb.loop8 { \
|
||||
*(.vectors.bhb.loop8) \
|
||||
@@ -144,7 +144,7 @@
|
||||
\
|
||||
__stubs_lma = .; \
|
||||
.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) { \
|
||||
- *(.stubs) \
|
||||
+ KEEP(*(.stubs)) \
|
||||
} \
|
||||
ARM_LMA(__stubs, .stubs); \
|
||||
. = __stubs_lma + SIZEOF(.stubs); \
|
@ -1,102 +0,0 @@
|
||||
From b14784e7883390c20ed3ff904892255404a5914b Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:05:53 +0200
|
||||
Subject: add an optional config option for stripping all unnecessary symbol exports from the kernel image
|
||||
|
||||
lede-commit: bb5a40c64b7c4f4848509fa0a6625055fc9e66cc
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
include/asm-generic/vmlinux.lds.h | 18 +++++++++++++++---
|
||||
include/linux/export.h | 9 ++++++++-
|
||||
scripts/Makefile.build | 2 +-
|
||||
3 files changed, 24 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/include/asm-generic/vmlinux.lds.h
|
||||
+++ b/include/asm-generic/vmlinux.lds.h
|
||||
@@ -81,6 +81,16 @@
|
||||
#define RO_EXCEPTION_TABLE
|
||||
#endif
|
||||
|
||||
+#ifndef SYMTAB_KEEP
|
||||
+#define SYMTAB_KEEP KEEP(*(SORT(___ksymtab+*)))
|
||||
+#define SYMTAB_KEEP_GPL KEEP(*(SORT(___ksymtab_gpl+*)))
|
||||
+#endif
|
||||
+
|
||||
+#ifndef SYMTAB_DISCARD
|
||||
+#define SYMTAB_DISCARD
|
||||
+#define SYMTAB_DISCARD_GPL
|
||||
+#endif
|
||||
+
|
||||
/* Align . to a 8 byte boundary equals to maximum function alignment. */
|
||||
#define ALIGN_FUNCTION() . = ALIGN(8)
|
||||
|
||||
@@ -479,14 +489,14 @@
|
||||
/* Kernel symbol table: Normal symbols */ \
|
||||
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
|
||||
__start___ksymtab = .; \
|
||||
- KEEP(*(SORT(___ksymtab+*))) \
|
||||
+ SYMTAB_KEEP \
|
||||
__stop___ksymtab = .; \
|
||||
} \
|
||||
\
|
||||
/* Kernel symbol table: GPL-only symbols */ \
|
||||
__ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
|
||||
__start___ksymtab_gpl = .; \
|
||||
- KEEP(*(SORT(___ksymtab_gpl+*))) \
|
||||
+ SYMTAB_KEEP_GPL \
|
||||
__stop___ksymtab_gpl = .; \
|
||||
} \
|
||||
\
|
||||
@@ -506,7 +516,7 @@
|
||||
\
|
||||
/* Kernel symbol table: strings */ \
|
||||
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
|
||||
- *(__ksymtab_strings) \
|
||||
+ *(__ksymtab_strings+*) \
|
||||
} \
|
||||
\
|
||||
/* __*init sections */ \
|
||||
@@ -1023,6 +1033,8 @@
|
||||
|
||||
#define COMMON_DISCARDS \
|
||||
SANITIZER_DISCARDS \
|
||||
+ SYMTAB_DISCARD \
|
||||
+ SYMTAB_DISCARD_GPL \
|
||||
*(.discard) \
|
||||
*(.discard.*) \
|
||||
*(.modinfo) \
|
||||
--- a/include/linux/export.h
|
||||
+++ b/include/linux/export.h
|
||||
@@ -72,6 +72,12 @@ struct kernel_symbol {
|
||||
|
||||
#else
|
||||
|
||||
+#ifdef MODULE
|
||||
+#define __EXPORT_SUFFIX(sym)
|
||||
+#else
|
||||
+#define __EXPORT_SUFFIX(sym) "+" #sym
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* For every exported symbol, do the following:
|
||||
*
|
||||
@@ -87,7 +93,7 @@ struct kernel_symbol {
|
||||
extern typeof(sym) sym; \
|
||||
extern const char __kstrtab_##sym[]; \
|
||||
extern const char __kstrtabns_##sym[]; \
|
||||
- asm(" .section \"__ksymtab_strings\",\"aMS\",%progbits,1 \n" \
|
||||
+ asm(" .section \"__ksymtab_strings" __EXPORT_SUFFIX(sym) "\",\"aMS\",%progbits,1 \n" \
|
||||
"__kstrtab_" #sym ": \n" \
|
||||
" .asciz \"" #sym "\" \n" \
|
||||
"__kstrtabns_" #sym ": \n" \
|
||||
--- a/scripts/Makefile.build
|
||||
+++ b/scripts/Makefile.build
|
||||
@@ -328,7 +328,7 @@ targets += $(real-dtb-y) $(lib-y) $(alwa
|
||||
# Linker scripts preprocessor (.lds.S -> .lds)
|
||||
# ---------------------------------------------------------------------------
|
||||
quiet_cmd_cpp_lds_S = LDS $@
|
||||
- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -U$(ARCH) \
|
||||
+ cmd_cpp_lds_S = $(CPP) $(EXTRA_LDSFLAGS) $(cpp_flags) -P -U$(ARCH) \
|
||||
-D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
|
||||
|
||||
$(obj)/%.lds: $(src)/%.lds.S FORCE
|
@ -1,34 +0,0 @@
|
||||
From b3d00b452467f621317953d9e4c6f9ae8dcfd271 Mon Sep 17 00:00:00 2001
|
||||
From: Imre Kaloz <kaloz@openwrt.org>
|
||||
Date: Fri, 7 Jul 2017 17:06:55 +0200
|
||||
Subject: use the openwrt lzma options for now
|
||||
|
||||
lede-commit: 548de949f392049420a6a1feeef118b30ab8ea8c
|
||||
Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
|
||||
---
|
||||
lib/decompress.c | 1 +
|
||||
scripts/Makefile.lib | 2 +-
|
||||
usr/gen_initramfs_list.sh | 10 +++++-----
|
||||
3 files changed, 7 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/lib/decompress.c
|
||||
+++ b/lib/decompress.c
|
||||
@@ -53,6 +53,7 @@ static const struct compress_format comp
|
||||
{ {0x1f, 0x9e}, "gzip", gunzip },
|
||||
{ {0x42, 0x5a}, "bzip2", bunzip2 },
|
||||
{ {0x5d, 0x00}, "lzma", unlzma },
|
||||
+ { {0x6d, 0x00}, "lzma-openwrt", unlzma },
|
||||
{ {0xfd, 0x37}, "xz", unxz },
|
||||
{ {0x89, 0x4c}, "lzo", unlzo },
|
||||
{ {0x02, 0x21}, "lz4", unlz4 },
|
||||
--- a/scripts/Makefile.lib
|
||||
+++ b/scripts/Makefile.lib
|
||||
@@ -421,7 +421,7 @@ quiet_cmd_bzip2_with_size = BZIP2 $@
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
quiet_cmd_lzma = LZMA $@
|
||||
- cmd_lzma = cat $(real-prereqs) | $(LZMA) -9 > $@
|
||||
+ cmd_lzma = { cat $(real-prereqs) | $(LZMA) e -d20 -lc1 -lp2 -pb2 -eos -si -so; $(size_append); } > $@
|
||||
|
||||
quiet_cmd_lzma_with_size = LZMA $@
|
||||
cmd_lzma_with_size = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@
|
@ -1,11 +0,0 @@
|
||||
--- a/net/ipv4/Kconfig
|
||||
+++ b/net/ipv4/Kconfig
|
||||
@@ -315,7 +315,7 @@ config NET_IPVTI
|
||||
on top.
|
||||
|
||||
config NET_UDP_TUNNEL
|
||||
- tristate
|
||||
+ tristate "IP: UDP tunneling support"
|
||||
select NET_IP_TUNNEL
|
||||
default n
|
||||
|
@ -1,27 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: hack: net: remove bogus netfilter dependencies
|
||||
|
||||
lede-commit: 589d2a377dee27d206fc3725325309cf649e4df6
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
net/netfilter/Kconfig | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
--- a/net/netfilter/Kconfig
|
||||
+++ b/net/netfilter/Kconfig
|
||||
@@ -253,7 +253,6 @@ config NF_CONNTRACK_FTP
|
||||
|
||||
config NF_CONNTRACK_H323
|
||||
tristate "H.323 protocol support"
|
||||
- depends on IPV6 || IPV6=n
|
||||
depends on NETFILTER_ADVANCED
|
||||
help
|
||||
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
|
||||
@@ -1118,7 +1117,6 @@ config NETFILTER_XT_TARGET_SECMARK
|
||||
|
||||
config NETFILTER_XT_TARGET_TCPMSS
|
||||
tristate '"TCPMSS" target support'
|
||||
- depends on IPV6 || IPV6=n
|
||||
default m if NETFILTER_ADVANCED=n
|
||||
help
|
||||
This option adds a `TCPMSS' target, which allows you to alter the
|
@ -1,199 +0,0 @@
|
||||
From da3c50704f14132f4adf80d48e9a4cd5d46e54c9 Mon Sep 17 00:00:00 2001
|
||||
From: John Crispin <john@phrozen.org>
|
||||
Date: Fri, 7 Jul 2017 17:09:21 +0200
|
||||
Subject: kconfig: owrt specifc dependencies
|
||||
|
||||
Signed-off-by: John Crispin <john@phrozen.org>
|
||||
---
|
||||
crypto/Kconfig | 10 +++++-----
|
||||
drivers/bcma/Kconfig | 1 +
|
||||
drivers/ssb/Kconfig | 3 ++-
|
||||
lib/Kconfig | 8 ++++----
|
||||
net/netfilter/Kconfig | 2 +-
|
||||
net/wireless/Kconfig | 17 ++++++++++-------
|
||||
sound/core/Kconfig | 4 ++--
|
||||
7 files changed, 25 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -55,7 +55,7 @@ config CRYPTO_FIPS_VERSION
|
||||
By default the KERNELRELEASE value is used.
|
||||
|
||||
config CRYPTO_ALGAPI
|
||||
- tristate
|
||||
+ tristate "ALGAPI"
|
||||
select CRYPTO_ALGAPI2
|
||||
help
|
||||
This option provides the API for cryptographic algorithms.
|
||||
@@ -64,7 +64,7 @@ config CRYPTO_ALGAPI2
|
||||
tristate
|
||||
|
||||
config CRYPTO_AEAD
|
||||
- tristate
|
||||
+ tristate "AEAD"
|
||||
select CRYPTO_AEAD2
|
||||
select CRYPTO_ALGAPI
|
||||
|
||||
@@ -75,7 +75,7 @@ config CRYPTO_AEAD2
|
||||
select CRYPTO_RNG2
|
||||
|
||||
config CRYPTO_SKCIPHER
|
||||
- tristate
|
||||
+ tristate "SKCIPHER"
|
||||
select CRYPTO_SKCIPHER2
|
||||
select CRYPTO_ALGAPI
|
||||
|
||||
@@ -85,7 +85,7 @@ config CRYPTO_SKCIPHER2
|
||||
select CRYPTO_RNG2
|
||||
|
||||
config CRYPTO_HASH
|
||||
- tristate
|
||||
+ tristate "HASH"
|
||||
select CRYPTO_HASH2
|
||||
select CRYPTO_ALGAPI
|
||||
|
||||
@@ -94,7 +94,7 @@ config CRYPTO_HASH2
|
||||
select CRYPTO_ALGAPI2
|
||||
|
||||
config CRYPTO_RNG
|
||||
- tristate
|
||||
+ tristate "RNG"
|
||||
select CRYPTO_RNG2
|
||||
select CRYPTO_ALGAPI
|
||||
|
||||
--- a/drivers/bcma/Kconfig
|
||||
+++ b/drivers/bcma/Kconfig
|
||||
@@ -16,6 +16,7 @@ if BCMA
|
||||
# Support for Block-I/O. SELECT this from the driver that needs it.
|
||||
config BCMA_BLOCKIO
|
||||
bool
|
||||
+ default y
|
||||
|
||||
config BCMA_HOST_PCI_POSSIBLE
|
||||
bool
|
||||
--- a/drivers/ssb/Kconfig
|
||||
+++ b/drivers/ssb/Kconfig
|
||||
@@ -29,6 +29,7 @@ config SSB_SPROM
|
||||
config SSB_BLOCKIO
|
||||
bool
|
||||
depends on SSB
|
||||
+ default y
|
||||
|
||||
config SSB_PCIHOST_POSSIBLE
|
||||
bool
|
||||
@@ -49,7 +50,7 @@ config SSB_PCIHOST
|
||||
config SSB_B43_PCI_BRIDGE
|
||||
bool
|
||||
depends on SSB_PCIHOST
|
||||
- default n
|
||||
+ default y
|
||||
|
||||
config SSB_PCMCIAHOST_POSSIBLE
|
||||
bool
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -456,16 +456,16 @@ config BCH_CONST_T
|
||||
# Textsearch support is select'ed if needed
|
||||
#
|
||||
config TEXTSEARCH
|
||||
- bool
|
||||
+ bool "Textsearch support"
|
||||
|
||||
config TEXTSEARCH_KMP
|
||||
- tristate
|
||||
+ tristate "Textsearch KMP"
|
||||
|
||||
config TEXTSEARCH_BM
|
||||
- tristate
|
||||
+ tristate "Textsearch BM"
|
||||
|
||||
config TEXTSEARCH_FSM
|
||||
- tristate
|
||||
+ tristate "Textsearch FSM"
|
||||
|
||||
config BTREE
|
||||
bool
|
||||
--- a/net/netfilter/Kconfig
|
||||
+++ b/net/netfilter/Kconfig
|
||||
@@ -22,7 +22,7 @@ config NETFILTER_SKIP_EGRESS
|
||||
def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB)
|
||||
|
||||
config NETFILTER_NETLINK
|
||||
- tristate
|
||||
+ tristate "Netfilter NFNETLINK interface"
|
||||
|
||||
config NETFILTER_FAMILY_BRIDGE
|
||||
bool
|
||||
--- a/net/wireless/Kconfig
|
||||
+++ b/net/wireless/Kconfig
|
||||
@@ -1,6 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config WIRELESS_EXT
|
||||
- bool
|
||||
+ bool "Wireless extensions"
|
||||
|
||||
config WEXT_CORE
|
||||
def_bool y
|
||||
@@ -12,10 +12,10 @@ config WEXT_PROC
|
||||
depends on WEXT_CORE
|
||||
|
||||
config WEXT_SPY
|
||||
- bool
|
||||
+ bool "WEXT_SPY"
|
||||
|
||||
config WEXT_PRIV
|
||||
- bool
|
||||
+ bool "WEXT_PRIV"
|
||||
|
||||
config CFG80211
|
||||
tristate "cfg80211 - wireless configuration API"
|
||||
@@ -204,7 +204,7 @@ config CFG80211_WEXT_EXPORT
|
||||
endif # CFG80211
|
||||
|
||||
config LIB80211
|
||||
- tristate
|
||||
+ tristate "LIB80211"
|
||||
default n
|
||||
help
|
||||
This options enables a library of common routines used
|
||||
@@ -213,17 +213,17 @@ config LIB80211
|
||||
Drivers should select this themselves if needed.
|
||||
|
||||
config LIB80211_CRYPT_WEP
|
||||
- tristate
|
||||
+ tristate "LIB80211_CRYPT_WEP"
|
||||
select CRYPTO_LIB_ARC4
|
||||
|
||||
config LIB80211_CRYPT_CCMP
|
||||
- tristate
|
||||
+ tristate "LIB80211_CRYPT_CCMP"
|
||||
select CRYPTO
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_CCM
|
||||
|
||||
config LIB80211_CRYPT_TKIP
|
||||
- tristate
|
||||
+ tristate "LIB80211_CRYPT_TKIP"
|
||||
select CRYPTO_LIB_ARC4
|
||||
|
||||
config LIB80211_DEBUG
|
||||
--- a/sound/core/Kconfig
|
||||
+++ b/sound/core/Kconfig
|
||||
@@ -17,7 +17,7 @@ config SND_DMAENGINE_PCM
|
||||
tristate
|
||||
|
||||
config SND_HWDEP
|
||||
- tristate
|
||||
+ tristate "Sound hardware support"
|
||||
|
||||
config SND_SEQ_DEVICE
|
||||
tristate
|
||||
@@ -27,7 +27,7 @@ config SND_RAWMIDI
|
||||
select SND_SEQ_DEVICE if SND_SEQUENCER != n
|
||||
|
||||
config SND_COMPRESS_OFFLOAD
|
||||
- tristate
|
||||
+ tristate "Compression offloading support"
|
||||
|
||||
config SND_JACK
|
||||
bool
|
@ -1,23 +0,0 @@
|
||||
From 8c817e33be829c7249c2cfd59ff48ad5fac6a31d Mon Sep 17 00:00:00 2001
|
||||
From: Sungbo Eo <mans0n@gorani.run>
|
||||
Date: Fri, 7 Jul 2017 17:09:21 +0200
|
||||
Subject: [PATCH] kconfig: solidify SATA_PMP config
|
||||
|
||||
SATA_PMP option in kernel config file disappears for every kernel_oldconfig refresh.
|
||||
To prevent this, SATA_HOST is now selected automatically when SATA_PMP is enabled.
|
||||
This patch can be dropped if SATA_MV is ever re-added into the config.
|
||||
---
|
||||
drivers/ata/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/ata/Kconfig
|
||||
+++ b/drivers/ata/Kconfig
|
||||
@@ -112,7 +112,7 @@ config SATA_ZPODD
|
||||
|
||||
config SATA_PMP
|
||||
bool "SATA Port Multiplier support"
|
||||
- depends on SATA_HOST
|
||||
+ select SATA_HOST
|
||||
default y
|
||||
help
|
||||
This option adds support for SATA Port Multipliers
|
@ -1,22 +0,0 @@
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1954,7 +1954,7 @@ config PADATA
|
||||
bool
|
||||
|
||||
config ASN1
|
||||
- tristate
|
||||
+ tristate "ASN1"
|
||||
help
|
||||
Build a simple ASN.1 grammar compiler that produces a bytecode output
|
||||
that can be interpreted by the ASN.1 stream decoder and used to
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -627,7 +627,7 @@ config LIBFDT
|
||||
bool
|
||||
|
||||
config OID_REGISTRY
|
||||
- tristate
|
||||
+ tristate "OID"
|
||||
help
|
||||
Enable fast lookup object identifier registry.
|
||||
|
@ -1,144 +0,0 @@
|
||||
From 811d9e2268a62b830cfe93cd8bc929afcb8b198b Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sat, 15 Jul 2017 21:12:38 +0200
|
||||
Subject: kernel: move regmap bloat out of the kernel image if it is only being used in modules
|
||||
|
||||
lede-commit: 96f39119815028073583e4fca3a9c5fe9141e998
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
drivers/base/regmap/Kconfig | 15 ++++++++++-----
|
||||
drivers/base/regmap/Makefile | 12 ++++++++----
|
||||
drivers/base/regmap/regmap.c | 3 +++
|
||||
include/linux/regmap.h | 2 +-
|
||||
4 files changed, 22 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/drivers/base/regmap/Kconfig
|
||||
+++ b/drivers/base/regmap/Kconfig
|
||||
@@ -4,10 +4,9 @@
|
||||
# subsystems should select the appropriate symbols.
|
||||
|
||||
config REGMAP
|
||||
- default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO)
|
||||
select IRQ_DOMAIN if REGMAP_IRQ
|
||||
select MDIO_BUS if REGMAP_MDIO
|
||||
- bool
|
||||
+ tristate
|
||||
|
||||
config REGCACHE_COMPRESSED
|
||||
select LZO_COMPRESS
|
||||
@@ -15,53 +14,67 @@ config REGCACHE_COMPRESSED
|
||||
bool
|
||||
|
||||
config REGMAP_AC97
|
||||
+ select REGMAP
|
||||
tristate
|
||||
|
||||
config REGMAP_I2C
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on I2C
|
||||
|
||||
config REGMAP_SLIMBUS
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on SLIMBUS
|
||||
|
||||
config REGMAP_SPI
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on SPI
|
||||
|
||||
config REGMAP_SPMI
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on SPMI
|
||||
|
||||
config REGMAP_W1
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on W1
|
||||
|
||||
config REGMAP_MDIO
|
||||
+ select REGMAP
|
||||
tristate
|
||||
|
||||
config REGMAP_MMIO
|
||||
+ select REGMAP
|
||||
tristate
|
||||
|
||||
config REGMAP_IRQ
|
||||
+ select REGMAP
|
||||
bool
|
||||
|
||||
config REGMAP_SOUNDWIRE
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on SOUNDWIRE
|
||||
|
||||
config REGMAP_SOUNDWIRE_MBQ
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on SOUNDWIRE
|
||||
|
||||
config REGMAP_SCCB
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on I2C
|
||||
|
||||
config REGMAP_I3C
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on I3C
|
||||
|
||||
config REGMAP_SPI_AVMM
|
||||
+ select REGMAP
|
||||
tristate
|
||||
depends on SPI
|
||||
--- a/drivers/base/regmap/Makefile
|
||||
+++ b/drivers/base/regmap/Makefile
|
||||
@@ -2,10 +2,14 @@
|
||||
# For include/trace/define_trace.h to include trace.h
|
||||
CFLAGS_regmap.o := -I$(src)
|
||||
|
||||
-obj-$(CONFIG_REGMAP) += regmap.o regcache.o
|
||||
-obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-flat.o
|
||||
-obj-$(CONFIG_REGCACHE_COMPRESSED) += regcache-lzo.o
|
||||
-obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
|
||||
+regmap-core-objs = regmap.o regcache.o regcache-rbtree.o regcache-flat.o
|
||||
+ifdef CONFIG_DEBUG_FS
|
||||
+regmap-core-objs += regmap-debugfs.o
|
||||
+endif
|
||||
+ifdef CONFIG_REGCACHE_COMPRESSED
|
||||
+regmap-core-objs += regcache-lzo.o
|
||||
+endif
|
||||
+obj-$(CONFIG_REGMAP) += regmap-core.o
|
||||
obj-$(CONFIG_REGMAP_AC97) += regmap-ac97.o
|
||||
obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
|
||||
obj-$(CONFIG_REGMAP_SLIMBUS) += regmap-slimbus.o
|
||||
--- a/drivers/base/regmap/regmap.c
|
||||
+++ b/drivers/base/regmap/regmap.c
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <linux/device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
+#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/property.h>
|
||||
@@ -3384,3 +3385,5 @@ static int __init regmap_initcall(void)
|
||||
return 0;
|
||||
}
|
||||
postcore_initcall(regmap_initcall);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
--- a/include/linux/regmap.h
|
||||
+++ b/include/linux/regmap.h
|
||||
@@ -180,7 +180,7 @@ struct reg_sequence {
|
||||
__ret ?: __tmp; \
|
||||
})
|
||||
|
||||
-#ifdef CONFIG_REGMAP
|
||||
+#if IS_REACHABLE(CONFIG_REGMAP)
|
||||
|
||||
enum regmap_endian {
|
||||
/* Unspecified -> 0 -> Backwards compatible default */
|
@ -1,52 +0,0 @@
|
||||
From fd1799b0bf5efa46dd3e6dfbbf3955564807e508 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:12:51 +0200
|
||||
Subject: kernel: prevent cryptomgr from pulling in useless extra dependencies for tests that are not run
|
||||
|
||||
Reduces kernel size after LZMA by about 5k on MIPS
|
||||
|
||||
lede-commit: 044c316167e076479a344c59905e5b435b84a77f
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
crypto/Kconfig | 13 ++++++-------
|
||||
crypto/algboss.c | 4 ++++
|
||||
2 files changed, 10 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/crypto/Kconfig
|
||||
+++ b/crypto/Kconfig
|
||||
@@ -142,13 +142,13 @@ config CRYPTO_MANAGER
|
||||
cbc(aes).
|
||||
|
||||
config CRYPTO_MANAGER2
|
||||
- def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y)
|
||||
- select CRYPTO_AEAD2
|
||||
- select CRYPTO_HASH2
|
||||
- select CRYPTO_SKCIPHER2
|
||||
- select CRYPTO_AKCIPHER2
|
||||
- select CRYPTO_KPP2
|
||||
- select CRYPTO_ACOMP2
|
||||
+ def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y && !CRYPTO_MANAGER_DISABLE_TESTS)
|
||||
+ select CRYPTO_AEAD2 if !CRYPTO_MANAGER_DISABLE_TESTS
|
||||
+ select CRYPTO_HASH2 if !CRYPTO_MANAGER_DISABLE_TESTS
|
||||
+ select CRYPTO_SKCIPHER2 if !CRYPTO_MANAGER_DISABLE_TESTS
|
||||
+ select CRYPTO_AKCIPHER2 if !CRYPTO_MANAGER_DISABLE_TESTS
|
||||
+ select CRYPTO_KPP2 if !CRYPTO_MANAGER_DISABLE_TESTS
|
||||
+ select CRYPTO_ACOMP2 if !CRYPTO_MANAGER_DISABLE_TESTS
|
||||
|
||||
config CRYPTO_USER
|
||||
tristate "Userspace cryptographic algorithm configuration"
|
||||
--- a/crypto/algboss.c
|
||||
+++ b/crypto/algboss.c
|
||||
@@ -211,8 +211,12 @@ static int cryptomgr_schedule_test(struc
|
||||
type = alg->cra_flags;
|
||||
|
||||
/* Do not test internal algorithms. */
|
||||
+#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
|
||||
+ type |= CRYPTO_ALG_TESTED;
|
||||
+#else
|
||||
if (type & CRYPTO_ALG_INTERNAL)
|
||||
type |= CRYPTO_ALG_TESTED;
|
||||
+#endif
|
||||
|
||||
param->type = type;
|
||||
|
@ -1,15 +0,0 @@
|
||||
This makes it possible to select CONFIG_CRYPTO_LIB_ARC4 directly. We
|
||||
need this to be able to compile this into the kernel and make use of it
|
||||
from backports.
|
||||
|
||||
--- a/lib/crypto/Kconfig
|
||||
+++ b/lib/crypto/Kconfig
|
||||
@@ -6,7 +6,7 @@ config CRYPTO_LIB_AES
|
||||
tristate
|
||||
|
||||
config CRYPTO_LIB_ARC4
|
||||
- tristate
|
||||
+ tristate "ARC4 cipher library"
|
||||
|
||||
config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
||||
bool
|
@ -1,84 +0,0 @@
|
||||
From 236c1acdfef5958010ac9814a9872e0a46fd78ee Mon Sep 17 00:00:00 2001
|
||||
From: John Crispin <john@phrozen.org>
|
||||
Date: Fri, 7 Jul 2017 17:13:44 +0200
|
||||
Subject: rfkill: add fake rfkill support
|
||||
|
||||
allow building of modules depending on RFKILL even if RFKILL is not enabled.
|
||||
|
||||
Signed-off-by: John Crispin <john@phrozen.org>
|
||||
---
|
||||
include/linux/rfkill.h | 2 +-
|
||||
net/Makefile | 2 +-
|
||||
net/rfkill/Kconfig | 14 +++++++++-----
|
||||
net/rfkill/Makefile | 2 +-
|
||||
4 files changed, 12 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/include/linux/rfkill.h
|
||||
+++ b/include/linux/rfkill.h
|
||||
@@ -64,7 +64,7 @@ struct rfkill_ops {
|
||||
int (*set_block)(void *data, bool blocked);
|
||||
};
|
||||
|
||||
-#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
|
||||
+#if defined(CONFIG_RFKILL_FULL) || defined(CONFIG_RFKILL_FULL_MODULE)
|
||||
/**
|
||||
* rfkill_alloc - Allocate rfkill structure
|
||||
* @name: name of the struct -- the string is not copied internally
|
||||
--- a/net/Makefile
|
||||
+++ b/net/Makefile
|
||||
@@ -52,7 +52,7 @@ obj-$(CONFIG_TIPC) += tipc/
|
||||
obj-$(CONFIG_NETLABEL) += netlabel/
|
||||
obj-$(CONFIG_IUCV) += iucv/
|
||||
obj-$(CONFIG_SMC) += smc/
|
||||
-obj-$(CONFIG_RFKILL) += rfkill/
|
||||
+obj-$(CONFIG_RFKILL_FULL) += rfkill/
|
||||
obj-$(CONFIG_NET_9P) += 9p/
|
||||
obj-$(CONFIG_CAIF) += caif/
|
||||
obj-$(CONFIG_DCB) += dcb/
|
||||
--- a/net/rfkill/Kconfig
|
||||
+++ b/net/rfkill/Kconfig
|
||||
@@ -2,7 +2,11 @@
|
||||
#
|
||||
# RF switch subsystem configuration
|
||||
#
|
||||
-menuconfig RFKILL
|
||||
+config RFKILL
|
||||
+ bool
|
||||
+ default y
|
||||
+
|
||||
+menuconfig RFKILL_FULL
|
||||
tristate "RF switch subsystem support"
|
||||
help
|
||||
Say Y here if you want to have control over RF switches
|
||||
@@ -14,19 +18,19 @@ menuconfig RFKILL
|
||||
# LED trigger support
|
||||
config RFKILL_LEDS
|
||||
bool
|
||||
- depends on RFKILL
|
||||
+ depends on RFKILL_FULL
|
||||
depends on LEDS_TRIGGERS = y || RFKILL = LEDS_TRIGGERS
|
||||
default y
|
||||
|
||||
config RFKILL_INPUT
|
||||
bool "RF switch input support" if EXPERT
|
||||
- depends on RFKILL
|
||||
+ depends on RFKILL_FULL
|
||||
depends on INPUT = y || RFKILL = INPUT
|
||||
default y if !EXPERT
|
||||
|
||||
config RFKILL_GPIO
|
||||
tristate "GPIO RFKILL driver"
|
||||
- depends on RFKILL
|
||||
+ depends on RFKILL_FULL
|
||||
depends on GPIOLIB || COMPILE_TEST
|
||||
default n
|
||||
help
|
||||
--- a/net/rfkill/Makefile
|
||||
+++ b/net/rfkill/Makefile
|
||||
@@ -5,5 +5,5 @@
|
||||
|
||||
rfkill-y += core.o
|
||||
rfkill-$(CONFIG_RFKILL_INPUT) += input.o
|
||||
-obj-$(CONFIG_RFKILL) += rfkill.o
|
||||
+obj-$(CONFIG_RFKILL_FULL) += rfkill.o
|
||||
obj-$(CONFIG_RFKILL_GPIO) += rfkill-gpio.o
|
@ -1,64 +0,0 @@
|
||||
From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
|
||||
Date: Fri, 7 Jun 2013 18:35:22 -0500
|
||||
Subject: MIPS: r4k_cache: use more efficient cache blast
|
||||
|
||||
Optimize the compiler output for larger cache blast cases that are
|
||||
common for DMA-based networking.
|
||||
|
||||
Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
--- a/arch/mips/include/asm/r4kcache.h
|
||||
+++ b/arch/mips/include/asm/r4kcache.h
|
||||
@@ -286,14 +286,46 @@ static inline void prot##extra##blast_##
|
||||
unsigned long end) \
|
||||
{ \
|
||||
unsigned long lsize = cpu_##desc##_line_size(); \
|
||||
+ unsigned long lsize_2 = lsize * 2; \
|
||||
+ unsigned long lsize_3 = lsize * 3; \
|
||||
+ unsigned long lsize_4 = lsize * 4; \
|
||||
+ unsigned long lsize_5 = lsize * 5; \
|
||||
+ unsigned long lsize_6 = lsize * 6; \
|
||||
+ unsigned long lsize_7 = lsize * 7; \
|
||||
+ unsigned long lsize_8 = lsize * 8; \
|
||||
unsigned long addr = start & ~(lsize - 1); \
|
||||
- unsigned long aend = (end - 1) & ~(lsize - 1); \
|
||||
+ unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \
|
||||
+ int lines = (aend - addr) / lsize; \
|
||||
\
|
||||
- while (1) { \
|
||||
+ while (lines >= 8) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ prot##cache_op(hitop, addr + lsize_2); \
|
||||
+ prot##cache_op(hitop, addr + lsize_3); \
|
||||
+ prot##cache_op(hitop, addr + lsize_4); \
|
||||
+ prot##cache_op(hitop, addr + lsize_5); \
|
||||
+ prot##cache_op(hitop, addr + lsize_6); \
|
||||
+ prot##cache_op(hitop, addr + lsize_7); \
|
||||
+ addr += lsize_8; \
|
||||
+ lines -= 8; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x4) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ prot##cache_op(hitop, addr + lsize_2); \
|
||||
+ prot##cache_op(hitop, addr + lsize_3); \
|
||||
+ addr += lsize_4; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x2) { \
|
||||
+ prot##cache_op(hitop, addr); \
|
||||
+ prot##cache_op(hitop, addr + lsize); \
|
||||
+ addr += lsize_2; \
|
||||
+ } \
|
||||
+ \
|
||||
+ if (lines & 0x1) { \
|
||||
prot##cache_op(hitop, addr); \
|
||||
- if (addr == aend) \
|
||||
- break; \
|
||||
- addr += lsize; \
|
||||
} \
|
||||
}
|
||||
|
@ -1,38 +0,0 @@
|
||||
From: John Crispin <john@phrozen.org>
|
||||
Subject: hack: kernel: add generic image_cmdline hack to MIPS targets
|
||||
|
||||
lede-commit: d59f5b3a987a48508257a0ddbaeadc7909f9f976
|
||||
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
|
||||
---
|
||||
arch/mips/Kconfig | 4 ++++
|
||||
arch/mips/kernel/head.S | 6 ++++++
|
||||
2 files changed, 10 insertions(+)
|
||||
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -1112,6 +1112,10 @@ config MIPS_MSC
|
||||
config SYNC_R4K
|
||||
bool
|
||||
|
||||
+config IMAGE_CMDLINE_HACK
|
||||
+ bool "OpenWrt specific image command line hack"
|
||||
+ default n
|
||||
+
|
||||
config NO_IOPORT_MAP
|
||||
def_bool n
|
||||
|
||||
--- a/arch/mips/kernel/head.S
|
||||
+++ b/arch/mips/kernel/head.S
|
||||
@@ -79,6 +79,12 @@ FEXPORT(__kernel_entry)
|
||||
j kernel_entry
|
||||
#endif /* CONFIG_BOOT_RAW */
|
||||
|
||||
+#ifdef CONFIG_IMAGE_CMDLINE_HACK
|
||||
+ .ascii "CMDLINE:"
|
||||
+EXPORT(__image_cmdline)
|
||||
+ .fill 0x400
|
||||
+#endif /* CONFIG_IMAGE_CMDLINE_HACK */
|
||||
+
|
||||
__REF
|
||||
|
||||
NESTED(kernel_entry, 16, sp) # kernel entry point
|
@ -1,38 +0,0 @@
|
||||
From 107c0964cb8db7ca28ac5199426414fdab3c274d Mon Sep 17 00:00:00 2001
|
||||
From: "Alexandros C. Couloumbis" <alex@ozo.com>
|
||||
Date: Fri, 7 Jul 2017 17:14:51 +0200
|
||||
Subject: hack: arch: powerpc: drop register save/restore library from modules
|
||||
|
||||
Upstream GCC uses a libgcc function for saving/restoring registers. This
|
||||
makes the code bigger, and upstream kernels need to carry that function
|
||||
for every single kernel module. Our GCC is patched to avoid those
|
||||
references, so we can drop the extra bloat for modules.
|
||||
|
||||
lede-commit: e8e1084654f50904e6bf77b70b2de3f137d7b3ec
|
||||
Signed-off-by: Alexandros C. Couloumbis <alex@ozo.com>
|
||||
---
|
||||
arch/powerpc/Makefile | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
--- a/arch/powerpc/Makefile
|
||||
+++ b/arch/powerpc/Makefile
|
||||
@@ -42,19 +42,6 @@ machine-$(CONFIG_PPC64) += 64
|
||||
machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
|
||||
UTS_MACHINE := $(subst $(space),,$(machine-y))
|
||||
|
||||
-# XXX This needs to be before we override LD below
|
||||
-ifdef CONFIG_PPC32
|
||||
-KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
|
||||
-else
|
||||
-ifeq ($(call ld-ifversion, -ge, 22500, y),y)
|
||||
-# Have the linker provide sfpr if possible.
|
||||
-# There is a corresponding test in arch/powerpc/lib/Makefile
|
||||
-KBUILD_LDFLAGS_MODULE += --save-restore-funcs
|
||||
-else
|
||||
-KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
|
||||
-endif
|
||||
-endif
|
||||
-
|
||||
ifdef CONFIG_CPU_LITTLE_ENDIAN
|
||||
KBUILD_CFLAGS += -mlittle-endian
|
||||
KBUILD_LDFLAGS += -EL
|
@ -1,196 +0,0 @@
|
||||
--- a/block/blk.h
|
||||
+++ b/block/blk.h
|
||||
@@ -407,6 +407,8 @@ void blk_free_ext_minor(unsigned int min
|
||||
#define ADDPART_FLAG_NONE 0
|
||||
#define ADDPART_FLAG_RAID 1
|
||||
#define ADDPART_FLAG_WHOLEDISK 2
|
||||
+#define ADDPART_FLAG_READONLY 4
|
||||
+#define ADDPART_FLAG_ROOTDEV 8
|
||||
int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
|
||||
sector_t length);
|
||||
int bdev_del_partition(struct gendisk *disk, int partno);
|
||||
--- a/block/partitions/Kconfig
|
||||
+++ b/block/partitions/Kconfig
|
||||
@@ -103,6 +103,13 @@ config ATARI_PARTITION
|
||||
Say Y here if you would like to use hard disks under Linux which
|
||||
were partitioned under the Atari OS.
|
||||
|
||||
+config FIT_PARTITION
|
||||
+ bool "Flattened-Image-Tree (FIT) partition support" if PARTITION_ADVANCED
|
||||
+ default n
|
||||
+ help
|
||||
+ Say Y here if your system needs to mount the filesystem part of
|
||||
+ a Flattened-Image-Tree (FIT) image commonly used with Das U-Boot.
|
||||
+
|
||||
config IBM_PARTITION
|
||||
bool "IBM disk label and partition support"
|
||||
depends on PARTITION_ADVANCED && S390
|
||||
--- a/block/partitions/Makefile
|
||||
+++ b/block/partitions/Makefile
|
||||
@@ -8,6 +8,7 @@ obj-$(CONFIG_ACORN_PARTITION) += acorn.o
|
||||
obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
|
||||
obj-$(CONFIG_ATARI_PARTITION) += atari.o
|
||||
obj-$(CONFIG_AIX_PARTITION) += aix.o
|
||||
+obj-$(CONFIG_FIT_PARTITION) += fit.o
|
||||
obj-$(CONFIG_CMDLINE_PARTITION) += cmdline.o
|
||||
obj-$(CONFIG_MAC_PARTITION) += mac.o
|
||||
obj-$(CONFIG_LDM_PARTITION) += ldm.o
|
||||
--- a/block/partitions/check.h
|
||||
+++ b/block/partitions/check.h
|
||||
@@ -57,6 +57,7 @@ int amiga_partition(struct parsed_partit
|
||||
int atari_partition(struct parsed_partitions *state);
|
||||
int cmdline_partition(struct parsed_partitions *state);
|
||||
int efi_partition(struct parsed_partitions *state);
|
||||
+int fit_partition(struct parsed_partitions *state);
|
||||
int ibm_partition(struct parsed_partitions *);
|
||||
int karma_partition(struct parsed_partitions *state);
|
||||
int ldm_partition(struct parsed_partitions *state);
|
||||
@@ -67,3 +68,5 @@ int sgi_partition(struct parsed_partitio
|
||||
int sun_partition(struct parsed_partitions *state);
|
||||
int sysv68_partition(struct parsed_partitions *state);
|
||||
int ultrix_partition(struct parsed_partitions *state);
|
||||
+
|
||||
+int parse_fit_partitions(struct parsed_partitions *state, u64 start_sector, u64 nr_sectors, int *slot, int add_remain);
|
||||
--- a/block/partitions/core.c
|
||||
+++ b/block/partitions/core.c
|
||||
@@ -10,6 +10,10 @@
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/raid/detect.h>
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+#include <linux/root_dev.h>
|
||||
+#endif
|
||||
+
|
||||
#include "check.h"
|
||||
|
||||
static int (*check_part[])(struct parsed_partitions *) = {
|
||||
@@ -46,6 +50,9 @@ static int (*check_part[])(struct parsed
|
||||
#ifdef CONFIG_EFI_PARTITION
|
||||
efi_partition, /* this must come before msdos */
|
||||
#endif
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ fit_partition,
|
||||
+#endif
|
||||
#ifdef CONFIG_SGI_PARTITION
|
||||
sgi_partition,
|
||||
#endif
|
||||
@@ -398,6 +405,11 @@ static struct block_device *add_partitio
|
||||
goto out_del;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ if (flags & ADDPART_FLAG_READONLY)
|
||||
+ bdev->bd_read_only = true;
|
||||
+#endif
|
||||
+
|
||||
/* everything is up and running, commence */
|
||||
err = xa_insert(&disk->part_tbl, partno, bdev, GFP_KERNEL);
|
||||
if (err)
|
||||
@@ -585,6 +597,11 @@ static bool blk_add_partition(struct gen
|
||||
(state->parts[p].flags & ADDPART_FLAG_RAID))
|
||||
md_autodetect_dev(part->bd_dev);
|
||||
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ if ((state->parts[p].flags & ADDPART_FLAG_ROOTDEV) && ROOT_DEV == 0)
|
||||
+ ROOT_DEV = part->bd_dev;
|
||||
+#endif
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
--- a/drivers/mtd/ubi/block.c
|
||||
+++ b/drivers/mtd/ubi/block.c
|
||||
@@ -433,6 +433,9 @@ int ubiblock_create(struct ubi_volume_in
|
||||
}
|
||||
gd->flags |= GENHD_FL_NO_PART;
|
||||
gd->private_data = dev;
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ gd->flags |= GENHD_FL_EXT_DEVT;
|
||||
+#endif
|
||||
sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
|
||||
set_capacity(gd, disk_capacity);
|
||||
dev->gd = gd;
|
||||
--- a/drivers/mtd/mtd_blkdevs.c
|
||||
+++ b/drivers/mtd/mtd_blkdevs.c
|
||||
@@ -346,6 +346,9 @@ int add_mtd_blktrans_dev(struct mtd_blkt
|
||||
gd->first_minor = (new->devnum) << tr->part_bits;
|
||||
gd->minors = 1 << tr->part_bits;
|
||||
gd->fops = &mtd_block_ops;
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ gd->flags |= GENHD_FL_EXT_DEVT;
|
||||
+#endif
|
||||
|
||||
if (tr->part_bits) {
|
||||
if (new->devnum < 26)
|
||||
--- a/block/partitions/efi.c
|
||||
+++ b/block/partitions/efi.c
|
||||
@@ -716,6 +716,9 @@ int efi_partition(struct parsed_partitio
|
||||
gpt_entry *ptes = NULL;
|
||||
u32 i;
|
||||
unsigned ssz = queue_logical_block_size(state->disk->queue) / 512;
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ u32 extra_slot = 64;
|
||||
+#endif
|
||||
|
||||
if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
|
||||
kfree(gpt);
|
||||
@@ -749,6 +752,11 @@ int efi_partition(struct parsed_partitio
|
||||
ARRAY_SIZE(ptes[i].partition_name));
|
||||
utf16_le_to_7bit(ptes[i].partition_name, label_max, info->volname);
|
||||
state->parts[i + 1].has_info = true;
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ /* If this is a U-Boot FIT volume it may have subpartitions */
|
||||
+ if (!efi_guidcmp(ptes[i].partition_type_guid, PARTITION_LINUX_FIT_GUID))
|
||||
+ (void) parse_fit_partitions(state, start * ssz, size * ssz, &extra_slot, 1);
|
||||
+#endif
|
||||
}
|
||||
kfree(ptes);
|
||||
kfree(gpt);
|
||||
--- a/block/partitions/efi.h
|
||||
+++ b/block/partitions/efi.h
|
||||
@@ -51,6 +51,9 @@
|
||||
#define PARTITION_LINUX_LVM_GUID \
|
||||
EFI_GUID( 0xe6d6d379, 0xf507, 0x44c2, \
|
||||
0xa2, 0x3c, 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28)
|
||||
+#define PARTITION_LINUX_FIT_GUID \
|
||||
+ EFI_GUID( 0xcae9be83, 0xb15f, 0x49cc, \
|
||||
+ 0x86, 0x3f, 0x08, 0x1b, 0x74, 0x4a, 0x2d, 0x93)
|
||||
|
||||
typedef struct _gpt_header {
|
||||
__le64 signature;
|
||||
--- a/block/partitions/msdos.c
|
||||
+++ b/block/partitions/msdos.c
|
||||
@@ -564,6 +564,15 @@ static void parse_minix(struct parsed_pa
|
||||
#endif /* CONFIG_MINIX_SUBPARTITION */
|
||||
}
|
||||
|
||||
+static void parse_fit_mbr(struct parsed_partitions *state,
|
||||
+ sector_t offset, sector_t size, int origin)
|
||||
+{
|
||||
+#ifdef CONFIG_FIT_PARTITION
|
||||
+ u32 extra_slot = 64;
|
||||
+ (void) parse_fit_partitions(state, offset, size, &extra_slot, 1);
|
||||
+#endif /* CONFIG_FIT_PARTITION */
|
||||
+}
|
||||
+
|
||||
static struct {
|
||||
unsigned char id;
|
||||
void (*parse)(struct parsed_partitions *, sector_t, sector_t, int);
|
||||
@@ -575,6 +584,7 @@ static struct {
|
||||
{UNIXWARE_PARTITION, parse_unixware},
|
||||
{SOLARIS_X86_PARTITION, parse_solaris_x86},
|
||||
{NEW_SOLARIS_X86_PARTITION, parse_solaris_x86},
|
||||
+ {FIT_PARTITION, parse_fit_mbr},
|
||||
{0, NULL},
|
||||
};
|
||||
|
||||
--- a/include/linux/msdos_partition.h
|
||||
+++ b/include/linux/msdos_partition.h
|
||||
@@ -31,6 +31,7 @@ enum msdos_sys_ind {
|
||||
LINUX_LVM_PARTITION = 0x8e,
|
||||
LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */
|
||||
|
||||
+ FIT_PARTITION = 0x2e, /* U-Boot uImage.FIT */
|
||||
SOLARIS_X86_PARTITION = 0x82, /* also Linux swap partitions */
|
||||
NEW_SOLARIS_X86_PARTITION = 0xbf,
|
||||
|
@ -1,39 +0,0 @@
|
||||
From: Gabor Juhos <juhosg@openwrt.org>
|
||||
Subject: kernel/3.1[02]: move MTD root device setup code to mtdcore
|
||||
|
||||
The current code only allows to automatically set
|
||||
root device on MTD partitions. Move the code to MTD
|
||||
core to allow to use it with all MTD devices.
|
||||
|
||||
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
|
||||
---
|
||||
drivers/mtd/mtdcore.c | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
--- a/drivers/mtd/mtdcore.c
|
||||
+++ b/drivers/mtd/mtdcore.c
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/leds.h>
|
||||
#include <linux/debugfs.h>
|
||||
+#include <linux/root_dev.h>
|
||||
#include <linux/nvmem-provider.h>
|
||||
|
||||
#include <linux/mtd/mtd.h>
|
||||
@@ -748,6 +749,16 @@ int add_mtd_device(struct mtd_info *mtd)
|
||||
of this try_ nonsense, and no bitching about it
|
||||
either. :) */
|
||||
__module_get(THIS_MODULE);
|
||||
+
|
||||
+ if (!strcmp(mtd->name, "rootfs") &&
|
||||
+ IS_ENABLED(CONFIG_MTD_ROOTFS_ROOT_DEV) &&
|
||||
+ ROOT_DEV == 0) {
|
||||
+ unsigned int index = mtd->index;
|
||||
+ pr_notice("mtd: device %d (%s) set to be root filesystem\n",
|
||||
+ mtd->index, mtd->name);
|
||||
+ ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, index);
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
|
||||
fail_nvmem_add:
|
@ -1,120 +0,0 @@
|
||||
From 6fa9e3678eb002246df1280322b6a024853950a5 Mon Sep 17 00:00:00 2001
|
||||
From: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
Date: Mon, 11 Oct 2021 00:53:14 +0200
|
||||
Subject: [PATCH] drivers: mtd: parsers: add nvmem support to cmdlinepart
|
||||
|
||||
Assuming cmdlinepart is only one level deep partition scheme and that
|
||||
static partition are also defined in DTS, we can assign an of_node for
|
||||
partition declared from bootargs. cmdlinepart have priority than
|
||||
fiexed-partition parser so in this specific case the parser doesn't
|
||||
assign an of_node. Fix this by searching a defined of_node using a
|
||||
similar fixed_partition parser and if a partition is found with the same
|
||||
label, check that it has the same offset and size and return the DT
|
||||
of_node to correctly use NVMEM cells.
|
||||
|
||||
Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
|
||||
---
|
||||
drivers/mtd/parsers/cmdlinepart.c | 71 +++++++++++++++++++++++++++++++
|
||||
1 file changed, 71 insertions(+)
|
||||
|
||||
--- a/drivers/mtd/parsers/cmdlinepart.c
|
||||
+++ b/drivers/mtd/parsers/cmdlinepart.c
|
||||
@@ -43,6 +43,7 @@
|
||||
#include <linux/mtd/partitions.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/err.h>
|
||||
+#include <linux/of.h>
|
||||
|
||||
/* debug macro */
|
||||
#if 0
|
||||
@@ -323,6 +324,68 @@ static int mtdpart_setup_real(char *s)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int search_fixed_partition(struct mtd_info *master,
|
||||
+ struct mtd_partition *target_part,
|
||||
+ struct mtd_partition *fixed_part)
|
||||
+{
|
||||
+ struct device_node *mtd_node;
|
||||
+ struct device_node *ofpart_node;
|
||||
+ struct device_node *pp;
|
||||
+ struct mtd_partition part;
|
||||
+ const char *partname;
|
||||
+
|
||||
+ mtd_node = mtd_get_of_node(master);
|
||||
+ if (!mtd_node)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ ofpart_node = of_get_child_by_name(mtd_node, "partitions");
|
||||
+
|
||||
+ for_each_child_of_node(ofpart_node, pp) {
|
||||
+ const __be32 *reg;
|
||||
+ int len;
|
||||
+ int a_cells, s_cells;
|
||||
+
|
||||
+ reg = of_get_property(pp, "reg", &len);
|
||||
+ if (!reg) {
|
||||
+ pr_debug("%s: ofpart partition %pOF (%pOF) missing reg property.\n",
|
||||
+ master->name, pp,
|
||||
+ mtd_node);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ a_cells = of_n_addr_cells(pp);
|
||||
+ s_cells = of_n_size_cells(pp);
|
||||
+ if (len / 4 != a_cells + s_cells) {
|
||||
+ pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n",
|
||||
+ master->name, pp,
|
||||
+ mtd_node);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ part.offset = of_read_number(reg, a_cells);
|
||||
+ part.size = of_read_number(reg + a_cells, s_cells);
|
||||
+ part.of_node = pp;
|
||||
+
|
||||
+ partname = of_get_property(pp, "label", &len);
|
||||
+ if (!partname)
|
||||
+ partname = of_get_property(pp, "name", &len);
|
||||
+ part.name = partname;
|
||||
+
|
||||
+ if (!strncmp(target_part->name, part.name, len)) {
|
||||
+ if (part.offset != target_part->offset)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (part.size != target_part->size)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ memcpy(fixed_part, &part, sizeof(struct mtd_partition));
|
||||
+ return 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return -EINVAL;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Main function to be called from the MTD mapping driver/device to
|
||||
* obtain the partitioning information. At this point the command line
|
||||
@@ -338,6 +401,7 @@ static int parse_cmdline_partitions(stru
|
||||
int i, err;
|
||||
struct cmdline_mtd_partition *part;
|
||||
const char *mtd_id = master->name;
|
||||
+ struct mtd_partition fixed_part;
|
||||
|
||||
/* parse command line */
|
||||
if (!cmdline_parsed) {
|
||||
@@ -382,6 +446,13 @@ static int parse_cmdline_partitions(stru
|
||||
sizeof(*part->parts) * (part->num_parts - i));
|
||||
i--;
|
||||
}
|
||||
+
|
||||
+ err = search_fixed_partition(master, &part->parts[i], &fixed_part);
|
||||
+ if (!err) {
|
||||
+ part->parts[i].of_node = fixed_part.of_node;
|
||||
+ pr_info("Found partition defined in DT for %s. Assigning OF node to support nvmem.",
|
||||
+ part->parts[i].name);
|
||||
+ }
|
||||
}
|
||||
|
||||
*pparts = kmemdup(part->parts, sizeof(*part->parts) * part->num_parts,
|
@ -1,23 +0,0 @@
|
||||
--- a/drivers/mtd/nand/Kconfig
|
||||
+++ b/drivers/mtd/nand/Kconfig
|
||||
@@ -61,6 +61,10 @@ config MTD_NAND_ECC_MEDIATEK
|
||||
help
|
||||
This enables support for the hardware ECC engine from Mediatek.
|
||||
|
||||
+config MTD_NAND_MTK_BMT
|
||||
+ bool "Support MediaTek NAND Bad-block Management Table"
|
||||
+ default n
|
||||
+
|
||||
endmenu
|
||||
|
||||
endmenu
|
||||
--- a/drivers/mtd/nand/Makefile
|
||||
+++ b/drivers/mtd/nand/Makefile
|
||||
@@ -3,6 +3,7 @@
|
||||
nandcore-objs := core.o bbt.o
|
||||
obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
|
||||
obj-$(CONFIG_MTD_NAND_ECC_MEDIATEK) += ecc-mtk.o
|
||||
+obj-$(CONFIG_MTD_NAND_MTK_BMT) += mtk_bmt.o mtk_bmt_v2.o mtk_bmt_bbt.o mtk_bmt_nmbm.o
|
||||
|
||||
obj-y += onenand/
|
||||
obj-y += raw/
|
File diff suppressed because it is too large
Load Diff
@ -1,41 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:18:54 +0200
|
||||
Subject: bridge: only accept EAP locally
|
||||
|
||||
When bridging, do not forward EAP frames to other ports, only deliver
|
||||
them locally, regardless of the state.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
[add disable_eap_hack sysfs attribute]
|
||||
Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>
|
||||
---
|
||||
|
||||
--- a/net/bridge/br_input.c
|
||||
+++ b/net/bridge/br_input.c
|
||||
@@ -133,10 +133,14 @@ int br_handle_frame_finish(struct net *n
|
||||
}
|
||||
}
|
||||
|
||||
+ BR_INPUT_SKB_CB(skb)->brdev = br->dev;
|
||||
+
|
||||
+ if (skb->protocol == htons(ETH_P_PAE) && !br->disable_eap_hack)
|
||||
+ return br_pass_frame_up(skb);
|
||||
+
|
||||
if (state == BR_STATE_LEARNING)
|
||||
goto drop;
|
||||
|
||||
- BR_INPUT_SKB_CB(skb)->brdev = br->dev;
|
||||
BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED);
|
||||
|
||||
if (IS_ENABLED(CONFIG_INET) &&
|
||||
--- a/net/bridge/br_private.h
|
||||
+++ b/net/bridge/br_private.h
|
||||
@@ -482,6 +482,8 @@ struct net_bridge {
|
||||
u16 group_fwd_mask;
|
||||
u16 group_fwd_mask_required;
|
||||
|
||||
+ bool disable_eap_hack;
|
||||
+
|
||||
/* STP */
|
||||
bridge_id designated_root;
|
||||
bridge_id bridge_id;
|
@ -1,214 +0,0 @@
|
||||
From eda40b8c8c82e0f2789d6bc8bf63846dce2e8f32 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
Date: Sat, 23 Mar 2019 09:29:49 +0000
|
||||
Subject: [PATCH] netfilter: connmark: introduce set-dscpmark
|
||||
|
||||
set-dscpmark is a method of storing the DSCP of an ip packet into
|
||||
conntrack mark. In combination with a suitable tc filter action
|
||||
(act_ctinfo) DSCP values are able to be stored in the mark on egress and
|
||||
restored on ingress across links that otherwise alter or bleach DSCP.
|
||||
|
||||
This is useful for qdiscs such as CAKE which are able to shape according
|
||||
to policies based on DSCP.
|
||||
|
||||
Ingress classification is traditionally a challenging task since
|
||||
iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT
|
||||
lookups, hence are unable to see internal IPv4 addresses as used on the
|
||||
typical home masquerading gateway.
|
||||
|
||||
x_tables CONNMARK set-dscpmark target solves the problem of storing the
|
||||
DSCP to the conntrack mark in a way suitable for the new act_ctinfo tc
|
||||
action to restore.
|
||||
|
||||
The set-dscpmark option accepts 2 parameters, a 32bit 'dscpmask' and a
|
||||
32bit 'statemask'. The dscp mask must be 6 contiguous bits and
|
||||
represents the area where the DSCP will be stored in the connmark. The
|
||||
state mask is a minimum 1 bit length mask that must not overlap with the
|
||||
dscpmask. It represents a flag which is set when the DSCP has been
|
||||
stored in the conntrack mark. This is useful to implement a 'one shot'
|
||||
iptables based classification where the 'complicated' iptables rules are
|
||||
only run once to classify the connection on initial (egress) packet and
|
||||
subsequent packets are all marked/restored with the same DSCP. A state
|
||||
mask of zero disables the setting of a status bit/s.
|
||||
|
||||
example syntax with a suitably modified iptables user space application:
|
||||
|
||||
iptables -A QOS_MARK_eth0 -t mangle -j CONNMARK --set-dscpmark 0xfc000000/0x01000000
|
||||
|
||||
Would store the DSCP in the top 6 bits of the 32bit mark field, and use
|
||||
the LSB of the top byte as the 'DSCP has been stored' marker.
|
||||
|
||||
|----0xFC----conntrack mark----000000---|
|
||||
| Bits 31-26 | bit 25 | bit24 |~~~ Bit 0|
|
||||
| DSCP | unused | flag |unused |
|
||||
|-----------------------0x01---000000---|
|
||||
^ ^
|
||||
| |
|
||||
---| Conditional flag
|
||||
| set this when dscp
|
||||
|-ip diffserv-| stored in mark
|
||||
| 6 bits |
|
||||
|-------------|
|
||||
|
||||
an identically configured tc action to restore looks like:
|
||||
|
||||
tc filter show dev eth0 ingress
|
||||
filter parent ffff: protocol all pref 10 u32 chain 0
|
||||
filter parent ffff: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1
|
||||
filter parent ffff: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1: not_in_hw
|
||||
match 00000000/00000000 at 0
|
||||
action order 1: ctinfo zone 0 pipe
|
||||
index 2 ref 1 bind 1 dscp 0xfc000000/0x1000000
|
||||
|
||||
action order 2: mirred (Egress Redirect to device ifb4eth0) stolen
|
||||
index 1 ref 1 bind 1
|
||||
|
||||
|----0xFC----conntrack mark----000000---|
|
||||
| Bits 31-26 | bit 25 | bit24 |~~~ Bit 0|
|
||||
| DSCP | unused | flag |unused |
|
||||
|-----------------------0x01---000000---|
|
||||
| |
|
||||
| |
|
||||
---| Conditional flag
|
||||
v only restore if set
|
||||
|-ip diffserv-|
|
||||
| 6 bits |
|
||||
|-------------|
|
||||
|
||||
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
|
||||
---
|
||||
include/uapi/linux/netfilter/xt_connmark.h | 10 ++++
|
||||
net/netfilter/xt_connmark.c | 55 ++++++++++++++++++----
|
||||
2 files changed, 57 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/include/uapi/linux/netfilter/xt_connmark.h
|
||||
+++ b/include/uapi/linux/netfilter/xt_connmark.h
|
||||
@@ -20,6 +20,11 @@ enum {
|
||||
};
|
||||
|
||||
enum {
|
||||
+ XT_CONNMARK_VALUE = (1 << 0),
|
||||
+ XT_CONNMARK_DSCP = (1 << 1)
|
||||
+};
|
||||
+
|
||||
+enum {
|
||||
D_SHIFT_LEFT = 0,
|
||||
D_SHIFT_RIGHT,
|
||||
};
|
||||
@@ -34,6 +39,11 @@ struct xt_connmark_tginfo2 {
|
||||
__u8 shift_dir, shift_bits, mode;
|
||||
};
|
||||
|
||||
+struct xt_connmark_tginfo3 {
|
||||
+ __u32 ctmark, ctmask, nfmask;
|
||||
+ __u8 shift_dir, shift_bits, mode, func;
|
||||
+};
|
||||
+
|
||||
struct xt_connmark_mtinfo1 {
|
||||
__u32 mark, mask;
|
||||
__u8 invert;
|
||||
--- a/net/netfilter/xt_connmark.c
|
||||
+++ b/net/netfilter/xt_connmark.c
|
||||
@@ -24,13 +24,13 @@ MODULE_ALIAS("ipt_connmark");
|
||||
MODULE_ALIAS("ip6t_connmark");
|
||||
|
||||
static unsigned int
|
||||
-connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info)
|
||||
+connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo3 *info)
|
||||
{
|
||||
enum ip_conntrack_info ctinfo;
|
||||
u_int32_t new_targetmark;
|
||||
struct nf_conn *ct;
|
||||
u_int32_t newmark;
|
||||
- u_int32_t oldmark;
|
||||
+ u_int8_t dscp;
|
||||
|
||||
ct = nf_ct_get(skb, &ctinfo);
|
||||
if (ct == NULL)
|
||||
@@ -38,13 +38,24 @@ connmark_tg_shift(struct sk_buff *skb, c
|
||||
|
||||
switch (info->mode) {
|
||||
case XT_CONNMARK_SET:
|
||||
- oldmark = READ_ONCE(ct->mark);
|
||||
- newmark = (oldmark & ~info->ctmask) ^ info->ctmark;
|
||||
- if (info->shift_dir == D_SHIFT_RIGHT)
|
||||
- newmark >>= info->shift_bits;
|
||||
- else
|
||||
- newmark <<= info->shift_bits;
|
||||
+ newmark = READ_ONCE(ct->mark);
|
||||
+ if (info->func & XT_CONNMARK_VALUE) {
|
||||
+ newmark = (newmark & ~info->ctmask) ^ info->ctmark;
|
||||
+ if (info->shift_dir == D_SHIFT_RIGHT)
|
||||
+ newmark >>= info->shift_bits;
|
||||
+ else
|
||||
+ newmark <<= info->shift_bits;
|
||||
+ } else if (info->func & XT_CONNMARK_DSCP) {
|
||||
+ if (skb->protocol == htons(ETH_P_IP))
|
||||
+ dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
|
||||
+ else if (skb->protocol == htons(ETH_P_IPV6))
|
||||
+ dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
|
||||
+ else /* protocol doesn't have diffserv */
|
||||
+ break;
|
||||
|
||||
+ newmark = (newmark & ~info->ctmark) |
|
||||
+ (info->ctmask | (dscp << info->shift_bits));
|
||||
+ }
|
||||
if (READ_ONCE(ct->mark) != newmark) {
|
||||
WRITE_ONCE(ct->mark, newmark);
|
||||
nf_conntrack_event_cache(IPCT_MARK, ct);
|
||||
@@ -83,20 +94,36 @@ static unsigned int
|
||||
connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
{
|
||||
const struct xt_connmark_tginfo1 *info = par->targinfo;
|
||||
- const struct xt_connmark_tginfo2 info2 = {
|
||||
+ const struct xt_connmark_tginfo3 info3 = {
|
||||
.ctmark = info->ctmark,
|
||||
.ctmask = info->ctmask,
|
||||
.nfmask = info->nfmask,
|
||||
.mode = info->mode,
|
||||
+ .func = XT_CONNMARK_VALUE
|
||||
};
|
||||
|
||||
- return connmark_tg_shift(skb, &info2);
|
||||
+ return connmark_tg_shift(skb, &info3);
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
{
|
||||
const struct xt_connmark_tginfo2 *info = par->targinfo;
|
||||
+ const struct xt_connmark_tginfo3 info3 = {
|
||||
+ .ctmark = info->ctmark,
|
||||
+ .ctmask = info->ctmask,
|
||||
+ .nfmask = info->nfmask,
|
||||
+ .mode = info->mode,
|
||||
+ .func = XT_CONNMARK_VALUE
|
||||
+ };
|
||||
+
|
||||
+ return connmark_tg_shift(skb, &info3);
|
||||
+}
|
||||
+
|
||||
+static unsigned int
|
||||
+connmark_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
+{
|
||||
+ const struct xt_connmark_tginfo3 *info = par->targinfo;
|
||||
|
||||
return connmark_tg_shift(skb, info);
|
||||
}
|
||||
@@ -167,6 +194,16 @@ static struct xt_target connmark_tg_reg[
|
||||
.targetsize = sizeof(struct xt_connmark_tginfo2),
|
||||
.destroy = connmark_tg_destroy,
|
||||
.me = THIS_MODULE,
|
||||
+ },
|
||||
+ {
|
||||
+ .name = "CONNMARK",
|
||||
+ .revision = 3,
|
||||
+ .family = NFPROTO_UNSPEC,
|
||||
+ .checkentry = connmark_tg_check,
|
||||
+ .target = connmark_tg_v3,
|
||||
+ .targetsize = sizeof(struct xt_connmark_tginfo3),
|
||||
+ .destroy = connmark_tg_destroy,
|
||||
+ .me = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
@ -1,776 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Tue, 20 Feb 2018 15:56:02 +0100
|
||||
Subject: [PATCH] netfilter: add xt_FLOWOFFLOAD target
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
create mode 100644 net/netfilter/xt_OFFLOAD.c
|
||||
|
||||
--- a/net/netfilter/Kconfig
|
||||
+++ b/net/netfilter/Kconfig
|
||||
@@ -712,8 +712,6 @@ config NFT_REJECT_NETDEV
|
||||
|
||||
endif # NF_TABLES_NETDEV
|
||||
|
||||
-endif # NF_TABLES
|
||||
-
|
||||
config NF_FLOW_TABLE_INET
|
||||
tristate "Netfilter flow table mixed IPv4/IPv6 module"
|
||||
depends on NF_FLOW_TABLE
|
||||
@@ -722,11 +720,12 @@ config NF_FLOW_TABLE_INET
|
||||
|
||||
To compile it as a module, choose M here.
|
||||
|
||||
+endif # NF_TABLES
|
||||
+
|
||||
config NF_FLOW_TABLE
|
||||
tristate "Netfilter flow table module"
|
||||
depends on NETFILTER_INGRESS
|
||||
depends on NF_CONNTRACK
|
||||
- depends on NF_TABLES
|
||||
help
|
||||
This option adds the flow table core infrastructure.
|
||||
|
||||
@@ -1023,6 +1022,15 @@ config NETFILTER_XT_TARGET_NOTRACK
|
||||
depends on NETFILTER_ADVANCED
|
||||
select NETFILTER_XT_TARGET_CT
|
||||
|
||||
+config NETFILTER_XT_TARGET_FLOWOFFLOAD
|
||||
+ tristate '"FLOWOFFLOAD" target support'
|
||||
+ depends on NF_FLOW_TABLE
|
||||
+ depends on NETFILTER_INGRESS
|
||||
+ help
|
||||
+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
|
||||
+ module to speed up processing of packets by bypassing the usual
|
||||
+ netfilter chains
|
||||
+
|
||||
config NETFILTER_XT_TARGET_RATEEST
|
||||
tristate '"RATEEST" target support'
|
||||
depends on NETFILTER_ADVANCED
|
||||
--- a/net/netfilter/Makefile
|
||||
+++ b/net/netfilter/Makefile
|
||||
@@ -148,6 +148,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
|
||||
+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
|
||||
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
|
||||
--- /dev/null
|
||||
+++ b/net/netfilter/xt_FLOWOFFLOAD.c
|
||||
@@ -0,0 +1,694 @@
|
||||
+/*
|
||||
+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
|
||||
+ *
|
||||
+ * This program is free software; you can redistribute it and/or modify
|
||||
+ * it under the terms of the GNU General Public License version 2 as
|
||||
+ * published by the Free Software Foundation.
|
||||
+ */
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/netfilter.h>
|
||||
+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
|
||||
+#include <linux/if_vlan.h>
|
||||
+#include <net/ip.h>
|
||||
+#include <net/netfilter/nf_conntrack.h>
|
||||
+#include <net/netfilter/nf_conntrack_extend.h>
|
||||
+#include <net/netfilter/nf_conntrack_helper.h>
|
||||
+#include <net/netfilter/nf_flow_table.h>
|
||||
+
|
||||
+struct xt_flowoffload_hook {
|
||||
+ struct hlist_node list;
|
||||
+ struct nf_hook_ops ops;
|
||||
+ struct net *net;
|
||||
+ bool registered;
|
||||
+ bool used;
|
||||
+};
|
||||
+
|
||||
+struct xt_flowoffload_table {
|
||||
+ struct nf_flowtable ft;
|
||||
+ struct hlist_head hooks;
|
||||
+ struct delayed_work work;
|
||||
+};
|
||||
+
|
||||
+struct nf_forward_info {
|
||||
+ const struct net_device *indev;
|
||||
+ const struct net_device *outdev;
|
||||
+ const struct net_device *hw_outdev;
|
||||
+ struct id {
|
||||
+ __u16 id;
|
||||
+ __be16 proto;
|
||||
+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
+ u8 num_encaps;
|
||||
+ u8 ingress_vlans;
|
||||
+ u8 h_source[ETH_ALEN];
|
||||
+ u8 h_dest[ETH_ALEN];
|
||||
+ enum flow_offload_xmit_type xmit_type;
|
||||
+};
|
||||
+
|
||||
+static DEFINE_SPINLOCK(hooks_lock);
|
||||
+
|
||||
+struct xt_flowoffload_table flowtable[2];
|
||||
+
|
||||
+static unsigned int
|
||||
+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
|
||||
+ const struct nf_hook_state *state)
|
||||
+{
|
||||
+ struct vlan_ethhdr *veth;
|
||||
+ __be16 proto;
|
||||
+
|
||||
+ switch (skb->protocol) {
|
||||
+ case htons(ETH_P_8021Q):
|
||||
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
+ proto = veth->h_vlan_encapsulated_proto;
|
||||
+ break;
|
||||
+ case htons(ETH_P_PPP_SES):
|
||||
+ proto = nf_flow_pppoe_proto(skb);
|
||||
+ break;
|
||||
+ default:
|
||||
+ proto = skb->protocol;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ switch (proto) {
|
||||
+ case htons(ETH_P_IP):
|
||||
+ return nf_flow_offload_ip_hook(priv, skb, state);
|
||||
+ case htons(ETH_P_IPV6):
|
||||
+ return nf_flow_offload_ipv6_hook(priv, skb, state);
|
||||
+ }
|
||||
+
|
||||
+ return NF_ACCEPT;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
|
||||
+ struct net_device *dev)
|
||||
+{
|
||||
+ struct xt_flowoffload_hook *hook;
|
||||
+ struct nf_hook_ops *ops;
|
||||
+
|
||||
+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
|
||||
+ if (!hook)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ ops = &hook->ops;
|
||||
+ ops->pf = NFPROTO_NETDEV;
|
||||
+ ops->hooknum = NF_NETDEV_INGRESS;
|
||||
+ ops->priority = 10;
|
||||
+ ops->priv = &table->ft;
|
||||
+ ops->hook = xt_flowoffload_net_hook;
|
||||
+ ops->dev = dev;
|
||||
+
|
||||
+ hlist_add_head(&hook->list, &table->hooks);
|
||||
+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct xt_flowoffload_hook *
|
||||
+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
|
||||
+ struct net_device *dev)
|
||||
+{
|
||||
+ struct xt_flowoffload_hook *hook;
|
||||
+
|
||||
+ hlist_for_each_entry(hook, &table->hooks, list) {
|
||||
+ if (hook->ops.dev == dev)
|
||||
+ return hook;
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
|
||||
+ struct net_device *dev)
|
||||
+{
|
||||
+ struct xt_flowoffload_hook *hook;
|
||||
+
|
||||
+ if (!dev)
|
||||
+ return;
|
||||
+
|
||||
+ spin_lock_bh(&hooks_lock);
|
||||
+ hook = flow_offload_lookup_hook(table, dev);
|
||||
+ if (hook)
|
||||
+ hook->used = true;
|
||||
+ else
|
||||
+ xt_flowoffload_create_hook(table, dev);
|
||||
+ spin_unlock_bh(&hooks_lock);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
|
||||
+{
|
||||
+ struct xt_flowoffload_hook *hook;
|
||||
+
|
||||
+restart:
|
||||
+ hlist_for_each_entry(hook, &table->hooks, list) {
|
||||
+ if (hook->registered)
|
||||
+ continue;
|
||||
+
|
||||
+ hook->registered = true;
|
||||
+ hook->net = dev_net(hook->ops.dev);
|
||||
+ spin_unlock_bh(&hooks_lock);
|
||||
+ nf_register_net_hook(hook->net, &hook->ops);
|
||||
+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
|
||||
+ table->ft.type->setup(&table->ft, hook->ops.dev,
|
||||
+ FLOW_BLOCK_BIND);
|
||||
+ spin_lock_bh(&hooks_lock);
|
||||
+ goto restart;
|
||||
+ }
|
||||
+
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
|
||||
+{
|
||||
+ struct xt_flowoffload_hook *hook;
|
||||
+ bool active = false;
|
||||
+
|
||||
+restart:
|
||||
+ spin_lock_bh(&hooks_lock);
|
||||
+ hlist_for_each_entry(hook, &table->hooks, list) {
|
||||
+ if (hook->used || !hook->registered) {
|
||||
+ active = true;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ hlist_del(&hook->list);
|
||||
+ spin_unlock_bh(&hooks_lock);
|
||||
+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
|
||||
+ table->ft.type->setup(&table->ft, hook->ops.dev,
|
||||
+ FLOW_BLOCK_UNBIND);
|
||||
+ nf_unregister_net_hook(hook->net, &hook->ops);
|
||||
+ kfree(hook);
|
||||
+ goto restart;
|
||||
+ }
|
||||
+ spin_unlock_bh(&hooks_lock);
|
||||
+
|
||||
+ return active;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
|
||||
+{
|
||||
+ struct xt_flowoffload_table *table = data;
|
||||
+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
|
||||
+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
|
||||
+ struct xt_flowoffload_hook *hook;
|
||||
+
|
||||
+ spin_lock_bh(&hooks_lock);
|
||||
+ hlist_for_each_entry(hook, &table->hooks, list) {
|
||||
+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
|
||||
+ hook->ops.dev->ifindex != tuple1->iifidx)
|
||||
+ continue;
|
||||
+
|
||||
+ hook->used = true;
|
||||
+ }
|
||||
+ spin_unlock_bh(&hooks_lock);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+xt_flowoffload_hook_work(struct work_struct *work)
|
||||
+{
|
||||
+ struct xt_flowoffload_table *table;
|
||||
+ struct xt_flowoffload_hook *hook;
|
||||
+ int err;
|
||||
+
|
||||
+ table = container_of(work, struct xt_flowoffload_table, work.work);
|
||||
+
|
||||
+ spin_lock_bh(&hooks_lock);
|
||||
+ xt_flowoffload_register_hooks(table);
|
||||
+ hlist_for_each_entry(hook, &table->hooks, list)
|
||||
+ hook->used = false;
|
||||
+ spin_unlock_bh(&hooks_lock);
|
||||
+
|
||||
+
|
||||
+
|
||||
+ if (err && err != -EAGAIN)
|
||||
+ goto out;
|
||||
+
|
||||
+ if (!xt_flowoffload_cleanup_hooks(table))
|
||||
+ return;
|
||||
+
|
||||
+out:
|
||||
+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+xt_flowoffload_skip(struct sk_buff *skb, int family)
|
||||
+{
|
||||
+ if (skb_sec_path(skb))
|
||||
+ return true;
|
||||
+
|
||||
+ if (family == NFPROTO_IPV4) {
|
||||
+ const struct ip_options *opt = &(IPCB(skb)->opt);
|
||||
+
|
||||
+ if (unlikely(opt->optlen))
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
|
||||
+{
|
||||
+ if (dst_xfrm(dst))
|
||||
+ return FLOW_OFFLOAD_XMIT_XFRM;
|
||||
+
|
||||
+ return FLOW_OFFLOAD_XMIT_NEIGH;
|
||||
+}
|
||||
+
|
||||
+static void nf_default_forward_path(struct nf_flow_route *route,
|
||||
+ struct dst_entry *dst_cache,
|
||||
+ enum ip_conntrack_dir dir,
|
||||
+ struct net_device **dev)
|
||||
+{
|
||||
+ dev[!dir] = dst_cache->dev;
|
||||
+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
|
||||
+ route->tuple[dir].dst = dst_cache;
|
||||
+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
|
||||
+}
|
||||
+
|
||||
+static bool nf_is_valid_ether_device(const struct net_device *dev)
|
||||
+{
|
||||
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
|
||||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static void nf_dev_path_info(const struct net_device_path_stack *stack,
|
||||
+ struct nf_forward_info *info,
|
||||
+ unsigned char *ha)
|
||||
+{
|
||||
+ const struct net_device_path *path;
|
||||
+ int i;
|
||||
+
|
||||
+ memcpy(info->h_dest, ha, ETH_ALEN);
|
||||
+
|
||||
+ for (i = 0; i < stack->num_paths; i++) {
|
||||
+ path = &stack->path[i];
|
||||
+ switch (path->type) {
|
||||
+ case DEV_PATH_ETHERNET:
|
||||
+ case DEV_PATH_DSA:
|
||||
+ case DEV_PATH_VLAN:
|
||||
+ case DEV_PATH_PPPOE:
|
||||
+ info->indev = path->dev;
|
||||
+ if (is_zero_ether_addr(info->h_source))
|
||||
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
+
|
||||
+ if (path->type == DEV_PATH_ETHERNET)
|
||||
+ break;
|
||||
+ if (path->type == DEV_PATH_DSA) {
|
||||
+ i = stack->num_paths;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
|
||||
+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
||||
+ info->indev = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
+ if (!info->outdev)
|
||||
+ info->outdev = path->dev;
|
||||
+ info->encap[info->num_encaps].id = path->encap.id;
|
||||
+ info->encap[info->num_encaps].proto = path->encap.proto;
|
||||
+ info->num_encaps++;
|
||||
+ if (path->type == DEV_PATH_PPPOE)
|
||||
+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
|
||||
+ break;
|
||||
+ case DEV_PATH_BRIDGE:
|
||||
+ if (is_zero_ether_addr(info->h_source))
|
||||
+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
+
|
||||
+ switch (path->bridge.vlan_mode) {
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG_HW:
|
||||
+ info->ingress_vlans |= BIT(info->num_encaps - 1);
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_TAG:
|
||||
+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
|
||||
+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
|
||||
+ info->num_encaps++;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_UNTAG:
|
||||
+ info->num_encaps--;
|
||||
+ break;
|
||||
+ case DEV_PATH_BR_VLAN_KEEP:
|
||||
+ break;
|
||||
+ }
|
||||
+ break;
|
||||
+ default:
|
||||
+ info->indev = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (!info->outdev)
|
||||
+ info->outdev = info->indev;
|
||||
+
|
||||
+ info->hw_outdev = info->indev;
|
||||
+
|
||||
+ if (nf_is_valid_ether_device(info->indev))
|
||||
+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
+}
|
||||
+
|
||||
+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
+ const struct dst_entry *dst_cache,
|
||||
+ const struct nf_conn *ct,
|
||||
+ enum ip_conntrack_dir dir, u8 *ha,
|
||||
+ struct net_device_path_stack *stack)
|
||||
+{
|
||||
+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
|
||||
+ struct net_device *dev = dst_cache->dev;
|
||||
+ struct neighbour *n;
|
||||
+ u8 nud_state;
|
||||
+
|
||||
+ if (!nf_is_valid_ether_device(dev))
|
||||
+ goto out;
|
||||
+
|
||||
+ n = dst_neigh_lookup(dst_cache, daddr);
|
||||
+ if (!n)
|
||||
+ return -1;
|
||||
+
|
||||
+ read_lock_bh(&n->lock);
|
||||
+ nud_state = n->nud_state;
|
||||
+ ether_addr_copy(ha, n->ha);
|
||||
+ read_unlock_bh(&n->lock);
|
||||
+ neigh_release(n);
|
||||
+
|
||||
+ if (!(nud_state & NUD_VALID))
|
||||
+ return -1;
|
||||
+
|
||||
+out:
|
||||
+ return dev_fill_forward_path(dev, ha, stack);
|
||||
+}
|
||||
+
|
||||
+static void nf_dev_forward_path(struct nf_flow_route *route,
|
||||
+ const struct nf_conn *ct,
|
||||
+ enum ip_conntrack_dir dir,
|
||||
+ struct net_device **devs)
|
||||
+{
|
||||
+ const struct dst_entry *dst = route->tuple[dir].dst;
|
||||
+ struct net_device_path_stack stack;
|
||||
+ struct nf_forward_info info = {};
|
||||
+ unsigned char ha[ETH_ALEN];
|
||||
+ int i;
|
||||
+
|
||||
+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
+ nf_dev_path_info(&stack, &info, ha);
|
||||
+
|
||||
+ devs[!dir] = (struct net_device *)info.indev;
|
||||
+ if (!info.indev)
|
||||
+ return;
|
||||
+
|
||||
+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
+ for (i = 0; i < info.num_encaps; i++) {
|
||||
+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
|
||||
+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
|
||||
+ }
|
||||
+ route->tuple[!dir].in.num_encaps = info.num_encaps;
|
||||
+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
|
||||
+
|
||||
+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
||||
+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
|
||||
+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
|
||||
+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
|
||||
+ route->tuple[dir].xmit_type = info.xmit_type;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
|
||||
+ const struct xt_action_param *par,
|
||||
+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
|
||||
+ struct net_device **devs)
|
||||
+{
|
||||
+ struct dst_entry *this_dst = skb_dst(skb);
|
||||
+ struct dst_entry *other_dst = NULL;
|
||||
+ struct flowi fl;
|
||||
+
|
||||
+ memset(&fl, 0, sizeof(fl));
|
||||
+ switch (xt_family(par)) {
|
||||
+ case NFPROTO_IPV4:
|
||||
+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
|
||||
+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
|
||||
+ break;
|
||||
+ case NFPROTO_IPV6:
|
||||
+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
|
||||
+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
|
||||
+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
|
||||
+ if (!other_dst)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ nf_default_forward_path(route, this_dst, dir, devs);
|
||||
+ nf_default_forward_path(route, other_dst, !dir, devs);
|
||||
+
|
||||
+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
|
||||
+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
|
||||
+ nf_dev_forward_path(route, ct, dir, devs);
|
||||
+ nf_dev_forward_path(route, ct, !dir, devs);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static unsigned int
|
||||
+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
|
||||
+{
|
||||
+ struct xt_flowoffload_table *table;
|
||||
+ const struct xt_flowoffload_target_info *info = par->targinfo;
|
||||
+ struct tcphdr _tcph, *tcph = NULL;
|
||||
+ enum ip_conntrack_info ctinfo;
|
||||
+ enum ip_conntrack_dir dir;
|
||||
+ struct nf_flow_route route = {};
|
||||
+ struct flow_offload *flow = NULL;
|
||||
+ struct net_device *devs[2] = {};
|
||||
+ struct nf_conn *ct;
|
||||
+ struct net *net;
|
||||
+
|
||||
+ if (xt_flowoffload_skip(skb, xt_family(par)))
|
||||
+ return XT_CONTINUE;
|
||||
+
|
||||
+ ct = nf_ct_get(skb, &ctinfo);
|
||||
+ if (ct == NULL)
|
||||
+ return XT_CONTINUE;
|
||||
+
|
||||
+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
|
||||
+ case IPPROTO_TCP:
|
||||
+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
|
||||
+ return XT_CONTINUE;
|
||||
+
|
||||
+ tcph = skb_header_pointer(skb, par->thoff,
|
||||
+ sizeof(_tcph), &_tcph);
|
||||
+ if (unlikely(!tcph || tcph->fin || tcph->rst))
|
||||
+ return XT_CONTINUE;
|
||||
+ break;
|
||||
+ case IPPROTO_UDP:
|
||||
+ break;
|
||||
+ default:
|
||||
+ return XT_CONTINUE;
|
||||
+ }
|
||||
+
|
||||
+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
|
||||
+ ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH))
|
||||
+ return XT_CONTINUE;
|
||||
+
|
||||
+ if (!nf_ct_is_confirmed(ct))
|
||||
+ return XT_CONTINUE;
|
||||
+
|
||||
+ devs[dir] = xt_out(par);
|
||||
+ devs[!dir] = xt_in(par);
|
||||
+
|
||||
+ if (!devs[dir] || !devs[!dir])
|
||||
+ return XT_CONTINUE;
|
||||
+
|
||||
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
|
||||
+ return XT_CONTINUE;
|
||||
+
|
||||
+ dir = CTINFO2DIR(ctinfo);
|
||||
+
|
||||
+ if (xt_flowoffload_route(skb, ct, par, &route, dir, devs) < 0)
|
||||
+ goto err_flow_route;
|
||||
+
|
||||
+ flow = flow_offload_alloc(ct);
|
||||
+ if (!flow)
|
||||
+ goto err_flow_alloc;
|
||||
+
|
||||
+ if (flow_offload_route_init(flow, &route) < 0)
|
||||
+ goto err_flow_add;
|
||||
+
|
||||
+ if (tcph) {
|
||||
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
|
||||
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
|
||||
+ }
|
||||
+
|
||||
+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
|
||||
+
|
||||
+ net = read_pnet(&table->ft.net);
|
||||
+ if (!net)
|
||||
+ write_pnet(&table->ft.net, xt_net(par));
|
||||
+
|
||||
+ if (flow_offload_add(&table->ft, flow) < 0)
|
||||
+ goto err_flow_add;
|
||||
+
|
||||
+ xt_flowoffload_check_device(table, devs[0]);
|
||||
+ xt_flowoffload_check_device(table, devs[1]);
|
||||
+
|
||||
+ dst_release(route.tuple[!dir].dst);
|
||||
+
|
||||
+ return XT_CONTINUE;
|
||||
+
|
||||
+err_flow_add:
|
||||
+ flow_offload_free(flow);
|
||||
+err_flow_alloc:
|
||||
+ dst_release(route.tuple[!dir].dst);
|
||||
+err_flow_route:
|
||||
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
|
||||
+
|
||||
+ return XT_CONTINUE;
|
||||
+}
|
||||
+
|
||||
+static int flowoffload_chk(const struct xt_tgchk_param *par)
|
||||
+{
|
||||
+ struct xt_flowoffload_target_info *info = par->targinfo;
|
||||
+
|
||||
+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct xt_target offload_tg_reg __read_mostly = {
|
||||
+ .family = NFPROTO_UNSPEC,
|
||||
+ .name = "FLOWOFFLOAD",
|
||||
+ .revision = 0,
|
||||
+ .targetsize = sizeof(struct xt_flowoffload_target_info),
|
||||
+ .usersize = sizeof(struct xt_flowoffload_target_info),
|
||||
+ .checkentry = flowoffload_chk,
|
||||
+ .target = flowoffload_tg,
|
||||
+ .me = THIS_MODULE,
|
||||
+};
|
||||
+
|
||||
+static int flow_offload_netdev_event(struct notifier_block *this,
|
||||
+ unsigned long event, void *ptr)
|
||||
+{
|
||||
+ struct xt_flowoffload_hook *hook0, *hook1;
|
||||
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||
+
|
||||
+ if (event != NETDEV_UNREGISTER)
|
||||
+ return NOTIFY_DONE;
|
||||
+
|
||||
+ spin_lock_bh(&hooks_lock);
|
||||
+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
|
||||
+ if (hook0)
|
||||
+ hlist_del(&hook0->list);
|
||||
+
|
||||
+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
|
||||
+ if (hook1)
|
||||
+ hlist_del(&hook1->list);
|
||||
+ spin_unlock_bh(&hooks_lock);
|
||||
+
|
||||
+ if (hook0) {
|
||||
+ nf_unregister_net_hook(hook0->net, &hook0->ops);
|
||||
+ kfree(hook0);
|
||||
+ }
|
||||
+
|
||||
+ if (hook1) {
|
||||
+ nf_unregister_net_hook(hook1->net, &hook1->ops);
|
||||
+ kfree(hook1);
|
||||
+ }
|
||||
+
|
||||
+ nf_flow_table_cleanup(dev);
|
||||
+
|
||||
+ return NOTIFY_DONE;
|
||||
+}
|
||||
+
|
||||
+static struct notifier_block flow_offload_netdev_notifier = {
|
||||
+ .notifier_call = flow_offload_netdev_event,
|
||||
+};
|
||||
+
|
||||
+static int nf_flow_rule_route_inet(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
+{
|
||||
+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
|
||||
+ int err;
|
||||
+
|
||||
+ switch (flow_tuple->l3proto) {
|
||||
+ case NFPROTO_IPV4:
|
||||
+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
|
||||
+ break;
|
||||
+ case NFPROTO_IPV6:
|
||||
+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
|
||||
+ break;
|
||||
+ default:
|
||||
+ err = -1;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static struct nf_flowtable_type flowtable_inet = {
|
||||
+ .family = NFPROTO_INET,
|
||||
+ .init = nf_flow_table_init,
|
||||
+ .setup = nf_flow_table_offload_setup,
|
||||
+ .action = nf_flow_rule_route_inet,
|
||||
+ .free = nf_flow_table_free,
|
||||
+ .hook = xt_flowoffload_net_hook,
|
||||
+ .owner = THIS_MODULE,
|
||||
+};
|
||||
+
|
||||
+static int init_flowtable(struct xt_flowoffload_table *tbl)
|
||||
+{
|
||||
+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
|
||||
+ tbl->ft.type = &flowtable_inet;
|
||||
+
|
||||
+ return nf_flow_table_init(&tbl->ft);
|
||||
+}
|
||||
+
|
||||
+static int __init xt_flowoffload_tg_init(void)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ register_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||
+
|
||||
+ ret = init_flowtable(&flowtable[0]);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ ret = init_flowtable(&flowtable[1]);
|
||||
+ if (ret)
|
||||
+ goto cleanup;
|
||||
+
|
||||
+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
|
||||
+
|
||||
+ ret = xt_register_target(&offload_tg_reg);
|
||||
+ if (ret)
|
||||
+ goto cleanup2;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+cleanup2:
|
||||
+ nf_flow_table_free(&flowtable[1].ft);
|
||||
+cleanup:
|
||||
+ nf_flow_table_free(&flowtable[0].ft);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static void __exit xt_flowoffload_tg_exit(void)
|
||||
+{
|
||||
+ xt_unregister_target(&offload_tg_reg);
|
||||
+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
|
||||
+ nf_flow_table_free(&flowtable[0].ft);
|
||||
+ nf_flow_table_free(&flowtable[1].ft);
|
||||
+}
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
+module_init(xt_flowoffload_tg_init);
|
||||
+module_exit(xt_flowoffload_tg_exit);
|
||||
--- /dev/null
|
||||
+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
|
||||
@@ -0,0 +1,17 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
+#ifndef _XT_FLOWOFFLOAD_H
|
||||
+#define _XT_FLOWOFFLOAD_H
|
||||
+
|
||||
+#include <linux/types.h>
|
||||
+
|
||||
+enum {
|
||||
+ XT_FLOWOFFLOAD_HW = 1 << 0,
|
||||
+
|
||||
+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
|
||||
+};
|
||||
+
|
||||
+struct xt_flowoffload_target_info {
|
||||
+ __u32 flags;
|
||||
+};
|
||||
+
|
||||
+#endif /* _XT_FLOWOFFLOAD_H */
|
@ -1,24 +0,0 @@
|
||||
From 6d3bc769657b0ee7c7506dad9911111c4226a7ea Mon Sep 17 00:00:00 2001
|
||||
From: Imre Kaloz <kaloz@openwrt.org>
|
||||
Date: Fri, 7 Jul 2017 17:21:05 +0200
|
||||
Subject: mac80211: increase wireless mesh header size
|
||||
|
||||
lede-commit 3d4466cfd8f75f717efdb1f96fdde3c70d865fc1
|
||||
Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
|
||||
---
|
||||
include/linux/netdevice.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -149,8 +149,8 @@ static inline bool dev_xmit_complete(int
|
||||
|
||||
#if defined(CONFIG_HYPERV_NET)
|
||||
# define LL_MAX_HEADER 128
|
||||
-#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
|
||||
-# if defined(CONFIG_MAC80211_MESH)
|
||||
+#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) || 1
|
||||
+# if defined(CONFIG_MAC80211_MESH) || 1
|
||||
# define LL_MAX_HEADER 128
|
||||
# else
|
||||
# define LL_MAX_HEADER 96
|
@ -1,27 +0,0 @@
|
||||
From a6ccb238939b25851474a279b20367fd24a0e816 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:21:53 +0200
|
||||
Subject: hack: net: fq_codel: tune defaults for small devices
|
||||
|
||||
Assume that x86_64 devices always have a big memory and do not need this
|
||||
optimization compared to devices with only 32 MB or 64 MB RAM.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
net/sched/sch_fq_codel.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/net/sched/sch_fq_codel.c
|
||||
+++ b/net/sched/sch_fq_codel.c
|
||||
@@ -474,7 +474,11 @@ static int fq_codel_init(struct Qdisc *s
|
||||
|
||||
sch->limit = 10*1024;
|
||||
q->flows_cnt = 1024;
|
||||
+#ifdef CONFIG_X86_64
|
||||
q->memory_limit = 32 << 20; /* 32 MBytes */
|
||||
+#else
|
||||
+ q->memory_limit = 4 << 20; /* 4 MBytes */
|
||||
+#endif
|
||||
q->drop_batch_size = 64;
|
||||
q->quantum = psched_mtu(qdisc_dev(sch));
|
||||
INIT_LIST_HEAD(&q->new_flows);
|
@ -1,100 +0,0 @@
|
||||
From 1d418f7e88035ed7a94073f6354246c66e9193e9 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:22:58 +0200
|
||||
Subject: fq_codel: switch default qdisc from pfifo_fast to fq_codel and remove pfifo_fast
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
include/net/sch_generic.h | 3 ++-
|
||||
net/sched/Kconfig | 3 ++-
|
||||
net/sched/sch_api.c | 2 +-
|
||||
net/sched/sch_fq_codel.c | 3 ++-
|
||||
net/sched/sch_generic.c | 4 ++--
|
||||
5 files changed, 9 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/include/net/sch_generic.h
|
||||
+++ b/include/net/sch_generic.h
|
||||
@@ -585,12 +585,13 @@ extern struct Qdisc_ops noop_qdisc_ops;
|
||||
extern struct Qdisc_ops pfifo_fast_ops;
|
||||
extern struct Qdisc_ops mq_qdisc_ops;
|
||||
extern struct Qdisc_ops noqueue_qdisc_ops;
|
||||
+extern struct Qdisc_ops fq_codel_qdisc_ops;
|
||||
extern const struct Qdisc_ops *default_qdisc_ops;
|
||||
static inline const struct Qdisc_ops *
|
||||
get_default_qdisc_ops(const struct net_device *dev, int ntx)
|
||||
{
|
||||
return ntx < dev->real_num_tx_queues ?
|
||||
- default_qdisc_ops : &pfifo_fast_ops;
|
||||
+ default_qdisc_ops : &fq_codel_qdisc_ops;
|
||||
}
|
||||
|
||||
struct Qdisc_class_common {
|
||||
--- a/net/sched/Kconfig
|
||||
+++ b/net/sched/Kconfig
|
||||
@@ -4,8 +4,9 @@
|
||||
#
|
||||
|
||||
menuconfig NET_SCHED
|
||||
- bool "QoS and/or fair queueing"
|
||||
+ def_bool y
|
||||
select NET_SCH_FIFO
|
||||
+ select NET_SCH_FQ_CODEL
|
||||
help
|
||||
When the kernel has several packets to send out over a network
|
||||
device, it has to decide which ones to send first, which ones to
|
||||
--- a/net/sched/sch_api.c
|
||||
+++ b/net/sched/sch_api.c
|
||||
@@ -2277,7 +2277,7 @@ static int __init pktsched_init(void)
|
||||
return err;
|
||||
}
|
||||
|
||||
- register_qdisc(&pfifo_fast_ops);
|
||||
+ register_qdisc(&fq_codel_qdisc_ops);
|
||||
register_qdisc(&pfifo_qdisc_ops);
|
||||
register_qdisc(&bfifo_qdisc_ops);
|
||||
register_qdisc(&pfifo_head_drop_qdisc_ops);
|
||||
--- a/net/sched/sch_fq_codel.c
|
||||
+++ b/net/sched/sch_fq_codel.c
|
||||
@@ -719,7 +719,7 @@ static const struct Qdisc_class_ops fq_c
|
||||
.walk = fq_codel_walk,
|
||||
};
|
||||
|
||||
-static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
|
||||
+struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
|
||||
.cl_ops = &fq_codel_class_ops,
|
||||
.id = "fq_codel",
|
||||
.priv_size = sizeof(struct fq_codel_sched_data),
|
||||
@@ -734,6 +734,7 @@ static struct Qdisc_ops fq_codel_qdisc_o
|
||||
.dump_stats = fq_codel_dump_stats,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
+EXPORT_SYMBOL(fq_codel_qdisc_ops);
|
||||
|
||||
static int __init fq_codel_module_init(void)
|
||||
{
|
||||
--- a/net/sched/sch_generic.c
|
||||
+++ b/net/sched/sch_generic.c
|
||||
@@ -32,7 +32,7 @@
|
||||
#include <net/xfrm.h>
|
||||
|
||||
/* Qdisc to use by default */
|
||||
-const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
|
||||
+const struct Qdisc_ops *default_qdisc_ops = &fq_codel_qdisc_ops;
|
||||
EXPORT_SYMBOL(default_qdisc_ops);
|
||||
|
||||
static void qdisc_maybe_clear_missed(struct Qdisc *q,
|
||||
@@ -1142,12 +1142,12 @@ static void attach_one_default_qdisc(str
|
||||
void *_unused)
|
||||
{
|
||||
struct Qdisc *qdisc;
|
||||
- const struct Qdisc_ops *ops = default_qdisc_ops;
|
||||
+ const struct Qdisc_ops *ops = &fq_codel_qdisc_ops;
|
||||
|
||||
if (dev->priv_flags & IFF_NO_QUEUE)
|
||||
ops = &noqueue_qdisc_ops;
|
||||
else if(dev->type == ARPHRD_CAN)
|
||||
- ops = &pfifo_fast_ops;
|
||||
+ ops = &fq_codel_qdisc_ops;
|
||||
|
||||
qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
|
||||
if (!qdisc)
|
@ -1,129 +0,0 @@
|
||||
From 36e516290611e613aa92996cb4339561452695b4 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Fri, 7 Jul 2017 17:24:23 +0200
|
||||
Subject: net: swconfig: adds openwrt switch layer
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
drivers/net/phy/Kconfig | 83 +++++++++++++++++++++++++++++++++++++++++++++++
|
||||
drivers/net/phy/Makefile | 15 +++++++++
|
||||
include/uapi/linux/Kbuild | 1 +
|
||||
3 files changed, 99 insertions(+)
|
||||
|
||||
--- a/drivers/net/phy/Kconfig
|
||||
+++ b/drivers/net/phy/Kconfig
|
||||
@@ -61,6 +61,80 @@ config SFP
|
||||
depends on HWMON || HWMON=n
|
||||
select MDIO_I2C
|
||||
|
||||
+comment "Switch configuration API + drivers"
|
||||
+
|
||||
+config SWCONFIG
|
||||
+ tristate "Switch configuration API"
|
||||
+ help
|
||||
+ Switch configuration API using netlink. This allows
|
||||
+ you to configure the VLAN features of certain switches.
|
||||
+
|
||||
+config SWCONFIG_LEDS
|
||||
+ bool "Switch LED trigger support"
|
||||
+ depends on (SWCONFIG && LEDS_TRIGGERS)
|
||||
+
|
||||
+config ADM6996_PHY
|
||||
+ tristate "Driver for ADM6996 switches"
|
||||
+ select SWCONFIG
|
||||
+ help
|
||||
+ Currently supports the ADM6996FC and ADM6996M switches.
|
||||
+ Support for FC is very limited.
|
||||
+
|
||||
+config AR8216_PHY
|
||||
+ tristate "Driver for Atheros AR8216 switches"
|
||||
+ select SWCONFIG
|
||||
+
|
||||
+config AR8216_PHY_LEDS
|
||||
+ bool "Atheros AR8216 switch LED support"
|
||||
+ depends on (AR8216_PHY && LEDS_CLASS)
|
||||
+
|
||||
+source "drivers/net/phy/b53/Kconfig"
|
||||
+
|
||||
+config IP17XX_PHY
|
||||
+ tristate "Driver for IC+ IP17xx switches"
|
||||
+ select SWCONFIG
|
||||
+
|
||||
+config PSB6970_PHY
|
||||
+ tristate "Lantiq XWAY Tantos (PSB6970) Ethernet switch"
|
||||
+ select SWCONFIG
|
||||
+ select ETHERNET_PACKET_MANGLE
|
||||
+
|
||||
+config RTL8306_PHY
|
||||
+ tristate "Driver for Realtek RTL8306S switches"
|
||||
+ select SWCONFIG
|
||||
+
|
||||
+config RTL8366_SMI
|
||||
+ tristate "Driver for the RTL8366 SMI interface"
|
||||
+ depends on GPIOLIB
|
||||
+ help
|
||||
+ This module implements the SMI interface protocol which is used
|
||||
+ by some RTL8366 ethernet switch devices via the generic GPIO API.
|
||||
+
|
||||
+if RTL8366_SMI
|
||||
+
|
||||
+config RTL8366_SMI_DEBUG_FS
|
||||
+ bool "RTL8366 SMI interface debugfs support"
|
||||
+ depends on DEBUG_FS
|
||||
+ default n
|
||||
+
|
||||
+config RTL8366S_PHY
|
||||
+ tristate "Driver for the Realtek RTL8366S switch"
|
||||
+ select SWCONFIG
|
||||
+
|
||||
+config RTL8366RB_PHY
|
||||
+ tristate "Driver for the Realtek RTL8366RB switch"
|
||||
+ select SWCONFIG
|
||||
+
|
||||
+config RTL8367_PHY
|
||||
+ tristate "Driver for the Realtek RTL8367R/M switches"
|
||||
+ select SWCONFIG
|
||||
+
|
||||
+config RTL8367B_PHY
|
||||
+ tristate "Driver fot the Realtek RTL8367R-VB switch"
|
||||
+ select SWCONFIG
|
||||
+
|
||||
+endif # RTL8366_SMI
|
||||
+
|
||||
comment "MII PHY device drivers"
|
||||
|
||||
config AMD_PHY
|
||||
--- a/drivers/net/phy/Makefile
|
||||
+++ b/drivers/net/phy/Makefile
|
||||
@@ -24,6 +24,19 @@ libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_
|
||||
obj-$(CONFIG_PHYLINK) += phylink.o
|
||||
obj-$(CONFIG_PHYLIB) += libphy.o
|
||||
|
||||
+obj-$(CONFIG_SWCONFIG) += swconfig.o
|
||||
+obj-$(CONFIG_ADM6996_PHY) += adm6996.o
|
||||
+obj-$(CONFIG_AR8216_PHY) += ar8216.o ar8327.o
|
||||
+obj-$(CONFIG_SWCONFIG_B53) += b53/
|
||||
+obj-$(CONFIG_IP17XX_PHY) += ip17xx.o
|
||||
+obj-$(CONFIG_PSB6970_PHY) += psb6970.o
|
||||
+obj-$(CONFIG_RTL8306_PHY) += rtl8306.o
|
||||
+obj-$(CONFIG_RTL8366_SMI) += rtl8366_smi.o
|
||||
+obj-$(CONFIG_RTL8366S_PHY) += rtl8366s.o
|
||||
+obj-$(CONFIG_RTL8366RB_PHY) += rtl8366rb.o
|
||||
+obj-$(CONFIG_RTL8367_PHY) += rtl8367.o
|
||||
+obj-$(CONFIG_RTL8367B_PHY) += rtl8367b.o
|
||||
+
|
||||
obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += mii_timestamper.o
|
||||
|
||||
obj-$(CONFIG_SFP) += sfp.o
|
||||
--- a/include/linux/platform_data/b53.h
|
||||
+++ b/include/linux/platform_data/b53.h
|
||||
@@ -29,6 +29,9 @@ struct b53_platform_data {
|
||||
u32 chip_id;
|
||||
u16 enabled_ports;
|
||||
|
||||
+ /* allow to specify an ethX alias */
|
||||
+ const char *alias;
|
||||
+
|
||||
/* only used by MMAP'd driver */
|
||||
unsigned big_endian:1;
|
||||
void __iomem *regs;
|
@ -1,74 +0,0 @@
|
||||
From 82985725e071f2a5735052f18e109a32aeac3a0b Mon Sep 17 00:00:00 2001
|
||||
From: David Bauer <mail@david-bauer.net>
|
||||
Date: Sun, 26 Jul 2020 02:38:31 +0200
|
||||
Subject: [PATCH] net: usb: r8152: add LED configuration from OF
|
||||
|
||||
This adds the ability to configure the LED configuration register using
|
||||
OF. This way, the correct value for board specific LED configuration can
|
||||
be determined.
|
||||
|
||||
Signed-off-by: David Bauer <mail@david-bauer.net>
|
||||
---
|
||||
drivers/net/usb/r8152.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
--- a/drivers/net/usb/r8152.c
|
||||
+++ b/drivers/net/usb/r8152.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <linux/mii.h>
|
||||
#include <linux/ethtool.h>
|
||||
#include <linux/usb.h>
|
||||
+#include <linux/of.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/uaccess.h>
|
||||
@@ -6866,6 +6867,22 @@ static void rtl_tally_reset(struct r8152
|
||||
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RSTTALLY, ocp_data);
|
||||
}
|
||||
|
||||
+static int r8152_led_configuration(struct r8152 *tp)
|
||||
+{
|
||||
+ u32 led_data;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = of_property_read_u32(tp->udev->dev.of_node, "realtek,led-data",
|
||||
+ &led_data);
|
||||
+
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_LEDSEL, led_data);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static void r8152b_init(struct r8152 *tp)
|
||||
{
|
||||
u32 ocp_data;
|
||||
@@ -6907,6 +6924,8 @@ static void r8152b_init(struct r8152 *tp
|
||||
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
|
||||
ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
|
||||
ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
|
||||
+
|
||||
+ r8152_led_configuration(tp);
|
||||
}
|
||||
|
||||
static void r8153_init(struct r8152 *tp)
|
||||
@@ -7047,6 +7066,8 @@ static void r8153_init(struct r8152 *tp)
|
||||
tp->coalesce = COALESCE_SLOW;
|
||||
break;
|
||||
}
|
||||
+
|
||||
+ r8152_led_configuration(tp);
|
||||
}
|
||||
|
||||
static void r8153b_init(struct r8152 *tp)
|
||||
@@ -7129,6 +7150,8 @@ static void r8153b_init(struct r8152 *tp
|
||||
rtl_tally_reset(tp);
|
||||
|
||||
tp->coalesce = 15000; /* 15 us */
|
||||
+
|
||||
+ r8152_led_configuration(tp);
|
||||
}
|
||||
|
||||
static void r8153c_init(struct r8152 *tp)
|
@ -1,54 +0,0 @@
|
||||
From 3ee05f4aa64fc86af3be5bc176ba5808de9260a7 Mon Sep 17 00:00:00 2001
|
||||
From: David Bauer <mail@david-bauer.net>
|
||||
Date: Sun, 26 Jul 2020 15:30:33 +0200
|
||||
Subject: [PATCH] dt-bindings: net: add RTL8152 binding documentation
|
||||
|
||||
Add binding documentation for the Realtek RTL8152 / RTL8153 USB ethernet
|
||||
adapters.
|
||||
|
||||
Signed-off-by: David Bauer <mail@david-bauer.net>
|
||||
---
|
||||
.../bindings/net/realtek,rtl8152.yaml | 36 +++++++++++++++++++
|
||||
1 file changed, 36 insertions(+)
|
||||
create mode 100644 Documentation/devicetree/bindings/net/realtek,rtl8152.yaml
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/devicetree/bindings/net/realtek,rtl8152.yaml
|
||||
@@ -0,0 +1,36 @@
|
||||
+# SPDX-License-Identifier: GPL-2.0
|
||||
+%YAML 1.2
|
||||
+---
|
||||
+$id: http://devicetree.org/schemas/net/realtek,rtl8152.yaml#
|
||||
+$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
+
|
||||
+title: Realtek RTL8152/RTL8153 series USB ethernet
|
||||
+
|
||||
+maintainers:
|
||||
+ - David Bauer <mail@david-bauer.net>
|
||||
+
|
||||
+properties:
|
||||
+ compatible:
|
||||
+ oneOf:
|
||||
+ - items:
|
||||
+ - enum:
|
||||
+ - realtek,rtl8152
|
||||
+ - realtek,rtl8153
|
||||
+
|
||||
+ reg:
|
||||
+ description: The device number on the USB bus
|
||||
+
|
||||
+ realtek,led-data:
|
||||
+ description: Value to be written to the LED configuration register.
|
||||
+
|
||||
+required:
|
||||
+ - compatible
|
||||
+ - reg
|
||||
+
|
||||
+examples:
|
||||
+ - |
|
||||
+ usb-eth@2 {
|
||||
+ compatible = "realtek,rtl8153";
|
||||
+ reg = <2>;
|
||||
+ realtek,led-data = <0x87>;
|
||||
+ };
|
||||
\ No newline at end of file
|
@ -1,98 +0,0 @@
|
||||
From 3cb240533ab787899dc7f17aa7d6c5b4810e2e58 Mon Sep 17 00:00:00 2001
|
||||
From: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
Date: Fri, 7 Jul 2017 17:26:01 +0200
|
||||
Subject: bcm53xx: bgmac: use srab switch driver
|
||||
|
||||
use the srab switch driver on these SoCs.
|
||||
|
||||
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
---
|
||||
drivers/net/ethernet/broadcom/bgmac-bcma.c | 1 +
|
||||
drivers/net/ethernet/broadcom/bgmac.c | 24 ++++++++++++++++++++++++
|
||||
drivers/net/ethernet/broadcom/bgmac.h | 4 ++++
|
||||
3 files changed, 29 insertions(+)
|
||||
|
||||
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
|
||||
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
|
||||
@@ -280,6 +280,7 @@ static int bgmac_probe(struct bcma_devic
|
||||
bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
|
||||
bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
|
||||
bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
|
||||
+ bgmac->feature_flags |= BGMAC_FEAT_SRAB;
|
||||
break;
|
||||
default:
|
||||
bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
|
||||
--- a/drivers/net/ethernet/broadcom/bgmac.c
|
||||
+++ b/drivers/net/ethernet/broadcom/bgmac.c
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <linux/bcma/bcma.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/interrupt.h>
|
||||
+#include <linux/platform_data/b53.h>
|
||||
#include <linux/bcm47xx_nvram.h>
|
||||
#include <linux/phy.h>
|
||||
#include <linux/phy_fixed.h>
|
||||
@@ -1408,6 +1409,17 @@ static const struct ethtool_ops bgmac_et
|
||||
.set_link_ksettings = phy_ethtool_set_link_ksettings,
|
||||
};
|
||||
|
||||
+static struct b53_platform_data bgmac_b53_pdata = {
|
||||
+};
|
||||
+
|
||||
+static struct platform_device bgmac_b53_dev = {
|
||||
+ .name = "b53-srab-switch",
|
||||
+ .id = -1,
|
||||
+ .dev = {
|
||||
+ .platform_data = &bgmac_b53_pdata,
|
||||
+ },
|
||||
+};
|
||||
+
|
||||
/**************************************************
|
||||
* MII
|
||||
**************************************************/
|
||||
@@ -1542,6 +1554,14 @@ int bgmac_enet_probe(struct bgmac *bgmac
|
||||
/* Omit FCS from max MTU size */
|
||||
net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN;
|
||||
|
||||
+ if ((bgmac->feature_flags & BGMAC_FEAT_SRAB) && !bgmac_b53_pdata.regs) {
|
||||
+ bgmac_b53_pdata.regs = ioremap_nocache(0x18007000, 0x1000);
|
||||
+
|
||||
+ err = platform_device_register(&bgmac_b53_dev);
|
||||
+ if (!err)
|
||||
+ bgmac->b53_device = &bgmac_b53_dev;
|
||||
+ }
|
||||
+
|
||||
err = register_netdev(bgmac->net_dev);
|
||||
if (err) {
|
||||
dev_err(bgmac->dev, "Cannot register net device\n");
|
||||
@@ -1564,6 +1584,10 @@ EXPORT_SYMBOL_GPL(bgmac_enet_probe);
|
||||
|
||||
void bgmac_enet_remove(struct bgmac *bgmac)
|
||||
{
|
||||
+ if (bgmac->b53_device)
|
||||
+ platform_device_unregister(&bgmac_b53_dev);
|
||||
+ bgmac->b53_device = NULL;
|
||||
+
|
||||
unregister_netdev(bgmac->net_dev);
|
||||
phy_disconnect(bgmac->net_dev->phydev);
|
||||
netif_napi_del(&bgmac->napi);
|
||||
--- a/drivers/net/ethernet/broadcom/bgmac.h
|
||||
+++ b/drivers/net/ethernet/broadcom/bgmac.h
|
||||
@@ -388,6 +388,7 @@
|
||||
#define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII BIT(18)
|
||||
#define BGMAC_FEAT_CC7_IF_TYPE_RGMII BIT(19)
|
||||
#define BGMAC_FEAT_IDM_MASK BIT(20)
|
||||
+#define BGMAC_FEAT_SRAB BIT(21)
|
||||
|
||||
struct bgmac_slot_info {
|
||||
union {
|
||||
@@ -493,6 +494,9 @@ struct bgmac {
|
||||
void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask,
|
||||
u32 set);
|
||||
int (*phy_connect)(struct bgmac *bgmac);
|
||||
+
|
||||
+ /* platform device for associated switch */
|
||||
+ struct platform_device *b53_device;
|
||||
};
|
||||
|
||||
struct bgmac *bgmac_alloc(struct device *dev);
|
@ -1,33 +0,0 @@
|
||||
--- a/drivers/net/usb/qmi_wwan.c
|
||||
+++ b/drivers/net/usb/qmi_wwan.c
|
||||
@@ -1088,6 +1088,7 @@ static const struct usb_device_id produc
|
||||
{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0620)}, /* Quectel EM160R-GL */
|
||||
{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */
|
||||
{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0801)}, /* Quectel RM520N */
|
||||
+ {QMI_MATCH_FF_FF_FF(0x05c6, 0xf601)}, /* MeigLink SLM750 */
|
||||
|
||||
/* 3. Combined interface devices matching on interface number */
|
||||
{QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */
|
||||
--- a/drivers/usb/serial/option.c
|
||||
+++ b/drivers/usb/serial/option.c
|
||||
@@ -244,6 +244,8 @@ static void option_instat_callback(struc
|
||||
#define UBLOX_PRODUCT_R410M 0x90b2
|
||||
/* These Yuga products use Qualcomm's vendor ID */
|
||||
#define YUGA_PRODUCT_CLM920_NC5 0x9625
|
||||
+/* These MeigLink products use Qualcomm's vendor ID */
|
||||
+#define MEIGLINK_PRODUCT_SLM750 0xf601
|
||||
|
||||
#define QUECTEL_VENDOR_ID 0x2c7c
|
||||
/* These Quectel products use Quectel's vendor ID */
|
||||
@@ -1155,6 +1157,11 @@ static const struct usb_device_id option
|
||||
.driver_info = ZLP },
|
||||
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
|
||||
.driver_info = RSVD(4) },
|
||||
+ /* Meiglink products using Qualcomm vendor ID */
|
||||
+ // Works OK. In case of some issues check macros that are used by Quectel Products
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, MEIGLINK_PRODUCT_SLM750, 0xff, 0xff, 0xff),
|
||||
+ .driver_info = NUMEP2 },
|
||||
+ { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, MEIGLINK_PRODUCT_SLM750, 0xff, 0, 0) },
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
|
||||
.driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
|
||||
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
|
@ -1,162 +0,0 @@
|
||||
From cc809a441d8f2924f785eb863dfa6aef47a25b0b Mon Sep 17 00:00:00 2001
|
||||
From: John Crispin <blogic@openwrt.org>
|
||||
Date: Tue, 12 Aug 2014 20:49:27 +0200
|
||||
Subject: [PATCH 30/36] GPIO: add named gpio exports
|
||||
|
||||
Signed-off-by: John Crispin <blogic@openwrt.org>
|
||||
--- a/drivers/gpio/gpiolib-of.c
|
||||
+++ b/drivers/gpio/gpiolib-of.c
|
||||
@@ -19,6 +19,8 @@
|
||||
#include <linux/pinctrl/pinctrl.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/gpio/machine.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/platform_device.h>
|
||||
|
||||
#include "gpiolib.h"
|
||||
#include "gpiolib-of.h"
|
||||
@@ -1066,3 +1068,72 @@ void of_gpio_dev_init(struct gpio_chip *
|
||||
else
|
||||
gc->of_node = gdev->dev.of_node;
|
||||
}
|
||||
+
|
||||
+#ifdef CONFIG_GPIO_SYSFS
|
||||
+
|
||||
+static struct of_device_id gpio_export_ids[] = {
|
||||
+ { .compatible = "gpio-export" },
|
||||
+ { /* sentinel */ }
|
||||
+};
|
||||
+
|
||||
+static int of_gpio_export_probe(struct platform_device *pdev)
|
||||
+{
|
||||
+ struct device_node *np = pdev->dev.of_node;
|
||||
+ struct device_node *cnp;
|
||||
+ u32 val;
|
||||
+ int nb = 0;
|
||||
+
|
||||
+ for_each_child_of_node(np, cnp) {
|
||||
+ const char *name = NULL;
|
||||
+ int gpio;
|
||||
+ bool dmc;
|
||||
+ int max_gpio = 1;
|
||||
+ int i;
|
||||
+
|
||||
+ of_property_read_string(cnp, "gpio-export,name", &name);
|
||||
+
|
||||
+ if (!name)
|
||||
+ max_gpio = of_gpio_count(cnp);
|
||||
+
|
||||
+ for (i = 0; i < max_gpio; i++) {
|
||||
+ unsigned flags = 0;
|
||||
+ enum of_gpio_flags of_flags;
|
||||
+
|
||||
+ gpio = of_get_gpio_flags(cnp, i, &of_flags);
|
||||
+ if (!gpio_is_valid(gpio))
|
||||
+ return gpio;
|
||||
+
|
||||
+ if (of_flags == OF_GPIO_ACTIVE_LOW)
|
||||
+ flags |= GPIOF_ACTIVE_LOW;
|
||||
+
|
||||
+ if (!of_property_read_u32(cnp, "gpio-export,output", &val))
|
||||
+ flags |= val ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
|
||||
+ else
|
||||
+ flags |= GPIOF_IN;
|
||||
+
|
||||
+ if (devm_gpio_request_one(&pdev->dev, gpio, flags, name ? name : of_node_full_name(np)))
|
||||
+ continue;
|
||||
+
|
||||
+ dmc = of_property_read_bool(cnp, "gpio-export,direction_may_change");
|
||||
+ gpio_export_with_name(gpio, dmc, name);
|
||||
+ nb++;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ dev_info(&pdev->dev, "%d gpio(s) exported\n", nb);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static struct platform_driver gpio_export_driver = {
|
||||
+ .driver = {
|
||||
+ .name = "gpio-export",
|
||||
+ .owner = THIS_MODULE,
|
||||
+ .of_match_table = of_match_ptr(gpio_export_ids),
|
||||
+ },
|
||||
+ .probe = of_gpio_export_probe,
|
||||
+};
|
||||
+
|
||||
+module_platform_driver(gpio_export_driver);
|
||||
+
|
||||
+#endif
|
||||
--- a/include/asm-generic/gpio.h
|
||||
+++ b/include/asm-generic/gpio.h
|
||||
@@ -125,6 +125,12 @@ static inline int gpio_export(unsigned g
|
||||
return gpiod_export(gpio_to_desc(gpio), direction_may_change);
|
||||
}
|
||||
|
||||
+int __gpiod_export(struct gpio_desc *desc, bool direction_may_change, const char *name);
|
||||
+static inline int gpio_export_with_name(unsigned gpio, bool direction_may_change, const char *name)
|
||||
+{
|
||||
+ return __gpiod_export(gpio_to_desc(gpio), direction_may_change, name);
|
||||
+}
|
||||
+
|
||||
static inline int gpio_export_link(struct device *dev, const char *name,
|
||||
unsigned gpio)
|
||||
{
|
||||
--- a/include/linux/gpio/consumer.h
|
||||
+++ b/include/linux/gpio/consumer.h
|
||||
@@ -728,6 +728,7 @@ static inline struct gpio_desc *acpi_get
|
||||
|
||||
#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS)
|
||||
|
||||
+int _gpiod_export(struct gpio_desc *desc, bool direction_may_change, const char *name);
|
||||
int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
|
||||
int gpiod_export_link(struct device *dev, const char *name,
|
||||
struct gpio_desc *desc);
|
||||
@@ -735,6 +736,13 @@ void gpiod_unexport(struct gpio_desc *de
|
||||
|
||||
#else /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */
|
||||
|
||||
+static inline int _gpiod_export(struct gpio_desc *desc,
|
||||
+ bool direction_may_change,
|
||||
+ const char *name)
|
||||
+{
|
||||
+ return -ENOSYS;
|
||||
+}
|
||||
+
|
||||
static inline int gpiod_export(struct gpio_desc *desc,
|
||||
bool direction_may_change)
|
||||
{
|
||||
--- a/drivers/gpio/gpiolib-sysfs.c
|
||||
+++ b/drivers/gpio/gpiolib-sysfs.c
|
||||
@@ -544,7 +544,7 @@ static struct class gpio_class = {
|
||||
*
|
||||
* Returns zero on success, else an error.
|
||||
*/
|
||||
-int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
|
||||
+int __gpiod_export(struct gpio_desc *desc, bool direction_may_change, const char *name)
|
||||
{
|
||||
struct gpio_chip *chip;
|
||||
struct gpio_device *gdev;
|
||||
@@ -606,6 +606,8 @@ int gpiod_export(struct gpio_desc *desc,
|
||||
offset = gpio_chip_hwgpio(desc);
|
||||
if (chip->names && chip->names[offset])
|
||||
ioname = chip->names[offset];
|
||||
+ if (name)
|
||||
+ ioname = name;
|
||||
|
||||
dev = device_create_with_groups(&gpio_class, &gdev->dev,
|
||||
MKDEV(0, 0), data, gpio_groups,
|
||||
@@ -627,6 +629,12 @@ err_unlock:
|
||||
gpiod_dbg(desc, "%s: status %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(__gpiod_export);
|
||||
+
|
||||
+int gpiod_export(struct gpio_desc *desc, bool direction_may_change)
|
||||
+{
|
||||
+ return __gpiod_export(desc, direction_may_change, NULL);
|
||||
+}
|
||||
EXPORT_SYMBOL_GPL(gpiod_export);
|
||||
|
||||
static int match_export(struct device *dev, const void *desc)
|
@ -1,408 +0,0 @@
|
||||
From 9e3f1d0805b2d919904dd9a4ff0d956314cc3cba Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sat, 8 Jul 2017 08:20:09 +0200
|
||||
Subject: debloat: procfs
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
fs/locks.c | 2 ++
|
||||
fs/proc/Kconfig | 5 +++++
|
||||
fs/proc/consoles.c | 3 +++
|
||||
fs/proc/proc_tty.c | 11 ++++++++++-
|
||||
include/net/snmp.h | 18 +++++++++++++++++-
|
||||
ipc/msg.c | 3 +++
|
||||
ipc/sem.c | 2 ++
|
||||
ipc/shm.c | 2 ++
|
||||
ipc/util.c | 3 +++
|
||||
kernel/exec_domain.c | 2 ++
|
||||
kernel/irq/proc.c | 9 +++++++++
|
||||
kernel/time/timer_list.c | 2 ++
|
||||
mm/vmalloc.c | 2 ++
|
||||
mm/vmstat.c | 8 +++++---
|
||||
net/8021q/vlanproc.c | 6 ++++++
|
||||
net/core/net-procfs.c | 18 ++++++++++++------
|
||||
net/core/sock.c | 2 ++
|
||||
net/ipv4/fib_trie.c | 18 ++++++++++++------
|
||||
net/ipv4/proc.c | 3 +++
|
||||
net/ipv4/route.c | 3 +++
|
||||
20 files changed, 105 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/fs/locks.c
|
||||
+++ b/fs/locks.c
|
||||
@@ -2885,6 +2885,8 @@ static const struct seq_operations locks
|
||||
|
||||
static int __init proc_locks_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
|
||||
sizeof(struct locks_iterator), NULL);
|
||||
return 0;
|
||||
--- a/fs/proc/Kconfig
|
||||
+++ b/fs/proc/Kconfig
|
||||
@@ -100,6 +100,11 @@ config PROC_CHILDREN
|
||||
Say Y if you are running any user-space software which takes benefit from
|
||||
this interface. For example, rkt is such a piece of software.
|
||||
|
||||
+config PROC_STRIPPED
|
||||
+ default n
|
||||
+ depends on EXPERT
|
||||
+ bool "Strip non-essential /proc functionality to reduce code size"
|
||||
+
|
||||
config PROC_PID_ARCH_STATUS
|
||||
def_bool n
|
||||
depends on PROC_FS
|
||||
--- a/fs/proc/consoles.c
|
||||
+++ b/fs/proc/consoles.c
|
||||
@@ -92,6 +92,9 @@ static const struct seq_operations conso
|
||||
|
||||
static int __init proc_consoles_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
+
|
||||
proc_create_seq("consoles", 0, NULL, &consoles_op);
|
||||
return 0;
|
||||
}
|
||||
--- a/fs/proc/proc_tty.c
|
||||
+++ b/fs/proc/proc_tty.c
|
||||
@@ -131,7 +131,10 @@ static const struct seq_operations tty_d
|
||||
void proc_tty_register_driver(struct tty_driver *driver)
|
||||
{
|
||||
struct proc_dir_entry *ent;
|
||||
-
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return;
|
||||
+
|
||||
if (!driver->driver_name || driver->proc_entry ||
|
||||
!driver->ops->proc_show)
|
||||
return;
|
||||
@@ -148,6 +151,9 @@ void proc_tty_unregister_driver(struct t
|
||||
{
|
||||
struct proc_dir_entry *ent;
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return;
|
||||
+
|
||||
ent = driver->proc_entry;
|
||||
if (!ent)
|
||||
return;
|
||||
@@ -162,6 +168,9 @@ void proc_tty_unregister_driver(struct t
|
||||
*/
|
||||
void __init proc_tty_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return;
|
||||
+
|
||||
if (!proc_mkdir("tty", NULL))
|
||||
return;
|
||||
proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */
|
||||
--- a/include/net/snmp.h
|
||||
+++ b/include/net/snmp.h
|
||||
@@ -124,6 +124,21 @@ struct linux_tls_mib {
|
||||
#define DECLARE_SNMP_STAT(type, name) \
|
||||
extern __typeof__(type) __percpu *name
|
||||
|
||||
+#ifdef CONFIG_PROC_STRIPPED
|
||||
+#define __SNMP_STATS_DUMMY(mib) \
|
||||
+ do { (void) mib->mibs[0]; } while(0)
|
||||
+
|
||||
+#define __SNMP_INC_STATS(mib, field) __SNMP_STATS_DUMMY(mib)
|
||||
+#define SNMP_INC_STATS_ATOMIC_LONG(mib, field) __SNMP_STATS_DUMMY(mib)
|
||||
+#define SNMP_INC_STATS(mib, field) __SNMP_STATS_DUMMY(mib)
|
||||
+#define SNMP_DEC_STATS(mib, field) __SNMP_STATS_DUMMY(mib)
|
||||
+#define __SNMP_ADD_STATS(mib, field, addend) __SNMP_STATS_DUMMY(mib)
|
||||
+#define SNMP_ADD_STATS(mib, field, addend) __SNMP_STATS_DUMMY(mib)
|
||||
+#define SNMP_UPD_PO_STATS(mib, basefield, addend) __SNMP_STATS_DUMMY(mib)
|
||||
+#define __SNMP_UPD_PO_STATS(mib, basefield, addend) __SNMP_STATS_DUMMY(mib)
|
||||
+
|
||||
+#else
|
||||
+
|
||||
#define __SNMP_INC_STATS(mib, field) \
|
||||
__this_cpu_inc(mib->mibs[field])
|
||||
|
||||
@@ -154,8 +169,9 @@ struct linux_tls_mib {
|
||||
__this_cpu_add(ptr[basefield##OCTETS], addend); \
|
||||
} while (0)
|
||||
|
||||
+#endif
|
||||
|
||||
-#if BITS_PER_LONG==32
|
||||
+#if (BITS_PER_LONG==32) && !defined(CONFIG_PROC_STRIPPED)
|
||||
|
||||
#define __SNMP_ADD_STATS64(mib, field, addend) \
|
||||
do { \
|
||||
--- a/ipc/msg.c
|
||||
+++ b/ipc/msg.c
|
||||
@@ -1350,6 +1350,9 @@ void __init msg_init(void)
|
||||
{
|
||||
msg_init_ns(&init_ipc_ns);
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return;
|
||||
+
|
||||
ipc_init_proc_interface("sysvipc/msg",
|
||||
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
|
||||
IPC_MSG_IDS, sysvipc_msg_proc_show);
|
||||
--- a/ipc/sem.c
|
||||
+++ b/ipc/sem.c
|
||||
@@ -268,6 +268,8 @@ void sem_exit_ns(struct ipc_namespace *n
|
||||
void __init sem_init(void)
|
||||
{
|
||||
sem_init_ns(&init_ipc_ns);
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return;
|
||||
ipc_init_proc_interface("sysvipc/sem",
|
||||
" key semid perms nsems uid gid cuid cgid otime ctime\n",
|
||||
IPC_SEM_IDS, sysvipc_sem_proc_show);
|
||||
--- a/ipc/shm.c
|
||||
+++ b/ipc/shm.c
|
||||
@@ -154,6 +154,8 @@ pure_initcall(ipc_ns_init);
|
||||
|
||||
void __init shm_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return;
|
||||
ipc_init_proc_interface("sysvipc/shm",
|
||||
#if BITS_PER_LONG <= 32
|
||||
" key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
|
||||
--- a/ipc/util.c
|
||||
+++ b/ipc/util.c
|
||||
@@ -141,6 +141,9 @@ void __init ipc_init_proc_interface(cons
|
||||
struct proc_dir_entry *pde;
|
||||
struct ipc_proc_iface *iface;
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return;
|
||||
+
|
||||
iface = kmalloc(sizeof(*iface), GFP_KERNEL);
|
||||
if (!iface)
|
||||
return;
|
||||
--- a/kernel/exec_domain.c
|
||||
+++ b/kernel/exec_domain.c
|
||||
@@ -29,6 +29,8 @@ static int execdomains_proc_show(struct
|
||||
|
||||
static int __init proc_execdomains_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
proc_create_single("execdomains", 0, NULL, execdomains_proc_show);
|
||||
return 0;
|
||||
}
|
||||
--- a/kernel/irq/proc.c
|
||||
+++ b/kernel/irq/proc.c
|
||||
@@ -341,6 +341,9 @@ void register_irq_proc(unsigned int irq,
|
||||
void __maybe_unused *irqp = (void *)(unsigned long) irq;
|
||||
char name [MAX_NAMELEN];
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP))
|
||||
+ return;
|
||||
+
|
||||
if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip))
|
||||
return;
|
||||
|
||||
@@ -394,6 +397,9 @@ void unregister_irq_proc(unsigned int ir
|
||||
{
|
||||
char name [MAX_NAMELEN];
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP))
|
||||
+ return;
|
||||
+
|
||||
if (!root_irq_dir || !desc->dir)
|
||||
return;
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -432,6 +438,9 @@ void init_irq_proc(void)
|
||||
unsigned int irq;
|
||||
struct irq_desc *desc;
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP))
|
||||
+ return;
|
||||
+
|
||||
/* create /proc/irq */
|
||||
root_irq_dir = proc_mkdir("irq", NULL);
|
||||
if (!root_irq_dir)
|
||||
--- a/kernel/time/timer_list.c
|
||||
+++ b/kernel/time/timer_list.c
|
||||
@@ -350,6 +350,8 @@ static int __init init_timer_list_procfs
|
||||
{
|
||||
struct proc_dir_entry *pe;
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
pe = proc_create_seq_private("timer_list", 0400, NULL, &timer_list_sops,
|
||||
sizeof(struct timer_list_iter), NULL);
|
||||
if (!pe)
|
||||
--- a/mm/vmalloc.c
|
||||
+++ b/mm/vmalloc.c
|
||||
@@ -4160,6 +4160,8 @@ static const struct seq_operations vmall
|
||||
|
||||
static int __init proc_vmalloc_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
if (IS_ENABLED(CONFIG_NUMA))
|
||||
proc_create_seq_private("vmallocinfo", 0400, NULL,
|
||||
&vmalloc_op,
|
||||
--- a/mm/vmstat.c
|
||||
+++ b/mm/vmstat.c
|
||||
@@ -2127,10 +2127,12 @@ void __init init_mm_internals(void)
|
||||
#endif
|
||||
migrate_on_reclaim_init();
|
||||
#ifdef CONFIG_PROC_FS
|
||||
- proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
|
||||
- proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) {
|
||||
+ proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
|
||||
+ proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
|
||||
+ proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
|
||||
+ }
|
||||
proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
|
||||
- proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
|
||||
#endif
|
||||
}
|
||||
|
||||
--- a/net/8021q/vlanproc.c
|
||||
+++ b/net/8021q/vlanproc.c
|
||||
@@ -93,6 +93,9 @@ void vlan_proc_cleanup(struct net *net)
|
||||
{
|
||||
struct vlan_net *vn = net_generic(net, vlan_net_id);
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return;
|
||||
+
|
||||
if (vn->proc_vlan_conf)
|
||||
remove_proc_entry(name_conf, vn->proc_vlan_dir);
|
||||
|
||||
@@ -112,6 +115,9 @@ int __net_init vlan_proc_init(struct net
|
||||
{
|
||||
struct vlan_net *vn = net_generic(net, vlan_net_id);
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
+
|
||||
vn->proc_vlan_dir = proc_net_mkdir(net, name_root, net->proc_net);
|
||||
if (!vn->proc_vlan_dir)
|
||||
goto err;
|
||||
--- a/net/core/net-procfs.c
|
||||
+++ b/net/core/net-procfs.c
|
||||
@@ -319,10 +319,12 @@ static int __net_init dev_proc_net_init(
|
||||
if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops,
|
||||
sizeof(struct seq_net_private)))
|
||||
goto out;
|
||||
- if (!proc_create_seq("softnet_stat", 0444, net->proc_net,
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) &&
|
||||
+ !proc_create_seq("softnet_stat", 0444, net->proc_net,
|
||||
&softnet_seq_ops))
|
||||
goto out_dev;
|
||||
- if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) &&
|
||||
+ !proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
|
||||
sizeof(struct seq_net_private)))
|
||||
goto out_softnet;
|
||||
|
||||
@@ -332,9 +334,11 @@ static int __net_init dev_proc_net_init(
|
||||
out:
|
||||
return rc;
|
||||
out_ptype:
|
||||
- remove_proc_entry("ptype", net->proc_net);
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ remove_proc_entry("ptype", net->proc_net);
|
||||
out_softnet:
|
||||
- remove_proc_entry("softnet_stat", net->proc_net);
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ remove_proc_entry("softnet_stat", net->proc_net);
|
||||
out_dev:
|
||||
remove_proc_entry("dev", net->proc_net);
|
||||
goto out;
|
||||
@@ -344,8 +348,10 @@ static void __net_exit dev_proc_net_exit
|
||||
{
|
||||
wext_proc_exit(net);
|
||||
|
||||
- remove_proc_entry("ptype", net->proc_net);
|
||||
- remove_proc_entry("softnet_stat", net->proc_net);
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) {
|
||||
+ remove_proc_entry("ptype", net->proc_net);
|
||||
+ remove_proc_entry("softnet_stat", net->proc_net);
|
||||
+ }
|
||||
remove_proc_entry("dev", net->proc_net);
|
||||
}
|
||||
|
||||
--- a/net/core/sock.c
|
||||
+++ b/net/core/sock.c
|
||||
@@ -4005,6 +4005,8 @@ static __net_initdata struct pernet_oper
|
||||
|
||||
static int __init proto_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
return register_pernet_subsys(&proto_net_ops);
|
||||
}
|
||||
|
||||
--- a/net/ipv4/fib_trie.c
|
||||
+++ b/net/ipv4/fib_trie.c
|
||||
@@ -3031,11 +3031,13 @@ static const struct seq_operations fib_r
|
||||
|
||||
int __net_init fib_proc_init(struct net *net)
|
||||
{
|
||||
- if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) &&
|
||||
+ !proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
|
||||
sizeof(struct fib_trie_iter)))
|
||||
goto out1;
|
||||
|
||||
- if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) &&
|
||||
+ !proc_create_net_single("fib_triestat", 0444, net->proc_net,
|
||||
fib_triestat_seq_show, NULL))
|
||||
goto out2;
|
||||
|
||||
@@ -3046,17 +3048,21 @@ int __net_init fib_proc_init(struct net
|
||||
return 0;
|
||||
|
||||
out3:
|
||||
- remove_proc_entry("fib_triestat", net->proc_net);
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ remove_proc_entry("fib_triestat", net->proc_net);
|
||||
out2:
|
||||
- remove_proc_entry("fib_trie", net->proc_net);
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ remove_proc_entry("fib_trie", net->proc_net);
|
||||
out1:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void __net_exit fib_proc_exit(struct net *net)
|
||||
{
|
||||
- remove_proc_entry("fib_trie", net->proc_net);
|
||||
- remove_proc_entry("fib_triestat", net->proc_net);
|
||||
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) {
|
||||
+ remove_proc_entry("fib_trie", net->proc_net);
|
||||
+ remove_proc_entry("fib_triestat", net->proc_net);
|
||||
+ }
|
||||
remove_proc_entry("route", net->proc_net);
|
||||
}
|
||||
|
||||
--- a/net/ipv4/proc.c
|
||||
+++ b/net/ipv4/proc.c
|
||||
@@ -553,5 +553,8 @@ static __net_initdata struct pernet_oper
|
||||
|
||||
int __init ip_misc_proc_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
+
|
||||
return register_pernet_subsys(&ip_proc_ops);
|
||||
}
|
||||
--- a/net/ipv4/route.c
|
||||
+++ b/net/ipv4/route.c
|
||||
@@ -380,6 +380,9 @@ static struct pernet_operations ip_rt_pr
|
||||
|
||||
static int __init ip_rt_proc_init(void)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED))
|
||||
+ return 0;
|
||||
+
|
||||
return register_pernet_subsys(&ip_rt_proc_ops);
|
||||
}
|
||||
|
@ -1,93 +0,0 @@
|
||||
From e3692cb2fcd5ba1244512a0f43b8118f65f1c375 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sat, 8 Jul 2017 08:20:43 +0200
|
||||
Subject: debloat: dmabuf
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
drivers/base/Kconfig | 2 +-
|
||||
drivers/dma-buf/Makefile | 10 +++++++---
|
||||
drivers/dma-buf/dma-buf.c | 4 +++-
|
||||
kernel/sched/core.c | 1 +
|
||||
4 files changed, 12 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/drivers/base/Kconfig
|
||||
+++ b/drivers/base/Kconfig
|
||||
@@ -198,7 +198,7 @@ config SOC_BUS
|
||||
source "drivers/base/regmap/Kconfig"
|
||||
|
||||
config DMA_SHARED_BUFFER
|
||||
- bool
|
||||
+ tristate
|
||||
default n
|
||||
select IRQ_WORK
|
||||
help
|
||||
--- a/drivers/dma-buf/heaps/Makefile
|
||||
+++ b/drivers/dma-buf/heaps/Makefile
|
||||
@@ -1,3 +1,3 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
-obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o
|
||||
-obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o
|
||||
+dma-buf-objs-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o
|
||||
+dma-buf-objs-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o
|
||||
--- a/drivers/dma-buf/Makefile
|
||||
+++ b/drivers/dma-buf/Makefile
|
||||
@@ -1,12 +1,14 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
|
||||
+obj-$(CONFIG_DMA_SHARED_BUFFER) := dma-shared-buffer.o
|
||||
+
|
||||
+dma-buf-objs-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
|
||||
dma-fence-unwrap.o dma-resv.o
|
||||
-obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o
|
||||
-obj-$(CONFIG_DMABUF_HEAPS) += heaps/
|
||||
-obj-$(CONFIG_SYNC_FILE) += sync_file.o
|
||||
-obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
|
||||
-obj-$(CONFIG_UDMABUF) += udmabuf.o
|
||||
-obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o
|
||||
+dma-buf-objs-$(CONFIG_DMABUF_HEAPS) += dma-heap.o
|
||||
+obj-$(CONFIG_DMABUF_HEAPS) += heaps/
|
||||
+dma-buf-objs-$(CONFIG_SYNC_FILE) += sync_file.o
|
||||
+dma-buf-objs-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
|
||||
+dma-buf-objs-$(CONFIG_UDMABUF) += udmabuf.o
|
||||
+dma-buf-objs-$(CONFIG_DMABUF_SYSFS_STATS) += udmabuf.o
|
||||
|
||||
dmabuf_selftests-y := \
|
||||
selftest.o \
|
||||
@@ -15,4 +17,6 @@ dmabuf_selftests-y := \
|
||||
st-dma-fence-unwrap.o \
|
||||
st-dma-resv.o
|
||||
|
||||
-obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o
|
||||
+dma-buf-objs-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o
|
||||
+
|
||||
+dma-shared-buffer-objs := $(dma-buf-objs-y)
|
||||
--- a/drivers/dma-buf/dma-buf.c
|
||||
+++ b/drivers/dma-buf/dma-buf.c
|
||||
@@ -1589,4 +1589,5 @@ static void __exit dma_buf_deinit(void)
|
||||
kern_unmount(dma_buf_mnt);
|
||||
dma_buf_uninit_sysfs_statistics();
|
||||
}
|
||||
-__exitcall(dma_buf_deinit);
|
||||
+module_exit(dma_buf_deinit);
|
||||
+MODULE_LICENSE("GPL");
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -4319,6 +4319,7 @@ int wake_up_state(struct task_struct *p,
|
||||
{
|
||||
return try_to_wake_up(p, state, 0);
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(wake_up_state);
|
||||
|
||||
/*
|
||||
* Perform scheduler related setup for a newly forked process p.
|
||||
--- a/fs/d_path.c
|
||||
+++ b/fs/d_path.c
|
||||
@@ -314,6 +314,7 @@ char *dynamic_dname(struct dentry *dentr
|
||||
buffer += buflen - sz;
|
||||
return memcpy(buffer, temp, sz);
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(dynamic_dname);
|
||||
|
||||
char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
|
||||
{
|
@ -1,32 +0,0 @@
|
||||
From 0d37e6edc09c99e683dd91ca0e83bbc0df8477b3 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sun, 16 Jul 2017 16:56:10 +0200
|
||||
Subject: lib: add uevent_next_seqnum()
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
include/linux/kobject.h | 5 +++++
|
||||
lib/kobject_uevent.c | 37 +++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 42 insertions(+)
|
||||
|
||||
--- a/lib/kobject_uevent.c
|
||||
+++ b/lib/kobject_uevent.c
|
||||
@@ -179,6 +179,18 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
+u64 uevent_next_seqnum(void)
|
||||
+{
|
||||
+ u64 seq;
|
||||
+
|
||||
+ mutex_lock(&uevent_sock_mutex);
|
||||
+ seq = ++uevent_seqnum;
|
||||
+ mutex_unlock(&uevent_sock_mutex);
|
||||
+
|
||||
+ return seq;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(uevent_next_seqnum);
|
||||
+
|
||||
/**
|
||||
* kobject_synth_uevent - send synthetic uevent with arguments
|
||||
*
|
@ -1,76 +0,0 @@
|
||||
From 0d37e6edc09c99e683dd91ca0e83bbc0df8477b3 Mon Sep 17 00:00:00 2001
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sun, 16 Jul 2017 16:56:10 +0200
|
||||
Subject: lib: add uevent_next_seqnum()
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
include/linux/kobject.h | 5 +++++
|
||||
lib/kobject_uevent.c | 37 +++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 42 insertions(+)
|
||||
|
||||
--- a/include/linux/kobject.h
|
||||
+++ b/include/linux/kobject.h
|
||||
@@ -32,6 +32,8 @@
|
||||
#define UEVENT_NUM_ENVP 64 /* number of env pointers */
|
||||
#define UEVENT_BUFFER_SIZE 2048 /* buffer for the variables */
|
||||
|
||||
+struct sk_buff;
|
||||
+
|
||||
#ifdef CONFIG_UEVENT_HELPER
|
||||
/* path to the userspace helper executed on an event */
|
||||
extern char uevent_helper[];
|
||||
@@ -224,4 +226,7 @@ int kobject_synth_uevent(struct kobject
|
||||
__printf(2, 3)
|
||||
int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...);
|
||||
|
||||
+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
|
||||
+ gfp_t allocation);
|
||||
+
|
||||
#endif /* _KOBJECT_H_ */
|
||||
--- a/lib/kobject_uevent.c
|
||||
+++ b/lib/kobject_uevent.c
|
||||
@@ -691,6 +691,43 @@ int add_uevent_var(struct kobj_uevent_en
|
||||
EXPORT_SYMBOL_GPL(add_uevent_var);
|
||||
|
||||
#if defined(CONFIG_NET)
|
||||
+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
|
||||
+ gfp_t allocation)
|
||||
+{
|
||||
+ struct uevent_sock *ue_sk;
|
||||
+ int err = 0;
|
||||
+
|
||||
+ /* send netlink message */
|
||||
+ mutex_lock(&uevent_sock_mutex);
|
||||
+ list_for_each_entry(ue_sk, &uevent_sock_list, list) {
|
||||
+ struct sock *uevent_sock = ue_sk->sk;
|
||||
+ struct sk_buff *skb2;
|
||||
+
|
||||
+ skb2 = skb_clone(skb, allocation);
|
||||
+ if (!skb2)
|
||||
+ break;
|
||||
+
|
||||
+ err = netlink_broadcast(uevent_sock, skb2, pid, group,
|
||||
+ allocation);
|
||||
+ if (err)
|
||||
+ break;
|
||||
+ }
|
||||
+ mutex_unlock(&uevent_sock_mutex);
|
||||
+
|
||||
+ kfree_skb(skb);
|
||||
+ return err;
|
||||
+}
|
||||
+#else
|
||||
+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group,
|
||||
+ gfp_t allocation)
|
||||
+{
|
||||
+ kfree_skb(skb);
|
||||
+ return 0;
|
||||
+}
|
||||
+#endif
|
||||
+EXPORT_SYMBOL_GPL(broadcast_uevent);
|
||||
+
|
||||
+#if defined(CONFIG_NET)
|
||||
static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
@ -1,12 +0,0 @@
|
||||
--- a/drivers/of/fdt.c
|
||||
+++ b/drivers/of/fdt.c
|
||||
@@ -1179,6 +1179,9 @@ int __init early_init_dt_scan_chosen(cha
|
||||
p = of_get_flat_dt_prop(node, "bootargs", &l);
|
||||
if (p != NULL && l > 0)
|
||||
strlcpy(cmdline, p, min(l, COMMAND_LINE_SIZE));
|
||||
+ p = of_get_flat_dt_prop(node, "bootargs-append", &l);
|
||||
+ if (p != NULL && l > 0)
|
||||
+ strlcat(cmdline, p, min_t(int, strlen(cmdline) + (int)l, COMMAND_LINE_SIZE));
|
||||
|
||||
/*
|
||||
* CONFIG_CMDLINE is meant to be a default in case nothing else
|
@ -1,352 +0,0 @@
|
||||
From 42824d4b753f84ccf885eca602c5037338b546c8 Mon Sep 17 00:00:00 2001
|
||||
From: Zhi Chen <zhichen@codeaurora.org>
|
||||
Date: Tue, 13 Jan 2015 14:28:18 -0800
|
||||
Subject: [PATCH 3/3] net: conntrack events, support multiple registrant
|
||||
|
||||
Merging this patch from kernel 3.4:
|
||||
This was supported by old (.28) kernel versions but removed
|
||||
because of it's overhead.
|
||||
But we need this feature for NA connection manager. Both ipv4
|
||||
and ipv6 modules needs to register themselves to ct events.
|
||||
|
||||
Change-Id: Iebfb254590fb594f5baf232f849d1b7ae45ef757
|
||||
Signed-off-by: Zhi Chen <zhichen@codeaurora.org>
|
||||
---
|
||||
include/net/netfilter/nf_conntrack_ecache.h | 15 ++-
|
||||
include/net/netns/conntrack.h | 3 +
|
||||
net/netfilter/Kconfig | 8 ++
|
||||
net/netfilter/nf_conntrack_core.c | 4 +
|
||||
net/netfilter/nf_conntrack_ecache.c | 103 +++++++++++++++++++-
|
||||
net/netfilter/nf_conntrack_netlink.c | 17 ++++
|
||||
6 files changed, 146 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/include/net/netfilter/nf_conntrack_ecache.h
|
||||
+++ b/include/net/netfilter/nf_conntrack_ecache.h
|
||||
@@ -65,9 +65,14 @@ struct nf_ct_event_notifier {
|
||||
int (*exp_event)(unsigned int events, const struct nf_exp_event *item);
|
||||
};
|
||||
|
||||
-void nf_conntrack_register_notifier(struct net *net,
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+extern int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb);
|
||||
+extern int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb);
|
||||
+#else
|
||||
+int nf_conntrack_register_notifier(struct net *net,
|
||||
const struct nf_ct_event_notifier *nb);
|
||||
void nf_conntrack_unregister_notifier(struct net *net);
|
||||
+#endif
|
||||
|
||||
void nf_ct_deliver_cached_events(struct nf_conn *ct);
|
||||
int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
|
||||
@@ -98,11 +103,13 @@ static inline void
|
||||
nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
|
||||
{
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
- struct net *net = nf_ct_net(ct);
|
||||
struct nf_conntrack_ecache *e;
|
||||
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ struct net *net = nf_ct_net(ct);
|
||||
|
||||
if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
|
||||
return;
|
||||
+#endif
|
||||
|
||||
e = nf_ct_ecache_find(ct);
|
||||
if (e == NULL)
|
||||
@@ -117,20 +124,34 @@ nf_conntrack_event_report(enum ip_conntr
|
||||
u32 portid, int report)
|
||||
{
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
- if (nf_ct_ecache_exist(ct))
|
||||
- return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
|
||||
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ const struct net *net = nf_ct_net(ct);
|
||||
+
|
||||
+ if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
|
||||
+ return 0;
|
||||
#endif
|
||||
+
|
||||
+ return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
|
||||
+#else
|
||||
return 0;
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int
|
||||
nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
|
||||
{
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
- if (nf_ct_ecache_exist(ct))
|
||||
- return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
|
||||
+#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ const struct net *net = nf_ct_net(ct);
|
||||
+
|
||||
+ if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
|
||||
+ return 0;
|
||||
#endif
|
||||
+
|
||||
+ return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
|
||||
+#else
|
||||
return 0;
|
||||
+#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
--- a/include/net/netns/conntrack.h
|
||||
+++ b/include/net/netns/conntrack.h
|
||||
@@ -106,6 +106,9 @@ struct netns_ct {
|
||||
u8 sysctl_checksum;
|
||||
|
||||
struct ip_conntrack_stat __percpu *stat;
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ struct atomic_notifier_head nf_conntrack_chain;
|
||||
+#endif
|
||||
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
|
||||
struct nf_ip_net nf_ct_proto;
|
||||
#if defined(CONFIG_NF_CONNTRACK_LABELS)
|
||||
--- a/net/netfilter/Kconfig
|
||||
+++ b/net/netfilter/Kconfig
|
||||
@@ -161,6 +161,14 @@ config NF_CONNTRACK_EVENTS
|
||||
|
||||
If unsure, say `N'.
|
||||
|
||||
+config NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ bool "Register multiple callbacks to ct events"
|
||||
+ depends on NF_CONNTRACK_EVENTS
|
||||
+ help
|
||||
+ Support multiple registrations.
|
||||
+
|
||||
+ If unsure, say `N'.
|
||||
+
|
||||
config NF_CONNTRACK_TIMEOUT
|
||||
bool 'Connection tracking timeout'
|
||||
depends on NETFILTER_ADVANCED
|
||||
--- a/net/netfilter/nf_conntrack_core.c
|
||||
+++ b/net/netfilter/nf_conntrack_core.c
|
||||
@@ -2803,6 +2803,10 @@ int nf_conntrack_init_net(struct net *ne
|
||||
nf_conntrack_ecache_pernet_init(net);
|
||||
nf_conntrack_proto_pernet_init(net);
|
||||
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ ATOMIC_INIT_NOTIFIER_HEAD(&net->ct.nf_conntrack_chain);
|
||||
+#endif
|
||||
+
|
||||
return 0;
|
||||
|
||||
err_expect:
|
||||
--- a/net/netfilter/nf_conntrack_ecache.c
|
||||
+++ b/net/netfilter/nf_conntrack_ecache.c
|
||||
@@ -17,6 +17,9 @@
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kernel.h>
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+#include <linux/notifier.h>
|
||||
+#endif
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
@@ -162,6 +165,35 @@ static int __nf_conntrack_eventmask_repo
|
||||
return ret;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
|
||||
+ u32 portid, int report)
|
||||
+{
|
||||
+ struct nf_conntrack_ecache *e;
|
||||
+ struct net *net = nf_ct_net(ct);
|
||||
+
|
||||
+ e = nf_ct_ecache_find(ct);
|
||||
+ if (e == NULL)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (nf_ct_is_confirmed(ct)) {
|
||||
+ struct nf_ct_event item = {
|
||||
+ .ct = ct,
|
||||
+ .portid = e->portid ? e->portid : portid,
|
||||
+ .report = report
|
||||
+ };
|
||||
+ /* This is a resent of a destroy event? If so, skip missed */
|
||||
+ unsigned long missed = e->portid ? 0 : e->missed;
|
||||
+
|
||||
+ if (!((eventmask | missed) & e->ctmask))
|
||||
+ return 0;
|
||||
+
|
||||
+ atomic_notifier_call_chain(&net->ct.nf_conntrack_chain, eventmask | missed, &item);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+#else
|
||||
int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
|
||||
u32 portid, int report)
|
||||
{
|
||||
@@ -197,10 +229,52 @@ int nf_conntrack_eventmask_report(unsign
|
||||
|
||||
return ret;
|
||||
}
|
||||
+#endif
|
||||
EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
|
||||
|
||||
/* deliver cached events and clear cache entry - must be called with locally
|
||||
* disabled softirqs */
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+void nf_ct_deliver_cached_events(struct nf_conn *ct)
|
||||
+{
|
||||
+ unsigned long events, missed;
|
||||
+ struct nf_conntrack_ecache *e;
|
||||
+ struct nf_ct_event item;
|
||||
+ struct net *net = nf_ct_net(ct);
|
||||
+
|
||||
+ e = nf_ct_ecache_find(ct);
|
||||
+ if (e == NULL)
|
||||
+ return;
|
||||
+
|
||||
+ events = xchg(&e->cache, 0);
|
||||
+
|
||||
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events)
|
||||
+ return;
|
||||
+
|
||||
+ /* We make a copy of the missed event cache without taking
|
||||
+ * the lock, thus we may send missed events twice. However,
|
||||
+ * this does not harm and it happens very rarely. */
|
||||
+ missed = e->missed;
|
||||
+
|
||||
+ if (!((events | missed) & e->ctmask))
|
||||
+ return;
|
||||
+
|
||||
+ item.ct = ct;
|
||||
+ item.portid = 0;
|
||||
+ item.report = 0;
|
||||
+
|
||||
+ atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
|
||||
+ events | missed,
|
||||
+ &item);
|
||||
+
|
||||
+ if (likely(!missed))
|
||||
+ return;
|
||||
+
|
||||
+ spin_lock_bh(&ct->lock);
|
||||
+ e->missed &= ~missed;
|
||||
+ spin_unlock_bh(&ct->lock);
|
||||
+}
|
||||
+#else
|
||||
void nf_ct_deliver_cached_events(struct nf_conn *ct)
|
||||
{
|
||||
struct nf_conntrack_ecache *e;
|
||||
@@ -226,6 +300,7 @@ void nf_ct_deliver_cached_events(struct
|
||||
*/
|
||||
__nf_conntrack_eventmask_report(e, events, e->missed, &item);
|
||||
}
|
||||
+#endif
|
||||
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
|
||||
|
||||
void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
|
||||
@@ -258,20 +333,43 @@ out_unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
-void nf_conntrack_register_notifier(struct net *net,
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+int nf_conntrack_register_notifier(struct net *net,
|
||||
+ struct notifier_block *nb)
|
||||
+{
|
||||
+ return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
|
||||
+}
|
||||
+#else
|
||||
+int nf_conntrack_register_notifier(struct net *net,
|
||||
const struct nf_ct_event_notifier *new)
|
||||
{
|
||||
+ int ret;
|
||||
struct nf_ct_event_notifier *notify;
|
||||
|
||||
mutex_lock(&nf_ct_ecache_mutex);
|
||||
notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
|
||||
lockdep_is_held(&nf_ct_ecache_mutex));
|
||||
WARN_ON_ONCE(notify);
|
||||
+ if (notify != NULL) {
|
||||
+ ret = -EBUSY;
|
||||
+ goto out_unlock;
|
||||
+ }
|
||||
+
|
||||
rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
|
||||
- mutex_unlock(&nf_ct_ecache_mutex);
|
||||
+ ret = 0;
|
||||
+out_unlock:
|
||||
+ mutex_unlock(&nf_ct_ecache_mutex);
|
||||
+ return ret;
|
||||
}
|
||||
+#endif
|
||||
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
|
||||
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb)
|
||||
+{
|
||||
+ return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb);
|
||||
+}
|
||||
+#else
|
||||
void nf_conntrack_unregister_notifier(struct net *net)
|
||||
{
|
||||
mutex_lock(&nf_ct_ecache_mutex);
|
||||
@@ -279,6 +377,7 @@ void nf_conntrack_unregister_notifier(st
|
||||
mutex_unlock(&nf_ct_ecache_mutex);
|
||||
/* synchronize_rcu() is called after netns pre_exit */
|
||||
}
|
||||
+#endif
|
||||
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
|
||||
|
||||
void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
|
||||
--- a/net/netfilter/nf_conntrack_netlink.c
|
||||
+++ b/net/netfilter/nf_conntrack_netlink.c
|
||||
@@ -712,12 +712,19 @@ static size_t ctnetlink_nlmsg_size(const
|
||||
}
|
||||
|
||||
static int
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ctnetlink_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr)
|
||||
+#else
|
||||
ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
|
||||
+#endif
|
||||
{
|
||||
const struct nf_conntrack_zone *zone;
|
||||
struct net *net;
|
||||
struct nlmsghdr *nlh;
|
||||
struct nlattr *nest_parms;
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ struct nf_ct_event *item = (struct nf_ct_event *)ptr;
|
||||
+#endif
|
||||
struct nf_conn *ct = item->ct;
|
||||
struct sk_buff *skb;
|
||||
unsigned int type;
|
||||
@@ -3749,11 +3756,17 @@ static int ctnetlink_stat_exp_cpu(struct
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+static struct notifier_block ctnl_notifier = {
|
||||
+ .notifier_call = ctnetlink_conntrack_event
|
||||
+};
|
||||
+#else
|
||||
static struct nf_ct_event_notifier ctnl_notifier = {
|
||||
.ct_event = ctnetlink_conntrack_event,
|
||||
.exp_event = ctnetlink_expect_event,
|
||||
};
|
||||
#endif
|
||||
+#endif
|
||||
|
||||
static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
|
||||
[IPCTNL_MSG_CT_NEW] = {
|
||||
@@ -3852,8 +3865,12 @@ static int __net_init ctnetlink_net_init
|
||||
static void ctnetlink_net_pre_exit(struct net *net)
|
||||
{
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ nf_conntrack_unregister_notifier(net,&ctnl_notifier);
|
||||
+#else
|
||||
nf_conntrack_unregister_notifier(net);
|
||||
#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
static struct pernet_operations ctnetlink_net_ops = {
|
@ -1,204 +0,0 @@
|
||||
--- a/include/linux/if_bridge.h
|
||||
+++ b/include/linux/if_bridge.h
|
||||
@@ -69,6 +69,9 @@ void brioctl_set(int (*hook)(struct net
|
||||
int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
|
||||
struct ifreq *ifr, void __user *uarg);
|
||||
|
||||
+extern void br_dev_update_stats(struct net_device *dev,
|
||||
+ struct rtnl_link_stats64 *nlstats);
|
||||
+
|
||||
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
|
||||
int br_multicast_list_adjacent(struct net_device *dev,
|
||||
struct list_head *br_ip_list);
|
||||
--- a/include/linux/skbuff.h
|
||||
+++ b/include/linux/skbuff.h
|
||||
@@ -979,6 +979,10 @@ struct sk_buff {
|
||||
__u8 csum_not_inet:1;
|
||||
__u8 scm_io_uring:1;
|
||||
|
||||
+#ifdef CONFIG_SHORTCUT_FE
|
||||
+ __u8 fast_forwarded:1;
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_NET_SCHED
|
||||
__u16 tc_index; /* traffic control index */
|
||||
#endif
|
||||
--- a/include/linux/timer.h
|
||||
+++ b/include/linux/timer.h
|
||||
@@ -18,6 +18,10 @@ struct timer_list {
|
||||
void (*function)(struct timer_list *);
|
||||
u32 flags;
|
||||
|
||||
+#ifdef CONFIG_SHORTCUT_FE
|
||||
+ unsigned long cust_data;
|
||||
+#endif
|
||||
+
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
struct lockdep_map lockdep_map;
|
||||
#endif
|
||||
--- a/include/net/netfilter/nf_conntrack_ecache.h
|
||||
+++ b/include/net/netfilter/nf_conntrack_ecache.h
|
||||
@@ -68,6 +68,8 @@ struct nf_ct_event_notifier {
|
||||
#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
extern int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb);
|
||||
extern int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb);
|
||||
+extern int nf_conntrack_register_chain_notifier(struct net *net, struct notifier_block *nb);
|
||||
+extern int nf_conntrack_unregister_chain_notifier(struct net *net, struct notifier_block *nb);
|
||||
#else
|
||||
int nf_conntrack_register_notifier(struct net *net,
|
||||
const struct nf_ct_event_notifier *nb);
|
||||
--- a/net/Kconfig
|
||||
+++ b/net/Kconfig
|
||||
@@ -460,6 +460,9 @@ config FAILOVER
|
||||
migration of VMs with direct attached VFs by failing over to the
|
||||
paravirtual datapath when the VF is unplugged.
|
||||
|
||||
+config SHORTCUT_FE
|
||||
+ bool "Enables kernel network stack path for Shortcut Forwarding Engine"
|
||||
+
|
||||
config ETHTOOL_NETLINK
|
||||
bool "Netlink interface for ethtool"
|
||||
default y
|
||||
--- a/net/bridge/br_if.c
|
||||
+++ b/net/bridge/br_if.c
|
||||
@@ -774,6 +774,28 @@ void br_port_flags_change(struct net_bri
|
||||
br_recalculate_neigh_suppress_enabled(br);
|
||||
}
|
||||
|
||||
+void br_dev_update_stats(struct net_device *dev,
|
||||
+ struct rtnl_link_stats64 *nlstats)
|
||||
+{
|
||||
+
|
||||
+ struct pcpu_sw_netstats *stats;
|
||||
+
|
||||
+ /* Is this a bridge? */
|
||||
+ if (!(dev->priv_flags & IFF_EBRIDGE))
|
||||
+ return;
|
||||
+
|
||||
+
|
||||
+ stats = this_cpu_ptr(dev->tstats);
|
||||
+
|
||||
+ u64_stats_update_begin(&stats->syncp);
|
||||
+ u64_stats_add(&stats->rx_packets, nlstats->rx_packets);
|
||||
+ u64_stats_add(&stats->rx_bytes, nlstats->rx_bytes);
|
||||
+ u64_stats_add(&stats->tx_packets, nlstats->tx_packets);
|
||||
+ u64_stats_add(&stats->tx_bytes, nlstats->tx_bytes);
|
||||
+ u64_stats_update_end(&stats->syncp);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(br_dev_update_stats);
|
||||
+
|
||||
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
|
||||
{
|
||||
struct net_bridge_port *p;
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -3581,9 +3581,17 @@ static int xmit_one(struct sk_buff *skb,
|
||||
{
|
||||
unsigned int len;
|
||||
int rc;
|
||||
-
|
||||
+#ifdef CONFIG_SHORTCUT_FE
|
||||
+ /* If this skb has been fast forwarded then we don't want it to
|
||||
+ * go to any taps (by definition we're trying to bypass them).
|
||||
+ */
|
||||
+ if (!skb->fast_forwarded) {
|
||||
+#endif
|
||||
if (dev_nit_active(dev))
|
||||
dev_queue_xmit_nit(skb, dev);
|
||||
+#ifdef CONFIG_SHORTCUT_FE
|
||||
+ }
|
||||
+#endif
|
||||
|
||||
len = skb->len;
|
||||
trace_net_dev_start_xmit(skb, dev);
|
||||
@@ -5237,6 +5245,11 @@ void netdev_rx_handler_unregister(struct
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
|
||||
|
||||
+#ifdef CONFIG_SHORTCUT_FE
|
||||
+int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly;
|
||||
+EXPORT_SYMBOL_GPL(athrs_fast_nat_recv);
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Limit the use of PFMEMALLOC reserves to those protocols that implement
|
||||
* the special handling of PFMEMALLOC skbs.
|
||||
@@ -5285,6 +5298,10 @@ static int __netif_receive_skb_core(stru
|
||||
int ret = NET_RX_DROP;
|
||||
__be16 type;
|
||||
|
||||
+#ifdef CONFIG_SHORTCUT_FE
|
||||
+ int (*fast_recv)(struct sk_buff *skb);
|
||||
+#endif
|
||||
+
|
||||
net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
|
||||
|
||||
trace_netif_receive_skb(skb);
|
||||
@@ -5322,6 +5339,15 @@ another_round:
|
||||
goto out;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_SHORTCUT_FE
|
||||
+ fast_recv = rcu_dereference(athrs_fast_nat_recv);
|
||||
+ if (fast_recv) {
|
||||
+ if (fast_recv(skb)) {
|
||||
+ ret = NET_RX_SUCCESS;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ }
|
||||
+#endif
|
||||
if (skb_skip_tc_classify(skb))
|
||||
goto skip_classify;
|
||||
|
||||
--- a/net/netfilter/nf_conntrack_ecache.c
|
||||
+++ b/net/netfilter/nf_conntrack_ecache.c
|
||||
@@ -143,12 +143,23 @@ static int __nf_conntrack_eventmask_repo
|
||||
rcu_read_lock();
|
||||
|
||||
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
|
||||
- if (!notify) {
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ if (!notify && !rcu_dereference_raw(net->ct.nf_conntrack_chain.head))
|
||||
+#else
|
||||
+ if (!notify)
|
||||
+#endif
|
||||
+ {
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
-
|
||||
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS
|
||||
+ ret = atomic_notifier_call_chain(&net->ct.nf_conntrack_chain,
|
||||
+ events | missed, &item);
|
||||
+ if (notify)
|
||||
+ ret = notify->ct_event(events | missed, item);
|
||||
+#else
|
||||
ret = notify->ct_event(events | missed, item);
|
||||
+#endif
|
||||
rcu_read_unlock();
|
||||
|
||||
if (likely(ret >= 0 && missed == 0))
|
||||
@@ -339,6 +350,11 @@ int nf_conntrack_register_notifier(struc
|
||||
{
|
||||
return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
|
||||
}
|
||||
+int nf_conntrack_register_chain_notifier(struct net *net, struct notifier_block *nb)
|
||||
+{
|
||||
+ return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(nf_conntrack_register_chain_notifier);
|
||||
#else
|
||||
int nf_conntrack_register_notifier(struct net *net,
|
||||
const struct nf_ct_event_notifier *new)
|
||||
@@ -369,6 +385,11 @@ int nf_conntrack_unregister_notifier(str
|
||||
{
|
||||
return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb);
|
||||
}
|
||||
+int nf_conntrack_unregister_chain_notifier(struct net *net, struct notifier_block *nb)
|
||||
+{
|
||||
+ return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_chain_notifier);
|
||||
#else
|
||||
void nf_conntrack_unregister_notifier(struct net *net)
|
||||
{
|
@ -1,235 +0,0 @@
|
||||
--- a/net/netfilter/nf_nat_masquerade.c
|
||||
+++ b/net/netfilter/nf_nat_masquerade.c
|
||||
@@ -8,6 +8,9 @@
|
||||
#include <linux/netfilter_ipv6.h>
|
||||
|
||||
#include <net/netfilter/nf_nat_masquerade.h>
|
||||
+#include <net/netfilter/nf_conntrack_zones.h>
|
||||
+#include <net/netfilter/nf_conntrack_helper.h>
|
||||
+#include <net/netfilter/nf_conntrack_core.h>
|
||||
|
||||
struct masq_dev_work {
|
||||
struct work_struct work;
|
||||
@@ -24,6 +27,129 @@ static DEFINE_MUTEX(masq_mutex);
|
||||
static unsigned int masq_refcnt __read_mostly;
|
||||
static atomic_t masq_worker_count __read_mostly;
|
||||
|
||||
+static void bcm_nat_expect(struct nf_conn *ct,
|
||||
+ struct nf_conntrack_expect *exp)
|
||||
+{
|
||||
+ struct nf_nat_range2 range;
|
||||
+
|
||||
+ /* This must be a fresh one. */
|
||||
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
|
||||
+
|
||||
+ /* Change src to where new ct comes from */
|
||||
+ range.flags = NF_NAT_RANGE_MAP_IPS;
|
||||
+ range.min_addr = range.max_addr =
|
||||
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3;
|
||||
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
|
||||
+
|
||||
+ /* For DST manip, map port here to where it's expected. */
|
||||
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
|
||||
+ range.min_proto = range.max_proto = exp->saved_proto;
|
||||
+ range.min_addr = range.max_addr = exp->saved_addr;
|
||||
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
|
||||
+}
|
||||
+
|
||||
+/****************************************************************************/
|
||||
+static int bcm_nat_help(struct sk_buff *skb, unsigned int protoff,
|
||||
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
|
||||
+{
|
||||
+ int dir = CTINFO2DIR(ctinfo);
|
||||
+ struct nf_conn_help *help = nfct_help(ct);
|
||||
+ struct nf_conntrack_expect *exp;
|
||||
+
|
||||
+ if (dir != IP_CT_DIR_ORIGINAL ||
|
||||
+ help->expecting[NF_CT_EXPECT_CLASS_DEFAULT])
|
||||
+ return NF_ACCEPT;
|
||||
+
|
||||
+ pr_debug("bcm_nat: packet[%d bytes] ", skb->len);
|
||||
+ nf_ct_dump_tuple(&ct->tuplehash[dir].tuple);
|
||||
+ pr_debug("reply: ");
|
||||
+ nf_ct_dump_tuple(&ct->tuplehash[!dir].tuple);
|
||||
+
|
||||
+ /* Create expect */
|
||||
+ if ((exp = nf_ct_expect_alloc(ct)) == NULL)
|
||||
+ return NF_ACCEPT;
|
||||
+
|
||||
+ nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, AF_INET, NULL,
|
||||
+ &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_UDP,
|
||||
+ NULL, &ct->tuplehash[!dir].tuple.dst.u.udp.port);
|
||||
+ exp->flags = NF_CT_EXPECT_PERMANENT;
|
||||
+ exp->saved_addr = ct->tuplehash[dir].tuple.src.u3;
|
||||
+ exp->saved_proto.udp.port = ct->tuplehash[dir].tuple.src.u.udp.port;
|
||||
+ exp->dir = !dir;
|
||||
+ exp->expectfn = bcm_nat_expect;
|
||||
+
|
||||
+ /* Setup expect */
|
||||
+ nf_ct_expect_related(exp, 0);
|
||||
+ nf_ct_expect_put(exp);
|
||||
+ pr_debug("bcm_nat: expect setup\n");
|
||||
+
|
||||
+ return NF_ACCEPT;
|
||||
+}
|
||||
+
|
||||
+/****************************************************************************/
|
||||
+static struct nf_conntrack_expect_policy bcm_nat_exp_policy __read_mostly = {
|
||||
+ .max_expected = 1000,
|
||||
+ .timeout = 240,
|
||||
+};
|
||||
+
|
||||
+/****************************************************************************/
|
||||
+static struct nf_conntrack_helper nf_conntrack_helper_bcm_nat __read_mostly = {
|
||||
+ .name = "BCM-NAT",
|
||||
+ .me = THIS_MODULE,
|
||||
+ .tuple.src.l3num = AF_INET,
|
||||
+ .tuple.dst.protonum = IPPROTO_UDP,
|
||||
+ .expect_policy = &bcm_nat_exp_policy,
|
||||
+ .expect_class_max = 1,
|
||||
+ .help = bcm_nat_help,
|
||||
+};
|
||||
+
|
||||
+/****************************************************************************/
|
||||
+static inline int find_exp(__be32 ip, __be16 port, struct nf_conn *ct)
|
||||
+{
|
||||
+ struct nf_conntrack_tuple tuple;
|
||||
+ struct nf_conntrack_expect *i = NULL;
|
||||
+
|
||||
+
|
||||
+ memset(&tuple, 0, sizeof(tuple));
|
||||
+ tuple.src.l3num = AF_INET;
|
||||
+ tuple.dst.protonum = IPPROTO_UDP;
|
||||
+ tuple.dst.u3.ip = ip;
|
||||
+ tuple.dst.u.udp.port = port;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ i = __nf_ct_expect_find(nf_ct_net(ct), nf_ct_zone(ct), &tuple);
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ return i != NULL;
|
||||
+}
|
||||
+
|
||||
+/****************************************************************************/
|
||||
+static inline struct nf_conntrack_expect *find_fullcone_exp(struct nf_conn *ct)
|
||||
+{
|
||||
+ struct nf_conntrack_tuple * tp =
|
||||
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
|
||||
+ struct nf_conntrack_expect * exp = NULL;
|
||||
+ struct nf_conntrack_expect * i;
|
||||
+ unsigned int h;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ for (h = 0; h < nf_ct_expect_hsize; h++) {
|
||||
+ hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
|
||||
+ if (nf_inet_addr_cmp(&i->saved_addr, &tp->src.u3) &&
|
||||
+ i->saved_proto.all == tp->src.u.all &&
|
||||
+ i->tuple.dst.protonum == tp->dst.protonum &&
|
||||
+ i->tuple.src.u3.ip == 0 &&
|
||||
+ i->tuple.src.u.udp.port == 0) {
|
||||
+ exp = i;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ return exp;
|
||||
+}
|
||||
+
|
||||
unsigned int
|
||||
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
|
||||
const struct nf_nat_range2 *range,
|
||||
@@ -61,6 +187,72 @@ nf_nat_masquerade_ipv4(struct sk_buff *s
|
||||
if (nat)
|
||||
nat->masq_index = out->ifindex;
|
||||
|
||||
+/* RFC 4787 - 4.2.2. Port Parity
|
||||
+ i.e., an even port will be mapped to an even port, and an odd port will be mapped to an odd port.
|
||||
+*/
|
||||
+#define CHECK_PORT_PARITY(a, b) ((a%2)==(b%2))
|
||||
+ if (range->min_addr.ip != 0 /* nat_mode == full cone */
|
||||
+ && (nfct_help(ct) == NULL || nfct_help(ct)->helper == NULL)
|
||||
+ && nf_ct_protonum(ct) == IPPROTO_UDP) {
|
||||
+ unsigned int ret;
|
||||
+ u_int16_t minport;
|
||||
+ u_int16_t maxport;
|
||||
+ struct nf_conntrack_expect *exp;
|
||||
+
|
||||
+ pr_debug("bcm_nat: need full cone NAT\n");
|
||||
+
|
||||
+ /* Choose port */
|
||||
+ spin_lock_bh(&nf_conntrack_expect_lock);
|
||||
+ /* Look for existing expectation */
|
||||
+ exp = find_fullcone_exp(ct);
|
||||
+ if (exp) {
|
||||
+ minport = maxport = exp->tuple.dst.u.udp.port;
|
||||
+ pr_debug("bcm_nat: existing mapped port = %hu\n",
|
||||
+ ntohs(minport));
|
||||
+ } else { /* no previous expect */
|
||||
+ u_int16_t newport, tmpport, orgport;
|
||||
+
|
||||
+ minport = range->min_proto.all == 0?
|
||||
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.
|
||||
+ u.udp.port : range->min_proto.all;
|
||||
+ maxport = range->max_proto.all == 0?
|
||||
+ htons(65535) : range->max_proto.all;
|
||||
+ orgport = ntohs(minport);
|
||||
+ for (newport = ntohs(minport),tmpport = ntohs(maxport);
|
||||
+ newport <= tmpport; newport++) {
|
||||
+ if (CHECK_PORT_PARITY(orgport, newport) && !find_exp(newsrc, htons(newport), ct)) {
|
||||
+ pr_debug("bcm_nat: new mapped port = "
|
||||
+ "%hu\n", newport);
|
||||
+ minport = maxport = htons(newport);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ spin_unlock_bh(&nf_conntrack_expect_lock);
|
||||
+
|
||||
+
|
||||
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
|
||||
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
|
||||
+
|
||||
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS |
|
||||
+ NF_NAT_RANGE_PROTO_SPECIFIED;
|
||||
+ newrange.max_addr.ip = newrange.min_addr.ip = newsrc;
|
||||
+ newrange.min_proto.udp.port = newrange.max_proto.udp.port = minport;
|
||||
+
|
||||
+ /* Set ct helper */
|
||||
+ ret = nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
|
||||
+ if (ret == NF_ACCEPT) {
|
||||
+ struct nf_conn_help *help = nfct_help(ct);
|
||||
+ if (help == NULL)
|
||||
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
|
||||
+ if (help != NULL) {
|
||||
+ help->helper = &nf_conntrack_helper_bcm_nat;
|
||||
+ pr_debug("bcm_nat: helper set\n");
|
||||
+ }
|
||||
+ }
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
/* Transfer from original range. */
|
||||
memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
|
||||
memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
|
||||
@@ -352,6 +544,7 @@ EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet
|
||||
|
||||
void nf_nat_masquerade_inet_unregister_notifiers(void)
|
||||
{
|
||||
+ nf_conntrack_helper_unregister(&nf_conntrack_helper_bcm_nat);
|
||||
mutex_lock(&masq_mutex);
|
||||
/* check if the notifiers still have clients */
|
||||
if (--masq_refcnt > 0)
|
||||
--- a/net/netfilter/xt_MASQUERADE.c
|
||||
+++ b/net/netfilter/xt_MASQUERADE.c
|
||||
@@ -42,6 +42,9 @@ masquerade_tg(struct sk_buff *skb, const
|
||||
range.min_proto = mr->range[0].min;
|
||||
range.max_proto = mr->range[0].max;
|
||||
|
||||
+ range.min_addr.ip = mr->range[0].min_ip;
|
||||
+ range.max_addr.ip = mr->range[0].max_ip;
|
||||
+
|
||||
return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
|
||||
xt_out(par));
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
--- a/net/wireless/wext-core.c
|
||||
+++ b/net/wireless/wext-core.c
|
||||
@@ -959,6 +959,9 @@ static int wireless_process_ioctl(struct
|
||||
else if (private)
|
||||
return private(dev, iwr, cmd, info, handler);
|
||||
}
|
||||
+ /* Old driver API : call driver ioctl handler */
|
||||
+ if (dev->netdev_ops->ndo_do_ioctl)
|
||||
+ return dev->netdev_ops->ndo_do_ioctl(dev, (struct ifreq *) iwr, cmd);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
@ -1,29 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Thu, 22 Oct 2020 22:00:03 +0200
|
||||
Subject: [PATCH] compiler.h: only include asm/rwonce.h for kernel code
|
||||
|
||||
This header file is not in uapi, which makes any user space code that includes
|
||||
linux/compiler.h to fail with the error 'asm/rwonce.h: No such file or directory'
|
||||
|
||||
Fixes: e506ea451254 ("compiler.h: Split {READ,WRITE}_ONCE definitions out into rwonce.h")
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/include/linux/compiler.h
|
||||
+++ b/include/linux/compiler.h
|
||||
@@ -213,6 +213,8 @@ void ftrace_likely_update(struct ftrace_
|
||||
#define function_nocfi(x) (x)
|
||||
#endif
|
||||
|
||||
+#include <asm/rwonce.h>
|
||||
+
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
/*
|
||||
@@ -251,6 +253,4 @@ static inline void *offset_to_ptr(const
|
||||
*/
|
||||
#define prevent_tail_call_optimization() mb()
|
||||
|
||||
-#include <asm/rwonce.h>
|
||||
-
|
||||
#endif /* __LINUX_COMPILER_H */
|
@ -1,11 +0,0 @@
|
||||
--- a/include/uapi/linux/swab.h
|
||||
+++ b/include/uapi/linux/swab.h
|
||||
@@ -3,7 +3,7 @@
|
||||
#define _UAPI_LINUX_SWAB_H
|
||||
|
||||
#include <linux/types.h>
|
||||
-#include <linux/compiler.h>
|
||||
+#include <linux/stddef.h>
|
||||
#include <asm/bitsperlong.h>
|
||||
#include <asm/swab.h>
|
||||
|
@ -1,57 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 18 Apr 2018 10:50:05 +0200
|
||||
Subject: [PATCH] MIPS: only process negative stack offsets on stack traces
|
||||
|
||||
Fixes endless back traces in cases where the compiler emits a stack
|
||||
pointer increase in a branch delay slot (probably for some form of
|
||||
function return).
|
||||
|
||||
[ 3.475442] BUG: MAX_STACK_TRACE_ENTRIES too low!
|
||||
[ 3.480070] turning off the locking correctness validator.
|
||||
[ 3.485521] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.34 #0
|
||||
[ 3.491475] Stack : 00000000 00000000 00000000 00000000 80e0fce2 00000034 00000000 00000000
|
||||
[ 3.499764] 87c3838c 80696377 8061047c 00000000 00000001 00000001 87c2d850 6534689f
|
||||
[ 3.508059] 00000000 00000000 80e10000 00000000 00000000 000000cf 0000000f 00000000
|
||||
[ 3.516353] 00000000 806a0000 00076891 00000000 00000000 00000000 ffffffff 00000000
|
||||
[ 3.524648] 806c0000 00000004 80e10000 806a0000 00000003 80690000 00000000 80700000
|
||||
[ 3.532942] ...
|
||||
[ 3.535362] Call Trace:
|
||||
[ 3.537818] [<80010a48>] show_stack+0x58/0x100
|
||||
[ 3.542207] [<804c2f78>] dump_stack+0xe8/0x170
|
||||
[ 3.546613] [<80079f90>] save_trace+0xf0/0x110
|
||||
[ 3.551010] [<8007b1ec>] mark_lock+0x33c/0x78c
|
||||
[ 3.555413] [<8007bf48>] __lock_acquire+0x2ac/0x1a08
|
||||
[ 3.560337] [<8007de60>] lock_acquire+0x64/0x8c
|
||||
[ 3.564846] [<804e1570>] _raw_spin_lock_irqsave+0x54/0x78
|
||||
[ 3.570186] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
[ 3.574770] [<801b7b10>] sysfs_notify+0x74/0xa0
|
||||
[ 3.579257] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
[ 3.583839] [<801b7b10>] sysfs_notify+0x74/0xa0
|
||||
[ 3.588329] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
[ 3.592911] [<801b7b10>] sysfs_notify+0x74/0xa0
|
||||
[ 3.597401] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
[ 3.601983] [<801b7b10>] sysfs_notify+0x74/0xa0
|
||||
[ 3.606473] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
[ 3.611055] [<801b7b10>] sysfs_notify+0x74/0xa0
|
||||
[ 3.615545] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
[ 3.620125] [<801b7b10>] sysfs_notify+0x74/0xa0
|
||||
[ 3.624619] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
[ 3.629197] [<801b7b10>] sysfs_notify+0x74/0xa0
|
||||
[ 3.633691] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
[ 3.638269] [<801b7b10>] sysfs_notify+0x74/0xa0
|
||||
[ 3.642763] [<801b618c>] kernfs_notify+0x94/0xac
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/arch/mips/kernel/process.c
|
||||
+++ b/arch/mips/kernel/process.c
|
||||
@@ -394,6 +394,8 @@ static inline int is_sp_move_ins(union m
|
||||
|
||||
if (ip->i_format.opcode == addiu_op ||
|
||||
ip->i_format.opcode == daddiu_op) {
|
||||
+ if (ip->i_format.simmediate > 0)
|
||||
+ return 0;
|
||||
*frame_size = -ip->i_format.simmediate;
|
||||
return 1;
|
||||
}
|
@ -1,82 +0,0 @@
|
||||
From: Tobias Wolf <dev-NTEO@vplace.de>
|
||||
Subject: mm: Fix alloc_node_mem_map with ARCH_PFN_OFFSET calculation
|
||||
|
||||
An rt288x (ralink) based router (Belkin F5D8235 v1) does not boot with any
|
||||
kernel beyond version 4.3 resulting in:
|
||||
|
||||
BUG: Bad page state in process swapper pfn:086ac
|
||||
|
||||
bisect resulted in:
|
||||
|
||||
a1c34a3bf00af2cede839879502e12dc68491ad5 is the first bad commit
|
||||
commit a1c34a3bf00af2cede839879502e12dc68491ad5
|
||||
Author: Laura Abbott <laura@labbott.name>
|
||||
Date: Thu Nov 5 18:48:46 2015 -0800
|
||||
|
||||
mm: Don't offset memmap for flatmem
|
||||
|
||||
Srinivas Kandagatla reported bad page messages when trying to remove the
|
||||
bottom 2MB on an ARM based IFC6410 board
|
||||
|
||||
BUG: Bad page state in process swapper pfn:fffa8
|
||||
page:ef7fb500 count:0 mapcount:0 mapping: (null) index:0x0
|
||||
flags: 0x96640253(locked|error|dirty|active|arch_1|reclaim|mlocked)
|
||||
page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
|
||||
bad because of flags:
|
||||
flags: 0x200041(locked|active|mlocked)
|
||||
Modules linked in:
|
||||
CPU: 0 PID: 0 Comm: swapper Not tainted 3.19.0-rc3-00007-g412f9ba-dirty
|
||||
#816
|
||||
Hardware name: Qualcomm (Flattened Device Tree)
|
||||
unwind_backtrace
|
||||
show_stack
|
||||
dump_stack
|
||||
bad_page
|
||||
free_pages_prepare
|
||||
free_hot_cold_page
|
||||
__free_pages
|
||||
free_highmem_page
|
||||
mem_init
|
||||
start_kernel
|
||||
Disabling lock debugging due to kernel taint
|
||||
[...]
|
||||
:040000 040000 2de013c372345fd471cd58f0553c9b38b0ef1cc4
|
||||
0a8156f848733dfa21e16c196dfb6c0a76290709 M mm
|
||||
|
||||
This fix for ARM does not account ARCH_PFN_OFFSET for mem_map as later used by
|
||||
page_to_pfn anymore.
|
||||
|
||||
The following output was generated with two hacked in printk statements:
|
||||
|
||||
printk("before %p vs. %p or %p\n", mem_map, mem_map - offset, mem_map -
|
||||
(pgdat->node_start_pfn - ARCH_PFN_OFFSET));
|
||||
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
|
||||
mem_map -= offset + (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
|
||||
printk("after %p\n", mem_map);
|
||||
|
||||
Output:
|
||||
|
||||
[ 0.000000] before 8861b280 vs. 8861b280 or 8851b280
|
||||
[ 0.000000] after 8851b280
|
||||
|
||||
As seen in the first line mem_map with subtraction of offset does not equal the
|
||||
mem_map after subtraction of ARCH_PFN_OFFSET.
|
||||
|
||||
After adding the offset of ARCH_PFN_OFFSET as well to mem_map as the
|
||||
previously calculated offset is zero for the named platform it is able to boot
|
||||
4.4 and 4.9-rc7 again.
|
||||
|
||||
Signed-off-by: Tobias Wolf <dev-NTEO@vplace.de>
|
||||
---
|
||||
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -7841,7 +7841,7 @@ static void __init alloc_node_mem_map(st
|
||||
if (pgdat == NODE_DATA(0)) {
|
||||
mem_map = NODE_DATA(0)->node_mem_map;
|
||||
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
|
||||
- mem_map -= offset;
|
||||
+ mem_map -= offset + (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
|
||||
}
|
||||
#endif
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: jffs2: use .rename2 and add RENAME_WHITEOUT support
|
||||
|
||||
It is required for renames on overlayfs
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/fs/jffs2/dir.c
|
||||
+++ b/fs/jffs2/dir.c
|
||||
@@ -614,8 +614,8 @@ static int jffs2_rmdir (struct inode *di
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
|
||||
- struct dentry *dentry, umode_t mode, dev_t rdev)
|
||||
+static int __jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
|
||||
+ struct dentry *dentry, umode_t mode, dev_t rdev, bool whiteout)
|
||||
{
|
||||
struct jffs2_inode_info *f, *dir_f;
|
||||
struct jffs2_sb_info *c;
|
||||
@@ -754,7 +754,11 @@ static int jffs2_mknod (struct user_name
|
||||
mutex_unlock(&dir_f->sem);
|
||||
jffs2_complete_reservation(c);
|
||||
|
||||
- d_instantiate_new(dentry, inode);
|
||||
+ if (!whiteout)
|
||||
+ d_instantiate_new(dentry, inode);
|
||||
+ else
|
||||
+ unlock_new_inode(inode);
|
||||
+
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
@@ -762,6 +766,19 @@ static int jffs2_mknod (struct user_name
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
|
||||
+ struct dentry *dentry, umode_t mode, dev_t rdev)
|
||||
+{
|
||||
+ return __jffs2_mknod(mnt_userns, dir_i, dentry, mode, rdev, false);
|
||||
+}
|
||||
+
|
||||
+static int jffs2_whiteout (struct user_namespace *mnt_userns, struct inode *old_dir,
|
||||
+ struct dentry *old_dentry)
|
||||
+{
|
||||
+ return __jffs2_mknod(mnt_userns, old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE,
|
||||
+ WHITEOUT_DEV, true);
|
||||
+}
|
||||
+
|
||||
static int jffs2_rename (struct user_namespace *mnt_userns,
|
||||
struct inode *old_dir_i, struct dentry *old_dentry,
|
||||
struct inode *new_dir_i, struct dentry *new_dentry,
|
||||
@@ -773,7 +790,7 @@ static int jffs2_rename (struct user_nam
|
||||
uint8_t type;
|
||||
uint32_t now;
|
||||
|
||||
- if (flags & ~RENAME_NOREPLACE)
|
||||
+ if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT))
|
||||
return -EINVAL;
|
||||
|
||||
/* The VFS will check for us and prevent trying to rename a
|
||||
@@ -839,9 +856,14 @@ static int jffs2_rename (struct user_nam
|
||||
if (d_is_dir(old_dentry) && !victim_f)
|
||||
inc_nlink(new_dir_i);
|
||||
|
||||
- /* Unlink the original */
|
||||
- ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i),
|
||||
- old_dentry->d_name.name, old_dentry->d_name.len, NULL, now);
|
||||
+ if (flags & RENAME_WHITEOUT)
|
||||
+ /* Replace with whiteout */
|
||||
+ ret = jffs2_whiteout(mnt_userns, old_dir_i, old_dentry);
|
||||
+ else
|
||||
+ /* Unlink the original */
|
||||
+ ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i),
|
||||
+ old_dentry->d_name.name,
|
||||
+ old_dentry->d_name.len, NULL, now);
|
||||
|
||||
/* We don't touch inode->i_nlink */
|
||||
|
@ -1,73 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: jffs2: add RENAME_EXCHANGE support
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/fs/jffs2/dir.c
|
||||
+++ b/fs/jffs2/dir.c
|
||||
@@ -787,18 +787,31 @@ static int jffs2_rename (struct user_nam
|
||||
int ret;
|
||||
struct jffs2_sb_info *c = JFFS2_SB_INFO(old_dir_i->i_sb);
|
||||
struct jffs2_inode_info *victim_f = NULL;
|
||||
+ struct inode *fst_inode = d_inode(old_dentry);
|
||||
+ struct inode *snd_inode = d_inode(new_dentry);
|
||||
uint8_t type;
|
||||
uint32_t now;
|
||||
|
||||
- if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT))
|
||||
+ if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT|RENAME_EXCHANGE))
|
||||
return -EINVAL;
|
||||
|
||||
+ if ((flags & RENAME_EXCHANGE) && (old_dir_i != new_dir_i)) {
|
||||
+ if (S_ISDIR(fst_inode->i_mode) && !S_ISDIR(snd_inode->i_mode)) {
|
||||
+ inc_nlink(new_dir_i);
|
||||
+ drop_nlink(old_dir_i);
|
||||
+ }
|
||||
+ else if (!S_ISDIR(fst_inode->i_mode) && S_ISDIR(snd_inode->i_mode)) {
|
||||
+ drop_nlink(new_dir_i);
|
||||
+ inc_nlink(old_dir_i);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* The VFS will check for us and prevent trying to rename a
|
||||
* file over a directory and vice versa, but if it's a directory,
|
||||
* the VFS can't check whether the victim is empty. The filesystem
|
||||
* needs to do that for itself.
|
||||
*/
|
||||
- if (d_really_is_positive(new_dentry)) {
|
||||
+ if (d_really_is_positive(new_dentry) && !(flags & RENAME_EXCHANGE)) {
|
||||
victim_f = JFFS2_INODE_INFO(d_inode(new_dentry));
|
||||
if (d_is_dir(new_dentry)) {
|
||||
struct jffs2_full_dirent *fd;
|
||||
@@ -833,7 +846,7 @@ static int jffs2_rename (struct user_nam
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- if (victim_f) {
|
||||
+ if (victim_f && !(flags & RENAME_EXCHANGE)) {
|
||||
/* There was a victim. Kill it off nicely */
|
||||
if (d_is_dir(new_dentry))
|
||||
clear_nlink(d_inode(new_dentry));
|
||||
@@ -859,6 +872,12 @@ static int jffs2_rename (struct user_nam
|
||||
if (flags & RENAME_WHITEOUT)
|
||||
/* Replace with whiteout */
|
||||
ret = jffs2_whiteout(mnt_userns, old_dir_i, old_dentry);
|
||||
+ else if (flags & RENAME_EXCHANGE)
|
||||
+ /* Replace the original */
|
||||
+ ret = jffs2_do_link(c, JFFS2_INODE_INFO(old_dir_i),
|
||||
+ d_inode(new_dentry)->i_ino, type,
|
||||
+ old_dentry->d_name.name, old_dentry->d_name.len,
|
||||
+ now);
|
||||
else
|
||||
/* Unlink the original */
|
||||
ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i),
|
||||
@@ -890,7 +909,7 @@ static int jffs2_rename (struct user_nam
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (d_is_dir(old_dentry))
|
||||
+ if (d_is_dir(old_dentry) && !(flags & RENAME_EXCHANGE))
|
||||
drop_nlink(old_dir_i);
|
||||
|
||||
new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now);
|
@ -1,20 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: jffs2: add splice ops
|
||||
|
||||
Add splice_read using generic_file_splice_read.
|
||||
Add splice_write using iter_file_splice_write
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/fs/jffs2/file.c
|
||||
+++ b/fs/jffs2/file.c
|
||||
@@ -53,6 +53,8 @@ const struct file_operations jffs2_file_
|
||||
.open = generic_file_open,
|
||||
.read_iter = generic_file_read_iter,
|
||||
.write_iter = generic_file_write_iter,
|
||||
+ .splice_read = generic_file_splice_read,
|
||||
+ .splice_write = iter_file_splice_write,
|
||||
.unlocked_ioctl=jffs2_ioctl,
|
||||
.mmap = generic_file_readonly_mmap,
|
||||
.fsync = jffs2_fsync,
|
@ -1,45 +0,0 @@
|
||||
From: Stephen Hemminger <stephen@networkplumber.org>
|
||||
Subject: bridge: allow receiption on disabled port
|
||||
|
||||
When an ethernet device is enslaved to a bridge, and the bridge STP
|
||||
detects loss of carrier (or operational state down), then normally
|
||||
packet receiption is blocked.
|
||||
|
||||
This breaks control applications like WPA which maybe expecting to
|
||||
receive packets to negotiate to bring link up. The bridge needs to
|
||||
block forwarding packets from these disabled ports, but there is no
|
||||
hard requirement to not allow local packet delivery.
|
||||
|
||||
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
|
||||
--- a/net/bridge/br_input.c
|
||||
+++ b/net/bridge/br_input.c
|
||||
@@ -222,6 +222,9 @@ static void __br_handle_local_finish(str
|
||||
/* note: already called with rcu_read_lock */
|
||||
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
+ struct net_bridge_port *p = br_port_get_rcu(skb->dev);
|
||||
+
|
||||
+ if (p->state != BR_STATE_DISABLED)
|
||||
__br_handle_local_finish(skb);
|
||||
|
||||
/* return 1 to signal the okfn() was called so it's ok to use the skb */
|
||||
@@ -390,6 +393,17 @@ forward:
|
||||
goto defer_stp_filtering;
|
||||
|
||||
switch (p->state) {
|
||||
+ case BR_STATE_DISABLED:
|
||||
+ if (ether_addr_equal(p->br->dev->dev_addr, dest))
|
||||
+ skb->pkt_type = PACKET_HOST;
|
||||
+
|
||||
+ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
|
||||
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
|
||||
+ br_handle_local_finish) == 1) {
|
||||
+ return RX_HANDLER_PASS;
|
||||
+ }
|
||||
+ break;
|
||||
+
|
||||
case BR_STATE_FORWARDING:
|
||||
case BR_STATE_LEARNING:
|
||||
defer_stp_filtering:
|
@ -1,94 +0,0 @@
|
||||
From: Daniel González Cabanelas <dgcbueu@gmail.com>
|
||||
Subject: [PATCH 1/2] rtc: rs5c372: support alarms up to 1 week
|
||||
|
||||
The Ricoh R2221x, R2223x, RS5C372, RV5C387A chips can handle 1 week
|
||||
alarms.
|
||||
|
||||
Read the "wday" alarm register and convert it to a date to support up 1
|
||||
week in our driver.
|
||||
|
||||
Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com>
|
||||
---
|
||||
drivers/rtc/rtc-rs5c372.c | 48 ++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 42 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/rtc/rtc-rs5c372.c
|
||||
+++ b/drivers/rtc/rtc-rs5c372.c
|
||||
@@ -399,7 +399,9 @@ static int rs5c_read_alarm(struct device
|
||||
{
|
||||
struct i2c_client *client = to_i2c_client(dev);
|
||||
struct rs5c372 *rs5c = i2c_get_clientdata(client);
|
||||
- int status;
|
||||
+ int status, wday_offs;
|
||||
+ struct rtc_time rtc;
|
||||
+ unsigned long alarm_secs;
|
||||
|
||||
status = rs5c_get_regs(rs5c);
|
||||
if (status < 0)
|
||||
@@ -409,6 +411,30 @@ static int rs5c_read_alarm(struct device
|
||||
t->time.tm_sec = 0;
|
||||
t->time.tm_min = bcd2bin(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
|
||||
t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
|
||||
+ t->time.tm_wday = ffs(rs5c->regs[RS5C_REG_ALARM_A_WDAY] & 0x7f) - 1;
|
||||
+
|
||||
+ /* determine the day, month and year based on alarm wday, taking as a
|
||||
+ * reference the current time from the rtc
|
||||
+ */
|
||||
+ status = rs5c372_rtc_read_time(dev, &rtc);
|
||||
+ if (status < 0)
|
||||
+ return status;
|
||||
+
|
||||
+ wday_offs = t->time.tm_wday - rtc.tm_wday;
|
||||
+ alarm_secs = mktime64(rtc.tm_year + 1900,
|
||||
+ rtc.tm_mon + 1,
|
||||
+ rtc.tm_mday + wday_offs,
|
||||
+ t->time.tm_hour,
|
||||
+ t->time.tm_min,
|
||||
+ t->time.tm_sec);
|
||||
+
|
||||
+ if (wday_offs < 0 || (wday_offs == 0 &&
|
||||
+ (t->time.tm_hour < rtc.tm_hour ||
|
||||
+ (t->time.tm_hour == rtc.tm_hour &&
|
||||
+ t->time.tm_min <= rtc.tm_min))))
|
||||
+ alarm_secs += 7 * 86400;
|
||||
+
|
||||
+ rtc_time64_to_tm(alarm_secs, &t->time);
|
||||
|
||||
/* ... and status */
|
||||
t->enabled = !!(rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE);
|
||||
@@ -423,12 +449,20 @@ static int rs5c_set_alarm(struct device
|
||||
struct rs5c372 *rs5c = i2c_get_clientdata(client);
|
||||
int status, addr, i;
|
||||
unsigned char buf[3];
|
||||
+ struct rtc_time rtc_tm;
|
||||
+ unsigned long rtc_secs, alarm_secs;
|
||||
|
||||
- /* only handle up to 24 hours in the future, like RTC_ALM_SET */
|
||||
- if (t->time.tm_mday != -1
|
||||
- || t->time.tm_mon != -1
|
||||
- || t->time.tm_year != -1)
|
||||
+ /* chip only can handle alarms up to one week in the future*/
|
||||
+ status = rs5c372_rtc_read_time(dev, &rtc_tm);
|
||||
+ if (status)
|
||||
+ return status;
|
||||
+ rtc_secs = rtc_tm_to_time64(&rtc_tm);
|
||||
+ alarm_secs = rtc_tm_to_time64(&t->time);
|
||||
+ if (alarm_secs >= rtc_secs + 7 * 86400) {
|
||||
+ dev_err(dev, "%s: alarm maximum is one week in the future (%d)\n",
|
||||
+ __func__, status);
|
||||
return -EINVAL;
|
||||
+ }
|
||||
|
||||
/* REVISIT: round up tm_sec */
|
||||
|
||||
@@ -449,7 +483,9 @@ static int rs5c_set_alarm(struct device
|
||||
/* set alarm */
|
||||
buf[0] = bin2bcd(t->time.tm_min);
|
||||
buf[1] = rs5c_hr2reg(rs5c, t->time.tm_hour);
|
||||
- buf[2] = 0x7f; /* any/all days */
|
||||
+ /* each bit is the day of the week, 0x7f means all days */
|
||||
+ buf[2] = (t->time.tm_wday >= 0 && t->time.tm_wday < 7) ?
|
||||
+ BIT(t->time.tm_wday) : 0x7f;
|
||||
|
||||
for (i = 0; i < sizeof(buf); i++) {
|
||||
addr = RS5C_ADDR(RS5C_REG_ALARM_A_MIN + i);
|
@ -1,70 +0,0 @@
|
||||
From: Daniel González Cabanelas <dgcbueu@gmail.com>
|
||||
Subject: [PATCH 2/2] rtc: rs5c372: let the alarm to be used as wakeup source
|
||||
|
||||
Currently there is no use for the interrupts on the rs5c372 RTC and the
|
||||
wakealarm isn't enabled. There are some devices like NASes which use this
|
||||
RTC to wake up from the power off state when the INTR pin is activated by
|
||||
the alarm clock.
|
||||
|
||||
Enable the alarm and let to be used as a wakeup source.
|
||||
|
||||
Tested on a Buffalo LS421DE NAS.
|
||||
|
||||
Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com>
|
||||
---
|
||||
drivers/rtc/rtc-rs5c372.c | 16 ++++++++++++++++
|
||||
1 file changed, 16 insertions(+)
|
||||
|
||||
--- a/drivers/rtc/rtc-rs5c372.c
|
||||
+++ b/drivers/rtc/rtc-rs5c372.c
|
||||
@@ -833,6 +833,7 @@ static int rs5c372_probe(struct i2c_clie
|
||||
int err = 0;
|
||||
int smbus_mode = 0;
|
||||
struct rs5c372 *rs5c372;
|
||||
+ bool rs5c372_can_wakeup_device = false;
|
||||
|
||||
dev_dbg(&client->dev, "%s\n", __func__);
|
||||
|
||||
@@ -868,6 +869,12 @@ static int rs5c372_probe(struct i2c_clie
|
||||
else
|
||||
rs5c372->type = id->driver_data;
|
||||
|
||||
+#ifdef CONFIG_OF
|
||||
+ if(of_property_read_bool(client->dev.of_node,
|
||||
+ "wakeup-source"))
|
||||
+ rs5c372_can_wakeup_device = true;
|
||||
+#endif
|
||||
+
|
||||
/* we read registers 0x0f then 0x00-0x0f; skip the first one */
|
||||
rs5c372->regs = &rs5c372->buf[1];
|
||||
rs5c372->smbus = smbus_mode;
|
||||
@@ -901,6 +908,8 @@ static int rs5c372_probe(struct i2c_clie
|
||||
goto exit;
|
||||
}
|
||||
|
||||
+ rs5c372->has_irq = 1;
|
||||
+
|
||||
/* if the oscillator lost power and no other software (like
|
||||
* the bootloader) set it up, do it here.
|
||||
*
|
||||
@@ -927,6 +936,10 @@ static int rs5c372_probe(struct i2c_clie
|
||||
);
|
||||
|
||||
/* REVISIT use client->irq to register alarm irq ... */
|
||||
+ if (rs5c372_can_wakeup_device) {
|
||||
+ device_init_wakeup(&client->dev, true);
|
||||
+ }
|
||||
+
|
||||
rs5c372->rtc = devm_rtc_device_register(&client->dev,
|
||||
rs5c372_driver.driver.name,
|
||||
&rs5c372_rtc_ops, THIS_MODULE);
|
||||
@@ -940,6 +953,9 @@ static int rs5c372_probe(struct i2c_clie
|
||||
if (err)
|
||||
goto exit;
|
||||
|
||||
+ /* the rs5c372 alarm only supports a minute accuracy */
|
||||
+
|
||||
+
|
||||
return 0;
|
||||
|
||||
exit:
|
@ -1,25 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: Upgrade to Linux 2.6.19
|
||||
|
||||
- Includes large parts of the patch from #1021 by dpalffy
|
||||
- Includes RB532 NAND driver changes by n0-1
|
||||
|
||||
[john@phrozen.org: feix will add this to his upstream queue]
|
||||
|
||||
lede-commit: bff468813f78f81e36ebb2a3f4354de7365e640f
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
Makefile | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -759,7 +759,7 @@ KBUILD_CFLAGS += $(call cc-disable-warni
|
||||
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
|
||||
KBUILD_CFLAGS += -O2
|
||||
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
-KBUILD_CFLAGS += -Os
|
||||
+KBUILD_CFLAGS += -Os -fno-reorder-blocks -fno-tree-ch
|
||||
endif
|
||||
|
||||
# Tell gcc to never replace conditional load with a non-conditional one
|
@ -1,119 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: kernel: add a config option for keeping the kallsyms table uncompressed, saving ~9kb kernel size after lzma on ar71xx
|
||||
|
||||
[john@phrozen.org: added to my upstream queue 30.12.2016]
|
||||
lede-commit: e0e3509b5ce2ccf93d4d67ea907613f5f7ec2eed
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
init/Kconfig | 11 +++++++++++
|
||||
kernel/kallsyms.c | 8 ++++++++
|
||||
scripts/kallsyms.c | 12 ++++++++++++
|
||||
scripts/link-vmlinux.sh | 4 ++++
|
||||
4 files changed, 35 insertions(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -1464,6 +1464,17 @@ config SYSCTL_ARCH_UNALIGN_ALLOW
|
||||
the unaligned access emulation.
|
||||
see arch/parisc/kernel/unaligned.c for reference
|
||||
|
||||
+config KALLSYMS_UNCOMPRESSED
|
||||
+ bool "Keep kallsyms uncompressed"
|
||||
+ depends on KALLSYMS
|
||||
+ help
|
||||
+ Normally kallsyms contains compressed symbols (using a token table),
|
||||
+ reducing the uncompressed kernel image size. Keeping the symbol table
|
||||
+ uncompressed significantly improves the size of this part in compressed
|
||||
+ kernel images.
|
||||
+
|
||||
+ Say N unless you need compressed kernel images to be small.
|
||||
+
|
||||
config HAVE_PCSPKR_PLATFORM
|
||||
bool
|
||||
|
||||
--- a/kernel/kallsyms.c
|
||||
+++ b/kernel/kallsyms.c
|
||||
@@ -61,6 +61,11 @@ static unsigned int kallsyms_expand_symb
|
||||
* For every byte on the compressed symbol data, copy the table
|
||||
* entry for that byte.
|
||||
*/
|
||||
+#ifdef CONFIG_KALLSYMS_UNCOMPRESSED
|
||||
+ memcpy(result, data + 1, len - 1);
|
||||
+ result += len - 1;
|
||||
+ len = 0;
|
||||
+#endif
|
||||
while (len) {
|
||||
tptr = &kallsyms_token_table[kallsyms_token_index[*data]];
|
||||
data++;
|
||||
@@ -93,6 +98,9 @@ tail:
|
||||
*/
|
||||
static char kallsyms_get_symbol_type(unsigned int off)
|
||||
{
|
||||
+#ifdef CONFIG_KALLSYMS_UNCOMPRESSED
|
||||
+ return kallsyms_names[off + 1];
|
||||
+#endif
|
||||
/*
|
||||
* Get just the first code, look it up in the token table,
|
||||
* and return the first char from this token.
|
||||
--- a/scripts/kallsyms.c
|
||||
+++ b/scripts/kallsyms.c
|
||||
@@ -58,6 +58,7 @@ static struct addr_range percpu_range =
|
||||
static struct sym_entry **table;
|
||||
static unsigned int table_size, table_cnt;
|
||||
static int all_symbols;
|
||||
+static int uncompressed;
|
||||
static int absolute_percpu;
|
||||
static int base_relative;
|
||||
|
||||
@@ -487,6 +488,9 @@ static void write_src(void)
|
||||
|
||||
free(markers);
|
||||
|
||||
+ if (uncompressed)
|
||||
+ return;
|
||||
+
|
||||
output_label("kallsyms_token_table");
|
||||
off = 0;
|
||||
for (i = 0; i < 256; i++) {
|
||||
@@ -538,6 +542,9 @@ static unsigned char *find_token(unsigne
|
||||
{
|
||||
int i;
|
||||
|
||||
+ if (uncompressed)
|
||||
+ return NULL;
|
||||
+
|
||||
for (i = 0; i < len - 1; i++) {
|
||||
if (str[i] == token[0] && str[i+1] == token[1])
|
||||
return &str[i];
|
||||
@@ -610,6 +617,9 @@ static void optimize_result(void)
|
||||
{
|
||||
int i, best;
|
||||
|
||||
+ if (uncompressed)
|
||||
+ return;
|
||||
+
|
||||
/* using the '\0' symbol last allows compress_symbols to use standard
|
||||
* fast string functions */
|
||||
for (i = 255; i >= 0; i--) {
|
||||
@@ -774,6 +784,8 @@ int main(int argc, char **argv)
|
||||
absolute_percpu = 1;
|
||||
else if (strcmp(argv[i], "--base-relative") == 0)
|
||||
base_relative = 1;
|
||||
+ else if (strcmp(argv[i], "--uncompressed") == 0)
|
||||
+ uncompressed = 1;
|
||||
else
|
||||
usage();
|
||||
}
|
||||
--- a/scripts/link-vmlinux.sh
|
||||
+++ b/scripts/link-vmlinux.sh
|
||||
@@ -156,6 +156,10 @@ kallsyms()
|
||||
kallsymopt="${kallsymopt} --base-relative"
|
||||
fi
|
||||
|
||||
+ if [ -n "${CONFIG_KALLSYMS_UNCOMPRESSED}" ]; then
|
||||
+ kallsymopt="${kallsymopt} --uncompressed"
|
||||
+ fi
|
||||
+
|
||||
info KSYMS ${2}
|
||||
${NM} -n ${1} | scripts/kallsyms ${kallsymopt} > ${2}
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: kernel: when KALLSYMS is disabled, print module address + size for matching backtrace entries
|
||||
|
||||
[john@phrozen.org: felix will add this to his upstream queue]
|
||||
|
||||
lede-commit 53827cdc824556cda910b23ce5030c363b8f1461
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
lib/vsprintf.c | 15 +++++++++++----
|
||||
1 file changed, 11 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/lib/vsprintf.c
|
||||
+++ b/lib/vsprintf.c
|
||||
@@ -980,8 +980,10 @@ char *symbol_string(char *buf, char *end
|
||||
struct printf_spec spec, const char *fmt)
|
||||
{
|
||||
unsigned long value;
|
||||
-#ifdef CONFIG_KALLSYMS
|
||||
char sym[KSYM_SYMBOL_LEN];
|
||||
+#ifndef CONFIG_KALLSYMS
|
||||
+ struct module *mod;
|
||||
+ int len;
|
||||
#endif
|
||||
|
||||
if (fmt[1] == 'R')
|
||||
@@ -1002,8 +1004,14 @@ char *symbol_string(char *buf, char *end
|
||||
|
||||
return string_nocheck(buf, end, sym, spec);
|
||||
#else
|
||||
- return special_hex_number(buf, end, value, sizeof(void *));
|
||||
+ len = snprintf(sym, sizeof(sym), "0x%lx", value);
|
||||
+ mod = __module_address(value);
|
||||
+ if (mod)
|
||||
+ snprintf(sym + len, sizeof(sym) - len, " [%s@%p+0x%x]",
|
||||
+ mod->name, mod->core_layout.base,
|
||||
+ mod->core_layout.size);
|
||||
#endif
|
||||
+ return string(buf, end, sym, spec);
|
||||
}
|
||||
|
||||
static const struct printf_spec default_str_spec = {
|
@ -1,30 +0,0 @@
|
||||
From: Gabor Juhos <juhosg@openwrt.org>
|
||||
Subject: usr: sanitize deps_initramfs list
|
||||
|
||||
If any filename in the intramfs dependency
|
||||
list contains a colon, that causes a kernel
|
||||
build error like this:
|
||||
|
||||
/devel/openwrt/build_dir/linux-ar71xx_generic/linux-3.6.6/usr/Makefile:58: *** multiple target patterns. Stop.
|
||||
make[5]: *** [usr] Error 2
|
||||
|
||||
Fix it by removing such filenames from the
|
||||
deps_initramfs list.
|
||||
|
||||
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
usr/Makefile | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/usr/Makefile
|
||||
+++ b/usr/Makefile
|
||||
@@ -56,6 +56,8 @@ hostprogs := gen_init_cpio
|
||||
# The dependency list is generated by gen_initramfs.sh -l
|
||||
-include $(obj)/.initramfs_data.cpio.d
|
||||
|
||||
+deps_initramfs := $(foreach v,$(deps_initramfs),$(if $(findstring :,$(v)),,$(v)))
|
||||
+
|
||||
# do not try to update files included in initramfs
|
||||
$(deps_initramfs): ;
|
||||
|
@ -1,20 +0,0 @@
|
||||
From: Imre Kaloz <kaloz@openwrt.org>
|
||||
Subject: [PATCH] hack: net: wireless: make the wl12xx glue code available with
|
||||
compat-wireless, too
|
||||
|
||||
Signed-off-by: Imre Kaloz <kaloz@openwrt.org>
|
||||
---
|
||||
drivers/net/wireless/ti/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/wireless/ti/Kconfig
|
||||
+++ b/drivers/net/wireless/ti/Kconfig
|
||||
@@ -20,7 +20,7 @@ source "drivers/net/wireless/ti/wlcore/K
|
||||
|
||||
config WILINK_PLATFORM_DATA
|
||||
bool "TI WiLink platform data"
|
||||
- depends on WLCORE_SDIO || WL1251_SDIO
|
||||
+ depends on WLCORE_SDIO || WL1251_SDIO || ARCH_OMAP2PLUS
|
||||
default y
|
||||
help
|
||||
Small platform data bit needed to pass data to the sdio modules.
|
@ -1,35 +0,0 @@
|
||||
From c2deb5ef01a0ef09088832744cbace9e239a6ee0 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Thibaut=20VAR=C3=88NE?= <hacks@slashdirt.org>
|
||||
Date: Sat, 28 Mar 2020 12:11:50 +0100
|
||||
Subject: [PATCH] generic: platform/mikrotik build bits (5.4)
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This patch adds platform/mikrotik kernel build bits
|
||||
|
||||
Signed-off-by: Thibaut VARÈNE <hacks@slashdirt.org>
|
||||
---
|
||||
drivers/platform/Kconfig | 2 ++
|
||||
drivers/platform/Makefile | 1 +
|
||||
2 files changed, 3 insertions(+)
|
||||
|
||||
--- a/drivers/platform/Kconfig
|
||||
+++ b/drivers/platform/Kconfig
|
||||
@@ -9,6 +9,8 @@ source "drivers/platform/chrome/Kconfig"
|
||||
|
||||
source "drivers/platform/mellanox/Kconfig"
|
||||
|
||||
+source "drivers/platform/mikrotik/Kconfig"
|
||||
+
|
||||
source "drivers/platform/olpc/Kconfig"
|
||||
|
||||
source "drivers/platform/surface/Kconfig"
|
||||
--- a/drivers/platform/Makefile
|
||||
+++ b/drivers/platform/Makefile
|
||||
@@ -9,4 +9,5 @@ obj-$(CONFIG_MIPS) += mips/
|
||||
obj-$(CONFIG_OLPC_EC) += olpc/
|
||||
obj-$(CONFIG_GOLDFISH) += goldfish/
|
||||
obj-$(CONFIG_CHROME_PLATFORMS) += chrome/
|
||||
+obj-$(CONFIG_MIKROTIK) += mikrotik/
|
||||
obj-$(CONFIG_SURFACE_PLATFORMS) += surface/
|
@ -1,40 +0,0 @@
|
||||
From: Mark Miller <mark@mirell.org>
|
||||
Subject: mips: expose CONFIG_BOOT_RAW
|
||||
|
||||
This exposes the CONFIG_BOOT_RAW symbol in Kconfig. This is needed on
|
||||
certain Broadcom chipsets running CFE in order to load the kernel.
|
||||
|
||||
Signed-off-by: Mark Miller <mark@mirell.org>
|
||||
Acked-by: Rob Landley <rob@landley.net>
|
||||
---
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -1032,9 +1032,6 @@ config FW_ARC
|
||||
config ARCH_MAY_HAVE_PC_FDC
|
||||
bool
|
||||
|
||||
-config BOOT_RAW
|
||||
- bool
|
||||
-
|
||||
config CEVT_BCM1480
|
||||
bool
|
||||
|
||||
@@ -3089,6 +3086,18 @@ choice
|
||||
bool "Extend builtin kernel arguments with bootloader arguments"
|
||||
endchoice
|
||||
|
||||
+config BOOT_RAW
|
||||
+ bool "Enable the kernel to be executed from the load address"
|
||||
+ default n
|
||||
+ help
|
||||
+ Allow the kernel to be executed from the load address for
|
||||
+ bootloaders which cannot read the ELF format. This places
|
||||
+ a jump to start_kernel at the load address.
|
||||
+
|
||||
+ If unsure, say N.
|
||||
+
|
||||
+
|
||||
+
|
||||
endmenu
|
||||
|
||||
config LOCKDEP_SUPPORT
|
@ -1,22 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: mips: use -mno-branch-likely for kernel and userspace
|
||||
|
||||
saves ~11k kernel size after lzma and ~12k squashfs size in the
|
||||
|
||||
lede-commit: 41a039f46450ffae9483d6216422098669da2900
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
arch/mips/Makefile | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/mips/Makefile
|
||||
+++ b/arch/mips/Makefile
|
||||
@@ -94,7 +94,7 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
|
||||
# machines may also. Since BFD is incredibly buggy with respect to
|
||||
# crossformat linking we rely on the elf2ecoff tool for format conversion.
|
||||
#
|
||||
-cflags-y += -G 0 -mno-abicalls -fno-pic -pipe
|
||||
+cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
|
||||
cflags-y += -msoft-float
|
||||
LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
|
||||
KBUILD_AFLAGS_MODULE += -mlong-calls
|
@ -1,370 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
|
||||
|
||||
lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
arch/mips/Makefile | 5 +
|
||||
arch/mips/include/asm/module.h | 5 +
|
||||
arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++-
|
||||
3 files changed, 284 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/arch/mips/Makefile
|
||||
+++ b/arch/mips/Makefile
|
||||
@@ -97,8 +97,18 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
|
||||
cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
|
||||
cflags-y += -msoft-float
|
||||
LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
|
||||
+ifdef CONFIG_64BIT
|
||||
KBUILD_AFLAGS_MODULE += -mlong-calls
|
||||
KBUILD_CFLAGS_MODULE += -mlong-calls
|
||||
+else
|
||||
+ ifdef CONFIG_DYNAMIC_FTRACE
|
||||
+ KBUILD_AFLAGS_MODULE += -mlong-calls
|
||||
+ KBUILD_CFLAGS_MODULE += -mlong-calls
|
||||
+ else
|
||||
+ KBUILD_AFLAGS_MODULE += -mno-long-calls
|
||||
+ KBUILD_CFLAGS_MODULE += -mno-long-calls
|
||||
+ endif
|
||||
+endif
|
||||
|
||||
ifeq ($(CONFIG_RELOCATABLE),y)
|
||||
LDFLAGS_vmlinux += --emit-relocs
|
||||
--- a/arch/mips/include/asm/module.h
|
||||
+++ b/arch/mips/include/asm/module.h
|
||||
@@ -12,6 +12,11 @@ struct mod_arch_specific {
|
||||
const struct exception_table_entry *dbe_start;
|
||||
const struct exception_table_entry *dbe_end;
|
||||
struct mips_hi16 *r_mips_hi16_list;
|
||||
+
|
||||
+ void *phys_plt_tbl;
|
||||
+ void *virt_plt_tbl;
|
||||
+ unsigned int phys_plt_offset;
|
||||
+ unsigned int virt_plt_offset;
|
||||
};
|
||||
|
||||
typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */
|
||||
--- a/arch/mips/kernel/module.c
|
||||
+++ b/arch/mips/kernel/module.c
|
||||
@@ -32,23 +32,261 @@ struct mips_hi16 {
|
||||
static LIST_HEAD(dbe_list);
|
||||
static DEFINE_SPINLOCK(dbe_lock);
|
||||
|
||||
-#ifdef MODULE_START
|
||||
+/*
|
||||
+ * Get the potential max trampolines size required of the init and
|
||||
+ * non-init sections. Only used if we cannot find enough contiguous
|
||||
+ * physically mapped memory to put the module into.
|
||||
+ */
|
||||
+static unsigned int
|
||||
+get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
|
||||
+ const char *secstrings, unsigned int symindex, bool is_init)
|
||||
+{
|
||||
+ unsigned long ret = 0;
|
||||
+ unsigned int i, j;
|
||||
+ Elf_Sym *syms;
|
||||
+
|
||||
+ /* Everything marked ALLOC (this includes the exported symbols) */
|
||||
+ for (i = 1; i < hdr->e_shnum; ++i) {
|
||||
+ unsigned int info = sechdrs[i].sh_info;
|
||||
+
|
||||
+ if (sechdrs[i].sh_type != SHT_REL
|
||||
+ && sechdrs[i].sh_type != SHT_RELA)
|
||||
+ continue;
|
||||
+
|
||||
+ /* Not a valid relocation section? */
|
||||
+ if (info >= hdr->e_shnum)
|
||||
+ continue;
|
||||
+
|
||||
+ /* Don't bother with non-allocated sections */
|
||||
+ if (!(sechdrs[info].sh_flags & SHF_ALLOC))
|
||||
+ continue;
|
||||
+
|
||||
+ /* If it's called *.init*, and we're not init, we're
|
||||
+ not interested */
|
||||
+ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
|
||||
+ != is_init)
|
||||
+ continue;
|
||||
+
|
||||
+ syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
|
||||
+ if (sechdrs[i].sh_type == SHT_REL) {
|
||||
+ Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
|
||||
+ unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
|
||||
+
|
||||
+ for (j = 0; j < size; ++j) {
|
||||
+ Elf_Sym *sym;
|
||||
+
|
||||
+ if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
|
||||
+ continue;
|
||||
+
|
||||
+ sym = syms + ELF_MIPS_R_SYM(rel[j]);
|
||||
+ if (!is_init && sym->st_shndx != SHN_UNDEF)
|
||||
+ continue;
|
||||
+
|
||||
+ ret += 4 * sizeof(int);
|
||||
+ }
|
||||
+ } else {
|
||||
+ Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
|
||||
+ unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
|
||||
+
|
||||
+ for (j = 0; j < size; ++j) {
|
||||
+ Elf_Sym *sym;
|
||||
+
|
||||
+ if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
|
||||
+ continue;
|
||||
+
|
||||
+ sym = syms + ELF_MIPS_R_SYM(rela[j]);
|
||||
+ if (!is_init && sym->st_shndx != SHN_UNDEF)
|
||||
+ continue;
|
||||
+
|
||||
+ ret += 4 * sizeof(int);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+#ifndef MODULE_START
|
||||
+static void *alloc_phys(unsigned long size)
|
||||
+{
|
||||
+ unsigned order;
|
||||
+ struct page *page;
|
||||
+ struct page *p;
|
||||
+
|
||||
+ size = PAGE_ALIGN(size);
|
||||
+ order = get_order(size);
|
||||
+
|
||||
+ page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
|
||||
+ __GFP_THISNODE, order);
|
||||
+ if (!page)
|
||||
+ return NULL;
|
||||
+
|
||||
+ split_page(page, order);
|
||||
+
|
||||
+ /* mark all pages except for the last one */
|
||||
+ for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
|
||||
+ set_bit(PG_owner_priv_1, &p->flags);
|
||||
+
|
||||
+ for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
|
||||
+ __free_page(p);
|
||||
+
|
||||
+ return page_address(page);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static void free_phys(void *ptr)
|
||||
+{
|
||||
+ struct page *page;
|
||||
+ bool free;
|
||||
+
|
||||
+ page = virt_to_page(ptr);
|
||||
+ do {
|
||||
+ free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
|
||||
+ __free_page(page);
|
||||
+ page++;
|
||||
+ } while (free);
|
||||
+}
|
||||
+
|
||||
void *module_alloc(unsigned long size)
|
||||
{
|
||||
+#ifdef MODULE_START
|
||||
return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
|
||||
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
|
||||
__builtin_return_address(0));
|
||||
+#else
|
||||
+ void *ptr;
|
||||
+
|
||||
+ if (size == 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ ptr = alloc_phys(size);
|
||||
+
|
||||
+ /* If we failed to allocate physically contiguous memory,
|
||||
+ * fall back to regular vmalloc. The module loader code will
|
||||
+ * create jump tables to handle long jumps */
|
||||
+ if (!ptr)
|
||||
+ return vmalloc(size);
|
||||
+
|
||||
+ return ptr;
|
||||
+#endif
|
||||
}
|
||||
+
|
||||
+static inline bool is_phys_addr(void *ptr)
|
||||
+{
|
||||
+#ifdef CONFIG_64BIT
|
||||
+ return (KSEGX((unsigned long)ptr) == CKSEG0);
|
||||
+#else
|
||||
+ return (KSEGX(ptr) == KSEG0);
|
||||
#endif
|
||||
+}
|
||||
+
|
||||
+/* Free memory returned from module_alloc */
|
||||
+void module_memfree(void *module_region)
|
||||
+{
|
||||
+ if (is_phys_addr(module_region))
|
||||
+ free_phys(module_region);
|
||||
+ else
|
||||
+ vfree(module_region);
|
||||
+}
|
||||
+
|
||||
+static void *__module_alloc(int size, bool phys)
|
||||
+{
|
||||
+ void *ptr;
|
||||
+
|
||||
+ if (phys)
|
||||
+ ptr = kmalloc(size, GFP_KERNEL);
|
||||
+ else
|
||||
+ ptr = vmalloc(size);
|
||||
+ return ptr;
|
||||
+}
|
||||
+
|
||||
+static void __module_free(void *ptr)
|
||||
+{
|
||||
+ if (is_phys_addr(ptr))
|
||||
+ kfree(ptr);
|
||||
+ else
|
||||
+ vfree(ptr);
|
||||
+}
|
||||
+
|
||||
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
|
||||
+ char *secstrings, struct module *mod)
|
||||
+{
|
||||
+ unsigned int symindex = 0;
|
||||
+ unsigned int core_size, init_size;
|
||||
+ int i;
|
||||
+
|
||||
+ mod->arch.phys_plt_offset = 0;
|
||||
+ mod->arch.virt_plt_offset = 0;
|
||||
+ mod->arch.phys_plt_tbl = NULL;
|
||||
+ mod->arch.virt_plt_tbl = NULL;
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_64BIT))
|
||||
+ return 0;
|
||||
+
|
||||
+ for (i = 1; i < hdr->e_shnum; i++)
|
||||
+ if (sechdrs[i].sh_type == SHT_SYMTAB)
|
||||
+ symindex = i;
|
||||
+
|
||||
+ core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
|
||||
+ init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
|
||||
+
|
||||
+ if ((core_size + init_size) == 0)
|
||||
+ return 0;
|
||||
+
|
||||
+ mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
|
||||
+ if (!mod->arch.phys_plt_tbl)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
|
||||
+ if (!mod->arch.virt_plt_tbl) {
|
||||
+ __module_free(mod->arch.phys_plt_tbl);
|
||||
+ mod->arch.phys_plt_tbl = NULL;
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
|
||||
static void apply_r_mips_32(u32 *location, u32 base, Elf_Addr v)
|
||||
{
|
||||
*location = base + v;
|
||||
}
|
||||
|
||||
+static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
|
||||
+ void *start, Elf_Addr v)
|
||||
+{
|
||||
+ unsigned *tramp = start + *plt_offset;
|
||||
+ *plt_offset += 4 * sizeof(int);
|
||||
+
|
||||
+ /* adjust carry for addiu */
|
||||
+ if (v & 0x00008000)
|
||||
+ v += 0x10000;
|
||||
+
|
||||
+ tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */
|
||||
+ tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */
|
||||
+ tramp[2] = 0x03200008; /* jr t9 */
|
||||
+ tramp[3] = 0x00000000; /* nop */
|
||||
+
|
||||
+ return (Elf_Addr) tramp;
|
||||
+}
|
||||
+
|
||||
+static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
|
||||
+{
|
||||
+ if (is_phys_addr(location))
|
||||
+ return add_plt_entry_to(&me->arch.phys_plt_offset,
|
||||
+ me->arch.phys_plt_tbl, v);
|
||||
+ else
|
||||
+ return add_plt_entry_to(&me->arch.virt_plt_offset,
|
||||
+ me->arch.virt_plt_tbl, v);
|
||||
+
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int apply_r_mips_26(struct module *me, u32 *location, u32 base,
|
||||
Elf_Addr v)
|
||||
{
|
||||
+ u32 ofs = base & 0x03ffffff;
|
||||
+
|
||||
if (v % 4) {
|
||||
pr_err("module %s: dangerous R_MIPS_26 relocation\n",
|
||||
me->name);
|
||||
@@ -56,13 +294,17 @@ static int apply_r_mips_26(struct module
|
||||
}
|
||||
|
||||
if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
|
||||
- pr_err("module %s: relocation overflow\n",
|
||||
- me->name);
|
||||
- return -ENOEXEC;
|
||||
+ v = add_plt_entry(me, location, v + (ofs << 2));
|
||||
+ if (!v) {
|
||||
+ pr_err("module %s: relocation overflow\n",
|
||||
+ me->name);
|
||||
+ return -ENOEXEC;
|
||||
+ }
|
||||
+ ofs = 0;
|
||||
}
|
||||
|
||||
*location = (*location & ~0x03ffffff) |
|
||||
- ((base + (v >> 2)) & 0x03ffffff);
|
||||
+ ((ofs + (v >> 2)) & 0x03ffffff);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -442,9 +684,36 @@ int module_finalize(const Elf_Ehdr *hdr,
|
||||
list_add(&me->arch.dbe_list, &dbe_list);
|
||||
spin_unlock_irq(&dbe_lock);
|
||||
}
|
||||
+
|
||||
+ /* Get rid of the fixup trampoline if we're running the module
|
||||
+ * from physically mapped address space */
|
||||
+ if (me->arch.phys_plt_offset == 0) {
|
||||
+ __module_free(me->arch.phys_plt_tbl);
|
||||
+ me->arch.phys_plt_tbl = NULL;
|
||||
+ }
|
||||
+ if (me->arch.virt_plt_offset == 0) {
|
||||
+ __module_free(me->arch.virt_plt_tbl);
|
||||
+ me->arch.virt_plt_tbl = NULL;
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
+void module_arch_freeing_init(struct module *mod)
|
||||
+{
|
||||
+ if (mod->state == MODULE_STATE_LIVE)
|
||||
+ return;
|
||||
+
|
||||
+ if (mod->arch.phys_plt_tbl) {
|
||||
+ __module_free(mod->arch.phys_plt_tbl);
|
||||
+ mod->arch.phys_plt_tbl = NULL;
|
||||
+ }
|
||||
+ if (mod->arch.virt_plt_tbl) {
|
||||
+ __module_free(mod->arch.virt_plt_tbl);
|
||||
+ mod->arch.virt_plt_tbl = NULL;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
void module_arch_cleanup(struct module *mod)
|
||||
{
|
||||
spin_lock_irq(&dbe_lock);
|
@ -1,19 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: kernel: adjust mips highmem offset to avoid the need for -mlong-calls on systems with >256M RAM
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
arch/mips/include/asm/mach-generic/spaces.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/mips/include/asm/mach-generic/spaces.h
|
||||
+++ b/arch/mips/include/asm/mach-generic/spaces.h
|
||||
@@ -46,7 +46,7 @@
|
||||
* Memory above this physical address will be considered highmem.
|
||||
*/
|
||||
#ifndef HIGHMEM_START
|
||||
-#define HIGHMEM_START _AC(0x20000000, UL)
|
||||
+#define HIGHMEM_START _AC(0x10000000, UL)
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_32BIT */
|
@ -1,22 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: kernel: add -mtune=34kc to MIPS CFLAGS when building for mips32r2
|
||||
|
||||
This provides a good tradeoff across at least 24Kc-74Kc, while also
|
||||
producing smaller code.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
arch/mips/Makefile | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/mips/Makefile
|
||||
+++ b/arch/mips/Makefile
|
||||
@@ -172,7 +172,7 @@ cflags-$(CONFIG_CPU_R4300) += -march=r43
|
||||
cflags-$(CONFIG_CPU_R4X00) += -march=r4600 -Wa,--trap
|
||||
cflags-$(CONFIG_CPU_TX49XX) += -march=r4600 -Wa,--trap
|
||||
cflags-$(CONFIG_CPU_MIPS32_R1) += -march=mips32 -Wa,--trap
|
||||
-cflags-$(CONFIG_CPU_MIPS32_R2) += -march=mips32r2 -Wa,--trap
|
||||
+cflags-$(CONFIG_CPU_MIPS32_R2) += -march=mips32r2 -mtune=34kc -Wa,--trap
|
||||
cflags-$(CONFIG_CPU_MIPS32_R5) += -march=mips32r5 -Wa,--trap -modd-spreg
|
||||
cflags-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 -Wa,--trap -modd-spreg
|
||||
cflags-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,--trap
|
@ -1,22 +0,0 @@
|
||||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Subject: fix errors in unresolved weak symbols on arm
|
||||
|
||||
lede-commit: 570699d4838a907c3ef9f2819bf19eb72997b32f
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
arch/arm/kernel/module.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
--- a/arch/arm/kernel/module.c
|
||||
+++ b/arch/arm/kernel/module.c
|
||||
@@ -146,6 +146,10 @@ apply_relocate(Elf32_Shdr *sechdrs, cons
|
||||
return -ENOEXEC;
|
||||
}
|
||||
|
||||
+ if ((IS_ERR_VALUE(sym->st_value) || !sym->st_value) &&
|
||||
+ ELF_ST_BIND(sym->st_info) == STB_WEAK)
|
||||
+ continue;
|
||||
+
|
||||
loc = dstsec->sh_addr + rel->r_offset;
|
||||
|
||||
switch (ELF32_R_TYPE(rel->r_info)) {
|
@ -1,282 +0,0 @@
|
||||
From: Yousong Zhou <yszhou4tech@gmail.com>
|
||||
Subject: MIPS: kexec: Accept command line parameters from userspace.
|
||||
|
||||
Signed-off-by: Yousong Zhou <yszhou4tech@gmail.com>
|
||||
---
|
||||
arch/mips/kernel/machine_kexec.c | 153 +++++++++++++++++++++++++++++++-----
|
||||
arch/mips/kernel/machine_kexec.h | 20 +++++
|
||||
arch/mips/kernel/relocate_kernel.S | 21 +++--
|
||||
3 files changed, 167 insertions(+), 27 deletions(-)
|
||||
create mode 100644 arch/mips/kernel/machine_kexec.h
|
||||
|
||||
--- a/arch/mips/kernel/machine_kexec.c
|
||||
+++ b/arch/mips/kernel/machine_kexec.c
|
||||
@@ -9,14 +9,11 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/libfdt.h>
|
||||
|
||||
+#include <asm/bootinfo.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/page.h>
|
||||
-
|
||||
-extern const unsigned char relocate_new_kernel[];
|
||||
-extern const size_t relocate_new_kernel_size;
|
||||
-
|
||||
-extern unsigned long kexec_start_address;
|
||||
-extern unsigned long kexec_indirection_page;
|
||||
+#include <linux/uaccess.h>
|
||||
+#include "machine_kexec.h"
|
||||
|
||||
static unsigned long reboot_code_buffer;
|
||||
|
||||
@@ -30,6 +27,101 @@ void (*_crash_smp_send_stop)(void) = NUL
|
||||
void (*_machine_kexec_shutdown)(void) = NULL;
|
||||
void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
|
||||
|
||||
+static void machine_kexec_print_args(void)
|
||||
+{
|
||||
+ unsigned long argc = (int)kexec_args[0];
|
||||
+ int i;
|
||||
+
|
||||
+ pr_info("kexec_args[0] (argc): %lu\n", argc);
|
||||
+ pr_info("kexec_args[1] (argv): %p\n", (void *)kexec_args[1]);
|
||||
+ pr_info("kexec_args[2] (env ): %p\n", (void *)kexec_args[2]);
|
||||
+ pr_info("kexec_args[3] (desc): %p\n", (void *)kexec_args[3]);
|
||||
+
|
||||
+ for (i = 0; i < argc; i++) {
|
||||
+ pr_info("kexec_argv[%d] = %p, %s\n",
|
||||
+ i, kexec_argv[i], kexec_argv[i]);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void machine_kexec_init_argv(struct kimage *image)
|
||||
+{
|
||||
+ void __user *buf = NULL;
|
||||
+ size_t bufsz;
|
||||
+ size_t size;
|
||||
+ int i;
|
||||
+
|
||||
+ bufsz = 0;
|
||||
+ for (i = 0; i < image->nr_segments; i++) {
|
||||
+ struct kexec_segment *seg;
|
||||
+
|
||||
+ seg = &image->segment[i];
|
||||
+ if (seg->bufsz < 6)
|
||||
+ continue;
|
||||
+
|
||||
+ if (strncmp((char *) seg->buf, "kexec ", 6))
|
||||
+ continue;
|
||||
+
|
||||
+ buf = seg->buf;
|
||||
+ bufsz = seg->bufsz;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (!buf)
|
||||
+ return;
|
||||
+
|
||||
+ size = KEXEC_COMMAND_LINE_SIZE;
|
||||
+ size = min(size, bufsz);
|
||||
+ if (size < bufsz)
|
||||
+ pr_warn("kexec command line truncated to %zd bytes\n", size);
|
||||
+
|
||||
+ /* Copy to kernel space */
|
||||
+ if (copy_from_user(kexec_argv_buf, buf, size))
|
||||
+ pr_warn("kexec command line copy to kernel space failed\n");
|
||||
+
|
||||
+ kexec_argv_buf[size - 1] = 0;
|
||||
+}
|
||||
+
|
||||
+static void machine_kexec_parse_argv(struct kimage *image)
|
||||
+{
|
||||
+ char *reboot_code_buffer;
|
||||
+ int reloc_delta;
|
||||
+ char *ptr;
|
||||
+ int argc;
|
||||
+ int i;
|
||||
+
|
||||
+ ptr = kexec_argv_buf;
|
||||
+ argc = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * convert command line string to array of parameters
|
||||
+ * (as bootloader does).
|
||||
+ */
|
||||
+ while (ptr && *ptr && (KEXEC_MAX_ARGC > argc)) {
|
||||
+ if (*ptr == ' ') {
|
||||
+ *ptr++ = '\0';
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ kexec_argv[argc++] = ptr;
|
||||
+ ptr = strchr(ptr, ' ');
|
||||
+ }
|
||||
+
|
||||
+ if (!argc)
|
||||
+ return;
|
||||
+
|
||||
+ kexec_args[0] = argc;
|
||||
+ kexec_args[1] = (unsigned long)kexec_argv;
|
||||
+ kexec_args[2] = 0;
|
||||
+ kexec_args[3] = 0;
|
||||
+
|
||||
+ reboot_code_buffer = page_address(image->control_code_page);
|
||||
+ reloc_delta = reboot_code_buffer - (char *)kexec_relocate_new_kernel;
|
||||
+
|
||||
+ kexec_args[1] += reloc_delta;
|
||||
+ for (i = 0; i < argc; i++)
|
||||
+ kexec_argv[i] += reloc_delta;
|
||||
+}
|
||||
+
|
||||
static void kexec_image_info(const struct kimage *kimage)
|
||||
{
|
||||
unsigned long i;
|
||||
@@ -99,6 +191,18 @@ machine_kexec_prepare(struct kimage *kim
|
||||
#endif
|
||||
|
||||
kexec_image_info(kimage);
|
||||
+ /*
|
||||
+ * Whenever arguments passed from kexec-tools, Init the arguments as
|
||||
+ * the original ones to try avoiding booting failure.
|
||||
+ */
|
||||
+
|
||||
+ kexec_args[0] = fw_arg0;
|
||||
+ kexec_args[1] = fw_arg1;
|
||||
+ kexec_args[2] = fw_arg2;
|
||||
+ kexec_args[3] = fw_arg3;
|
||||
+
|
||||
+ machine_kexec_init_argv(kimage);
|
||||
+ machine_kexec_parse_argv(kimage);
|
||||
|
||||
if (_machine_kexec_prepare)
|
||||
return _machine_kexec_prepare(kimage);
|
||||
@@ -161,7 +265,7 @@ machine_crash_shutdown(struct pt_regs *r
|
||||
void kexec_nonboot_cpu_jump(void)
|
||||
{
|
||||
local_flush_icache_range((unsigned long)relocated_kexec_smp_wait,
|
||||
- reboot_code_buffer + relocate_new_kernel_size);
|
||||
+ reboot_code_buffer + KEXEC_RELOCATE_NEW_KERNEL_SIZE);
|
||||
|
||||
relocated_kexec_smp_wait(NULL);
|
||||
}
|
||||
@@ -199,7 +303,7 @@ void kexec_reboot(void)
|
||||
* machine_kexec() CPU.
|
||||
*/
|
||||
local_flush_icache_range(reboot_code_buffer,
|
||||
- reboot_code_buffer + relocate_new_kernel_size);
|
||||
+ reboot_code_buffer + KEXEC_RELOCATE_NEW_KERNEL_SIZE);
|
||||
|
||||
do_kexec = (void *)reboot_code_buffer;
|
||||
do_kexec();
|
||||
@@ -212,10 +316,12 @@ machine_kexec(struct kimage *image)
|
||||
unsigned long *ptr;
|
||||
|
||||
reboot_code_buffer =
|
||||
- (unsigned long)page_address(image->control_code_page);
|
||||
+ (unsigned long)page_address(image->control_code_page);
|
||||
+ pr_info("reboot_code_buffer = %p\n", (void *)reboot_code_buffer);
|
||||
|
||||
kexec_start_address =
|
||||
(unsigned long) phys_to_virt(image->start);
|
||||
+ pr_info("kexec_start_address = %p\n", (void *)kexec_start_address);
|
||||
|
||||
if (image->type == KEXEC_TYPE_DEFAULT) {
|
||||
kexec_indirection_page =
|
||||
@@ -223,9 +329,19 @@ machine_kexec(struct kimage *image)
|
||||
} else {
|
||||
kexec_indirection_page = (unsigned long)&image->head;
|
||||
}
|
||||
+ pr_info("kexec_indirection_page = %p\n", (void *)kexec_indirection_page);
|
||||
|
||||
- memcpy((void*)reboot_code_buffer, relocate_new_kernel,
|
||||
- relocate_new_kernel_size);
|
||||
+ pr_info("Where is memcpy: %p\n", memcpy);
|
||||
+ pr_info("kexec_relocate_new_kernel = %p, kexec_relocate_new_kernel_end = %p\n",
|
||||
+ (void *)kexec_relocate_new_kernel, &kexec_relocate_new_kernel_end);
|
||||
+ pr_info("Copy %lu bytes from %p to %p\n", KEXEC_RELOCATE_NEW_KERNEL_SIZE,
|
||||
+ (void *)kexec_relocate_new_kernel, (void *)reboot_code_buffer);
|
||||
+ memcpy((void*)reboot_code_buffer, kexec_relocate_new_kernel,
|
||||
+ KEXEC_RELOCATE_NEW_KERNEL_SIZE);
|
||||
+
|
||||
+ pr_info("Before _print_args().\n");
|
||||
+ machine_kexec_print_args();
|
||||
+ pr_info("Before eval loop.\n");
|
||||
|
||||
/*
|
||||
* The generic kexec code builds a page list with physical
|
||||
@@ -256,7 +372,7 @@ machine_kexec(struct kimage *image)
|
||||
#ifdef CONFIG_SMP
|
||||
/* All secondary cpus now may jump to kexec_wait cycle */
|
||||
relocated_kexec_smp_wait = reboot_code_buffer +
|
||||
- (void *)(kexec_smp_wait - relocate_new_kernel);
|
||||
+ (void *)(kexec_smp_wait - kexec_relocate_new_kernel);
|
||||
smp_wmb();
|
||||
atomic_set(&kexec_ready_to_reboot, 1);
|
||||
#endif
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/kernel/machine_kexec.h
|
||||
@@ -0,0 +1,20 @@
|
||||
+#ifndef _MACHINE_KEXEC_H
|
||||
+#define _MACHINE_KEXEC_H
|
||||
+
|
||||
+#ifndef __ASSEMBLY__
|
||||
+extern const unsigned char kexec_relocate_new_kernel[];
|
||||
+extern unsigned long kexec_relocate_new_kernel_end;
|
||||
+extern unsigned long kexec_start_address;
|
||||
+extern unsigned long kexec_indirection_page;
|
||||
+
|
||||
+extern char kexec_argv_buf[];
|
||||
+extern char *kexec_argv[];
|
||||
+
|
||||
+#define KEXEC_RELOCATE_NEW_KERNEL_SIZE ((unsigned long)&kexec_relocate_new_kernel_end - (unsigned long)kexec_relocate_new_kernel)
|
||||
+#endif /* !__ASSEMBLY__ */
|
||||
+
|
||||
+#define KEXEC_COMMAND_LINE_SIZE 256
|
||||
+#define KEXEC_ARGV_SIZE (KEXEC_COMMAND_LINE_SIZE / 16)
|
||||
+#define KEXEC_MAX_ARGC (KEXEC_ARGV_SIZE / sizeof(long))
|
||||
+
|
||||
+#endif
|
||||
--- a/arch/mips/kernel/relocate_kernel.S
|
||||
+++ b/arch/mips/kernel/relocate_kernel.S
|
||||
@@ -10,10 +10,11 @@
|
||||
#include <asm/mipsregs.h>
|
||||
#include <asm/stackframe.h>
|
||||
#include <asm/addrspace.h>
|
||||
+#include "machine_kexec.h"
|
||||
|
||||
#include <kernel-entry-init.h>
|
||||
|
||||
-LEAF(relocate_new_kernel)
|
||||
+LEAF(kexec_relocate_new_kernel)
|
||||
PTR_L a0, arg0
|
||||
PTR_L a1, arg1
|
||||
PTR_L a2, arg2
|
||||
@@ -98,7 +99,7 @@ done:
|
||||
#endif
|
||||
/* jump to kexec_start_address */
|
||||
j s1
|
||||
- END(relocate_new_kernel)
|
||||
+ END(kexec_relocate_new_kernel)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
@@ -177,8 +178,15 @@ EXPORT(kexec_indirection_page)
|
||||
PTR_WD 0
|
||||
.size kexec_indirection_page, PTRSIZE
|
||||
|
||||
-relocate_new_kernel_end:
|
||||
+kexec_argv_buf:
|
||||
+ EXPORT(kexec_argv_buf)
|
||||
+ .skip KEXEC_COMMAND_LINE_SIZE
|
||||
+ .size kexec_argv_buf, KEXEC_COMMAND_LINE_SIZE
|
||||
+
|
||||
+kexec_argv:
|
||||
+ EXPORT(kexec_argv)
|
||||
+ .skip KEXEC_ARGV_SIZE
|
||||
+ .size kexec_argv, KEXEC_ARGV_SIZE
|
||||
|
||||
-EXPORT(relocate_new_kernel_size)
|
||||
- PTR_WD relocate_new_kernel_end - relocate_new_kernel
|
||||
- .size relocate_new_kernel_size, PTRSIZE
|
||||
+kexec_relocate_new_kernel_end:
|
||||
+ EXPORT(kexec_relocate_new_kernel_end)
|
@ -1,84 +0,0 @@
|
||||
From bb0c3b0175240bf152fd7c644821a0cf9f77c37c Mon Sep 17 00:00:00 2001
|
||||
From: Evgeniy Didin <Evgeniy.Didin@synopsys.com>
|
||||
Date: Fri, 15 Mar 2019 18:53:38 +0300
|
||||
Subject: [PATCH] arc add OWRTDTB section
|
||||
|
||||
This change allows OpenWRT to patch resulting kernel binary with
|
||||
external .dtb.
|
||||
|
||||
That allows us to re-use exactky the same vmlinux on different boards
|
||||
given its ARC core configurations match (at least cache line sizes etc).
|
||||
|
||||
""patch-dtb" searches for ASCII "OWRTDTB:" strign and copies external
|
||||
.dtb right after it, keeping the string in place.
|
||||
|
||||
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
||||
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
|
||||
Signed-off-by: Evgeniy Didin <Evgeniy.Didin@synopsys.com>
|
||||
---
|
||||
arch/arc/kernel/head.S | 10 ++++++++++
|
||||
arch/arc/kernel/setup.c | 4 +++-
|
||||
arch/arc/kernel/vmlinux.lds.S | 13 +++++++++++++
|
||||
3 files changed, 26 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/arc/kernel/head.S
|
||||
+++ b/arch/arc/kernel/head.S
|
||||
@@ -88,6 +88,16 @@
|
||||
DSP_EARLY_INIT
|
||||
.endm
|
||||
|
||||
+ ; Here "patch-dtb" will embed external .dtb
|
||||
+ ; Note "patch-dtb" searches for ASCII "OWRTDTB:" string
|
||||
+ ; and pastes .dtb right after it, hense the string precedes
|
||||
+ ; __image_dtb symbol.
|
||||
+ .section .owrt, "aw",@progbits
|
||||
+ .ascii "OWRTDTB:"
|
||||
+ENTRY(__image_dtb)
|
||||
+ .fill 0x4000
|
||||
+END(__image_dtb)
|
||||
+
|
||||
.section .init.text, "ax",@progbits
|
||||
|
||||
;----------------------------------------------------------------
|
||||
--- a/arch/arc/kernel/setup.c
|
||||
+++ b/arch/arc/kernel/setup.c
|
||||
@@ -495,6 +495,8 @@ static inline bool uboot_arg_invalid(uns
|
||||
/* We always pass 0 as magic from U-boot */
|
||||
#define UBOOT_MAGIC_VALUE 0
|
||||
|
||||
+extern struct boot_param_header __image_dtb;
|
||||
+
|
||||
void __init handle_uboot_args(void)
|
||||
{
|
||||
bool use_embedded_dtb = true;
|
||||
@@ -533,7 +535,7 @@ void __init handle_uboot_args(void)
|
||||
ignore_uboot_args:
|
||||
|
||||
if (use_embedded_dtb) {
|
||||
- machine_desc = setup_machine_fdt(__dtb_start);
|
||||
+ machine_desc = setup_machine_fdt(&__image_dtb);
|
||||
if (!machine_desc)
|
||||
panic("Embedded DT invalid\n");
|
||||
}
|
||||
--- a/arch/arc/kernel/vmlinux.lds.S
|
||||
+++ b/arch/arc/kernel/vmlinux.lds.S
|
||||
@@ -27,6 +27,19 @@ SECTIONS
|
||||
|
||||
. = CONFIG_LINUX_LINK_BASE;
|
||||
|
||||
+ /*
|
||||
+ * In OpenWRT we want to patch built binary embedding .dtb of choice.
|
||||
+ * This is implemented with "patch-dtb" utility which searches for
|
||||
+ * "OWRTDTB:" string in first 16k of image and if it is found
|
||||
+ * copies .dtb right after mentioned string.
|
||||
+ *
|
||||
+ * Note: "OWRTDTB:" won't be overwritten with .dtb, .dtb will follow it.
|
||||
+ */
|
||||
+ .owrt : {
|
||||
+ *(.owrt)
|
||||
+ . = ALIGN(PAGE_SIZE);
|
||||
+ }
|
||||
+
|
||||
_int_vec_base_lds = .;
|
||||
.vector : {
|
||||
*(.vector)
|
@ -1,24 +0,0 @@
|
||||
From: Alexey Brodkin <abrodkin@synopsys.com>
|
||||
Subject: arc: enable unaligned access in kernel mode
|
||||
|
||||
This enables misaligned access handling even in kernel mode.
|
||||
Some wireless drivers (ath9k-htc and mt7601u) use misaligned accesses
|
||||
here and there and to cope with that without fixing stuff in the drivers
|
||||
we're just gracefully handling it on ARC.
|
||||
|
||||
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
|
||||
---
|
||||
arch/arc/kernel/unaligned.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/arc/kernel/unaligned.c
|
||||
+++ b/arch/arc/kernel/unaligned.c
|
||||
@@ -202,7 +202,7 @@ int misaligned_fixup(unsigned long addre
|
||||
char buf[TASK_COMM_LEN];
|
||||
|
||||
/* handle user mode only and only if enabled by sysadmin */
|
||||
- if (!user_mode(regs) || !unaligned_enabled)
|
||||
+ if (!unaligned_enabled)
|
||||
return 1;
|
||||
|
||||
if (no_unaligned_warning) {
|
@ -1,25 +0,0 @@
|
||||
From 66770a004afe10df11d3902e16eaa0c2c39436bb Mon Sep 17 00:00:00 2001
|
||||
From: Pawel Dembicki <paweldembicki@gmail.com>
|
||||
Date: Fri, 24 May 2019 17:56:19 +0200
|
||||
Subject: [PATCH] powerpc: Enable kernel XZ compression option on PPC_85xx
|
||||
|
||||
Enable kernel XZ compression option on PPC_85xx. Tested with
|
||||
simpleImage on TP-Link TL-WDR4900 (Freescale P1014 processor).
|
||||
|
||||
Suggested-by: Christian Lamparter <chunkeey@gmail.com>
|
||||
Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
|
||||
---
|
||||
arch/powerpc/Kconfig | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/powerpc/Kconfig
|
||||
+++ b/arch/powerpc/Kconfig
|
||||
@@ -228,7 +228,7 @@ config PPC
|
||||
select HAVE_KERNEL_GZIP
|
||||
select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
|
||||
select HAVE_KERNEL_LZO if DEFAULT_UIMAGE
|
||||
- select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x
|
||||
+ select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x || PPC_85xx
|
||||
select HAVE_KPROBES
|
||||
select HAVE_KPROBES_ON_FTRACE
|
||||
select HAVE_KRETPROBES
|
@ -1,314 +0,0 @@
|
||||
--- a/drivers/mtd/Kconfig
|
||||
+++ b/drivers/mtd/Kconfig
|
||||
@@ -12,6 +12,25 @@ menuconfig MTD
|
||||
|
||||
if MTD
|
||||
|
||||
+menu "OpenWrt specific MTD options"
|
||||
+
|
||||
+config MTD_ROOTFS_ROOT_DEV
|
||||
+ bool "Automatically set 'rootfs' partition to be root filesystem"
|
||||
+ default y
|
||||
+
|
||||
+config MTD_SPLIT_FIRMWARE
|
||||
+ bool "Automatically split firmware partition for kernel+rootfs"
|
||||
+ default y
|
||||
+
|
||||
+config MTD_SPLIT_FIRMWARE_NAME
|
||||
+ string "Firmware partition name"
|
||||
+ depends on MTD_SPLIT_FIRMWARE
|
||||
+ default "firmware"
|
||||
+
|
||||
+source "drivers/mtd/mtdsplit/Kconfig"
|
||||
+
|
||||
+endmenu
|
||||
+
|
||||
config MTD_TESTS
|
||||
tristate "MTD tests support (DANGEROUS)"
|
||||
depends on m
|
||||
--- a/drivers/mtd/mtdpart.c
|
||||
+++ b/drivers/mtd/mtdpart.c
|
||||
@@ -15,11 +15,13 @@
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/mtd/mtd.h>
|
||||
#include <linux/mtd/partitions.h>
|
||||
+#include <linux/magic.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_platform.h>
|
||||
|
||||
#include "mtdcore.h"
|
||||
+#include "mtdsplit/mtdsplit.h"
|
||||
|
||||
/*
|
||||
* MTD methods which simply translate the effective address and pass through
|
||||
@@ -236,6 +238,146 @@ static int mtd_add_partition_attrs(struc
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static DEFINE_SPINLOCK(part_parser_lock);
|
||||
+static LIST_HEAD(part_parsers);
|
||||
+
|
||||
+static struct mtd_part_parser *mtd_part_parser_get(const char *name)
|
||||
+{
|
||||
+ struct mtd_part_parser *p, *ret = NULL;
|
||||
+
|
||||
+ spin_lock(&part_parser_lock);
|
||||
+
|
||||
+ list_for_each_entry(p, &part_parsers, list)
|
||||
+ if (!strcmp(p->name, name) && try_module_get(p->owner)) {
|
||||
+ ret = p;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ spin_unlock(&part_parser_lock);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static inline void mtd_part_parser_put(const struct mtd_part_parser *p)
|
||||
+{
|
||||
+ module_put(p->owner);
|
||||
+}
|
||||
+
|
||||
+static struct mtd_part_parser *
|
||||
+get_partition_parser_by_type(enum mtd_parser_type type,
|
||||
+ struct mtd_part_parser *start)
|
||||
+{
|
||||
+ struct mtd_part_parser *p, *ret = NULL;
|
||||
+
|
||||
+ spin_lock(&part_parser_lock);
|
||||
+
|
||||
+ p = list_prepare_entry(start, &part_parsers, list);
|
||||
+ if (start)
|
||||
+ mtd_part_parser_put(start);
|
||||
+
|
||||
+ list_for_each_entry_continue(p, &part_parsers, list) {
|
||||
+ if (p->type == type && try_module_get(p->owner)) {
|
||||
+ ret = p;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ spin_unlock(&part_parser_lock);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int parse_mtd_partitions_by_type(struct mtd_info *master,
|
||||
+ enum mtd_parser_type type,
|
||||
+ const struct mtd_partition **pparts,
|
||||
+ struct mtd_part_parser_data *data)
|
||||
+{
|
||||
+ struct mtd_part_parser *prev = NULL;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ while (1) {
|
||||
+ struct mtd_part_parser *parser;
|
||||
+
|
||||
+ parser = get_partition_parser_by_type(type, prev);
|
||||
+ if (!parser)
|
||||
+ break;
|
||||
+
|
||||
+ ret = (*parser->parse_fn)(master, pparts, data);
|
||||
+
|
||||
+ if (ret > 0) {
|
||||
+ mtd_part_parser_put(parser);
|
||||
+ printk(KERN_NOTICE
|
||||
+ "%d %s partitions found on MTD device %s\n",
|
||||
+ ret, parser->name, master->name);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ prev = parser;
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+run_parsers_by_type(struct mtd_info *child, enum mtd_parser_type type)
|
||||
+{
|
||||
+ struct mtd_partition *parts;
|
||||
+ int nr_parts;
|
||||
+ int i;
|
||||
+
|
||||
+ nr_parts = parse_mtd_partitions_by_type(child, type, (const struct mtd_partition **)&parts,
|
||||
+ NULL);
|
||||
+ if (nr_parts <= 0)
|
||||
+ return nr_parts;
|
||||
+
|
||||
+ if (WARN_ON(!parts))
|
||||
+ return 0;
|
||||
+
|
||||
+ for (i = 0; i < nr_parts; i++) {
|
||||
+ /* adjust partition offsets */
|
||||
+ parts[i].offset += child->part.offset;
|
||||
+
|
||||
+ mtd_add_partition(child->parent,
|
||||
+ parts[i].name,
|
||||
+ parts[i].offset,
|
||||
+ parts[i].size);
|
||||
+ }
|
||||
+
|
||||
+ kfree(parts);
|
||||
+
|
||||
+ return nr_parts;
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_MTD_SPLIT_FIRMWARE_NAME
|
||||
+#define SPLIT_FIRMWARE_NAME CONFIG_MTD_SPLIT_FIRMWARE_NAME
|
||||
+#else
|
||||
+#define SPLIT_FIRMWARE_NAME "unused"
|
||||
+#endif
|
||||
+
|
||||
+static void split_firmware(struct mtd_info *master, struct mtd_info *part)
|
||||
+{
|
||||
+ run_parsers_by_type(part, MTD_PARSER_TYPE_FIRMWARE);
|
||||
+}
|
||||
+
|
||||
+static void mtd_partition_split(struct mtd_info *master, struct mtd_info *part)
|
||||
+{
|
||||
+ static int rootfs_found = 0;
|
||||
+
|
||||
+ if (rootfs_found)
|
||||
+ return;
|
||||
+
|
||||
+ if (!strcmp(part->name, "rootfs")) {
|
||||
+ run_parsers_by_type(part, MTD_PARSER_TYPE_ROOTFS);
|
||||
+
|
||||
+ rootfs_found = 1;
|
||||
+ }
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_MTD_SPLIT_FIRMWARE) &&
|
||||
+ !strcmp(part->name, SPLIT_FIRMWARE_NAME) &&
|
||||
+ !of_find_property(mtd_get_of_node(part), "compatible", NULL))
|
||||
+ split_firmware(master, part);
|
||||
+}
|
||||
+
|
||||
int mtd_add_partition(struct mtd_info *parent, const char *name,
|
||||
long long offset, long long length)
|
||||
{
|
||||
@@ -274,6 +416,7 @@ int mtd_add_partition(struct mtd_info *p
|
||||
if (ret)
|
||||
goto err_remove_part;
|
||||
|
||||
+ mtd_partition_split(parent, child);
|
||||
mtd_add_partition_attrs(child);
|
||||
|
||||
return 0;
|
||||
@@ -422,6 +565,7 @@ int add_mtd_partitions(struct mtd_info *
|
||||
goto err_del_partitions;
|
||||
}
|
||||
|
||||
+ mtd_partition_split(master, child);
|
||||
mtd_add_partition_attrs(child);
|
||||
|
||||
/* Look for subpartitions */
|
||||
@@ -438,31 +582,6 @@ err_del_partitions:
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(part_parser_lock);
|
||||
-static LIST_HEAD(part_parsers);
|
||||
-
|
||||
-static struct mtd_part_parser *mtd_part_parser_get(const char *name)
|
||||
-{
|
||||
- struct mtd_part_parser *p, *ret = NULL;
|
||||
-
|
||||
- spin_lock(&part_parser_lock);
|
||||
-
|
||||
- list_for_each_entry(p, &part_parsers, list)
|
||||
- if (!strcmp(p->name, name) && try_module_get(p->owner)) {
|
||||
- ret = p;
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
- spin_unlock(&part_parser_lock);
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
-static inline void mtd_part_parser_put(const struct mtd_part_parser *p)
|
||||
-{
|
||||
- module_put(p->owner);
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* Many partition parsers just expected the core to kfree() all their data in
|
||||
* one chunk. Do that by default.
|
||||
--- a/include/linux/mtd/partitions.h
|
||||
+++ b/include/linux/mtd/partitions.h
|
||||
@@ -75,6 +75,12 @@ struct mtd_part_parser_data {
|
||||
* Functions dealing with the various ways of partitioning the space
|
||||
*/
|
||||
|
||||
+enum mtd_parser_type {
|
||||
+ MTD_PARSER_TYPE_DEVICE = 0,
|
||||
+ MTD_PARSER_TYPE_ROOTFS,
|
||||
+ MTD_PARSER_TYPE_FIRMWARE,
|
||||
+};
|
||||
+
|
||||
struct mtd_part_parser {
|
||||
struct list_head list;
|
||||
struct module *owner;
|
||||
@@ -83,6 +89,7 @@ struct mtd_part_parser {
|
||||
int (*parse_fn)(struct mtd_info *, const struct mtd_partition **,
|
||||
struct mtd_part_parser_data *);
|
||||
void (*cleanup)(const struct mtd_partition *pparts, int nr_parts);
|
||||
+ enum mtd_parser_type type;
|
||||
};
|
||||
|
||||
/* Container for passing around a set of parsed partitions */
|
||||
--- a/drivers/mtd/Makefile
|
||||
+++ b/drivers/mtd/Makefile
|
||||
@@ -9,6 +9,8 @@ mtd-y := mtdcore.o mtdsuper.o mtdconc
|
||||
|
||||
obj-y += parsers/
|
||||
|
||||
+obj-$(CONFIG_MTD_SPLIT) += mtdsplit/
|
||||
+
|
||||
# 'Users' - code which presents functionality to userspace.
|
||||
obj-$(CONFIG_MTD_BLKDEVS) += mtd_blkdevs.o
|
||||
obj-$(CONFIG_MTD_BLOCK) += mtdblock.o
|
||||
--- a/include/linux/mtd/mtd.h
|
||||
+++ b/include/linux/mtd/mtd.h
|
||||
@@ -608,6 +608,24 @@ static inline void mtd_align_erase_req(s
|
||||
req->len += mtd->erasesize - mod;
|
||||
}
|
||||
|
||||
+static inline uint64_t mtd_roundup_to_eb(uint64_t sz, struct mtd_info *mtd)
|
||||
+{
|
||||
+ if (mtd_mod_by_eb(sz, mtd) == 0)
|
||||
+ return sz;
|
||||
+
|
||||
+ /* Round up to next erase block */
|
||||
+ return (mtd_div_by_eb(sz, mtd) + 1) * mtd->erasesize;
|
||||
+}
|
||||
+
|
||||
+static inline uint64_t mtd_rounddown_to_eb(uint64_t sz, struct mtd_info *mtd)
|
||||
+{
|
||||
+ if (mtd_mod_by_eb(sz, mtd) == 0)
|
||||
+ return sz;
|
||||
+
|
||||
+ /* Round down to the start of the current erase block */
|
||||
+ return (mtd_div_by_eb(sz, mtd)) * mtd->erasesize;
|
||||
+}
|
||||
+
|
||||
static inline uint32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd)
|
||||
{
|
||||
if (mtd->writesize_shift)
|
||||
@@ -680,6 +698,13 @@ extern void __put_mtd_device(struct mtd_
|
||||
extern struct mtd_info *get_mtd_device_nm(const char *name);
|
||||
extern void put_mtd_device(struct mtd_info *mtd);
|
||||
|
||||
+static inline uint64_t mtdpart_get_offset(const struct mtd_info *mtd)
|
||||
+{
|
||||
+ if (!mtd_is_partition(mtd))
|
||||
+ return 0;
|
||||
+
|
||||
+ return mtd->part.offset;
|
||||
+}
|
||||
|
||||
struct mtd_notifier {
|
||||
void (*add)(struct mtd_info *mtd);
|
@ -1,389 +0,0 @@
|
||||
From patchwork Tue Jun 8 04:07:19 2021
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
MIME-Version: 1.0
|
||||
Content-Transfer-Encoding: 7bit
|
||||
X-Patchwork-Submitter: John Thomson <git@johnthomson.fastmail.com.au>
|
||||
X-Patchwork-Id: 1489105
|
||||
X-Patchwork-Delegate: tudor.ambarus@gmail.com
|
||||
Return-Path:
|
||||
<linux-mtd-bounces+incoming=patchwork.ozlabs.org@lists.infradead.org>
|
||||
X-Original-To: incoming@patchwork.ozlabs.org
|
||||
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
|
||||
Authentication-Results: ozlabs.org;
|
||||
spf=none (no SPF record) smtp.mailfrom=lists.infradead.org
|
||||
(client-ip=2607:7c80:54:e::133; helo=bombadil.infradead.org;
|
||||
envelope-from=linux-mtd-bounces+incoming=patchwork.ozlabs.org@lists.infradead.org;
|
||||
receiver=<UNKNOWN>)
|
||||
Authentication-Results: ozlabs.org;
|
||||
dkim=pass (2048-bit key;
|
||||
secure) header.d=lists.infradead.org header.i=@lists.infradead.org
|
||||
header.a=rsa-sha256 header.s=bombadil.20210309 header.b=EMabhVoR;
|
||||
dkim=fail reason="signature verification failed" (2048-bit key;
|
||||
unprotected) header.d=fastmail.com.au header.i=@fastmail.com.au
|
||||
header.a=rsa-sha256 header.s=fm3 header.b=dLzuZ6dB;
|
||||
dkim=fail reason="signature verification failed" (2048-bit key;
|
||||
unprotected) header.d=messagingengine.com header.i=@messagingengine.com
|
||||
header.a=rsa-sha256 header.s=fm3 header.b=nSRGsW+C;
|
||||
dkim-atps=neutral
|
||||
Received: from bombadil.infradead.org (bombadil.infradead.org
|
||||
[IPv6:2607:7c80:54:e::133])
|
||||
(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
|
||||
key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest
|
||||
SHA256)
|
||||
(No client certificate requested)
|
||||
by ozlabs.org (Postfix) with ESMTPS id 4FzcFN1j1nz9sW8
|
||||
for <incoming@patchwork.ozlabs.org>; Tue, 8 Jun 2021 14:09:28 +1000 (AEST)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed;
|
||||
d=lists.infradead.org; s=bombadil.20210309; h=Sender:
|
||||
Content-Transfer-Encoding:Content-Type:List-Subscribe:List-Help:List-Post:
|
||||
List-Archive:List-Unsubscribe:List-Id:MIME-Version:Message-Id:Date:Subject:Cc
|
||||
:To:From:Reply-To:Content-ID:Content-Description:Resent-Date:Resent-From:
|
||||
Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID:In-Reply-To:References:
|
||||
List-Owner; bh=6mUWQd71FwsINycGYY1qOhKz+ecWJVNtwDkTebG3XkA=; b=EMabhVoRE3ad89
|
||||
o3L2AgyKrs+blSofUC3hoSsQe7gi3m4si8S9HW8Z+8SsS5TufUsvGwDl80qSYGlQOytQF+1yRUWvE
|
||||
6FJ/+bqv+TwjqZFibgJ6+9OVsQN9dZ/no1R0bBXIpmrf8ORUmv58QK4ZQquaFKbyXKpFeWOC2MSv4
|
||||
H2MAhyhTU8a3gtooH6G8+KvsJEfVgh6C+aDbwxyh2UY3chHKuw1kvL6AktbfUE2xl4zxi3x3kc70B
|
||||
Wi3LiJBFokxVdgnROXxTU5tI0XboWYkQV64gLuQNV4XKClcuhVpzloDK8Iok6NTd7b32a7TdEFlCS
|
||||
lGKsEKmxtUlW2FpfoduA==;
|
||||
Received: from localhost ([::1] helo=bombadil.infradead.org)
|
||||
by bombadil.infradead.org with esmtp (Exim 4.94.2 #2 (Red Hat Linux))
|
||||
id 1lqT1r-006OAW-DX; Tue, 08 Jun 2021 04:07:51 +0000
|
||||
Received: from new1-smtp.messagingengine.com ([66.111.4.221])
|
||||
by bombadil.infradead.org with esmtps (Exim 4.94.2 #2 (Red Hat Linux))
|
||||
id 1lqT1l-006O9b-Fq
|
||||
for linux-mtd@lists.infradead.org; Tue, 08 Jun 2021 04:07:50 +0000
|
||||
Received: from compute2.internal (compute2.nyi.internal [10.202.2.42])
|
||||
by mailnew.nyi.internal (Postfix) with ESMTP id 4456B580622;
|
||||
Tue, 8 Jun 2021 00:07:42 -0400 (EDT)
|
||||
Received: from mailfrontend2 ([10.202.2.163])
|
||||
by compute2.internal (MEProxy); Tue, 08 Jun 2021 00:07:42 -0400
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=fastmail.com.au;
|
||||
h=from:to:cc:subject:date:message-id:mime-version
|
||||
:content-transfer-encoding; s=fm3; bh=ZXRH+YluM1mHCS1EWUiCY/Sg8O
|
||||
LccfHe1oW5iAay6y8=; b=dLzuZ6dBYf7ZA8tWLOBFZYLi7ERsGe/4vnMXG+ovvb
|
||||
dNBO0+SaFGwoqYSFrfq/TeyHfKyvxrA7+LCdopIuT4abpLHxtRwtRiafQcDYCPat
|
||||
qJIqOZO+wCZC5S9Jc1OP7+t1FviGpgevqIMotci37P+RWc5u3AweMzFljZk90E8C
|
||||
uorV6rXagD+OssJQzllRnAIK88+rOAC9ZyXv2gWxy4d1HSCwSWgzx2vnV9CNp918
|
||||
YC/3tiHas9krbrPIaAsdBROr7Bvoe/ShRRzruKRuvZVgg5NN90vX+/5ZjI8u04GM
|
||||
p2bWCbC62CP6wlcgDaz+c/Sgr5ITd2GPENJsHfqmLRBA==
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=
|
||||
messagingengine.com; h=cc:content-transfer-encoding:date:from
|
||||
:message-id:mime-version:subject:to:x-me-proxy:x-me-proxy
|
||||
:x-me-sender:x-me-sender:x-sasl-enc; s=fm3; bh=ZXRH+YluM1mHCS1EW
|
||||
UiCY/Sg8OLccfHe1oW5iAay6y8=; b=nSRGsW+CQ2Zx1RVpIUu8W/VD/k5P+32BW
|
||||
5k2ltd+UhI3dfldBPzHrYiOP/IJqGkNW+V+rHASacW/vFygnaZoxNjRYKnOsu+26
|
||||
wb2yK3jpl6lsNTg3N1Z4XJrYY2lf9H29DMFbhC67l0PTc050rcZk4XsKTLAlv14Q
|
||||
VA4WREYSaX/4IN4O+ES4TMq0a/3gKZh6nvbbJXbsXfK0WlSHTGZtZmW3fyrqvbXa
|
||||
t+R7L8vvqWvwls0pV+Sn8LeQqb7+A69w0UOnuznjkcA3sCc2YehcHbxcUEnMH+9N
|
||||
bxOjmIDeg9/4X/829tUWUJiLhE5SFmQZ1P6oFtmbWoLrDz0ZJIVBw==
|
||||
X-ME-Sender: <xms:C-2-YD2uka4HsA6gcdsV2Ia7vebY4Yjp9E8q7KBMb54jnAzGL7-67Q>
|
||||
<xme:C-2-YCEaxASy5VlcrvNO_jLFpMDGkFCRsuVNuZGEQsiRZygk8jPHWq7unPjeT6uYS
|
||||
2pUP6PrTQ2rggjEIg>
|
||||
X-ME-Received:
|
||||
<xmr:C-2-YD4exeK49N_YZWWf2BWDhVyCbCY3wwvjTyDOFxeugx7Jg08pzMUToo9oJjrBpcVTaA3kbfk>
|
||||
X-ME-Proxy-Cause:
|
||||
gggruggvucftvghtrhhoucdtuddrgeduledrfedtkedgjeduucetufdoteggodetrfdotf
|
||||
fvucfrrhhofhhilhgvmecuhfgrshhtofgrihhlpdfqfgfvpdfurfetoffkrfgpnffqhgen
|
||||
uceurghilhhouhhtmecufedttdenucesvcftvggtihhpihgvnhhtshculddquddttddmne
|
||||
cujfgurhephffvufffkffoggfgsedtkeertdertddtnecuhfhrohhmpeflohhhnhcuvfhh
|
||||
ohhmshhonhcuoehgihhtsehjohhhnhhthhhomhhsohhnrdhfrghsthhmrghilhdrtghomh
|
||||
drrghuqeenucggtffrrghtthgvrhhnpefffeeihfdukedtuedufeetieeuudfhhefhkefh
|
||||
tefgtdeuffekffelleetveduieenucevlhhushhtvghrufhiiigvpedtnecurfgrrhgrmh
|
||||
epmhgrihhlfhhrohhmpehgihhtsehjohhhnhhthhhomhhsohhnrdhfrghsthhmrghilhdr
|
||||
tghomhdrrghu
|
||||
X-ME-Proxy: <xmx:C-2-YI0AJZGjcB3wIbI9BoC9X8VNl4i9A7cQnBkvwZ25czWJlkKCLw>
|
||||
<xmx:C-2-YGGufw99T-O81-FeiSyEruv6_Pr0IHFhspQdxjv5k1VFTZ0lzQ>
|
||||
<xmx:C-2-YJ8BW7DhSDSCEAPSJWrwh_hHP79qreTZtWh_kOUwSh1c0MMlAg>
|
||||
<xmx:Du2-YJBeX2Fg9oFZVXGwEJ1ZrZnXHiAqNON8tbpzquYgcm2o_LM48g>
|
||||
Received: by mail.messagingengine.com (Postfix) with ESMTPA; Tue,
|
||||
8 Jun 2021 00:07:35 -0400 (EDT)
|
||||
From: John Thomson <git@johnthomson.fastmail.com.au>
|
||||
To: Miquel Raynal <miquel.raynal@bootlin.com>,
|
||||
Richard Weinberger <richard@nod.at>, Vignesh Raghavendra <vigneshr@ti.com>,
|
||||
Tudor Ambarus <tudor.ambarus@microchip.com>,
|
||||
Michael Walle <michael@walle.cc>, Pratyush Yadav <p.yadav@ti.com>,
|
||||
linux-mtd@lists.infradead.org
|
||||
Cc: linux-kernel@vger.kernel.org,
|
||||
John Thomson <git@johnthomson.fastmail.com.au>,
|
||||
kernel test robot <lkp@intel.com>, Dan Carpenter <dan.carpenter@oracle.com>
|
||||
Subject: [PATCH] mtd: spi-nor: write support for minor aligned partitions
|
||||
Date: Tue, 8 Jun 2021 14:07:19 +1000
|
||||
Message-Id: <20210608040719.14431-1-git@johnthomson.fastmail.com.au>
|
||||
X-Mailer: git-send-email 2.31.1
|
||||
MIME-Version: 1.0
|
||||
X-CRM114-Version: 20100106-BlameMichelson ( TRE 0.8.0 (BSD) ) MR-646709E3
|
||||
X-CRM114-CacheID: sfid-20210607_210745_712053_67A7D864
|
||||
X-CRM114-Status: GOOD ( 26.99 )
|
||||
X-Spam-Score: -0.8 (/)
|
||||
X-Spam-Report: Spam detection software,
|
||||
running on the system "bombadil.infradead.org",
|
||||
has NOT identified this incoming email as spam. The original
|
||||
message has been attached to this so you can view it or label
|
||||
similar future email. If you have any questions, see
|
||||
the administrator of that system for details.
|
||||
Content preview: Do not prevent writing to mtd partitions where a partition
|
||||
boundary sits on a minor erasesize boundary. This addresses a FIXME that
|
||||
has been present since the start of the linux git history: /* Doesn' [...]
|
||||
Content analysis details: (-0.8 points, 5.0 required)
|
||||
pts rule name description
|
||||
---- ----------------------
|
||||
--------------------------------------------------
|
||||
-0.7 RCVD_IN_DNSWL_LOW RBL: Sender listed at https://www.dnswl.org/,
|
||||
low trust [66.111.4.221 listed in list.dnswl.org]
|
||||
-0.0 SPF_PASS SPF: sender matches SPF record
|
||||
-0.0 SPF_HELO_PASS SPF: HELO matches SPF record
|
||||
0.0 RCVD_IN_MSPIKE_H3 RBL: Good reputation (+3)
|
||||
[66.111.4.221 listed in wl.mailspike.net]
|
||||
-0.1 DKIM_VALID Message has at least one valid DKIM or DK signature
|
||||
0.1 DKIM_SIGNED Message has a DKIM or DK signature,
|
||||
not necessarily
|
||||
valid
|
||||
-0.1 DKIM_VALID_EF Message has a valid DKIM or DK signature from
|
||||
envelope-from domain
|
||||
0.0 RCVD_IN_MSPIKE_WL Mailspike good senders
|
||||
X-BeenThere: linux-mtd@lists.infradead.org
|
||||
X-Mailman-Version: 2.1.34
|
||||
Precedence: list
|
||||
List-Id: Linux MTD discussion mailing list <linux-mtd.lists.infradead.org>
|
||||
List-Unsubscribe: <http://lists.infradead.org/mailman/options/linux-mtd>,
|
||||
<mailto:linux-mtd-request@lists.infradead.org?subject=unsubscribe>
|
||||
List-Archive: <http://lists.infradead.org/pipermail/linux-mtd/>
|
||||
List-Post: <mailto:linux-mtd@lists.infradead.org>
|
||||
List-Help: <mailto:linux-mtd-request@lists.infradead.org?subject=help>
|
||||
List-Subscribe: <http://lists.infradead.org/mailman/listinfo/linux-mtd>,
|
||||
<mailto:linux-mtd-request@lists.infradead.org?subject=subscribe>
|
||||
Sender: "linux-mtd" <linux-mtd-bounces@lists.infradead.org>
|
||||
Errors-To: linux-mtd-bounces+incoming=patchwork.ozlabs.org@lists.infradead.org
|
||||
|
||||
Do not prevent writing to mtd partitions where a partition boundary sits
|
||||
on a minor erasesize boundary.
|
||||
This addresses a FIXME that has been present since the start of the
|
||||
linux git history:
|
||||
/* Doesn't start on a boundary of major erase size */
|
||||
/* FIXME: Let it be writable if it is on a boundary of
|
||||
* _minor_ erase size though */
|
||||
|
||||
Allow a uniform erase region spi-nor device to be configured
|
||||
to use the non-uniform erase regions code path for an erase with:
|
||||
CONFIG_MTD_SPI_NOR_USE_VARIABLE_ERASE=y
|
||||
|
||||
On supporting hardware (SECT_4K: majority of current SPI-NOR device)
|
||||
provide the facility for an erase to use the least number
|
||||
of SPI-NOR operations, as well as access to 4K erase without
|
||||
requiring CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
|
||||
|
||||
Introduce erasesize_minor to the mtd struct,
|
||||
the smallest erasesize supported by the device
|
||||
|
||||
On existing devices, this is useful where write support is wanted
|
||||
for data on a 4K partition, such as some u-boot-env partitions,
|
||||
or RouterBoot soft_config, while still netting the performance
|
||||
benefits of using 64K sectors
|
||||
|
||||
Performance:
|
||||
time mtd erase firmware
|
||||
OpenWrt 5.10 ramips MT7621 w25q128jv 0xfc0000 partition length
|
||||
|
||||
Without this patch
|
||||
MTD_SPI_NOR_USE_4K_SECTORS=y |n
|
||||
real 2m 11.66s |0m 50.86s
|
||||
user 0m 0.00s |0m 0.00s
|
||||
sys 1m 56.20s |0m 50.80s
|
||||
|
||||
With this patch
|
||||
MTD_SPI_NOR_USE_VARIABLE_ERASE=n|y |4K_SECTORS=y
|
||||
real 0m 51.68s |0m 50.85s |2m 12.89s
|
||||
user 0m 0.00s |0m 0.00s |0m 0.01s
|
||||
sys 0m 46.94s |0m 50.38s |2m 12.46s
|
||||
|
||||
Signed-off-by: John Thomson <git@johnthomson.fastmail.com.au>
|
||||
---
|
||||
Have not tested on variable erase regions device.
|
||||
|
||||
checkpatch does not like the printk(KERN_WARNING
|
||||
these should be changed separately beforehand?
|
||||
|
||||
Changes RFC -> v1:
|
||||
Fix uninitialized variable smatch warning
|
||||
Reported-by: kernel test robot <lkp@intel.com>
|
||||
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
|
||||
---
|
||||
drivers/mtd/mtdpart.c | 52 ++++++++++++++++++++++++++++---------
|
||||
drivers/mtd/spi-nor/Kconfig | 10 +++++++
|
||||
drivers/mtd/spi-nor/core.c | 10 +++++--
|
||||
include/linux/mtd/mtd.h | 2 ++
|
||||
4 files changed, 60 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/drivers/mtd/mtdpart.c
|
||||
+++ b/drivers/mtd/mtdpart.c
|
||||
@@ -41,10 +41,11 @@ static struct mtd_info *allocate_partiti
|
||||
struct mtd_info *master = mtd_get_master(parent);
|
||||
int wr_alignment = (parent->flags & MTD_NO_ERASE) ?
|
||||
master->writesize : master->erasesize;
|
||||
+ int wr_alignment_minor = 0;
|
||||
u64 parent_size = mtd_is_partition(parent) ?
|
||||
parent->part.size : parent->size;
|
||||
struct mtd_info *child;
|
||||
- u32 remainder;
|
||||
+ u32 remainder, remainder_minor;
|
||||
char *name;
|
||||
u64 tmp;
|
||||
|
||||
@@ -146,6 +147,7 @@ static struct mtd_info *allocate_partiti
|
||||
int i, max = parent->numeraseregions;
|
||||
u64 end = child->part.offset + child->part.size;
|
||||
struct mtd_erase_region_info *regions = parent->eraseregions;
|
||||
+ uint32_t erasesize_minor = child->erasesize;
|
||||
|
||||
/* Find the first erase regions which is part of this
|
||||
* partition. */
|
||||
@@ -156,15 +158,24 @@ static struct mtd_info *allocate_partiti
|
||||
if (i > 0)
|
||||
i--;
|
||||
|
||||
- /* Pick biggest erasesize */
|
||||
for (; i < max && regions[i].offset < end; i++) {
|
||||
+ /* Pick biggest erasesize */
|
||||
if (child->erasesize < regions[i].erasesize)
|
||||
child->erasesize = regions[i].erasesize;
|
||||
+ /* Pick smallest non-zero erasesize */
|
||||
+ if ((erasesize_minor > regions[i].erasesize) && (regions[i].erasesize > 0))
|
||||
+ erasesize_minor = regions[i].erasesize;
|
||||
}
|
||||
+
|
||||
+ if (erasesize_minor < child->erasesize)
|
||||
+ child->erasesize_minor = erasesize_minor;
|
||||
+
|
||||
BUG_ON(child->erasesize == 0);
|
||||
} else {
|
||||
/* Single erase size */
|
||||
child->erasesize = master->erasesize;
|
||||
+ if (master->erasesize_minor)
|
||||
+ child->erasesize_minor = master->erasesize_minor;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -172,26 +183,43 @@ static struct mtd_info *allocate_partiti
|
||||
* exposes several regions with different erasesize. Adjust
|
||||
* wr_alignment accordingly.
|
||||
*/
|
||||
- if (!(child->flags & MTD_NO_ERASE))
|
||||
+ if (!(child->flags & MTD_NO_ERASE)) {
|
||||
wr_alignment = child->erasesize;
|
||||
+ if (IS_ENABLED(CONFIG_MTD_SPI_NOR_USE_VARIABLE_ERASE) && child->erasesize_minor)
|
||||
+ wr_alignment_minor = child->erasesize_minor;
|
||||
+ }
|
||||
|
||||
tmp = mtd_get_master_ofs(child, 0);
|
||||
remainder = do_div(tmp, wr_alignment);
|
||||
if ((child->flags & MTD_WRITEABLE) && remainder) {
|
||||
- /* Doesn't start on a boundary of major erase size */
|
||||
- /* FIXME: Let it be writable if it is on a boundary of
|
||||
- * _minor_ erase size though */
|
||||
- child->flags &= ~MTD_WRITEABLE;
|
||||
- printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase/write block boundary -- force read-only\n",
|
||||
- part->name);
|
||||
+ if (wr_alignment_minor) {
|
||||
+ tmp = mtd_get_master_ofs(child, 0);
|
||||
+ remainder_minor = do_div(tmp, wr_alignment_minor);
|
||||
+ if (remainder_minor == 0)
|
||||
+ child->erasesize = child->erasesize_minor;
|
||||
+ }
|
||||
+
|
||||
+ if ((!wr_alignment_minor) || (wr_alignment_minor && remainder_minor != 0)) {
|
||||
+ child->flags &= ~MTD_WRITEABLE;
|
||||
+ printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase/write block boundary -- force read-only\n",
|
||||
+ part->name);
|
||||
+ }
|
||||
}
|
||||
|
||||
tmp = mtd_get_master_ofs(child, 0) + child->part.size;
|
||||
remainder = do_div(tmp, wr_alignment);
|
||||
if ((child->flags & MTD_WRITEABLE) && remainder) {
|
||||
- child->flags &= ~MTD_WRITEABLE;
|
||||
- printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase/write block -- force read-only\n",
|
||||
- part->name);
|
||||
+ if (wr_alignment_minor) {
|
||||
+ tmp = mtd_get_master_ofs(child, 0) + child->part.size;
|
||||
+ remainder_minor = do_div(tmp, wr_alignment_minor);
|
||||
+ if (remainder_minor == 0)
|
||||
+ child->erasesize = child->erasesize_minor;
|
||||
+ }
|
||||
+ if ((!wr_alignment_minor) || (wr_alignment_minor && remainder_minor != 0)) {
|
||||
+ child->flags &= ~MTD_WRITEABLE;
|
||||
+ printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase/write block -- force read-only\n",
|
||||
+ part->name);
|
||||
+ }
|
||||
}
|
||||
|
||||
child->size = child->part.size;
|
||||
--- a/drivers/mtd/spi-nor/Kconfig
|
||||
+++ b/drivers/mtd/spi-nor/Kconfig
|
||||
@@ -10,6 +10,16 @@ menuconfig MTD_SPI_NOR
|
||||
|
||||
if MTD_SPI_NOR
|
||||
|
||||
+config MTD_SPI_NOR_USE_VARIABLE_ERASE
|
||||
+ bool "Disable uniform_erase to allow use of all hardware supported erasesizes"
|
||||
+ depends on !MTD_SPI_NOR_USE_4K_SECTORS
|
||||
+ default n
|
||||
+ help
|
||||
+ Allow mixed use of all hardware supported erasesizes,
|
||||
+ by forcing spi_nor to use the multiple eraseregions code path.
|
||||
+ For example: A 68K erase will use one 64K erase, and one 4K erase
|
||||
+ on supporting hardware.
|
||||
+
|
||||
config MTD_SPI_NOR_USE_4K_SECTORS
|
||||
bool "Use small 4096 B erase sectors"
|
||||
default y
|
||||
--- a/drivers/mtd/spi-nor/core.c
|
||||
+++ b/drivers/mtd/spi-nor/core.c
|
||||
@@ -1048,6 +1048,8 @@ static u8 spi_nor_convert_3to4_erase(u8
|
||||
|
||||
static bool spi_nor_has_uniform_erase(const struct spi_nor *nor)
|
||||
{
|
||||
+ if (IS_ENABLED(CONFIG_MTD_SPI_NOR_USE_VARIABLE_ERASE))
|
||||
+ return false;
|
||||
return !!nor->params->erase_map.uniform_erase_type;
|
||||
}
|
||||
|
||||
@@ -2144,6 +2146,7 @@ static int spi_nor_select_erase(struct s
|
||||
{
|
||||
struct spi_nor_erase_map *map = &nor->params->erase_map;
|
||||
const struct spi_nor_erase_type *erase = NULL;
|
||||
+ const struct spi_nor_erase_type *erase_minor = NULL;
|
||||
struct mtd_info *mtd = &nor->mtd;
|
||||
u32 wanted_size = nor->info->sector_size;
|
||||
int i;
|
||||
@@ -2176,8 +2179,9 @@ static int spi_nor_select_erase(struct s
|
||||
*/
|
||||
for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
|
||||
if (map->erase_type[i].size) {
|
||||
- erase = &map->erase_type[i];
|
||||
- break;
|
||||
+ if (!erase)
|
||||
+ erase = &map->erase_type[i];
|
||||
+ erase_minor = &map->erase_type[i];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2185,6 +2189,8 @@ static int spi_nor_select_erase(struct s
|
||||
return -EINVAL;
|
||||
|
||||
mtd->erasesize = erase->size;
|
||||
+ if (erase_minor && erase_minor->size < erase->size)
|
||||
+ mtd->erasesize_minor = erase_minor->size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
--- a/include/linux/mtd/mtd.h
|
||||
+++ b/include/linux/mtd/mtd.h
|
||||
@@ -238,6 +238,8 @@ struct mtd_info {
|
||||
* information below if they desire
|
||||
*/
|
||||
uint32_t erasesize;
|
||||
+ /* "Minor" (smallest) erase size supported by the whole device */
|
||||
+ uint32_t erasesize_minor;
|
||||
/* Minimal writable flash unit size. In case of NOR flash it is 1 (even
|
||||
* though individual bits can be cleared), in case of NAND flash it is
|
||||
* one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user