summaryrefslogtreecommitdiffstats
path: root/mm/damon
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /mm/damon
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'mm/damon')
-rw-r--r--mm/damon/Kconfig107
-rw-r--r--mm/damon/Makefile9
-rw-r--r--mm/damon/core-test.h441
-rw-r--r--mm/damon/core.c1531
-rw-r--r--mm/damon/dbgfs-test.h163
-rw-r--r--mm/damon/dbgfs.c1133
-rw-r--r--mm/damon/lru_sort.c321
-rw-r--r--mm/damon/modules-common.c42
-rw-r--r--mm/damon/modules-common.h49
-rw-r--r--mm/damon/ops-common.c121
-rw-r--r--mm/damon/ops-common.h18
-rw-r--r--mm/damon/paddr.c351
-rw-r--r--mm/damon/reclaim.c284
-rw-r--r--mm/damon/sysfs-common.c107
-rw-r--r--mm/damon/sysfs-common.h56
-rw-r--r--mm/damon/sysfs-schemes.c1817
-rw-r--r--mm/damon/sysfs.c1829
-rw-r--r--mm/damon/vaddr-test.h324
-rw-r--r--mm/damon/vaddr.c729
19 files changed, 9432 insertions, 0 deletions
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
new file mode 100644
index 000000000..436c6b4cb
--- /dev/null
+++ b/mm/damon/Kconfig
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+menu "Data Access Monitoring"
+
+config DAMON
+ bool "DAMON: Data Access Monitoring Framework"
+ help
+ This builds a framework that allows kernel subsystems to monitor
+ access frequency of each memory region. The information can be useful
+ for performance-centric DRAM level memory management.
+
+ See https://damonitor.github.io/doc/html/latest-damon/index.html for
+ more information.
+
+config DAMON_KUNIT_TEST
+ bool "Test for damon" if !KUNIT_ALL_TESTS
+ depends on DAMON && KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the DAMON Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
+config DAMON_VADDR
+ bool "Data access monitoring operations for virtual address spaces"
+ depends on DAMON && MMU
+ select PAGE_IDLE_FLAG
+ help
+ This builds the default data access monitoring operations for DAMON
+ that work for virtual address spaces.
+
+config DAMON_PADDR
+ bool "Data access monitoring operations for the physical address space"
+ depends on DAMON && MMU
+ select PAGE_IDLE_FLAG
+ help
+ This builds the default data access monitoring operations for DAMON
+ that works for the physical address space.
+
+config DAMON_VADDR_KUNIT_TEST
+ bool "Test for DAMON operations" if !KUNIT_ALL_TESTS
+ depends on DAMON_VADDR && KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the DAMON virtual addresses operations Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
+config DAMON_SYSFS
+ bool "DAMON sysfs interface"
+ depends on DAMON && SYSFS
+ help
+ This builds the sysfs interface for DAMON. The user space can use
+ the interface for arbitrary data access monitoring.
+
+config DAMON_DBGFS
+ bool "DAMON debugfs interface (DEPRECATED!)"
+ depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS
+ help
+ This builds the debugfs interface for DAMON. The user space admins
+ can use the interface for arbitrary data access monitoring.
+
+ If unsure, say N.
+
+ This is deprecated, so users should move to the sysfs interface
+ (DAMON_SYSFS). If you depend on this and cannot move, please report
+ your usecase to damon@lists.linux.dev and linux-mm@kvack.org.
+
+config DAMON_DBGFS_KUNIT_TEST
+ bool "Test for damon debugfs interface" if !KUNIT_ALL_TESTS
+ depends on DAMON_DBGFS && KUNIT=y
+ default KUNIT_ALL_TESTS
+ help
+ This builds the DAMON debugfs interface Kunit test suite.
+
+ For more information on KUnit and unit tests in general, please refer
+ to the KUnit documentation.
+
+ If unsure, say N.
+
+config DAMON_RECLAIM
+ bool "Build DAMON-based reclaim (DAMON_RECLAIM)"
+ depends on DAMON_PADDR
+ help
+ This builds the DAMON-based reclamation subsystem. It finds pages
+ that not accessed for a long time (cold) using DAMON and reclaim
+ those.
+
+ This is suggested to be used as a proactive and lightweight
+ reclamation under light memory pressure, while the traditional page
+ scanning-based reclamation is used for heavy pressure.
+
+config DAMON_LRU_SORT
+ bool "Build DAMON-based LRU-lists sorting (DAMON_LRU_SORT)"
+ depends on DAMON_PADDR
+ help
+ This builds the DAMON-based LRU-lists sorting subsystem. It tries to
+ protect frequently accessed (hot) pages while rarely accessed (cold)
+ pages reclaimed first under memory pressure.
+
+endmenu
diff --git a/mm/damon/Makefile b/mm/damon/Makefile
new file mode 100644
index 000000000..f7add3f4a
--- /dev/null
+++ b/mm/damon/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := core.o
+obj-$(CONFIG_DAMON_VADDR) += ops-common.o vaddr.o
+obj-$(CONFIG_DAMON_PADDR) += ops-common.o paddr.o
+obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs-schemes.o sysfs.o
+obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o
+obj-$(CONFIG_DAMON_RECLAIM) += modules-common.o reclaim.o
+obj-$(CONFIG_DAMON_LRU_SORT) += modules-common.o lru_sort.o
diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h
new file mode 100644
index 000000000..6cc8b2455
--- /dev/null
+++ b/mm/damon/core-test.h
@@ -0,0 +1,441 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifdef CONFIG_DAMON_KUNIT_TEST
+
+#ifndef _DAMON_CORE_TEST_H
+#define _DAMON_CORE_TEST_H
+
+#include <kunit/test.h>
+
+static void damon_test_regions(struct kunit *test)
+{
+ struct damon_region *r;
+ struct damon_target *t;
+
+ r = damon_new_region(1, 2);
+ KUNIT_EXPECT_EQ(test, 1ul, r->ar.start);
+ KUNIT_EXPECT_EQ(test, 2ul, r->ar.end);
+ KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+
+ t = damon_new_target();
+ KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t));
+
+ damon_add_region(r, t);
+ KUNIT_EXPECT_EQ(test, 1u, damon_nr_regions(t));
+
+ damon_del_region(r, t);
+ KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t));
+
+ damon_free_target(t);
+}
+
+static unsigned int nr_damon_targets(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ unsigned int nr_targets = 0;
+
+ damon_for_each_target(t, ctx)
+ nr_targets++;
+
+ return nr_targets;
+}
+
+static void damon_test_target(struct kunit *test)
+{
+ struct damon_ctx *c = damon_new_ctx();
+ struct damon_target *t;
+
+ t = damon_new_target();
+ KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c));
+
+ damon_add_target(c, t);
+ KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(c));
+
+ damon_destroy_target(t);
+ KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c));
+
+ damon_destroy_ctx(c);
+}
+
+/*
+ * Test kdamond_reset_aggregated()
+ *
+ * DAMON checks access to each region and aggregates this information as the
+ * access frequency of each region. In detail, it increases '->nr_accesses' of
+ * regions that an access has confirmed. 'kdamond_reset_aggregated()' flushes
+ * the aggregated information ('->nr_accesses' of each regions) to the result
+ * buffer. As a result of the flushing, the '->nr_accesses' of regions are
+ * initialized to zero.
+ */
+static void damon_test_aggregate(struct kunit *test)
+{
+ struct damon_ctx *ctx = damon_new_ctx();
+ unsigned long saddr[][3] = {{10, 20, 30}, {5, 42, 49}, {13, 33, 55} };
+ unsigned long eaddr[][3] = {{15, 27, 40}, {31, 45, 55}, {23, 44, 66} };
+ unsigned long accesses[][3] = {{42, 95, 84}, {10, 20, 30}, {0, 1, 2} };
+ struct damon_target *t;
+ struct damon_region *r;
+ int it, ir;
+
+ for (it = 0; it < 3; it++) {
+ t = damon_new_target();
+ damon_add_target(ctx, t);
+ }
+
+ it = 0;
+ damon_for_each_target(t, ctx) {
+ for (ir = 0; ir < 3; ir++) {
+ r = damon_new_region(saddr[it][ir], eaddr[it][ir]);
+ r->nr_accesses = accesses[it][ir];
+ damon_add_region(r, t);
+ }
+ it++;
+ }
+ kdamond_reset_aggregated(ctx);
+ it = 0;
+ damon_for_each_target(t, ctx) {
+ ir = 0;
+ /* '->nr_accesses' should be zeroed */
+ damon_for_each_region(r, t) {
+ KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses);
+ ir++;
+ }
+ /* regions should be preserved */
+ KUNIT_EXPECT_EQ(test, 3, ir);
+ it++;
+ }
+ /* targets also should be preserved */
+ KUNIT_EXPECT_EQ(test, 3, it);
+
+ damon_destroy_ctx(ctx);
+}
+
+static void damon_test_split_at(struct kunit *test)
+{
+ struct damon_ctx *c = damon_new_ctx();
+ struct damon_target *t;
+ struct damon_region *r;
+
+ t = damon_new_target();
+ r = damon_new_region(0, 100);
+ damon_add_region(r, t);
+ damon_split_region_at(t, r, 25);
+ KUNIT_EXPECT_EQ(test, r->ar.start, 0ul);
+ KUNIT_EXPECT_EQ(test, r->ar.end, 25ul);
+
+ r = damon_next_region(r);
+ KUNIT_EXPECT_EQ(test, r->ar.start, 25ul);
+ KUNIT_EXPECT_EQ(test, r->ar.end, 100ul);
+
+ damon_free_target(t);
+ damon_destroy_ctx(c);
+}
+
+static void damon_test_merge_two(struct kunit *test)
+{
+ struct damon_target *t;
+ struct damon_region *r, *r2, *r3;
+ int i;
+
+ t = damon_new_target();
+ r = damon_new_region(0, 100);
+ r->nr_accesses = 10;
+ damon_add_region(r, t);
+ r2 = damon_new_region(100, 300);
+ r2->nr_accesses = 20;
+ damon_add_region(r2, t);
+
+ damon_merge_two_regions(t, r, r2);
+ KUNIT_EXPECT_EQ(test, r->ar.start, 0ul);
+ KUNIT_EXPECT_EQ(test, r->ar.end, 300ul);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, 16u);
+
+ i = 0;
+ damon_for_each_region(r3, t) {
+ KUNIT_EXPECT_PTR_EQ(test, r, r3);
+ i++;
+ }
+ KUNIT_EXPECT_EQ(test, i, 1);
+
+ damon_free_target(t);
+}
+
+static struct damon_region *__nth_region_of(struct damon_target *t, int idx)
+{
+ struct damon_region *r;
+ unsigned int i = 0;
+
+ damon_for_each_region(r, t) {
+ if (i++ == idx)
+ return r;
+ }
+
+ return NULL;
+}
+
+static void damon_test_merge_regions_of(struct kunit *test)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned long sa[] = {0, 100, 114, 122, 130, 156, 170, 184};
+ unsigned long ea[] = {100, 112, 122, 130, 156, 170, 184, 230};
+ unsigned int nrs[] = {0, 0, 10, 10, 20, 30, 1, 2};
+
+ unsigned long saddrs[] = {0, 114, 130, 156, 170};
+ unsigned long eaddrs[] = {112, 130, 156, 170, 230};
+ int i;
+
+ t = damon_new_target();
+ for (i = 0; i < ARRAY_SIZE(sa); i++) {
+ r = damon_new_region(sa[i], ea[i]);
+ r->nr_accesses = nrs[i];
+ damon_add_region(r, t);
+ }
+
+ damon_merge_regions_of(t, 9, 9999);
+ /* 0-112, 114-130, 130-156, 156-170 */
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u);
+ for (i = 0; i < 5; i++) {
+ r = __nth_region_of(t, i);
+ KUNIT_EXPECT_EQ(test, r->ar.start, saddrs[i]);
+ KUNIT_EXPECT_EQ(test, r->ar.end, eaddrs[i]);
+ }
+ damon_free_target(t);
+}
+
+static void damon_test_split_regions_of(struct kunit *test)
+{
+ struct damon_ctx *c = damon_new_ctx();
+ struct damon_target *t;
+ struct damon_region *r;
+
+ t = damon_new_target();
+ r = damon_new_region(0, 22);
+ damon_add_region(r, t);
+ damon_split_regions_of(t, 2);
+ KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u);
+ damon_free_target(t);
+
+ t = damon_new_target();
+ r = damon_new_region(0, 220);
+ damon_add_region(r, t);
+ damon_split_regions_of(t, 4);
+ KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u);
+ damon_free_target(t);
+ damon_destroy_ctx(c);
+}
+
+static void damon_test_ops_registration(struct kunit *test)
+{
+ struct damon_ctx *c = damon_new_ctx();
+ struct damon_operations ops, bak;
+
+ /* DAMON_OPS_{V,P}ADDR are registered on subsys_initcall */
+ KUNIT_EXPECT_EQ(test, damon_select_ops(c, DAMON_OPS_VADDR), 0);
+ KUNIT_EXPECT_EQ(test, damon_select_ops(c, DAMON_OPS_PADDR), 0);
+
+ /* Double-registration is prohibited */
+ ops.id = DAMON_OPS_VADDR;
+ KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), -EINVAL);
+ ops.id = DAMON_OPS_PADDR;
+ KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), -EINVAL);
+
+ /* Unknown ops id cannot be registered */
+ KUNIT_EXPECT_EQ(test, damon_select_ops(c, NR_DAMON_OPS), -EINVAL);
+
+ /* Registration should success after unregistration */
+ mutex_lock(&damon_ops_lock);
+ bak = damon_registered_ops[DAMON_OPS_VADDR];
+ damon_registered_ops[DAMON_OPS_VADDR] = (struct damon_operations){};
+ mutex_unlock(&damon_ops_lock);
+
+ ops.id = DAMON_OPS_VADDR;
+ KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), 0);
+
+ mutex_lock(&damon_ops_lock);
+ damon_registered_ops[DAMON_OPS_VADDR] = bak;
+ mutex_unlock(&damon_ops_lock);
+
+ /* Check double-registration failure again */
+ KUNIT_EXPECT_EQ(test, damon_register_ops(&ops), -EINVAL);
+}
+
+static void damon_test_set_regions(struct kunit *test)
+{
+ struct damon_target *t = damon_new_target();
+ struct damon_region *r1 = damon_new_region(4, 16);
+ struct damon_region *r2 = damon_new_region(24, 32);
+ struct damon_addr_range range = {.start = 8, .end = 28};
+ unsigned long expects[] = {8, 16, 16, 24, 24, 28};
+ int expect_idx = 0;
+ struct damon_region *r;
+
+ damon_add_region(r1, t);
+ damon_add_region(r2, t);
+ damon_set_regions(t, &range, 1);
+
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 3);
+ damon_for_each_region(r, t) {
+ KUNIT_EXPECT_EQ(test, r->ar.start, expects[expect_idx++]);
+ KUNIT_EXPECT_EQ(test, r->ar.end, expects[expect_idx++]);
+ }
+ damon_destroy_target(t);
+}
+
+static void damon_test_update_monitoring_result(struct kunit *test)
+{
+ struct damon_attrs old_attrs = {
+ .sample_interval = 10, .aggr_interval = 1000,};
+ struct damon_attrs new_attrs;
+ struct damon_region *r = damon_new_region(3, 7);
+
+ r->nr_accesses = 15;
+ r->age = 20;
+
+ new_attrs = (struct damon_attrs){
+ .sample_interval = 100, .aggr_interval = 10000,};
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, 15);
+ KUNIT_EXPECT_EQ(test, r->age, 2);
+
+ new_attrs = (struct damon_attrs){
+ .sample_interval = 1, .aggr_interval = 1000};
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, 150);
+ KUNIT_EXPECT_EQ(test, r->age, 2);
+
+ new_attrs = (struct damon_attrs){
+ .sample_interval = 1, .aggr_interval = 100};
+ damon_update_monitoring_result(r, &old_attrs, &new_attrs);
+ KUNIT_EXPECT_EQ(test, r->nr_accesses, 150);
+ KUNIT_EXPECT_EQ(test, r->age, 20);
+}
+
+static void damon_test_set_attrs(struct kunit *test)
+{
+ struct damon_ctx *c = damon_new_ctx();
+ struct damon_attrs valid_attrs = {
+ .min_nr_regions = 10, .max_nr_regions = 1000,
+ .sample_interval = 5000, .aggr_interval = 100000,};
+ struct damon_attrs invalid_attrs;
+
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &valid_attrs), 0);
+
+ invalid_attrs = valid_attrs;
+ invalid_attrs.min_nr_regions = 1;
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
+
+ invalid_attrs = valid_attrs;
+ invalid_attrs.max_nr_regions = 9;
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
+
+ invalid_attrs = valid_attrs;
+ invalid_attrs.aggr_interval = 4999;
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
+}
+
+static void damos_test_new_filter(struct kunit *test)
+{
+ struct damos_filter *filter;
+
+ filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true);
+ KUNIT_EXPECT_EQ(test, filter->type, DAMOS_FILTER_TYPE_ANON);
+ KUNIT_EXPECT_EQ(test, filter->matching, true);
+ KUNIT_EXPECT_PTR_EQ(test, filter->list.prev, &filter->list);
+ KUNIT_EXPECT_PTR_EQ(test, filter->list.next, &filter->list);
+ damos_destroy_filter(filter);
+}
+
+static void damos_test_filter_out(struct kunit *test)
+{
+ struct damon_target *t;
+ struct damon_region *r, *r2;
+ struct damos_filter *f;
+
+ f = damos_new_filter(DAMOS_FILTER_TYPE_ADDR, true);
+ f->addr_range = (struct damon_addr_range){
+ .start = DAMON_MIN_REGION * 2, .end = DAMON_MIN_REGION * 6};
+
+ t = damon_new_target();
+ r = damon_new_region(DAMON_MIN_REGION * 3, DAMON_MIN_REGION * 5);
+ damon_add_region(r, t);
+
+ /* region in the range */
+ KUNIT_EXPECT_TRUE(test, __damos_filter_out(NULL, t, r, f));
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1);
+
+ /* region before the range */
+ r->ar.start = DAMON_MIN_REGION * 1;
+ r->ar.end = DAMON_MIN_REGION * 2;
+ KUNIT_EXPECT_FALSE(test, __damos_filter_out(NULL, t, r, f));
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1);
+
+ /* region after the range */
+ r->ar.start = DAMON_MIN_REGION * 6;
+ r->ar.end = DAMON_MIN_REGION * 8;
+ KUNIT_EXPECT_FALSE(test, __damos_filter_out(NULL, t, r, f));
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1);
+
+ /* region started before the range */
+ r->ar.start = DAMON_MIN_REGION * 1;
+ r->ar.end = DAMON_MIN_REGION * 4;
+ KUNIT_EXPECT_FALSE(test, __damos_filter_out(NULL, t, r, f));
+ /* filter should have split the region */
+ KUNIT_EXPECT_EQ(test, r->ar.start, DAMON_MIN_REGION * 1);
+ KUNIT_EXPECT_EQ(test, r->ar.end, DAMON_MIN_REGION * 2);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2);
+ r2 = damon_next_region(r);
+ KUNIT_EXPECT_EQ(test, r2->ar.start, DAMON_MIN_REGION * 2);
+ KUNIT_EXPECT_EQ(test, r2->ar.end, DAMON_MIN_REGION * 4);
+ damon_destroy_region(r2, t);
+
+ /* region started in the range */
+ r->ar.start = DAMON_MIN_REGION * 2;
+ r->ar.end = DAMON_MIN_REGION * 8;
+ KUNIT_EXPECT_TRUE(test, __damos_filter_out(NULL, t, r, f));
+ /* filter should have split the region */
+ KUNIT_EXPECT_EQ(test, r->ar.start, DAMON_MIN_REGION * 2);
+ KUNIT_EXPECT_EQ(test, r->ar.end, DAMON_MIN_REGION * 6);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2);
+ r2 = damon_next_region(r);
+ KUNIT_EXPECT_EQ(test, r2->ar.start, DAMON_MIN_REGION * 6);
+ KUNIT_EXPECT_EQ(test, r2->ar.end, DAMON_MIN_REGION * 8);
+ damon_destroy_region(r2, t);
+
+ damon_free_target(t);
+ damos_free_filter(f);
+}
+
+static struct kunit_case damon_test_cases[] = {
+ KUNIT_CASE(damon_test_target),
+ KUNIT_CASE(damon_test_regions),
+ KUNIT_CASE(damon_test_aggregate),
+ KUNIT_CASE(damon_test_split_at),
+ KUNIT_CASE(damon_test_merge_two),
+ KUNIT_CASE(damon_test_merge_regions_of),
+ KUNIT_CASE(damon_test_split_regions_of),
+ KUNIT_CASE(damon_test_ops_registration),
+ KUNIT_CASE(damon_test_set_regions),
+ KUNIT_CASE(damon_test_update_monitoring_result),
+ KUNIT_CASE(damon_test_set_attrs),
+ KUNIT_CASE(damos_test_new_filter),
+ KUNIT_CASE(damos_test_filter_out),
+ {},
+};
+
+static struct kunit_suite damon_test_suite = {
+ .name = "damon",
+ .test_cases = damon_test_cases,
+};
+kunit_test_suite(damon_test_suite);
+
+#endif /* _DAMON_CORE_TEST_H */
+
+#endif /* CONFIG_DAMON_KUNIT_TEST */
diff --git a/mm/damon/core.c b/mm/damon/core.c
new file mode 100644
index 000000000..aff611b6e
--- /dev/null
+++ b/mm/damon/core.c
@@ -0,0 +1,1531 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data Access Monitor
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#define pr_fmt(fmt) "damon: " fmt
+
+#include <linux/damon.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/damon.h>
+
+#ifdef CONFIG_DAMON_KUNIT_TEST
+#undef DAMON_MIN_REGION
+#define DAMON_MIN_REGION 1
+#endif
+
+static DEFINE_MUTEX(damon_lock);
+static int nr_running_ctxs;
+static bool running_exclusive_ctxs;
+
+static DEFINE_MUTEX(damon_ops_lock);
+static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
+
+static struct kmem_cache *damon_region_cache __ro_after_init;
+
+/* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
+static bool __damon_is_registered_ops(enum damon_ops_id id)
+{
+ struct damon_operations empty_ops = {};
+
+ if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
+ return false;
+ return true;
+}
+
+/**
+ * damon_is_registered_ops() - Check if a given damon_operations is registered.
+ * @id: Id of the damon_operations to check if registered.
+ *
+ * Return: true if the ops is set, false otherwise.
+ */
+bool damon_is_registered_ops(enum damon_ops_id id)
+{
+ bool registered;
+
+ if (id >= NR_DAMON_OPS)
+ return false;
+ mutex_lock(&damon_ops_lock);
+ registered = __damon_is_registered_ops(id);
+ mutex_unlock(&damon_ops_lock);
+ return registered;
+}
+
+/**
+ * damon_register_ops() - Register a monitoring operations set to DAMON.
+ * @ops: monitoring operations set to register.
+ *
+ * This function registers a monitoring operations set of valid &struct
+ * damon_operations->id so that others can find and use them later.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_register_ops(struct damon_operations *ops)
+{
+ int err = 0;
+
+ if (ops->id >= NR_DAMON_OPS)
+ return -EINVAL;
+ mutex_lock(&damon_ops_lock);
+ /* Fail for already registered ops */
+ if (__damon_is_registered_ops(ops->id)) {
+ err = -EINVAL;
+ goto out;
+ }
+ damon_registered_ops[ops->id] = *ops;
+out:
+ mutex_unlock(&damon_ops_lock);
+ return err;
+}
+
+/**
+ * damon_select_ops() - Select a monitoring operations to use with the context.
+ * @ctx: monitoring context to use the operations.
+ * @id: id of the registered monitoring operations to select.
+ *
+ * This function finds registered monitoring operations set of @id and make
+ * @ctx to use it.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
+{
+ int err = 0;
+
+ if (id >= NR_DAMON_OPS)
+ return -EINVAL;
+
+ mutex_lock(&damon_ops_lock);
+ if (!__damon_is_registered_ops(id))
+ err = -EINVAL;
+ else
+ ctx->ops = damon_registered_ops[id];
+ mutex_unlock(&damon_ops_lock);
+ return err;
+}
+
+/*
+ * Construct a damon_region struct
+ *
+ * Returns the pointer to the new struct if success, or NULL otherwise
+ */
+struct damon_region *damon_new_region(unsigned long start, unsigned long end)
+{
+ struct damon_region *region;
+
+ region = kmem_cache_alloc(damon_region_cache, GFP_KERNEL);
+ if (!region)
+ return NULL;
+
+ region->ar.start = start;
+ region->ar.end = end;
+ region->nr_accesses = 0;
+ INIT_LIST_HEAD(&region->list);
+
+ region->age = 0;
+ region->last_nr_accesses = 0;
+
+ return region;
+}
+
+void damon_add_region(struct damon_region *r, struct damon_target *t)
+{
+ list_add_tail(&r->list, &t->regions_list);
+ t->nr_regions++;
+}
+
+static void damon_del_region(struct damon_region *r, struct damon_target *t)
+{
+ list_del(&r->list);
+ t->nr_regions--;
+}
+
+static void damon_free_region(struct damon_region *r)
+{
+ kmem_cache_free(damon_region_cache, r);
+}
+
+void damon_destroy_region(struct damon_region *r, struct damon_target *t)
+{
+ damon_del_region(r, t);
+ damon_free_region(r);
+}
+
+/*
+ * Check whether a region is intersecting an address range
+ *
+ * Returns true if it is.
+ */
+static bool damon_intersect(struct damon_region *r,
+ struct damon_addr_range *re)
+{
+ return !(r->ar.end <= re->start || re->end <= r->ar.start);
+}
+
+/*
+ * Fill holes in regions with new regions.
+ */
+static int damon_fill_regions_holes(struct damon_region *first,
+ struct damon_region *last, struct damon_target *t)
+{
+ struct damon_region *r = first;
+
+ damon_for_each_region_from(r, t) {
+ struct damon_region *next, *newr;
+
+ if (r == last)
+ break;
+ next = damon_next_region(r);
+ if (r->ar.end != next->ar.start) {
+ newr = damon_new_region(r->ar.end, next->ar.start);
+ if (!newr)
+ return -ENOMEM;
+ damon_insert_region(newr, r, next, t);
+ }
+ }
+ return 0;
+}
+
+/*
+ * damon_set_regions() - Set regions of a target for given address ranges.
+ * @t: the given target.
+ * @ranges: array of new monitoring target ranges.
+ * @nr_ranges: length of @ranges.
+ *
+ * This function adds new regions to, or modify existing regions of a
+ * monitoring target to fit in specific ranges.
+ *
+ * Return: 0 if success, or negative error code otherwise.
+ */
+int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
+ unsigned int nr_ranges)
+{
+ struct damon_region *r, *next;
+ unsigned int i;
+ int err;
+
+ /* Remove regions which are not in the new ranges */
+ damon_for_each_region_safe(r, next, t) {
+ for (i = 0; i < nr_ranges; i++) {
+ if (damon_intersect(r, &ranges[i]))
+ break;
+ }
+ if (i == nr_ranges)
+ damon_destroy_region(r, t);
+ }
+
+ r = damon_first_region(t);
+ /* Add new regions or resize existing regions to fit in the ranges */
+ for (i = 0; i < nr_ranges; i++) {
+ struct damon_region *first = NULL, *last, *newr;
+ struct damon_addr_range *range;
+
+ range = &ranges[i];
+ /* Get the first/last regions intersecting with the range */
+ damon_for_each_region_from(r, t) {
+ if (damon_intersect(r, range)) {
+ if (!first)
+ first = r;
+ last = r;
+ }
+ if (r->ar.start >= range->end)
+ break;
+ }
+ if (!first) {
+ /* no region intersects with this range */
+ newr = damon_new_region(
+ ALIGN_DOWN(range->start,
+ DAMON_MIN_REGION),
+ ALIGN(range->end, DAMON_MIN_REGION));
+ if (!newr)
+ return -ENOMEM;
+ damon_insert_region(newr, damon_prev_region(r), r, t);
+ } else {
+ /* resize intersecting regions to fit in this range */
+ first->ar.start = ALIGN_DOWN(range->start,
+ DAMON_MIN_REGION);
+ last->ar.end = ALIGN(range->end, DAMON_MIN_REGION);
+
+ /* fill possible holes in the range */
+ err = damon_fill_regions_holes(first, last, t);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
+struct damos_filter *damos_new_filter(enum damos_filter_type type,
+ bool matching)
+{
+ struct damos_filter *filter;
+
+ filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter)
+ return NULL;
+ filter->type = type;
+ filter->matching = matching;
+ INIT_LIST_HEAD(&filter->list);
+ return filter;
+}
+
+void damos_add_filter(struct damos *s, struct damos_filter *f)
+{
+ list_add_tail(&f->list, &s->filters);
+}
+
+static void damos_del_filter(struct damos_filter *f)
+{
+ list_del(&f->list);
+}
+
+static void damos_free_filter(struct damos_filter *f)
+{
+ kfree(f);
+}
+
+void damos_destroy_filter(struct damos_filter *f)
+{
+ damos_del_filter(f);
+ damos_free_filter(f);
+}
+
+/* initialize private fields of damos_quota and return the pointer */
+static struct damos_quota *damos_quota_init_priv(struct damos_quota *quota)
+{
+ quota->total_charged_sz = 0;
+ quota->total_charged_ns = 0;
+ quota->esz = 0;
+ quota->charged_sz = 0;
+ quota->charged_from = 0;
+ quota->charge_target_from = NULL;
+ quota->charge_addr_from = 0;
+ return quota;
+}
+
+struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
+ enum damos_action action, struct damos_quota *quota,
+ struct damos_watermarks *wmarks)
+{
+ struct damos *scheme;
+
+ scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
+ if (!scheme)
+ return NULL;
+ scheme->pattern = *pattern;
+ scheme->action = action;
+ INIT_LIST_HEAD(&scheme->filters);
+ scheme->stat = (struct damos_stat){};
+ INIT_LIST_HEAD(&scheme->list);
+
+ scheme->quota = *(damos_quota_init_priv(quota));
+
+ scheme->wmarks = *wmarks;
+ scheme->wmarks.activated = true;
+
+ return scheme;
+}
+
+void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
+{
+ list_add_tail(&s->list, &ctx->schemes);
+}
+
+static void damon_del_scheme(struct damos *s)
+{
+ list_del(&s->list);
+}
+
+static void damon_free_scheme(struct damos *s)
+{
+ kfree(s);
+}
+
+void damon_destroy_scheme(struct damos *s)
+{
+ struct damos_filter *f, *next;
+
+ damos_for_each_filter_safe(f, next, s)
+ damos_destroy_filter(f);
+ damon_del_scheme(s);
+ damon_free_scheme(s);
+}
+
+/*
+ * Construct a damon_target struct
+ *
+ * Returns the pointer to the new struct if success, or NULL otherwise
+ */
+struct damon_target *damon_new_target(void)
+{
+ struct damon_target *t;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return NULL;
+
+ t->pid = NULL;
+ t->nr_regions = 0;
+ INIT_LIST_HEAD(&t->regions_list);
+ INIT_LIST_HEAD(&t->list);
+
+ return t;
+}
+
+void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
+{
+ list_add_tail(&t->list, &ctx->adaptive_targets);
+}
+
+bool damon_targets_empty(struct damon_ctx *ctx)
+{
+ return list_empty(&ctx->adaptive_targets);
+}
+
+static void damon_del_target(struct damon_target *t)
+{
+ list_del(&t->list);
+}
+
+void damon_free_target(struct damon_target *t)
+{
+ struct damon_region *r, *next;
+
+ damon_for_each_region_safe(r, next, t)
+ damon_free_region(r);
+ kfree(t);
+}
+
+void damon_destroy_target(struct damon_target *t)
+{
+ damon_del_target(t);
+ damon_free_target(t);
+}
+
+unsigned int damon_nr_regions(struct damon_target *t)
+{
+ return t->nr_regions;
+}
+
+struct damon_ctx *damon_new_ctx(void)
+{
+ struct damon_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return NULL;
+
+ init_completion(&ctx->kdamond_started);
+
+ ctx->attrs.sample_interval = 5 * 1000;
+ ctx->attrs.aggr_interval = 100 * 1000;
+ ctx->attrs.ops_update_interval = 60 * 1000 * 1000;
+
+ ctx->passed_sample_intervals = 0;
+ /* These will be set from kdamond_init_intervals_sis() */
+ ctx->next_aggregation_sis = 0;
+ ctx->next_ops_update_sis = 0;
+
+ mutex_init(&ctx->kdamond_lock);
+
+ ctx->attrs.min_nr_regions = 10;
+ ctx->attrs.max_nr_regions = 1000;
+
+ INIT_LIST_HEAD(&ctx->adaptive_targets);
+ INIT_LIST_HEAD(&ctx->schemes);
+
+ return ctx;
+}
+
+static void damon_destroy_targets(struct damon_ctx *ctx)
+{
+ struct damon_target *t, *next_t;
+
+ if (ctx->ops.cleanup) {
+ ctx->ops.cleanup(ctx);
+ return;
+ }
+
+ damon_for_each_target_safe(t, next_t, ctx)
+ damon_destroy_target(t);
+}
+
+void damon_destroy_ctx(struct damon_ctx *ctx)
+{
+ struct damos *s, *next_s;
+
+ damon_destroy_targets(ctx);
+
+ damon_for_each_scheme_safe(s, next_s, ctx)
+ damon_destroy_scheme(s);
+
+ kfree(ctx);
+}
+
+static unsigned int damon_age_for_new_attrs(unsigned int age,
+ struct damon_attrs *old_attrs, struct damon_attrs *new_attrs)
+{
+ return age * old_attrs->aggr_interval / new_attrs->aggr_interval;
+}
+
+/* convert access ratio in bp (per 10,000) to nr_accesses */
+static unsigned int damon_accesses_bp_to_nr_accesses(
+ unsigned int accesses_bp, struct damon_attrs *attrs)
+{
+ return accesses_bp * damon_max_nr_accesses(attrs) / 10000;
+}
+
+/* convert nr_accesses to access ratio in bp (per 10,000) */
+static unsigned int damon_nr_accesses_to_accesses_bp(
+ unsigned int nr_accesses, struct damon_attrs *attrs)
+{
+ return nr_accesses * 10000 / damon_max_nr_accesses(attrs);
+}
+
+static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses,
+ struct damon_attrs *old_attrs, struct damon_attrs *new_attrs)
+{
+ return damon_accesses_bp_to_nr_accesses(
+ damon_nr_accesses_to_accesses_bp(
+ nr_accesses, old_attrs),
+ new_attrs);
+}
+
+static void damon_update_monitoring_result(struct damon_region *r,
+ struct damon_attrs *old_attrs, struct damon_attrs *new_attrs)
+{
+ r->nr_accesses = damon_nr_accesses_for_new_attrs(r->nr_accesses,
+ old_attrs, new_attrs);
+ r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs);
+}
+
+/*
+ * region->nr_accesses is the number of sampling intervals in the last
+ * aggregation interval that access to the region has found, and region->age is
+ * the number of aggregation intervals that its access pattern has maintained.
+ * For the reason, the real meaning of the two fields depend on current
+ * sampling interval and aggregation interval. This function updates
+ * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs.
+ */
+static void damon_update_monitoring_results(struct damon_ctx *ctx,
+ struct damon_attrs *new_attrs)
+{
+ struct damon_attrs *old_attrs = &ctx->attrs;
+ struct damon_target *t;
+ struct damon_region *r;
+
+ /* if any interval is zero, simply forgive conversion */
+ if (!old_attrs->sample_interval || !old_attrs->aggr_interval ||
+ !new_attrs->sample_interval ||
+ !new_attrs->aggr_interval)
+ return;
+
+ damon_for_each_target(t, ctx)
+ damon_for_each_region(r, t)
+ damon_update_monitoring_result(
+ r, old_attrs, new_attrs);
+}
+
+/**
+ * damon_set_attrs() - Set attributes for the monitoring.
+ * @ctx: monitoring context
+ * @attrs: monitoring attributes
+ *
+ * This function should not be called while the kdamond is running.
+ * Every time interval is in micro-seconds.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs)
+{
+ unsigned long sample_interval = attrs->sample_interval ?
+ attrs->sample_interval : 1;
+
+ if (attrs->min_nr_regions < 3)
+ return -EINVAL;
+ if (attrs->min_nr_regions > attrs->max_nr_regions)
+ return -EINVAL;
+ if (attrs->sample_interval > attrs->aggr_interval)
+ return -EINVAL;
+
+ ctx->next_aggregation_sis = ctx->passed_sample_intervals +
+ attrs->aggr_interval / sample_interval;
+ ctx->next_ops_update_sis = ctx->passed_sample_intervals +
+ attrs->ops_update_interval / sample_interval;
+
+ damon_update_monitoring_results(ctx, attrs);
+ ctx->attrs = *attrs;
+ return 0;
+}
+
+/**
+ * damon_set_schemes() - Set data access monitoring based operation schemes.
+ * @ctx: monitoring context
+ * @schemes: array of the schemes
+ * @nr_schemes: number of entries in @schemes
+ *
+ * This function should not be called while the kdamond of the context is
+ * running.
+ */
+void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
+ ssize_t nr_schemes)
+{
+ struct damos *s, *next;
+ ssize_t i;
+
+ damon_for_each_scheme_safe(s, next, ctx)
+ damon_destroy_scheme(s);
+ for (i = 0; i < nr_schemes; i++)
+ damon_add_scheme(ctx, schemes[i]);
+}
+
+/**
+ * damon_nr_running_ctxs() - Return number of currently running contexts.
+ */
+int damon_nr_running_ctxs(void)
+{
+ int nr_ctxs;
+
+ mutex_lock(&damon_lock);
+ nr_ctxs = nr_running_ctxs;
+ mutex_unlock(&damon_lock);
+
+ return nr_ctxs;
+}
+
+/* Returns the size upper limit for each monitoring region */
+static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned long sz = 0;
+
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region(r, t)
+ sz += damon_sz_region(r);
+ }
+
+ if (ctx->attrs.min_nr_regions)
+ sz /= ctx->attrs.min_nr_regions;
+ if (sz < DAMON_MIN_REGION)
+ sz = DAMON_MIN_REGION;
+
+ return sz;
+}
+
+static int kdamond_fn(void *data);
+
+/*
+ * __damon_start() - Starts monitoring with given context.
+ * @ctx: monitoring context
+ *
+ * This function should be called while damon_lock is hold.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_start(struct damon_ctx *ctx)
+{
+ int err = -EBUSY;
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (!ctx->kdamond) {
+ err = 0;
+ reinit_completion(&ctx->kdamond_started);
+ ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
+ nr_running_ctxs);
+ if (IS_ERR(ctx->kdamond)) {
+ err = PTR_ERR(ctx->kdamond);
+ ctx->kdamond = NULL;
+ } else {
+ wait_for_completion(&ctx->kdamond_started);
+ }
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+
+ return err;
+}
+
+/**
+ * damon_start() - Starts the monitorings for a given group of contexts.
+ * @ctxs: an array of the pointers for contexts to start monitoring
+ * @nr_ctxs: size of @ctxs
+ * @exclusive: exclusiveness of this contexts group
+ *
+ * This function starts a group of monitoring threads for a group of monitoring
+ * contexts. One thread per each context is created and run in parallel. The
+ * caller should handle synchronization between the threads by itself. If
+ * @exclusive is true and a group of threads that created by other
+ * 'damon_start()' call is currently running, this function does nothing but
+ * returns -EBUSY.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
+{
+ int i;
+ int err = 0;
+
+ mutex_lock(&damon_lock);
+ if ((exclusive && nr_running_ctxs) ||
+ (!exclusive && running_exclusive_ctxs)) {
+ mutex_unlock(&damon_lock);
+ return -EBUSY;
+ }
+
+ for (i = 0; i < nr_ctxs; i++) {
+ err = __damon_start(ctxs[i]);
+ if (err)
+ break;
+ nr_running_ctxs++;
+ }
+ if (exclusive && nr_running_ctxs)
+ running_exclusive_ctxs = true;
+ mutex_unlock(&damon_lock);
+
+ return err;
+}
+
+/*
+ * __damon_stop() - Stops monitoring of a given context.
+ * @ctx: monitoring context
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_stop(struct damon_ctx *ctx)
+{
+ struct task_struct *tsk;
+
+ mutex_lock(&ctx->kdamond_lock);
+ tsk = ctx->kdamond;
+ if (tsk) {
+ get_task_struct(tsk);
+ mutex_unlock(&ctx->kdamond_lock);
+ kthread_stop(tsk);
+ put_task_struct(tsk);
+ return 0;
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+
+ return -EPERM;
+}
+
+/**
+ * damon_stop() - Stops the monitorings for a given group of contexts.
+ * @ctxs: an array of the pointers for contexts to stop monitoring
+ * @nr_ctxs: size of @ctxs
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
+{
+ int i, err = 0;
+
+ for (i = 0; i < nr_ctxs; i++) {
+ /* nr_running_ctxs is decremented in kdamond_fn */
+ err = __damon_stop(ctxs[i]);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/*
+ * Reset the aggregated monitoring results ('nr_accesses' of each region).
+ */
+static void kdamond_reset_aggregated(struct damon_ctx *c)
+{
+ struct damon_target *t;
+ unsigned int ti = 0; /* target's index */
+
+ damon_for_each_target(t, c) {
+ struct damon_region *r;
+
+ damon_for_each_region(r, t) {
+ trace_damon_aggregated(t, ti, r, damon_nr_regions(t));
+ r->last_nr_accesses = r->nr_accesses;
+ r->nr_accesses = 0;
+ }
+ ti++;
+ }
+}
+
+static void damon_split_region_at(struct damon_target *t,
+ struct damon_region *r, unsigned long sz_r);
+
+static bool __damos_valid_target(struct damon_region *r, struct damos *s)
+{
+ unsigned long sz;
+
+ sz = damon_sz_region(r);
+ return s->pattern.min_sz_region <= sz &&
+ sz <= s->pattern.max_sz_region &&
+ s->pattern.min_nr_accesses <= r->nr_accesses &&
+ r->nr_accesses <= s->pattern.max_nr_accesses &&
+ s->pattern.min_age_region <= r->age &&
+ r->age <= s->pattern.max_age_region;
+}
+
+static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
+ struct damon_region *r, struct damos *s)
+{
+ bool ret = __damos_valid_target(r, s);
+
+ if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
+ return ret;
+
+ return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
+}
+
+/*
+ * damos_skip_charged_region() - Check if the given region or starting part of
+ * it is already charged for the DAMOS quota.
+ * @t: The target of the region.
+ * @rp: The pointer to the region.
+ * @s: The scheme to be applied.
+ *
+ * If a quota of a scheme has exceeded in a quota charge window, the scheme's
+ * action would applied to only a part of the target access pattern fulfilling
+ * regions. To avoid applying the scheme action to only already applied
+ * regions, DAMON skips applying the scheme action to the regions that charged
+ * in the previous charge window.
+ *
+ * This function checks if a given region should be skipped or not for the
+ * reason. If only the starting part of the region has previously charged,
+ * this function splits the region into two so that the second one covers the
+ * area that not charged in the previous charge widnow and saves the second
+ * region in *rp and returns false, so that the caller can apply DAMON action
+ * to the second one.
+ *
+ * Return: true if the region should be entirely skipped, false otherwise.
+ */
+static bool damos_skip_charged_region(struct damon_target *t,
+ struct damon_region **rp, struct damos *s)
+{
+ struct damon_region *r = *rp;
+ struct damos_quota *quota = &s->quota;
+ unsigned long sz_to_skip;
+
+ /* Skip previously charged regions */
+ if (quota->charge_target_from) {
+ if (t != quota->charge_target_from)
+ return true;
+ if (r == damon_last_region(t)) {
+ quota->charge_target_from = NULL;
+ quota->charge_addr_from = 0;
+ return true;
+ }
+ if (quota->charge_addr_from &&
+ r->ar.end <= quota->charge_addr_from)
+ return true;
+
+ if (quota->charge_addr_from && r->ar.start <
+ quota->charge_addr_from) {
+ sz_to_skip = ALIGN_DOWN(quota->charge_addr_from -
+ r->ar.start, DAMON_MIN_REGION);
+ if (!sz_to_skip) {
+ if (damon_sz_region(r) <= DAMON_MIN_REGION)
+ return true;
+ sz_to_skip = DAMON_MIN_REGION;
+ }
+ damon_split_region_at(t, r, sz_to_skip);
+ r = damon_next_region(r);
+ *rp = r;
+ }
+ quota->charge_target_from = NULL;
+ quota->charge_addr_from = 0;
+ }
+ return false;
+}
+
+static void damos_update_stat(struct damos *s,
+ unsigned long sz_tried, unsigned long sz_applied)
+{
+ s->stat.nr_tried++;
+ s->stat.sz_tried += sz_tried;
+ if (sz_applied)
+ s->stat.nr_applied++;
+ s->stat.sz_applied += sz_applied;
+}
+
+static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
+ struct damon_region *r, struct damos_filter *filter)
+{
+ bool matched = false;
+ struct damon_target *ti;
+ int target_idx = 0;
+ unsigned long start, end;
+
+ switch (filter->type) {
+ case DAMOS_FILTER_TYPE_TARGET:
+ damon_for_each_target(ti, ctx) {
+ if (ti == t)
+ break;
+ target_idx++;
+ }
+ matched = target_idx == filter->target_idx;
+ break;
+ case DAMOS_FILTER_TYPE_ADDR:
+ start = ALIGN_DOWN(filter->addr_range.start, DAMON_MIN_REGION);
+ end = ALIGN_DOWN(filter->addr_range.end, DAMON_MIN_REGION);
+
+ /* inside the range */
+ if (start <= r->ar.start && r->ar.end <= end) {
+ matched = true;
+ break;
+ }
+ /* outside of the range */
+ if (r->ar.end <= start || end <= r->ar.start) {
+ matched = false;
+ break;
+ }
+ /* start before the range and overlap */
+ if (r->ar.start < start) {
+ damon_split_region_at(t, r, start - r->ar.start);
+ matched = false;
+ break;
+ }
+ /* start inside the range */
+ damon_split_region_at(t, r, end - r->ar.start);
+ matched = true;
+ break;
+ default:
+ return false;
+ }
+
+ return matched == filter->matching;
+}
+
+static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
+ struct damon_region *r, struct damos *s)
+{
+ struct damos_filter *filter;
+
+ damos_for_each_filter(filter, s) {
+ if (__damos_filter_out(ctx, t, r, filter))
+ return true;
+ }
+ return false;
+}
+
+static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
+ struct damon_region *r, struct damos *s)
+{
+ struct damos_quota *quota = &s->quota;
+ unsigned long sz = damon_sz_region(r);
+ struct timespec64 begin, end;
+ unsigned long sz_applied = 0;
+ int err = 0;
+
+ if (c->ops.apply_scheme) {
+ if (quota->esz && quota->charged_sz + sz > quota->esz) {
+ sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
+ DAMON_MIN_REGION);
+ if (!sz)
+ goto update_stat;
+ damon_split_region_at(t, r, sz);
+ }
+ if (damos_filter_out(c, t, r, s))
+ return;
+ ktime_get_coarse_ts64(&begin);
+ if (c->callback.before_damos_apply)
+ err = c->callback.before_damos_apply(c, t, r, s);
+ if (!err)
+ sz_applied = c->ops.apply_scheme(c, t, r, s);
+ ktime_get_coarse_ts64(&end);
+ quota->total_charged_ns += timespec64_to_ns(&end) -
+ timespec64_to_ns(&begin);
+ quota->charged_sz += sz;
+ if (quota->esz && quota->charged_sz >= quota->esz) {
+ quota->charge_target_from = t;
+ quota->charge_addr_from = r->ar.end + 1;
+ }
+ }
+ if (s->action != DAMOS_STAT)
+ r->age = 0;
+
+update_stat:
+ damos_update_stat(s, sz, sz_applied);
+}
+
+static void damon_do_apply_schemes(struct damon_ctx *c,
+ struct damon_target *t,
+ struct damon_region *r)
+{
+ struct damos *s;
+
+ damon_for_each_scheme(s, c) {
+ struct damos_quota *quota = &s->quota;
+
+ if (!s->wmarks.activated)
+ continue;
+
+ /* Check the quota */
+ if (quota->esz && quota->charged_sz >= quota->esz)
+ continue;
+
+ if (damos_skip_charged_region(t, &r, s))
+ continue;
+
+ if (!damos_valid_target(c, t, r, s))
+ continue;
+
+ damos_apply_scheme(c, t, r, s);
+ }
+}
+
+/* Shouldn't be called if quota->ms and quota->sz are zero */
+static void damos_set_effective_quota(struct damos_quota *quota)
+{
+ unsigned long throughput;
+ unsigned long esz;
+
+ if (!quota->ms) {
+ quota->esz = quota->sz;
+ return;
+ }
+
+ if (quota->total_charged_ns)
+ throughput = quota->total_charged_sz * 1000000 /
+ quota->total_charged_ns;
+ else
+ throughput = PAGE_SIZE * 1024;
+ esz = throughput * quota->ms;
+
+ if (quota->sz && quota->sz < esz)
+ esz = quota->sz;
+ quota->esz = esz;
+}
+
+static void damos_adjust_quota(struct damon_ctx *c, struct damos *s)
+{
+ struct damos_quota *quota = &s->quota;
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned long cumulated_sz;
+ unsigned int score, max_score = 0;
+
+ if (!quota->ms && !quota->sz)
+ return;
+
+ /* New charge window starts */
+ if (time_after_eq(jiffies, quota->charged_from +
+ msecs_to_jiffies(quota->reset_interval))) {
+ if (quota->esz && quota->charged_sz >= quota->esz)
+ s->stat.qt_exceeds++;
+ quota->total_charged_sz += quota->charged_sz;
+ quota->charged_from = jiffies;
+ quota->charged_sz = 0;
+ damos_set_effective_quota(quota);
+ }
+
+ if (!c->ops.get_scheme_score)
+ return;
+
+ /* Fill up the score histogram */
+ memset(quota->histogram, 0, sizeof(quota->histogram));
+ damon_for_each_target(t, c) {
+ damon_for_each_region(r, t) {
+ if (!__damos_valid_target(r, s))
+ continue;
+ score = c->ops.get_scheme_score(c, t, r, s);
+ quota->histogram[score] += damon_sz_region(r);
+ if (score > max_score)
+ max_score = score;
+ }
+ }
+
+ /* Set the min score limit */
+ for (cumulated_sz = 0, score = max_score; ; score--) {
+ cumulated_sz += quota->histogram[score];
+ if (cumulated_sz >= quota->esz || !score)
+ break;
+ }
+ quota->min_score = score;
+}
+
+static void kdamond_apply_schemes(struct damon_ctx *c)
+{
+ struct damon_target *t;
+ struct damon_region *r, *next_r;
+ struct damos *s;
+
+ damon_for_each_scheme(s, c) {
+ if (!s->wmarks.activated)
+ continue;
+
+ damos_adjust_quota(c, s);
+ }
+
+ damon_for_each_target(t, c) {
+ damon_for_each_region_safe(r, next_r, t)
+ damon_do_apply_schemes(c, t, r);
+ }
+}
+
+/*
+ * Merge two adjacent regions into one region
+ */
+static void damon_merge_two_regions(struct damon_target *t,
+ struct damon_region *l, struct damon_region *r)
+{
+ unsigned long sz_l = damon_sz_region(l), sz_r = damon_sz_region(r);
+
+ l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
+ (sz_l + sz_r);
+ l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
+ l->ar.end = r->ar.end;
+ damon_destroy_region(r, t);
+}
+
+/*
+ * Merge adjacent regions having similar access frequencies
+ *
+ * t target affected by this merge operation
+ * thres '->nr_accesses' diff threshold for the merge
+ * sz_limit size upper limit of each region
+ */
+static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
+ unsigned long sz_limit)
+{
+ struct damon_region *r, *prev = NULL, *next;
+
+ damon_for_each_region_safe(r, next, t) {
+ if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
+ r->age = 0;
+ else
+ r->age++;
+
+ if (prev && prev->ar.end == r->ar.start &&
+ abs(prev->nr_accesses - r->nr_accesses) <= thres &&
+ damon_sz_region(prev) + damon_sz_region(r) <= sz_limit)
+ damon_merge_two_regions(t, prev, r);
+ else
+ prev = r;
+ }
+}
+
+/*
+ * Merge adjacent regions having similar access frequencies
+ *
+ * threshold '->nr_accesses' diff threshold for the merge
+ * sz_limit size upper limit of each region
+ *
+ * This function merges monitoring target regions which are adjacent and their
+ * access frequencies are similar. This is for minimizing the monitoring
+ * overhead under the dynamically changeable access pattern. If a merge was
+ * unnecessarily made, later 'kdamond_split_regions()' will revert it.
+ */
+static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
+ unsigned long sz_limit)
+{
+ struct damon_target *t;
+
+ damon_for_each_target(t, c)
+ damon_merge_regions_of(t, threshold, sz_limit);
+}
+
+/*
+ * Split a region in two
+ *
+ * r the region to be split
+ * sz_r size of the first sub-region that will be made
+ */
+static void damon_split_region_at(struct damon_target *t,
+ struct damon_region *r, unsigned long sz_r)
+{
+ struct damon_region *new;
+
+ new = damon_new_region(r->ar.start + sz_r, r->ar.end);
+ if (!new)
+ return;
+
+ r->ar.end = new->ar.start;
+
+ new->age = r->age;
+ new->last_nr_accesses = r->last_nr_accesses;
+
+ damon_insert_region(new, r, damon_next_region(r), t);
+}
+
+/* Split every region in the given target into 'nr_subs' regions */
+static void damon_split_regions_of(struct damon_target *t, int nr_subs)
+{
+ struct damon_region *r, *next;
+ unsigned long sz_region, sz_sub = 0;
+ int i;
+
+ damon_for_each_region_safe(r, next, t) {
+ sz_region = damon_sz_region(r);
+
+ for (i = 0; i < nr_subs - 1 &&
+ sz_region > 2 * DAMON_MIN_REGION; i++) {
+ /*
+ * Randomly select size of left sub-region to be at
+ * least 10 percent and at most 90% of original region
+ */
+ sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
+ sz_region / 10, DAMON_MIN_REGION);
+ /* Do not allow blank region */
+ if (sz_sub == 0 || sz_sub >= sz_region)
+ continue;
+
+ damon_split_region_at(t, r, sz_sub);
+ sz_region = sz_sub;
+ }
+ }
+}
+
+/*
+ * Split every target region into randomly-sized small regions
+ *
+ * This function splits every target region into random-sized small regions if
+ * current total number of the regions is equal or smaller than half of the
+ * user-specified maximum number of regions. This is for maximizing the
+ * monitoring accuracy under the dynamically changeable access patterns. If a
+ * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
+ * it.
+ */
+static void kdamond_split_regions(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ unsigned int nr_regions = 0;
+ static unsigned int last_nr_regions;
+ int nr_subregions = 2;
+
+ damon_for_each_target(t, ctx)
+ nr_regions += damon_nr_regions(t);
+
+ if (nr_regions > ctx->attrs.max_nr_regions / 2)
+ return;
+
+ /* Maybe the middle of the region has different access frequency */
+ if (last_nr_regions == nr_regions &&
+ nr_regions < ctx->attrs.max_nr_regions / 3)
+ nr_subregions = 3;
+
+ damon_for_each_target(t, ctx)
+ damon_split_regions_of(t, nr_subregions);
+
+ last_nr_regions = nr_regions;
+}
+
+/*
+ * Check whether current monitoring should be stopped
+ *
+ * The monitoring is stopped when either the user requested to stop, or all
+ * monitoring targets are invalid.
+ *
+ * Returns true if need to stop current monitoring.
+ */
+static bool kdamond_need_stop(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+
+ if (kthread_should_stop())
+ return true;
+
+ if (!ctx->ops.target_valid)
+ return false;
+
+ damon_for_each_target(t, ctx) {
+ if (ctx->ops.target_valid(t))
+ return false;
+ }
+
+ return true;
+}
+
+static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
+{
+ struct sysinfo i;
+
+ switch (metric) {
+ case DAMOS_WMARK_FREE_MEM_RATE:
+ si_meminfo(&i);
+ return i.freeram * 1000 / i.totalram;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
+/*
+ * Returns zero if the scheme is active. Else, returns time to wait for next
+ * watermark check in micro-seconds.
+ */
+static unsigned long damos_wmark_wait_us(struct damos *scheme)
+{
+ unsigned long metric;
+
+ if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
+ return 0;
+
+ metric = damos_wmark_metric_value(scheme->wmarks.metric);
+ /* higher than high watermark or lower than low watermark */
+ if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
+ if (scheme->wmarks.activated)
+ pr_debug("deactivate a scheme (%d) for %s wmark\n",
+ scheme->action,
+ metric > scheme->wmarks.high ?
+ "high" : "low");
+ scheme->wmarks.activated = false;
+ return scheme->wmarks.interval;
+ }
+
+ /* inactive and higher than middle watermark */
+ if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
+ !scheme->wmarks.activated)
+ return scheme->wmarks.interval;
+
+ if (!scheme->wmarks.activated)
+ pr_debug("activate a scheme (%d)\n", scheme->action);
+ scheme->wmarks.activated = true;
+ return 0;
+}
+
+static void kdamond_usleep(unsigned long usecs)
+{
+ /* See Documentation/timers/timers-howto.rst for the thresholds */
+ if (usecs > 20 * USEC_PER_MSEC)
+ schedule_timeout_idle(usecs_to_jiffies(usecs));
+ else
+ usleep_idle_range(usecs, usecs + 1);
+}
+
+/* Returns negative error code if it's not activated but should return */
+static int kdamond_wait_activation(struct damon_ctx *ctx)
+{
+ struct damos *s;
+ unsigned long wait_time;
+ unsigned long min_wait_time = 0;
+ bool init_wait_time = false;
+
+ while (!kdamond_need_stop(ctx)) {
+ damon_for_each_scheme(s, ctx) {
+ wait_time = damos_wmark_wait_us(s);
+ if (!init_wait_time || wait_time < min_wait_time) {
+ init_wait_time = true;
+ min_wait_time = wait_time;
+ }
+ }
+ if (!min_wait_time)
+ return 0;
+
+ kdamond_usleep(min_wait_time);
+
+ if (ctx->callback.after_wmarks_check &&
+ ctx->callback.after_wmarks_check(ctx))
+ break;
+ }
+ return -EBUSY;
+}
+
+static void kdamond_init_intervals_sis(struct damon_ctx *ctx)
+{
+ unsigned long sample_interval = ctx->attrs.sample_interval ?
+ ctx->attrs.sample_interval : 1;
+
+ ctx->passed_sample_intervals = 0;
+ ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval;
+ ctx->next_ops_update_sis = ctx->attrs.ops_update_interval /
+ sample_interval;
+}
+
+/*
+ * The monitoring daemon that runs as a kernel thread
+ */
+static int kdamond_fn(void *data)
+{
+ struct damon_ctx *ctx = data;
+ struct damon_target *t;
+ struct damon_region *r, *next;
+ unsigned int max_nr_accesses = 0;
+ unsigned long sz_limit = 0;
+
+ pr_debug("kdamond (%d) starts\n", current->pid);
+
+ complete(&ctx->kdamond_started);
+ kdamond_init_intervals_sis(ctx);
+
+ if (ctx->ops.init)
+ ctx->ops.init(ctx);
+ if (ctx->callback.before_start && ctx->callback.before_start(ctx))
+ goto done;
+
+ sz_limit = damon_region_sz_limit(ctx);
+
+ while (!kdamond_need_stop(ctx)) {
+ /*
+ * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could
+ * be changed from after_wmarks_check() or after_aggregation()
+ * callbacks. Read the values here, and use those for this
+ * iteration. That is, damon_set_attrs() updated new values
+ * are respected from next iteration.
+ */
+ unsigned long next_aggregation_sis = ctx->next_aggregation_sis;
+ unsigned long next_ops_update_sis = ctx->next_ops_update_sis;
+ unsigned long sample_interval = ctx->attrs.sample_interval;
+
+ if (kdamond_wait_activation(ctx))
+ break;
+
+ if (ctx->ops.prepare_access_checks)
+ ctx->ops.prepare_access_checks(ctx);
+ if (ctx->callback.after_sampling &&
+ ctx->callback.after_sampling(ctx))
+ break;
+
+ kdamond_usleep(sample_interval);
+ ctx->passed_sample_intervals++;
+
+ if (ctx->ops.check_accesses)
+ max_nr_accesses = ctx->ops.check_accesses(ctx);
+
+ sample_interval = ctx->attrs.sample_interval ?
+ ctx->attrs.sample_interval : 1;
+ if (ctx->passed_sample_intervals == next_aggregation_sis) {
+ ctx->next_aggregation_sis = next_aggregation_sis +
+ ctx->attrs.aggr_interval / sample_interval;
+ kdamond_merge_regions(ctx,
+ max_nr_accesses / 10,
+ sz_limit);
+ if (ctx->callback.after_aggregation &&
+ ctx->callback.after_aggregation(ctx))
+ break;
+ if (!list_empty(&ctx->schemes))
+ kdamond_apply_schemes(ctx);
+ kdamond_reset_aggregated(ctx);
+ kdamond_split_regions(ctx);
+ if (ctx->ops.reset_aggregated)
+ ctx->ops.reset_aggregated(ctx);
+ }
+
+ if (ctx->passed_sample_intervals == next_ops_update_sis) {
+ ctx->next_ops_update_sis = next_ops_update_sis +
+ ctx->attrs.ops_update_interval /
+ sample_interval;
+ if (ctx->ops.update)
+ ctx->ops.update(ctx);
+ sz_limit = damon_region_sz_limit(ctx);
+ }
+ }
+done:
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region_safe(r, next, t)
+ damon_destroy_region(r, t);
+ }
+
+ if (ctx->callback.before_terminate)
+ ctx->callback.before_terminate(ctx);
+ if (ctx->ops.cleanup)
+ ctx->ops.cleanup(ctx);
+
+ pr_debug("kdamond (%d) finishes\n", current->pid);
+ mutex_lock(&ctx->kdamond_lock);
+ ctx->kdamond = NULL;
+ mutex_unlock(&ctx->kdamond_lock);
+
+ mutex_lock(&damon_lock);
+ nr_running_ctxs--;
+ if (!nr_running_ctxs && running_exclusive_ctxs)
+ running_exclusive_ctxs = false;
+ mutex_unlock(&damon_lock);
+
+ return 0;
+}
+
+/*
+ * struct damon_system_ram_region - System RAM resource address region of
+ * [@start, @end).
+ * @start: Start address of the region (inclusive).
+ * @end: End address of the region (exclusive).
+ */
+struct damon_system_ram_region {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int walk_system_ram(struct resource *res, void *arg)
+{
+ struct damon_system_ram_region *a = arg;
+
+ if (a->end - a->start < resource_size(res)) {
+ a->start = res->start;
+ a->end = res->end;
+ }
+ return 0;
+}
+
+/*
+ * Find biggest 'System RAM' resource and store its start and end address in
+ * @start and @end, respectively. If no System RAM is found, returns false.
+ */
+static bool damon_find_biggest_system_ram(unsigned long *start,
+ unsigned long *end)
+
+{
+ struct damon_system_ram_region arg = {};
+
+ walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
+ if (arg.end <= arg.start)
+ return false;
+
+ *start = arg.start;
+ *end = arg.end;
+ return true;
+}
+
+/**
+ * damon_set_region_biggest_system_ram_default() - Set the region of the given
+ * monitoring target as requested, or biggest 'System RAM'.
+ * @t: The monitoring target to set the region.
+ * @start: The pointer to the start address of the region.
+ * @end: The pointer to the end address of the region.
+ *
+ * This function sets the region of @t as requested by @start and @end. If the
+ * values of @start and @end are zero, however, this function finds the biggest
+ * 'System RAM' resource and sets the region to cover the resource. In the
+ * latter case, this function saves the start and end addresses of the resource
+ * in @start and @end, respectively.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_region_biggest_system_ram_default(struct damon_target *t,
+ unsigned long *start, unsigned long *end)
+{
+ struct damon_addr_range addr_range;
+
+ if (*start > *end)
+ return -EINVAL;
+
+ if (!*start && !*end &&
+ !damon_find_biggest_system_ram(start, end))
+ return -EINVAL;
+
+ addr_range.start = *start;
+ addr_range.end = *end;
+ return damon_set_regions(t, &addr_range, 1);
+}
+
+static int __init damon_init(void)
+{
+ damon_region_cache = KMEM_CACHE(damon_region, 0);
+ if (unlikely(!damon_region_cache)) {
+ pr_err("creating damon_region_cache fails\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+subsys_initcall(damon_init);
+
+#include "core-test.h"
diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h
new file mode 100644
index 000000000..0bb0d532b
--- /dev/null
+++ b/mm/damon/dbgfs-test.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAMON Debugfs Interface Unit Tests
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifdef CONFIG_DAMON_DBGFS_KUNIT_TEST
+
+#ifndef _DAMON_DBGFS_TEST_H
+#define _DAMON_DBGFS_TEST_H
+
+#include <kunit/test.h>
+
+static void damon_dbgfs_test_str_to_ints(struct kunit *test)
+{
+ char *question;
+ int *answers;
+ int expected[] = {12, 35, 46};
+ ssize_t nr_integers = 0, i;
+
+ question = "123";
+ answers = str_to_ints(question, strlen(question), &nr_integers);
+ KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+ KUNIT_EXPECT_EQ(test, 123, answers[0]);
+ kfree(answers);
+
+ question = "123abc";
+ answers = str_to_ints(question, strlen(question), &nr_integers);
+ KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers);
+ KUNIT_EXPECT_EQ(test, 123, answers[0]);
+ kfree(answers);
+
+ question = "a123";
+ answers = str_to_ints(question, strlen(question), &nr_integers);
+ KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+ kfree(answers);
+
+ question = "12 35";
+ answers = str_to_ints(question, strlen(question), &nr_integers);
+ KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+ for (i = 0; i < nr_integers; i++)
+ KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+ kfree(answers);
+
+ question = "12 35 46";
+ answers = str_to_ints(question, strlen(question), &nr_integers);
+ KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers);
+ for (i = 0; i < nr_integers; i++)
+ KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+ kfree(answers);
+
+ question = "12 35 abc 46";
+ answers = str_to_ints(question, strlen(question), &nr_integers);
+ KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers);
+ for (i = 0; i < 2; i++)
+ KUNIT_EXPECT_EQ(test, expected[i], answers[i]);
+ kfree(answers);
+
+ question = "";
+ answers = str_to_ints(question, strlen(question), &nr_integers);
+ KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+ kfree(answers);
+
+ question = "\n";
+ answers = str_to_ints(question, strlen(question), &nr_integers);
+ KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers);
+ kfree(answers);
+}
+
+static void damon_dbgfs_test_set_targets(struct kunit *test)
+{
+ struct damon_ctx *ctx = dbgfs_new_ctx();
+ char buf[64];
+
+ /* Make DAMON consider target has no pid */
+ damon_select_ops(ctx, DAMON_OPS_PADDR);
+
+ dbgfs_set_targets(ctx, 0, NULL);
+ sprint_target_ids(ctx, buf, 64);
+ KUNIT_EXPECT_STREQ(test, (char *)buf, "\n");
+
+ dbgfs_set_targets(ctx, 1, NULL);
+ sprint_target_ids(ctx, buf, 64);
+ KUNIT_EXPECT_STREQ(test, (char *)buf, "42\n");
+
+ dbgfs_set_targets(ctx, 0, NULL);
+ sprint_target_ids(ctx, buf, 64);
+ KUNIT_EXPECT_STREQ(test, (char *)buf, "\n");
+
+ dbgfs_destroy_ctx(ctx);
+}
+
+static void damon_dbgfs_test_set_init_regions(struct kunit *test)
+{
+ struct damon_ctx *ctx = damon_new_ctx();
+ /* Each line represents one region in ``<target idx> <start> <end>`` */
+ char * const valid_inputs[] = {"1 10 20\n 1 20 30\n1 35 45",
+ "1 10 20\n",
+ "1 10 20\n0 39 59\n0 70 134\n 1 20 25\n",
+ ""};
+ /* Reading the file again will show sorted, clean output */
+ char * const valid_expects[] = {"1 10 20\n1 20 30\n1 35 45\n",
+ "1 10 20\n",
+ "0 39 59\n0 70 134\n1 10 20\n1 20 25\n",
+ ""};
+ char * const invalid_inputs[] = {"3 10 20\n", /* target not exists */
+ "1 10 20\n 1 14 26\n", /* regions overlap */
+ "0 10 20\n1 30 40\n 0 5 8"}; /* not sorted by address */
+ char *input, *expect;
+ int i, rc;
+ char buf[256];
+
+ damon_select_ops(ctx, DAMON_OPS_PADDR);
+
+ dbgfs_set_targets(ctx, 3, NULL);
+
+ /* Put valid inputs and check the results */
+ for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) {
+ input = valid_inputs[i];
+ expect = valid_expects[i];
+
+ rc = set_init_regions(ctx, input, strnlen(input, 256));
+ KUNIT_EXPECT_EQ(test, rc, 0);
+
+ memset(buf, 0, 256);
+ sprint_init_regions(ctx, buf, 256);
+
+ KUNIT_EXPECT_STREQ(test, (char *)buf, expect);
+ }
+ /* Put invalid inputs and check the return error code */
+ for (i = 0; i < ARRAY_SIZE(invalid_inputs); i++) {
+ input = invalid_inputs[i];
+ pr_info("input: %s\n", input);
+ rc = set_init_regions(ctx, input, strnlen(input, 256));
+ KUNIT_EXPECT_EQ(test, rc, -EINVAL);
+
+ memset(buf, 0, 256);
+ sprint_init_regions(ctx, buf, 256);
+
+ KUNIT_EXPECT_STREQ(test, (char *)buf, "");
+ }
+
+ dbgfs_set_targets(ctx, 0, NULL);
+ damon_destroy_ctx(ctx);
+}
+
+static struct kunit_case damon_test_cases[] = {
+ KUNIT_CASE(damon_dbgfs_test_str_to_ints),
+ KUNIT_CASE(damon_dbgfs_test_set_targets),
+ KUNIT_CASE(damon_dbgfs_test_set_init_regions),
+ {},
+};
+
+static struct kunit_suite damon_test_suite = {
+ .name = "damon-dbgfs",
+ .test_cases = damon_test_cases,
+};
+kunit_test_suite(damon_test_suite);
+
+#endif /* _DAMON_TEST_H */
+
+#endif /* CONFIG_DAMON_KUNIT_TEST */
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
new file mode 100644
index 000000000..124f0f8c9
--- /dev/null
+++ b/mm/damon/dbgfs.c
@@ -0,0 +1,1133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON Debugfs Interface
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#define pr_fmt(fmt) "damon-dbgfs: " fmt
+
+#include <linux/damon.h>
+#include <linux/debugfs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/page_idle.h>
+#include <linux/slab.h>
+
+static struct damon_ctx **dbgfs_ctxs;
+static int dbgfs_nr_ctxs;
+static struct dentry **dbgfs_dirs;
+static DEFINE_MUTEX(damon_dbgfs_lock);
+
+static void damon_dbgfs_warn_deprecation(void)
+{
+ pr_warn_once("DAMON debugfs interface is deprecated, "
+ "so users should move to DAMON_SYSFS. If you cannot, "
+ "please report your usecase to damon@lists.linux.dev and "
+ "linux-mm@kvack.org.\n");
+}
+
+/*
+ * Returns non-empty string on success, negative error code otherwise.
+ */
+static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos)
+{
+ char *kbuf;
+ ssize_t ret;
+
+ /* We do not accept continuous write */
+ if (*ppos)
+ return ERR_PTR(-EINVAL);
+
+ kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN);
+ if (!kbuf)
+ return ERR_PTR(-ENOMEM);
+
+ ret = simple_write_to_buffer(kbuf, count + 1, ppos, buf, count);
+ if (ret != count) {
+ kfree(kbuf);
+ return ERR_PTR(-EIO);
+ }
+ kbuf[ret] = '\0';
+
+ return kbuf;
+}
+
+static ssize_t dbgfs_attrs_read(struct file *file,
+ char __user *buf, size_t count, loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ char kbuf[128];
+ int ret;
+
+ mutex_lock(&ctx->kdamond_lock);
+ ret = scnprintf(kbuf, ARRAY_SIZE(kbuf), "%lu %lu %lu %lu %lu\n",
+ ctx->attrs.sample_interval, ctx->attrs.aggr_interval,
+ ctx->attrs.ops_update_interval,
+ ctx->attrs.min_nr_regions, ctx->attrs.max_nr_regions);
+ mutex_unlock(&ctx->kdamond_lock);
+
+ return simple_read_from_buffer(buf, count, ppos, kbuf, ret);
+}
+
+static ssize_t dbgfs_attrs_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ struct damon_attrs attrs;
+ char *kbuf;
+ ssize_t ret;
+
+ kbuf = user_input_str(buf, count, ppos);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+
+ if (sscanf(kbuf, "%lu %lu %lu %lu %lu",
+ &attrs.sample_interval, &attrs.aggr_interval,
+ &attrs.ops_update_interval,
+ &attrs.min_nr_regions,
+ &attrs.max_nr_regions) != 5) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond) {
+ ret = -EBUSY;
+ goto unlock_out;
+ }
+
+ ret = damon_set_attrs(ctx, &attrs);
+ if (!ret)
+ ret = count;
+unlock_out:
+ mutex_unlock(&ctx->kdamond_lock);
+out:
+ kfree(kbuf);
+ return ret;
+}
+
+/*
+ * Return corresponding dbgfs' scheme action value (int) for the given
+ * damos_action if the given damos_action value is valid and supported by
+ * dbgfs, negative error code otherwise.
+ */
+static int damos_action_to_dbgfs_scheme_action(enum damos_action action)
+{
+ switch (action) {
+ case DAMOS_WILLNEED:
+ return 0;
+ case DAMOS_COLD:
+ return 1;
+ case DAMOS_PAGEOUT:
+ return 2;
+ case DAMOS_HUGEPAGE:
+ return 3;
+ case DAMOS_NOHUGEPAGE:
+ return 4;
+ case DAMOS_STAT:
+ return 5;
+ default:
+ return -EINVAL;
+ }
+}
+
+static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len)
+{
+ struct damos *s;
+ int written = 0;
+ int rc;
+
+ damon_for_each_scheme(s, c) {
+ rc = scnprintf(&buf[written], len - written,
+ "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+ s->pattern.min_sz_region,
+ s->pattern.max_sz_region,
+ s->pattern.min_nr_accesses,
+ s->pattern.max_nr_accesses,
+ s->pattern.min_age_region,
+ s->pattern.max_age_region,
+ damos_action_to_dbgfs_scheme_action(s->action),
+ s->quota.ms, s->quota.sz,
+ s->quota.reset_interval,
+ s->quota.weight_sz,
+ s->quota.weight_nr_accesses,
+ s->quota.weight_age,
+ s->wmarks.metric, s->wmarks.interval,
+ s->wmarks.high, s->wmarks.mid, s->wmarks.low,
+ s->stat.nr_tried, s->stat.sz_tried,
+ s->stat.nr_applied, s->stat.sz_applied,
+ s->stat.qt_exceeds);
+ if (!rc)
+ return -ENOMEM;
+
+ written += rc;
+ }
+ return written;
+}
+
+static ssize_t dbgfs_schemes_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ char *kbuf;
+ ssize_t len;
+
+ kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
+ if (!kbuf)
+ return -ENOMEM;
+
+ mutex_lock(&ctx->kdamond_lock);
+ len = sprint_schemes(ctx, kbuf, count);
+ mutex_unlock(&ctx->kdamond_lock);
+ if (len < 0)
+ goto out;
+ len = simple_read_from_buffer(buf, count, ppos, kbuf, len);
+
+out:
+ kfree(kbuf);
+ return len;
+}
+
+static void free_schemes_arr(struct damos **schemes, ssize_t nr_schemes)
+{
+ ssize_t i;
+
+ for (i = 0; i < nr_schemes; i++)
+ kfree(schemes[i]);
+ kfree(schemes);
+}
+
+/*
+ * Return corresponding damos_action for the given dbgfs input for a scheme
+ * action if the input is valid, negative error code otherwise.
+ */
+static enum damos_action dbgfs_scheme_action_to_damos_action(int dbgfs_action)
+{
+ switch (dbgfs_action) {
+ case 0:
+ return DAMOS_WILLNEED;
+ case 1:
+ return DAMOS_COLD;
+ case 2:
+ return DAMOS_PAGEOUT;
+ case 3:
+ return DAMOS_HUGEPAGE;
+ case 4:
+ return DAMOS_NOHUGEPAGE;
+ case 5:
+ return DAMOS_STAT;
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Converts a string into an array of struct damos pointers
+ *
+ * Returns an array of struct damos pointers that converted if the conversion
+ * success, or NULL otherwise.
+ */
+static struct damos **str_to_schemes(const char *str, ssize_t len,
+ ssize_t *nr_schemes)
+{
+ struct damos *scheme, **schemes;
+ const int max_nr_schemes = 256;
+ int pos = 0, parsed, ret;
+ unsigned int action_input;
+ enum damos_action action;
+
+ schemes = kmalloc_array(max_nr_schemes, sizeof(scheme),
+ GFP_KERNEL);
+ if (!schemes)
+ return NULL;
+
+ *nr_schemes = 0;
+ while (pos < len && *nr_schemes < max_nr_schemes) {
+ struct damos_access_pattern pattern = {};
+ struct damos_quota quota = {};
+ struct damos_watermarks wmarks;
+
+ ret = sscanf(&str[pos],
+ "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u %u %lu %lu %lu %lu%n",
+ &pattern.min_sz_region, &pattern.max_sz_region,
+ &pattern.min_nr_accesses,
+ &pattern.max_nr_accesses,
+ &pattern.min_age_region,
+ &pattern.max_age_region,
+ &action_input, &quota.ms,
+ &quota.sz, &quota.reset_interval,
+ &quota.weight_sz, &quota.weight_nr_accesses,
+ &quota.weight_age, &wmarks.metric,
+ &wmarks.interval, &wmarks.high, &wmarks.mid,
+ &wmarks.low, &parsed);
+ if (ret != 18)
+ break;
+ action = dbgfs_scheme_action_to_damos_action(action_input);
+ if ((int)action < 0)
+ goto fail;
+
+ if (pattern.min_sz_region > pattern.max_sz_region ||
+ pattern.min_nr_accesses > pattern.max_nr_accesses ||
+ pattern.min_age_region > pattern.max_age_region)
+ goto fail;
+
+ if (wmarks.high < wmarks.mid || wmarks.high < wmarks.low ||
+ wmarks.mid < wmarks.low)
+ goto fail;
+
+ pos += parsed;
+ scheme = damon_new_scheme(&pattern, action, &quota, &wmarks);
+ if (!scheme)
+ goto fail;
+
+ schemes[*nr_schemes] = scheme;
+ *nr_schemes += 1;
+ }
+ return schemes;
+fail:
+ free_schemes_arr(schemes, *nr_schemes);
+ return NULL;
+}
+
+static ssize_t dbgfs_schemes_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ char *kbuf;
+ struct damos **schemes;
+ ssize_t nr_schemes = 0, ret;
+
+ kbuf = user_input_str(buf, count, ppos);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+
+ schemes = str_to_schemes(kbuf, count, &nr_schemes);
+ if (!schemes) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond) {
+ ret = -EBUSY;
+ goto unlock_out;
+ }
+
+ damon_set_schemes(ctx, schemes, nr_schemes);
+ ret = count;
+ nr_schemes = 0;
+
+unlock_out:
+ mutex_unlock(&ctx->kdamond_lock);
+ free_schemes_arr(schemes, nr_schemes);
+out:
+ kfree(kbuf);
+ return ret;
+}
+
+static ssize_t sprint_target_ids(struct damon_ctx *ctx, char *buf, ssize_t len)
+{
+ struct damon_target *t;
+ int id;
+ int written = 0;
+ int rc;
+
+ damon_for_each_target(t, ctx) {
+ if (damon_target_has_pid(ctx))
+ /* Show pid numbers to debugfs users */
+ id = pid_vnr(t->pid);
+ else
+ /* Show 42 for physical address space, just for fun */
+ id = 42;
+
+ rc = scnprintf(&buf[written], len - written, "%d ", id);
+ if (!rc)
+ return -ENOMEM;
+ written += rc;
+ }
+ if (written)
+ written -= 1;
+ written += scnprintf(&buf[written], len - written, "\n");
+ return written;
+}
+
+static ssize_t dbgfs_target_ids_read(struct file *file,
+ char __user *buf, size_t count, loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ ssize_t len;
+ char ids_buf[320];
+
+ mutex_lock(&ctx->kdamond_lock);
+ len = sprint_target_ids(ctx, ids_buf, 320);
+ mutex_unlock(&ctx->kdamond_lock);
+ if (len < 0)
+ return len;
+
+ return simple_read_from_buffer(buf, count, ppos, ids_buf, len);
+}
+
+/*
+ * Converts a string into an integers array
+ *
+ * Returns an array of integers array if the conversion success, or NULL
+ * otherwise.
+ */
+static int *str_to_ints(const char *str, ssize_t len, ssize_t *nr_ints)
+{
+ int *array;
+ const int max_nr_ints = 32;
+ int nr;
+ int pos = 0, parsed, ret;
+
+ *nr_ints = 0;
+ array = kmalloc_array(max_nr_ints, sizeof(*array), GFP_KERNEL);
+ if (!array)
+ return NULL;
+ while (*nr_ints < max_nr_ints && pos < len) {
+ ret = sscanf(&str[pos], "%d%n", &nr, &parsed);
+ pos += parsed;
+ if (ret != 1)
+ break;
+ array[*nr_ints] = nr;
+ *nr_ints += 1;
+ }
+
+ return array;
+}
+
+static void dbgfs_put_pids(struct pid **pids, int nr_pids)
+{
+ int i;
+
+ for (i = 0; i < nr_pids; i++)
+ put_pid(pids[i]);
+}
+
+/*
+ * Converts a string into an struct pid pointers array
+ *
+ * Returns an array of struct pid pointers if the conversion success, or NULL
+ * otherwise.
+ */
+static struct pid **str_to_pids(const char *str, ssize_t len, ssize_t *nr_pids)
+{
+ int *ints;
+ ssize_t nr_ints;
+ struct pid **pids;
+
+ *nr_pids = 0;
+
+ ints = str_to_ints(str, len, &nr_ints);
+ if (!ints)
+ return NULL;
+
+ pids = kmalloc_array(nr_ints, sizeof(*pids), GFP_KERNEL);
+ if (!pids)
+ goto out;
+
+ for (; *nr_pids < nr_ints; (*nr_pids)++) {
+ pids[*nr_pids] = find_get_pid(ints[*nr_pids]);
+ if (!pids[*nr_pids]) {
+ dbgfs_put_pids(pids, *nr_pids);
+ kfree(ints);
+ kfree(pids);
+ return NULL;
+ }
+ }
+
+out:
+ kfree(ints);
+ return pids;
+}
+
+/*
+ * dbgfs_set_targets() - Set monitoring targets.
+ * @ctx: monitoring context
+ * @nr_targets: number of targets
+ * @pids: array of target pids (size is same to @nr_targets)
+ *
+ * This function should not be called while the kdamond is running. @pids is
+ * ignored if the context is not configured to have pid in each target. On
+ * failure, reference counts of all pids in @pids are decremented.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int dbgfs_set_targets(struct damon_ctx *ctx, ssize_t nr_targets,
+ struct pid **pids)
+{
+ ssize_t i;
+ struct damon_target *t, *next;
+
+ damon_for_each_target_safe(t, next, ctx) {
+ if (damon_target_has_pid(ctx))
+ put_pid(t->pid);
+ damon_destroy_target(t);
+ }
+
+ for (i = 0; i < nr_targets; i++) {
+ t = damon_new_target();
+ if (!t) {
+ damon_for_each_target_safe(t, next, ctx)
+ damon_destroy_target(t);
+ if (damon_target_has_pid(ctx))
+ dbgfs_put_pids(pids, nr_targets);
+ return -ENOMEM;
+ }
+ if (damon_target_has_pid(ctx))
+ t->pid = pids[i];
+ damon_add_target(ctx, t);
+ }
+
+ return 0;
+}
+
+static ssize_t dbgfs_target_ids_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ bool id_is_pid = true;
+ char *kbuf;
+ struct pid **target_pids = NULL;
+ ssize_t nr_targets;
+ ssize_t ret;
+
+ kbuf = user_input_str(buf, count, ppos);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+
+ if (!strncmp(kbuf, "paddr\n", count)) {
+ id_is_pid = false;
+ nr_targets = 1;
+ }
+
+ if (id_is_pid) {
+ target_pids = str_to_pids(kbuf, count, &nr_targets);
+ if (!target_pids) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond) {
+ if (id_is_pid)
+ dbgfs_put_pids(target_pids, nr_targets);
+ ret = -EBUSY;
+ goto unlock_out;
+ }
+
+ /* remove previously set targets */
+ dbgfs_set_targets(ctx, 0, NULL);
+ if (!nr_targets) {
+ ret = count;
+ goto unlock_out;
+ }
+
+ /* Configure the context for the address space type */
+ if (id_is_pid)
+ ret = damon_select_ops(ctx, DAMON_OPS_VADDR);
+ else
+ ret = damon_select_ops(ctx, DAMON_OPS_PADDR);
+ if (ret)
+ goto unlock_out;
+
+ ret = dbgfs_set_targets(ctx, nr_targets, target_pids);
+ if (!ret)
+ ret = count;
+
+unlock_out:
+ mutex_unlock(&ctx->kdamond_lock);
+ kfree(target_pids);
+out:
+ kfree(kbuf);
+ return ret;
+}
+
+static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ int target_idx = 0;
+ int written = 0;
+ int rc;
+
+ damon_for_each_target(t, c) {
+ damon_for_each_region(r, t) {
+ rc = scnprintf(&buf[written], len - written,
+ "%d %lu %lu\n",
+ target_idx, r->ar.start, r->ar.end);
+ if (!rc)
+ return -ENOMEM;
+ written += rc;
+ }
+ target_idx++;
+ }
+ return written;
+}
+
+static ssize_t dbgfs_init_regions_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ char *kbuf;
+ ssize_t len;
+
+ kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
+ if (!kbuf)
+ return -ENOMEM;
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond) {
+ mutex_unlock(&ctx->kdamond_lock);
+ len = -EBUSY;
+ goto out;
+ }
+
+ len = sprint_init_regions(ctx, kbuf, count);
+ mutex_unlock(&ctx->kdamond_lock);
+ if (len < 0)
+ goto out;
+ len = simple_read_from_buffer(buf, count, ppos, kbuf, len);
+
+out:
+ kfree(kbuf);
+ return len;
+}
+
+static int add_init_region(struct damon_ctx *c, int target_idx,
+ struct damon_addr_range *ar)
+{
+ struct damon_target *t;
+ struct damon_region *r, *prev;
+ unsigned long idx = 0;
+ int rc = -EINVAL;
+
+ if (ar->start >= ar->end)
+ return -EINVAL;
+
+ damon_for_each_target(t, c) {
+ if (idx++ == target_idx) {
+ r = damon_new_region(ar->start, ar->end);
+ if (!r)
+ return -ENOMEM;
+ damon_add_region(r, t);
+ if (damon_nr_regions(t) > 1) {
+ prev = damon_prev_region(r);
+ if (prev->ar.end > r->ar.start) {
+ damon_destroy_region(r, t);
+ return -EINVAL;
+ }
+ }
+ rc = 0;
+ }
+ }
+ return rc;
+}
+
+static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len)
+{
+ struct damon_target *t;
+ struct damon_region *r, *next;
+ int pos = 0, parsed, ret;
+ int target_idx;
+ struct damon_addr_range ar;
+ int err;
+
+ damon_for_each_target(t, c) {
+ damon_for_each_region_safe(r, next, t)
+ damon_destroy_region(r, t);
+ }
+
+ while (pos < len) {
+ ret = sscanf(&str[pos], "%d %lu %lu%n",
+ &target_idx, &ar.start, &ar.end, &parsed);
+ if (ret != 3)
+ break;
+ err = add_init_region(c, target_idx, &ar);
+ if (err)
+ goto fail;
+ pos += parsed;
+ }
+
+ return 0;
+
+fail:
+ damon_for_each_target(t, c) {
+ damon_for_each_region_safe(r, next, t)
+ damon_destroy_region(r, t);
+ }
+ return err;
+}
+
+static ssize_t dbgfs_init_regions_write(struct file *file,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ char *kbuf;
+ ssize_t ret = count;
+ int err;
+
+ kbuf = user_input_str(buf, count, ppos);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond) {
+ ret = -EBUSY;
+ goto unlock_out;
+ }
+
+ err = set_init_regions(ctx, kbuf, ret);
+ if (err)
+ ret = err;
+
+unlock_out:
+ mutex_unlock(&ctx->kdamond_lock);
+ kfree(kbuf);
+ return ret;
+}
+
+static ssize_t dbgfs_kdamond_pid_read(struct file *file,
+ char __user *buf, size_t count, loff_t *ppos)
+{
+ struct damon_ctx *ctx = file->private_data;
+ char *kbuf;
+ ssize_t len;
+
+ kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
+ if (!kbuf)
+ return -ENOMEM;
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond)
+ len = scnprintf(kbuf, count, "%d\n", ctx->kdamond->pid);
+ else
+ len = scnprintf(kbuf, count, "none\n");
+ mutex_unlock(&ctx->kdamond_lock);
+ if (!len)
+ goto out;
+ len = simple_read_from_buffer(buf, count, ppos, kbuf, len);
+
+out:
+ kfree(kbuf);
+ return len;
+}
+
+static int damon_dbgfs_open(struct inode *inode, struct file *file)
+{
+ damon_dbgfs_warn_deprecation();
+
+ file->private_data = inode->i_private;
+
+ return nonseekable_open(inode, file);
+}
+
+static const struct file_operations attrs_fops = {
+ .open = damon_dbgfs_open,
+ .read = dbgfs_attrs_read,
+ .write = dbgfs_attrs_write,
+};
+
+static const struct file_operations schemes_fops = {
+ .open = damon_dbgfs_open,
+ .read = dbgfs_schemes_read,
+ .write = dbgfs_schemes_write,
+};
+
+static const struct file_operations target_ids_fops = {
+ .open = damon_dbgfs_open,
+ .read = dbgfs_target_ids_read,
+ .write = dbgfs_target_ids_write,
+};
+
+static const struct file_operations init_regions_fops = {
+ .open = damon_dbgfs_open,
+ .read = dbgfs_init_regions_read,
+ .write = dbgfs_init_regions_write,
+};
+
+static const struct file_operations kdamond_pid_fops = {
+ .open = damon_dbgfs_open,
+ .read = dbgfs_kdamond_pid_read,
+};
+
+static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx)
+{
+ const char * const file_names[] = {"attrs", "schemes", "target_ids",
+ "init_regions", "kdamond_pid"};
+ const struct file_operations *fops[] = {&attrs_fops, &schemes_fops,
+ &target_ids_fops, &init_regions_fops, &kdamond_pid_fops};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(file_names); i++)
+ debugfs_create_file(file_names[i], 0600, dir, ctx, fops[i]);
+}
+
+static void dbgfs_before_terminate(struct damon_ctx *ctx)
+{
+ struct damon_target *t, *next;
+
+ if (!damon_target_has_pid(ctx))
+ return;
+
+ mutex_lock(&ctx->kdamond_lock);
+ damon_for_each_target_safe(t, next, ctx) {
+ put_pid(t->pid);
+ damon_destroy_target(t);
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+}
+
+static struct damon_ctx *dbgfs_new_ctx(void)
+{
+ struct damon_ctx *ctx;
+
+ ctx = damon_new_ctx();
+ if (!ctx)
+ return NULL;
+
+ if (damon_select_ops(ctx, DAMON_OPS_VADDR) &&
+ damon_select_ops(ctx, DAMON_OPS_PADDR)) {
+ damon_destroy_ctx(ctx);
+ return NULL;
+ }
+ ctx->callback.before_terminate = dbgfs_before_terminate;
+ return ctx;
+}
+
+static void dbgfs_destroy_ctx(struct damon_ctx *ctx)
+{
+ damon_destroy_ctx(ctx);
+}
+
+/*
+ * Make a context of @name and create a debugfs directory for it.
+ *
+ * This function should be called while holding damon_dbgfs_lock.
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+static int dbgfs_mk_context(char *name)
+{
+ struct dentry *root, **new_dirs, *new_dir;
+ struct damon_ctx **new_ctxs, *new_ctx;
+
+ if (damon_nr_running_ctxs())
+ return -EBUSY;
+
+ new_ctxs = krealloc(dbgfs_ctxs, sizeof(*dbgfs_ctxs) *
+ (dbgfs_nr_ctxs + 1), GFP_KERNEL);
+ if (!new_ctxs)
+ return -ENOMEM;
+ dbgfs_ctxs = new_ctxs;
+
+ new_dirs = krealloc(dbgfs_dirs, sizeof(*dbgfs_dirs) *
+ (dbgfs_nr_ctxs + 1), GFP_KERNEL);
+ if (!new_dirs)
+ return -ENOMEM;
+ dbgfs_dirs = new_dirs;
+
+ root = dbgfs_dirs[0];
+ if (!root)
+ return -ENOENT;
+
+ new_dir = debugfs_create_dir(name, root);
+ /* Below check is required for a potential duplicated name case */
+ if (IS_ERR(new_dir))
+ return PTR_ERR(new_dir);
+ dbgfs_dirs[dbgfs_nr_ctxs] = new_dir;
+
+ new_ctx = dbgfs_new_ctx();
+ if (!new_ctx) {
+ debugfs_remove(new_dir);
+ dbgfs_dirs[dbgfs_nr_ctxs] = NULL;
+ return -ENOMEM;
+ }
+
+ dbgfs_ctxs[dbgfs_nr_ctxs] = new_ctx;
+ dbgfs_fill_ctx_dir(dbgfs_dirs[dbgfs_nr_ctxs],
+ dbgfs_ctxs[dbgfs_nr_ctxs]);
+ dbgfs_nr_ctxs++;
+
+ return 0;
+}
+
+static ssize_t dbgfs_mk_context_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ char *kbuf;
+ char *ctx_name;
+ ssize_t ret;
+
+ kbuf = user_input_str(buf, count, ppos);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+ ctx_name = kmalloc(count + 1, GFP_KERNEL);
+ if (!ctx_name) {
+ kfree(kbuf);
+ return -ENOMEM;
+ }
+
+ /* Trim white space */
+ if (sscanf(kbuf, "%s", ctx_name) != 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ mutex_lock(&damon_dbgfs_lock);
+ ret = dbgfs_mk_context(ctx_name);
+ if (!ret)
+ ret = count;
+ mutex_unlock(&damon_dbgfs_lock);
+
+out:
+ kfree(kbuf);
+ kfree(ctx_name);
+ return ret;
+}
+
+/*
+ * Remove a context of @name and its debugfs directory.
+ *
+ * This function should be called while holding damon_dbgfs_lock.
+ *
+ * Return 0 on success, negative error code otherwise.
+ */
+static int dbgfs_rm_context(char *name)
+{
+ struct dentry *root, *dir, **new_dirs;
+ struct inode *inode;
+ struct damon_ctx **new_ctxs;
+ int i, j;
+ int ret = 0;
+
+ if (damon_nr_running_ctxs())
+ return -EBUSY;
+
+ root = dbgfs_dirs[0];
+ if (!root)
+ return -ENOENT;
+
+ dir = debugfs_lookup(name, root);
+ if (!dir)
+ return -ENOENT;
+
+ inode = d_inode(dir);
+ if (!S_ISDIR(inode->i_mode)) {
+ ret = -EINVAL;
+ goto out_dput;
+ }
+
+ new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs),
+ GFP_KERNEL);
+ if (!new_dirs) {
+ ret = -ENOMEM;
+ goto out_dput;
+ }
+
+ new_ctxs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_ctxs),
+ GFP_KERNEL);
+ if (!new_ctxs) {
+ ret = -ENOMEM;
+ goto out_new_dirs;
+ }
+
+ for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) {
+ if (dbgfs_dirs[i] == dir) {
+ debugfs_remove(dbgfs_dirs[i]);
+ dbgfs_destroy_ctx(dbgfs_ctxs[i]);
+ continue;
+ }
+ new_dirs[j] = dbgfs_dirs[i];
+ new_ctxs[j++] = dbgfs_ctxs[i];
+ }
+
+ kfree(dbgfs_dirs);
+ kfree(dbgfs_ctxs);
+
+ dbgfs_dirs = new_dirs;
+ dbgfs_ctxs = new_ctxs;
+ dbgfs_nr_ctxs--;
+
+ goto out_dput;
+
+out_new_dirs:
+ kfree(new_dirs);
+out_dput:
+ dput(dir);
+ return ret;
+}
+
+static ssize_t dbgfs_rm_context_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ char *kbuf;
+ ssize_t ret;
+ char *ctx_name;
+
+ kbuf = user_input_str(buf, count, ppos);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+ ctx_name = kmalloc(count + 1, GFP_KERNEL);
+ if (!ctx_name) {
+ kfree(kbuf);
+ return -ENOMEM;
+ }
+
+ /* Trim white space */
+ if (sscanf(kbuf, "%s", ctx_name) != 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ mutex_lock(&damon_dbgfs_lock);
+ ret = dbgfs_rm_context(ctx_name);
+ if (!ret)
+ ret = count;
+ mutex_unlock(&damon_dbgfs_lock);
+
+out:
+ kfree(kbuf);
+ kfree(ctx_name);
+ return ret;
+}
+
+static ssize_t dbgfs_monitor_on_read(struct file *file,
+ char __user *buf, size_t count, loff_t *ppos)
+{
+ char monitor_on_buf[5];
+ bool monitor_on = damon_nr_running_ctxs() != 0;
+ int len;
+
+ len = scnprintf(monitor_on_buf, 5, monitor_on ? "on\n" : "off\n");
+
+ return simple_read_from_buffer(buf, count, ppos, monitor_on_buf, len);
+}
+
+static ssize_t dbgfs_monitor_on_write(struct file *file,
+ const char __user *buf, size_t count, loff_t *ppos)
+{
+ ssize_t ret;
+ char *kbuf;
+
+ kbuf = user_input_str(buf, count, ppos);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+
+ /* Remove white space */
+ if (sscanf(kbuf, "%s", kbuf) != 1) {
+ kfree(kbuf);
+ return -EINVAL;
+ }
+
+ mutex_lock(&damon_dbgfs_lock);
+ if (!strncmp(kbuf, "on", count)) {
+ int i;
+
+ for (i = 0; i < dbgfs_nr_ctxs; i++) {
+ if (damon_targets_empty(dbgfs_ctxs[i])) {
+ kfree(kbuf);
+ mutex_unlock(&damon_dbgfs_lock);
+ return -EINVAL;
+ }
+ }
+ ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs, true);
+ } else if (!strncmp(kbuf, "off", count)) {
+ ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs);
+ } else {
+ ret = -EINVAL;
+ }
+ mutex_unlock(&damon_dbgfs_lock);
+
+ if (!ret)
+ ret = count;
+ kfree(kbuf);
+ return ret;
+}
+
+static int damon_dbgfs_static_file_open(struct inode *inode, struct file *file)
+{
+ damon_dbgfs_warn_deprecation();
+ return nonseekable_open(inode, file);
+}
+
+static const struct file_operations mk_contexts_fops = {
+ .open = damon_dbgfs_static_file_open,
+ .write = dbgfs_mk_context_write,
+};
+
+static const struct file_operations rm_contexts_fops = {
+ .open = damon_dbgfs_static_file_open,
+ .write = dbgfs_rm_context_write,
+};
+
+static const struct file_operations monitor_on_fops = {
+ .open = damon_dbgfs_static_file_open,
+ .read = dbgfs_monitor_on_read,
+ .write = dbgfs_monitor_on_write,
+};
+
+static int __init __damon_dbgfs_init(void)
+{
+ struct dentry *dbgfs_root;
+ const char * const file_names[] = {"mk_contexts", "rm_contexts",
+ "monitor_on"};
+ const struct file_operations *fops[] = {&mk_contexts_fops,
+ &rm_contexts_fops, &monitor_on_fops};
+ int i;
+
+ dbgfs_root = debugfs_create_dir("damon", NULL);
+
+ for (i = 0; i < ARRAY_SIZE(file_names); i++)
+ debugfs_create_file(file_names[i], 0600, dbgfs_root, NULL,
+ fops[i]);
+ dbgfs_fill_ctx_dir(dbgfs_root, dbgfs_ctxs[0]);
+
+ dbgfs_dirs = kmalloc(sizeof(dbgfs_root), GFP_KERNEL);
+ if (!dbgfs_dirs) {
+ debugfs_remove(dbgfs_root);
+ return -ENOMEM;
+ }
+ dbgfs_dirs[0] = dbgfs_root;
+
+ return 0;
+}
+
+/*
+ * Functions for the initialization
+ */
+
+static int __init damon_dbgfs_init(void)
+{
+ int rc = -ENOMEM;
+
+ mutex_lock(&damon_dbgfs_lock);
+ dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL);
+ if (!dbgfs_ctxs)
+ goto out;
+ dbgfs_ctxs[0] = dbgfs_new_ctx();
+ if (!dbgfs_ctxs[0]) {
+ kfree(dbgfs_ctxs);
+ goto out;
+ }
+ dbgfs_nr_ctxs = 1;
+
+ rc = __damon_dbgfs_init();
+ if (rc) {
+ kfree(dbgfs_ctxs[0]);
+ kfree(dbgfs_ctxs);
+ pr_err("%s: dbgfs init failed\n", __func__);
+ }
+
+out:
+ mutex_unlock(&damon_dbgfs_lock);
+ return rc;
+}
+
+module_init(damon_dbgfs_init);
+
+#include "dbgfs-test.h"
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
new file mode 100644
index 000000000..3071e08e8
--- /dev/null
+++ b/mm/damon/lru_sort.c
@@ -0,0 +1,321 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON-based LRU-lists Sorting
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-lru-sort: " fmt
+
+#include <linux/damon.h>
+#include <linux/kstrtox.h>
+#include <linux/module.h>
+
+#include "modules-common.h"
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_lru_sort."
+
+/*
+ * Enable or disable DAMON_LRU_SORT.
+ *
+ * You can enable DAMON_LRU_SORT by setting the value of this parameter as
+ * ``Y``. Setting it as ``N`` disables DAMON_LRU_SORT. Note that
+ * DAMON_LRU_SORT could do no real monitoring and LRU-lists sorting due to the
+ * watermarks-based activation condition. Refer to below descriptions for the
+ * watermarks parameter for this.
+ */
+static bool enabled __read_mostly;
+
+/*
+ * Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``.
+ *
+ * Input parameters that updated while DAMON_LRU_SORT is running are not
+ * applied by default. Once this parameter is set as ``Y``, DAMON_LRU_SORT
+ * reads values of parametrs except ``enabled`` again. Once the re-reading is
+ * done, this parameter is set as ``N``. If invalid parameters are found while
+ * the re-reading, DAMON_LRU_SORT will be disabled.
+ */
+static bool commit_inputs __read_mostly;
+module_param(commit_inputs, bool, 0600);
+
+/*
+ * Access frequency threshold for hot memory regions identification in permil.
+ *
+ * If a memory region is accessed in frequency of this or higher,
+ * DAMON_LRU_SORT identifies the region as hot, and mark it as accessed on the
+ * LRU list, so that it could not be reclaimed under memory pressure. 50% by
+ * default.
+ */
+static unsigned long hot_thres_access_freq = 500;
+module_param(hot_thres_access_freq, ulong, 0600);
+
+/*
+ * Time threshold for cold memory regions identification in microseconds.
+ *
+ * If a memory region is not accessed for this or longer time, DAMON_LRU_SORT
+ * identifies the region as cold, and mark it as unaccessed on the LRU list, so
+ * that it could be reclaimed first under memory pressure. 120 seconds by
+ * default.
+ */
+static unsigned long cold_min_age __read_mostly = 120000000;
+module_param(cold_min_age, ulong, 0600);
+
+static struct damos_quota damon_lru_sort_quota = {
+ /* Use up to 10 ms per 1 sec, by default */
+ .ms = 10,
+ .sz = 0,
+ .reset_interval = 1000,
+ /* Within the quota, mark hotter regions accessed first. */
+ .weight_sz = 0,
+ .weight_nr_accesses = 1,
+ .weight_age = 0,
+};
+DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(damon_lru_sort_quota);
+
+static struct damos_watermarks damon_lru_sort_wmarks = {
+ .metric = DAMOS_WMARK_FREE_MEM_RATE,
+ .interval = 5000000, /* 5 seconds */
+ .high = 200, /* 20 percent */
+ .mid = 150, /* 15 percent */
+ .low = 50, /* 5 percent */
+};
+DEFINE_DAMON_MODULES_WMARKS_PARAMS(damon_lru_sort_wmarks);
+
+static struct damon_attrs damon_lru_sort_mon_attrs = {
+ .sample_interval = 5000, /* 5 ms */
+ .aggr_interval = 100000, /* 100 ms */
+ .ops_update_interval = 0,
+ .min_nr_regions = 10,
+ .max_nr_regions = 1000,
+};
+DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(damon_lru_sort_mon_attrs);
+
+/*
+ * Start of the target memory region in physical address.
+ *
+ * The start physical address of memory region that DAMON_LRU_SORT will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_start __read_mostly;
+module_param(monitor_region_start, ulong, 0600);
+
+/*
+ * End of the target memory region in physical address.
+ *
+ * The end physical address of memory region that DAMON_LRU_SORT will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_end __read_mostly;
+module_param(monitor_region_end, ulong, 0600);
+
+/*
+ * PID of the DAMON thread
+ *
+ * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread.
+ * Else, -1.
+ */
+static int kdamond_pid __read_mostly = -1;
+module_param(kdamond_pid, int, 0400);
+
+static struct damos_stat damon_lru_sort_hot_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_hot_stat,
+ lru_sort_tried_hot_regions, lru_sorted_hot_regions,
+ hot_quota_exceeds);
+
+static struct damos_stat damon_lru_sort_cold_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_cold_stat,
+ lru_sort_tried_cold_regions, lru_sorted_cold_regions,
+ cold_quota_exceeds);
+
+static struct damos_access_pattern damon_lru_sort_stub_pattern = {
+ /* Find regions having PAGE_SIZE or larger size */
+ .min_sz_region = PAGE_SIZE,
+ .max_sz_region = ULONG_MAX,
+ /* no matter its access frequency */
+ .min_nr_accesses = 0,
+ .max_nr_accesses = UINT_MAX,
+ /* no matter its age */
+ .min_age_region = 0,
+ .max_age_region = UINT_MAX,
+};
+
+static struct damon_ctx *ctx;
+static struct damon_target *target;
+
+static struct damos *damon_lru_sort_new_scheme(
+ struct damos_access_pattern *pattern, enum damos_action action)
+{
+ struct damos_quota quota = damon_lru_sort_quota;
+
+ /* Use half of total quota for hot/cold pages sorting */
+ quota.ms = quota.ms / 2;
+
+ return damon_new_scheme(
+ /* find the pattern, and */
+ pattern,
+ /* (de)prioritize on LRU-lists */
+ action,
+ /* under the quota. */
+ &quota,
+ /* (De)activate this according to the watermarks. */
+ &damon_lru_sort_wmarks);
+}
+
+/* Create a DAMON-based operation scheme for hot memory regions */
+static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres)
+{
+ struct damos_access_pattern pattern = damon_lru_sort_stub_pattern;
+
+ pattern.min_nr_accesses = hot_thres;
+ return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_PRIO);
+}
+
+/* Create a DAMON-based operation scheme for cold memory regions */
+static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
+{
+ struct damos_access_pattern pattern = damon_lru_sort_stub_pattern;
+
+ pattern.max_nr_accesses = 0;
+ pattern.min_age_region = cold_thres;
+ return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO);
+}
+
+static int damon_lru_sort_apply_parameters(void)
+{
+ struct damos *scheme;
+ unsigned int hot_thres, cold_thres;
+ int err = 0;
+
+ err = damon_set_attrs(ctx, &damon_lru_sort_mon_attrs);
+ if (err)
+ return err;
+
+ hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) *
+ hot_thres_access_freq / 1000;
+ scheme = damon_lru_sort_new_hot_scheme(hot_thres);
+ if (!scheme)
+ return -ENOMEM;
+ damon_set_schemes(ctx, &scheme, 1);
+
+ cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval;
+ scheme = damon_lru_sort_new_cold_scheme(cold_thres);
+ if (!scheme)
+ return -ENOMEM;
+ damon_add_scheme(ctx, scheme);
+
+ return damon_set_region_biggest_system_ram_default(target,
+ &monitor_region_start,
+ &monitor_region_end);
+}
+
+static int damon_lru_sort_turn(bool on)
+{
+ int err;
+
+ if (!on) {
+ err = damon_stop(&ctx, 1);
+ if (!err)
+ kdamond_pid = -1;
+ return err;
+ }
+
+ err = damon_lru_sort_apply_parameters();
+ if (err)
+ return err;
+
+ err = damon_start(&ctx, 1, true);
+ if (err)
+ return err;
+ kdamond_pid = ctx->kdamond->pid;
+ return 0;
+}
+
+static int damon_lru_sort_enabled_store(const char *val,
+ const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ bool enable;
+ int err;
+
+ err = kstrtobool(val, &enable);
+ if (err)
+ return err;
+
+ if (is_enabled == enable)
+ return 0;
+
+ /* Called before init function. The function will handle this. */
+ if (!ctx)
+ goto set_param_out;
+
+ err = damon_lru_sort_turn(enable);
+ if (err)
+ return err;
+
+set_param_out:
+ enabled = enable;
+ return err;
+}
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_lru_sort_enabled_store,
+ .get = param_get_bool,
+};
+
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled,
+ "Enable or disable DAMON_LRU_SORT (default: disabled)");
+
+static int damon_lru_sort_handle_commit_inputs(void)
+{
+ int err;
+
+ if (!commit_inputs)
+ return 0;
+
+ err = damon_lru_sort_apply_parameters();
+ commit_inputs = false;
+ return err;
+}
+
+static int damon_lru_sort_after_aggregation(struct damon_ctx *c)
+{
+ struct damos *s;
+
+ /* update the stats parameter */
+ damon_for_each_scheme(s, c) {
+ if (s->action == DAMOS_LRU_PRIO)
+ damon_lru_sort_hot_stat = s->stat;
+ else if (s->action == DAMOS_LRU_DEPRIO)
+ damon_lru_sort_cold_stat = s->stat;
+ }
+
+ return damon_lru_sort_handle_commit_inputs();
+}
+
+static int damon_lru_sort_after_wmarks_check(struct damon_ctx *c)
+{
+ return damon_lru_sort_handle_commit_inputs();
+}
+
+static int __init damon_lru_sort_init(void)
+{
+ int err = damon_modules_new_paddr_ctx_target(&ctx, &target);
+
+ if (err)
+ return err;
+
+ ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check;
+ ctx->callback.after_aggregation = damon_lru_sort_after_aggregation;
+
+ /* 'enabled' has set before this function, probably via command line */
+ if (enabled)
+ err = damon_lru_sort_turn(true);
+
+ return err;
+}
+
+module_init(damon_lru_sort_init);
diff --git a/mm/damon/modules-common.c b/mm/damon/modules-common.c
new file mode 100644
index 000000000..b2381a846
--- /dev/null
+++ b/mm/damon/modules-common.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Primitives for DAMON Modules
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#include <linux/damon.h>
+
+#include "modules-common.h"
+
+/*
+ * Allocate, set, and return a DAMON context for the physical address space.
+ * @ctxp: Pointer to save the point to the newly created context
+ * @targetp: Pointer to save the point to the newly created target
+ */
+int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp,
+ struct damon_target **targetp)
+{
+ struct damon_ctx *ctx;
+ struct damon_target *target;
+
+ ctx = damon_new_ctx();
+ if (!ctx)
+ return -ENOMEM;
+
+ if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
+ damon_destroy_ctx(ctx);
+ return -EINVAL;
+ }
+
+ target = damon_new_target();
+ if (!target) {
+ damon_destroy_ctx(ctx);
+ return -ENOMEM;
+ }
+ damon_add_target(ctx, target);
+
+ *ctxp = ctx;
+ *targetp = target;
+ return 0;
+}
diff --git a/mm/damon/modules-common.h b/mm/damon/modules-common.h
new file mode 100644
index 000000000..f49cdb417
--- /dev/null
+++ b/mm/damon/modules-common.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common Primitives for DAMON Modules
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/moduleparam.h>
+
+#define DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(attrs) \
+ module_param_named(sample_interval, attrs.sample_interval, \
+ ulong, 0600); \
+ module_param_named(aggr_interval, attrs.aggr_interval, ulong, \
+ 0600); \
+ module_param_named(min_nr_regions, attrs.min_nr_regions, ulong, \
+ 0600); \
+ module_param_named(max_nr_regions, attrs.max_nr_regions, ulong, \
+ 0600);
+
+#define DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(quota) \
+ module_param_named(quota_ms, quota.ms, ulong, 0600); \
+ module_param_named(quota_reset_interval_ms, \
+ quota.reset_interval, ulong, 0600);
+
+#define DEFINE_DAMON_MODULES_DAMOS_QUOTAS(quota) \
+ DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(quota) \
+ module_param_named(quota_sz, quota.sz, ulong, 0600);
+
+#define DEFINE_DAMON_MODULES_WMARKS_PARAMS(wmarks) \
+ module_param_named(wmarks_interval, wmarks.interval, ulong, \
+ 0600); \
+ module_param_named(wmarks_high, wmarks.high, ulong, 0600); \
+ module_param_named(wmarks_mid, wmarks.mid, ulong, 0600); \
+ module_param_named(wmarks_low, wmarks.low, ulong, 0600);
+
+#define DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(stat, try_name, \
+ succ_name, qt_exceed_name) \
+ module_param_named(nr_##try_name, stat.nr_tried, ulong, 0400); \
+ module_param_named(bytes_##try_name, stat.sz_tried, ulong, \
+ 0400); \
+ module_param_named(nr_##succ_name, stat.nr_applied, ulong, \
+ 0400); \
+ module_param_named(bytes_##succ_name, stat.sz_applied, ulong, \
+ 0400); \
+ module_param_named(nr_##qt_exceed_name, stat.qt_exceeds, ulong, \
+ 0400);
+
+int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp,
+ struct damon_target **targetp);
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
new file mode 100644
index 000000000..d25d99cb5
--- /dev/null
+++ b/mm/damon/ops-common.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Primitives for Data Access Monitoring
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+
+#include "ops-common.h"
+
+/*
+ * Get an online page for a pfn if it's in the LRU list. Otherwise, returns
+ * NULL.
+ *
+ * The body of this function is stolen from the 'page_idle_get_folio()'. We
+ * steal rather than reuse it because the code is quite simple.
+ */
+struct folio *damon_get_folio(unsigned long pfn)
+{
+ struct page *page = pfn_to_online_page(pfn);
+ struct folio *folio;
+
+ if (!page || PageTail(page))
+ return NULL;
+
+ folio = page_folio(page);
+ if (!folio_test_lru(folio) || !folio_try_get(folio))
+ return NULL;
+ if (unlikely(page_folio(page) != folio || !folio_test_lru(folio))) {
+ folio_put(folio);
+ folio = NULL;
+ }
+ return folio;
+}
+
+void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
+{
+ struct folio *folio = damon_get_folio(pte_pfn(ptep_get(pte)));
+
+ if (!folio)
+ return;
+
+ if (ptep_clear_young_notify(vma, addr, pte))
+ folio_set_young(folio);
+
+ folio_set_idle(folio);
+ folio_put(folio);
+}
+
+void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd)));
+
+ if (!folio)
+ return;
+
+ if (pmdp_clear_young_notify(vma, addr, pmd))
+ folio_set_young(folio);
+
+ folio_set_idle(folio);
+ folio_put(folio);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+}
+
+#define DAMON_MAX_SUBSCORE (100)
+#define DAMON_MAX_AGE_IN_LOG (32)
+
+int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
+ struct damos *s)
+{
+ int freq_subscore;
+ unsigned int age_in_sec;
+ int age_in_log, age_subscore;
+ unsigned int freq_weight = s->quota.weight_nr_accesses;
+ unsigned int age_weight = s->quota.weight_age;
+ int hotness;
+
+ freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE /
+ damon_max_nr_accesses(&c->attrs);
+
+ age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000;
+ for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
+ age_in_log++, age_in_sec >>= 1)
+ ;
+
+ /* If frequency is 0, higher age means it's colder */
+ if (freq_subscore == 0)
+ age_in_log *= -1;
+
+ /*
+ * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG].
+ * Scale it to be in [0, 100] and set it as age subscore.
+ */
+ age_in_log += DAMON_MAX_AGE_IN_LOG;
+ age_subscore = age_in_log * DAMON_MAX_SUBSCORE /
+ DAMON_MAX_AGE_IN_LOG / 2;
+
+ hotness = (freq_weight * freq_subscore + age_weight * age_subscore);
+ if (freq_weight + age_weight)
+ hotness /= freq_weight + age_weight;
+ /*
+ * Transform it to fit in [0, DAMOS_MAX_SCORE]
+ */
+ hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE;
+
+ return hotness;
+}
+
+int damon_cold_score(struct damon_ctx *c, struct damon_region *r,
+ struct damos *s)
+{
+ int hotness = damon_hot_score(c, r, s);
+
+ /* Return coldness of the region */
+ return DAMOS_MAX_SCORE - hotness;
+}
diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h
new file mode 100644
index 000000000..18d837d11
--- /dev/null
+++ b/mm/damon/ops-common.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common Primitives for Data Access Monitoring
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/damon.h>
+
+struct folio *damon_get_folio(unsigned long pfn);
+
+void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr);
+void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr);
+
+int damon_cold_score(struct damon_ctx *c, struct damon_region *r,
+ struct damos *s);
+int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
+ struct damos *s);
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
new file mode 100644
index 000000000..909db25ef
--- /dev/null
+++ b/mm/damon/paddr.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON Primitives for The Physical Address Space
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-pa: " fmt
+
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/swap.h>
+
+#include "../internal.h"
+#include "ops-common.h"
+
+static bool __damon_pa_mkold(struct folio *folio, struct vm_area_struct *vma,
+ unsigned long addr, void *arg)
+{
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
+
+ while (page_vma_mapped_walk(&pvmw)) {
+ addr = pvmw.address;
+ if (pvmw.pte)
+ damon_ptep_mkold(pvmw.pte, vma, addr);
+ else
+ damon_pmdp_mkold(pvmw.pmd, vma, addr);
+ }
+ return true;
+}
+
+static void damon_pa_mkold(unsigned long paddr)
+{
+ struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
+ struct rmap_walk_control rwc = {
+ .rmap_one = __damon_pa_mkold,
+ .anon_lock = folio_lock_anon_vma_read,
+ };
+ bool need_lock;
+
+ if (!folio)
+ return;
+
+ if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
+ folio_set_idle(folio);
+ goto out;
+ }
+
+ need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
+ if (need_lock && !folio_trylock(folio))
+ goto out;
+
+ rmap_walk(folio, &rwc);
+
+ if (need_lock)
+ folio_unlock(folio);
+
+out:
+ folio_put(folio);
+}
+
+static void __damon_pa_prepare_access_check(struct damon_region *r)
+{
+ r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
+
+ damon_pa_mkold(r->sampling_addr);
+}
+
+static void damon_pa_prepare_access_checks(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region(r, t)
+ __damon_pa_prepare_access_check(r);
+ }
+}
+
+static bool __damon_pa_young(struct folio *folio, struct vm_area_struct *vma,
+ unsigned long addr, void *arg)
+{
+ bool *accessed = arg;
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
+
+ *accessed = false;
+ while (page_vma_mapped_walk(&pvmw)) {
+ addr = pvmw.address;
+ if (pvmw.pte) {
+ *accessed = pte_young(ptep_get(pvmw.pte)) ||
+ !folio_test_idle(folio) ||
+ mmu_notifier_test_young(vma->vm_mm, addr);
+ } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ *accessed = pmd_young(pmdp_get(pvmw.pmd)) ||
+ !folio_test_idle(folio) ||
+ mmu_notifier_test_young(vma->vm_mm, addr);
+#else
+ WARN_ON_ONCE(1);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ }
+ if (*accessed) {
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ }
+
+ /* If accessed, stop walking */
+ return *accessed == false;
+}
+
+static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz)
+{
+ struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
+ bool accessed = false;
+ struct rmap_walk_control rwc = {
+ .arg = &accessed,
+ .rmap_one = __damon_pa_young,
+ .anon_lock = folio_lock_anon_vma_read,
+ };
+ bool need_lock;
+
+ if (!folio)
+ return false;
+
+ if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
+ if (folio_test_idle(folio))
+ accessed = false;
+ else
+ accessed = true;
+ goto out;
+ }
+
+ need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
+ if (need_lock && !folio_trylock(folio))
+ goto out;
+
+ rmap_walk(folio, &rwc);
+
+ if (need_lock)
+ folio_unlock(folio);
+
+out:
+ *folio_sz = folio_size(folio);
+ folio_put(folio);
+ return accessed;
+}
+
+static void __damon_pa_check_access(struct damon_region *r)
+{
+ static unsigned long last_addr;
+ static unsigned long last_folio_sz = PAGE_SIZE;
+ static bool last_accessed;
+
+ /* If the region is in the last checked page, reuse the result */
+ if (ALIGN_DOWN(last_addr, last_folio_sz) ==
+ ALIGN_DOWN(r->sampling_addr, last_folio_sz)) {
+ if (last_accessed)
+ r->nr_accesses++;
+ return;
+ }
+
+ last_accessed = damon_pa_young(r->sampling_addr, &last_folio_sz);
+ if (last_accessed)
+ r->nr_accesses++;
+
+ last_addr = r->sampling_addr;
+}
+
+static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ unsigned int max_nr_accesses = 0;
+
+ damon_for_each_target(t, ctx) {
+ damon_for_each_region(r, t) {
+ __damon_pa_check_access(r);
+ max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+ }
+ }
+
+ return max_nr_accesses;
+}
+
+static bool __damos_pa_filter_out(struct damos_filter *filter,
+ struct folio *folio)
+{
+ bool matched = false;
+ struct mem_cgroup *memcg;
+
+ switch (filter->type) {
+ case DAMOS_FILTER_TYPE_ANON:
+ matched = folio_test_anon(folio);
+ break;
+ case DAMOS_FILTER_TYPE_MEMCG:
+ rcu_read_lock();
+ memcg = folio_memcg_check(folio);
+ if (!memcg)
+ matched = false;
+ else
+ matched = filter->memcg_id == mem_cgroup_id(memcg);
+ rcu_read_unlock();
+ break;
+ default:
+ break;
+ }
+
+ return matched == filter->matching;
+}
+
+/*
+ * damos_pa_filter_out - Return true if the page should be filtered out.
+ */
+static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
+{
+ struct damos_filter *filter;
+
+ damos_for_each_filter(filter, scheme) {
+ if (__damos_pa_filter_out(filter, folio))
+ return true;
+ }
+ return false;
+}
+
+static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s)
+{
+ unsigned long addr, applied;
+ LIST_HEAD(folio_list);
+
+ for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
+ struct folio *folio = damon_get_folio(PHYS_PFN(addr));
+
+ if (!folio)
+ continue;
+
+ if (damos_pa_filter_out(s, folio))
+ goto put_folio;
+
+ folio_clear_referenced(folio);
+ folio_test_clear_young(folio);
+ if (!folio_isolate_lru(folio))
+ goto put_folio;
+ if (folio_test_unevictable(folio))
+ folio_putback_lru(folio);
+ else
+ list_add(&folio->lru, &folio_list);
+put_folio:
+ folio_put(folio);
+ }
+ applied = reclaim_pages(&folio_list);
+ cond_resched();
+ return applied * PAGE_SIZE;
+}
+
+static inline unsigned long damon_pa_mark_accessed_or_deactivate(
+ struct damon_region *r, struct damos *s, bool mark_accessed)
+{
+ unsigned long addr, applied = 0;
+
+ for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
+ struct folio *folio = damon_get_folio(PHYS_PFN(addr));
+
+ if (!folio)
+ continue;
+
+ if (damos_pa_filter_out(s, folio))
+ goto put_folio;
+
+ if (mark_accessed)
+ folio_mark_accessed(folio);
+ else
+ folio_deactivate(folio);
+ applied += folio_nr_pages(folio);
+put_folio:
+ folio_put(folio);
+ }
+ return applied * PAGE_SIZE;
+}
+
+static unsigned long damon_pa_mark_accessed(struct damon_region *r,
+ struct damos *s)
+{
+ return damon_pa_mark_accessed_or_deactivate(r, s, true);
+}
+
+static unsigned long damon_pa_deactivate_pages(struct damon_region *r,
+ struct damos *s)
+{
+ return damon_pa_mark_accessed_or_deactivate(r, s, false);
+}
+
+static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
+{
+ switch (scheme->action) {
+ case DAMOS_PAGEOUT:
+ return damon_pa_pageout(r, scheme);
+ case DAMOS_LRU_PRIO:
+ return damon_pa_mark_accessed(r, scheme);
+ case DAMOS_LRU_DEPRIO:
+ return damon_pa_deactivate_pages(r, scheme);
+ case DAMOS_STAT:
+ break;
+ default:
+ /* DAMOS actions that not yet supported by 'paddr'. */
+ break;
+ }
+ return 0;
+}
+
+static int damon_pa_scheme_score(struct damon_ctx *context,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
+{
+ switch (scheme->action) {
+ case DAMOS_PAGEOUT:
+ return damon_cold_score(context, r, scheme);
+ case DAMOS_LRU_PRIO:
+ return damon_hot_score(context, r, scheme);
+ case DAMOS_LRU_DEPRIO:
+ return damon_cold_score(context, r, scheme);
+ default:
+ break;
+ }
+
+ return DAMOS_MAX_SCORE;
+}
+
+static int __init damon_pa_initcall(void)
+{
+ struct damon_operations ops = {
+ .id = DAMON_OPS_PADDR,
+ .init = NULL,
+ .update = NULL,
+ .prepare_access_checks = damon_pa_prepare_access_checks,
+ .check_accesses = damon_pa_check_accesses,
+ .reset_aggregated = NULL,
+ .target_valid = NULL,
+ .cleanup = NULL,
+ .apply_scheme = damon_pa_apply_scheme,
+ .get_scheme_score = damon_pa_scheme_score,
+ };
+
+ return damon_register_ops(&ops);
+};
+
+subsys_initcall(damon_pa_initcall);
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
new file mode 100644
index 000000000..648d2a855
--- /dev/null
+++ b/mm/damon/reclaim.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON-based page reclamation
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-reclaim: " fmt
+
+#include <linux/damon.h>
+#include <linux/kstrtox.h>
+#include <linux/module.h>
+
+#include "modules-common.h"
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_reclaim."
+
+/*
+ * Enable or disable DAMON_RECLAIM.
+ *
+ * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``.
+ * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could
+ * do no real monitoring and reclamation due to the watermarks-based activation
+ * condition. Refer to below descriptions for the watermarks parameter for
+ * this.
+ */
+static bool enabled __read_mostly;
+
+/*
+ * Make DAMON_RECLAIM reads the input parameters again, except ``enabled``.
+ *
+ * Input parameters that updated while DAMON_RECLAIM is running are not applied
+ * by default. Once this parameter is set as ``Y``, DAMON_RECLAIM reads values
+ * of parametrs except ``enabled`` again. Once the re-reading is done, this
+ * parameter is set as ``N``. If invalid parameters are found while the
+ * re-reading, DAMON_RECLAIM will be disabled.
+ */
+static bool commit_inputs __read_mostly;
+module_param(commit_inputs, bool, 0600);
+
+/*
+ * Time threshold for cold memory regions identification in microseconds.
+ *
+ * If a memory region is not accessed for this or longer time, DAMON_RECLAIM
+ * identifies the region as cold, and reclaims. 120 seconds by default.
+ */
+static unsigned long min_age __read_mostly = 120000000;
+module_param(min_age, ulong, 0600);
+
+static struct damos_quota damon_reclaim_quota = {
+ /* use up to 10 ms time, reclaim up to 128 MiB per 1 sec by default */
+ .ms = 10,
+ .sz = 128 * 1024 * 1024,
+ .reset_interval = 1000,
+ /* Within the quota, page out older regions first. */
+ .weight_sz = 0,
+ .weight_nr_accesses = 0,
+ .weight_age = 1
+};
+DEFINE_DAMON_MODULES_DAMOS_QUOTAS(damon_reclaim_quota);
+
+static struct damos_watermarks damon_reclaim_wmarks = {
+ .metric = DAMOS_WMARK_FREE_MEM_RATE,
+ .interval = 5000000, /* 5 seconds */
+ .high = 500, /* 50 percent */
+ .mid = 400, /* 40 percent */
+ .low = 200, /* 20 percent */
+};
+DEFINE_DAMON_MODULES_WMARKS_PARAMS(damon_reclaim_wmarks);
+
+static struct damon_attrs damon_reclaim_mon_attrs = {
+ .sample_interval = 5000, /* 5 ms */
+ .aggr_interval = 100000, /* 100 ms */
+ .ops_update_interval = 0,
+ .min_nr_regions = 10,
+ .max_nr_regions = 1000,
+};
+DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(damon_reclaim_mon_attrs);
+
+/*
+ * Start of the target memory region in physical address.
+ *
+ * The start physical address of memory region that DAMON_RECLAIM will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_start __read_mostly;
+module_param(monitor_region_start, ulong, 0600);
+
+/*
+ * End of the target memory region in physical address.
+ *
+ * The end physical address of memory region that DAMON_RECLAIM will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_end __read_mostly;
+module_param(monitor_region_end, ulong, 0600);
+
+/*
+ * Skip anonymous pages reclamation.
+ *
+ * If this parameter is set as ``Y``, DAMON_RECLAIM does not reclaim anonymous
+ * pages. By default, ``N``.
+ */
+static bool skip_anon __read_mostly;
+module_param(skip_anon, bool, 0600);
+
+/*
+ * PID of the DAMON thread
+ *
+ * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
+ * Else, -1.
+ */
+static int kdamond_pid __read_mostly = -1;
+module_param(kdamond_pid, int, 0400);
+
+static struct damos_stat damon_reclaim_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_reclaim_stat,
+ reclaim_tried_regions, reclaimed_regions, quota_exceeds);
+
+static struct damon_ctx *ctx;
+static struct damon_target *target;
+
+static struct damos *damon_reclaim_new_scheme(void)
+{
+ struct damos_access_pattern pattern = {
+ /* Find regions having PAGE_SIZE or larger size */
+ .min_sz_region = PAGE_SIZE,
+ .max_sz_region = ULONG_MAX,
+ /* and not accessed at all */
+ .min_nr_accesses = 0,
+ .max_nr_accesses = 0,
+ /* for min_age or more micro-seconds */
+ .min_age_region = min_age /
+ damon_reclaim_mon_attrs.aggr_interval,
+ .max_age_region = UINT_MAX,
+ };
+
+ return damon_new_scheme(
+ &pattern,
+ /* page out those, as soon as found */
+ DAMOS_PAGEOUT,
+ /* under the quota. */
+ &damon_reclaim_quota,
+ /* (De)activate this according to the watermarks. */
+ &damon_reclaim_wmarks);
+}
+
+static int damon_reclaim_apply_parameters(void)
+{
+ struct damos *scheme;
+ struct damos_filter *filter;
+ int err = 0;
+
+ err = damon_set_attrs(ctx, &damon_reclaim_mon_attrs);
+ if (err)
+ return err;
+
+ /* Will be freed by next 'damon_set_schemes()' below */
+ scheme = damon_reclaim_new_scheme();
+ if (!scheme)
+ return -ENOMEM;
+ if (skip_anon) {
+ filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true);
+ if (!filter) {
+ /* Will be freed by next 'damon_set_schemes()' below */
+ damon_destroy_scheme(scheme);
+ return -ENOMEM;
+ }
+ damos_add_filter(scheme, filter);
+ }
+ damon_set_schemes(ctx, &scheme, 1);
+
+ return damon_set_region_biggest_system_ram_default(target,
+ &monitor_region_start,
+ &monitor_region_end);
+}
+
+static int damon_reclaim_turn(bool on)
+{
+ int err;
+
+ if (!on) {
+ err = damon_stop(&ctx, 1);
+ if (!err)
+ kdamond_pid = -1;
+ return err;
+ }
+
+ err = damon_reclaim_apply_parameters();
+ if (err)
+ return err;
+
+ err = damon_start(&ctx, 1, true);
+ if (err)
+ return err;
+ kdamond_pid = ctx->kdamond->pid;
+ return 0;
+}
+
+static int damon_reclaim_enabled_store(const char *val,
+ const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ bool enable;
+ int err;
+
+ err = kstrtobool(val, &enable);
+ if (err)
+ return err;
+
+ if (is_enabled == enable)
+ return 0;
+
+ /* Called before init function. The function will handle this. */
+ if (!ctx)
+ goto set_param_out;
+
+ err = damon_reclaim_turn(enable);
+ if (err)
+ return err;
+
+set_param_out:
+ enabled = enable;
+ return err;
+}
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_reclaim_enabled_store,
+ .get = param_get_bool,
+};
+
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled,
+ "Enable or disable DAMON_RECLAIM (default: disabled)");
+
+static int damon_reclaim_handle_commit_inputs(void)
+{
+ int err;
+
+ if (!commit_inputs)
+ return 0;
+
+ err = damon_reclaim_apply_parameters();
+ commit_inputs = false;
+ return err;
+}
+
+static int damon_reclaim_after_aggregation(struct damon_ctx *c)
+{
+ struct damos *s;
+
+ /* update the stats parameter */
+ damon_for_each_scheme(s, c)
+ damon_reclaim_stat = s->stat;
+
+ return damon_reclaim_handle_commit_inputs();
+}
+
+static int damon_reclaim_after_wmarks_check(struct damon_ctx *c)
+{
+ return damon_reclaim_handle_commit_inputs();
+}
+
+static int __init damon_reclaim_init(void)
+{
+ int err = damon_modules_new_paddr_ctx_target(&ctx, &target);
+
+ if (err)
+ return err;
+
+ ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check;
+ ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
+
+ /* 'enabled' has set before this function, probably via command line */
+ if (enabled)
+ err = damon_reclaim_turn(true);
+
+ return err;
+}
+
+module_init(damon_reclaim_init);
diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c
new file mode 100644
index 000000000..70edf45c2
--- /dev/null
+++ b/mm/damon/sysfs-common.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Primitives for DAMON Sysfs Interface
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/slab.h>
+
+#include "sysfs-common.h"
+
+DEFINE_MUTEX(damon_sysfs_lock);
+
+/*
+ * unsigned long range directory
+ */
+
+struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
+ unsigned long min,
+ unsigned long max)
+{
+ struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range),
+ GFP_KERNEL);
+
+ if (!range)
+ return NULL;
+ range->kobj = (struct kobject){};
+ range->min = min;
+ range->max = max;
+
+ return range;
+}
+
+static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+
+ return sysfs_emit(buf, "%lu\n", range->min);
+}
+
+static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+ unsigned long min;
+ int err;
+
+ err = kstrtoul(buf, 0, &min);
+ if (err)
+ return err;
+
+ range->min = min;
+ return count;
+}
+
+static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+
+ return sysfs_emit(buf, "%lu\n", range->max);
+}
+
+static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_ul_range *range = container_of(kobj,
+ struct damon_sysfs_ul_range, kobj);
+ unsigned long max;
+ int err;
+
+ err = kstrtoul(buf, 0, &max);
+ if (err)
+ return err;
+
+ range->max = max;
+ return count;
+}
+
+void damon_sysfs_ul_range_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_ul_range_min_attr =
+ __ATTR_RW_MODE(min, 0600);
+
+static struct kobj_attribute damon_sysfs_ul_range_max_attr =
+ __ATTR_RW_MODE(max, 0600);
+
+static struct attribute *damon_sysfs_ul_range_attrs[] = {
+ &damon_sysfs_ul_range_min_attr.attr,
+ &damon_sysfs_ul_range_max_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_ul_range);
+
+const struct kobj_type damon_sysfs_ul_range_ktype = {
+ .release = damon_sysfs_ul_range_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_ul_range_groups,
+};
+
diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h
new file mode 100644
index 000000000..fd482a063
--- /dev/null
+++ b/mm/damon/sysfs-common.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common Primitives for DAMON Sysfs Interface
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/damon.h>
+#include <linux/kobject.h>
+
+extern struct mutex damon_sysfs_lock;
+
+struct damon_sysfs_ul_range {
+ struct kobject kobj;
+ unsigned long min;
+ unsigned long max;
+};
+
+struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
+ unsigned long min,
+ unsigned long max);
+void damon_sysfs_ul_range_release(struct kobject *kobj);
+
+extern const struct kobj_type damon_sysfs_ul_range_ktype;
+
+/*
+ * schemes directory
+ */
+
+struct damon_sysfs_schemes {
+ struct kobject kobj;
+ struct damon_sysfs_scheme **schemes_arr;
+ int nr;
+};
+
+struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void);
+void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes);
+
+extern const struct kobj_type damon_sysfs_schemes_ktype;
+
+int damon_sysfs_set_schemes(struct damon_ctx *ctx,
+ struct damon_sysfs_schemes *sysfs_schemes);
+
+void damon_sysfs_schemes_update_stats(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx);
+
+int damon_sysfs_schemes_update_regions_start(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx, bool total_bytes_only);
+
+int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx);
+
+int damon_sysfs_schemes_clear_regions(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx);
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
new file mode 100644
index 000000000..36dcd881a
--- /dev/null
+++ b/mm/damon/sysfs-schemes.c
@@ -0,0 +1,1817 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON sysfs Interface
+ *
+ * Copyright (c) 2022 SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/slab.h>
+
+#include "sysfs-common.h"
+
+/*
+ * scheme region directory
+ */
+
+struct damon_sysfs_scheme_region {
+ struct kobject kobj;
+ struct damon_addr_range ar;
+ unsigned int nr_accesses;
+ unsigned int age;
+ struct list_head list;
+};
+
+static struct damon_sysfs_scheme_region *damon_sysfs_scheme_region_alloc(
+ struct damon_region *region)
+{
+ struct damon_sysfs_scheme_region *sysfs_region = kmalloc(
+ sizeof(*sysfs_region), GFP_KERNEL);
+
+ if (!sysfs_region)
+ return NULL;
+ sysfs_region->kobj = (struct kobject){};
+ sysfs_region->ar = region->ar;
+ sysfs_region->nr_accesses = region->nr_accesses;
+ sysfs_region->age = region->age;
+ INIT_LIST_HEAD(&sysfs_region->list);
+ return sysfs_region;
+}
+
+static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->ar.start);
+}
+
+static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->ar.end);
+}
+
+static ssize_t nr_accesses_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%u\n", region->nr_accesses);
+}
+
+static ssize_t age_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ return sysfs_emit(buf, "%u\n", region->age);
+}
+
+static void damon_sysfs_scheme_region_release(struct kobject *kobj)
+{
+ struct damon_sysfs_scheme_region *region = container_of(kobj,
+ struct damon_sysfs_scheme_region, kobj);
+
+ list_del(&region->list);
+ kfree(region);
+}
+
+static struct kobj_attribute damon_sysfs_scheme_region_start_attr =
+ __ATTR_RO_MODE(start, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_end_attr =
+ __ATTR_RO_MODE(end, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_nr_accesses_attr =
+ __ATTR_RO_MODE(nr_accesses, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_age_attr =
+ __ATTR_RO_MODE(age, 0400);
+
+static struct attribute *damon_sysfs_scheme_region_attrs[] = {
+ &damon_sysfs_scheme_region_start_attr.attr,
+ &damon_sysfs_scheme_region_end_attr.attr,
+ &damon_sysfs_scheme_region_nr_accesses_attr.attr,
+ &damon_sysfs_scheme_region_age_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_region);
+
+static const struct kobj_type damon_sysfs_scheme_region_ktype = {
+ .release = damon_sysfs_scheme_region_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_region_groups,
+};
+
+/*
+ * scheme regions directory
+ */
+
+struct damon_sysfs_scheme_regions {
+ struct kobject kobj;
+ struct list_head regions_list;
+ int nr_regions;
+ unsigned long total_bytes;
+};
+
+static struct damon_sysfs_scheme_regions *
+damon_sysfs_scheme_regions_alloc(void)
+{
+ struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
+ GFP_KERNEL);
+
+ if (!regions)
+ return NULL;
+
+ regions->kobj = (struct kobject){};
+ INIT_LIST_HEAD(&regions->regions_list);
+ regions->nr_regions = 0;
+ regions->total_bytes = 0;
+ return regions;
+}
+
+static ssize_t total_bytes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_regions *regions = container_of(kobj,
+ struct damon_sysfs_scheme_regions, kobj);
+
+ return sysfs_emit(buf, "%lu\n", regions->total_bytes);
+}
+
+static void damon_sysfs_scheme_regions_rm_dirs(
+ struct damon_sysfs_scheme_regions *regions)
+{
+ struct damon_sysfs_scheme_region *r, *next;
+
+ list_for_each_entry_safe(r, next, &regions->regions_list, list) {
+ /* release function deletes it from the list */
+ kobject_put(&r->kobj);
+ regions->nr_regions--;
+ }
+}
+
+static void damon_sysfs_scheme_regions_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_scheme_regions, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_scheme_regions_total_bytes_attr =
+ __ATTR_RO_MODE(total_bytes, 0400);
+
+static struct attribute *damon_sysfs_scheme_regions_attrs[] = {
+ &damon_sysfs_scheme_regions_total_bytes_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_regions);
+
+static const struct kobj_type damon_sysfs_scheme_regions_ktype = {
+ .release = damon_sysfs_scheme_regions_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_regions_groups,
+};
+
+/*
+ * schemes/stats directory
+ */
+
+struct damon_sysfs_stats {
+ struct kobject kobj;
+ unsigned long nr_tried;
+ unsigned long sz_tried;
+ unsigned long nr_applied;
+ unsigned long sz_applied;
+ unsigned long qt_exceeds;
+};
+
+static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL);
+}
+
+static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->nr_tried);
+}
+
+static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->sz_tried);
+}
+
+static ssize_t nr_applied_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->nr_applied);
+}
+
+static ssize_t sz_applied_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->sz_applied);
+}
+
+static ssize_t qt_exceeds_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_stats *stats = container_of(kobj,
+ struct damon_sysfs_stats, kobj);
+
+ return sysfs_emit(buf, "%lu\n", stats->qt_exceeds);
+}
+
+static void damon_sysfs_stats_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_stats, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_stats_nr_tried_attr =
+ __ATTR_RO_MODE(nr_tried, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_tried_attr =
+ __ATTR_RO_MODE(sz_tried, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_nr_applied_attr =
+ __ATTR_RO_MODE(nr_applied, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_applied_attr =
+ __ATTR_RO_MODE(sz_applied, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr =
+ __ATTR_RO_MODE(qt_exceeds, 0400);
+
+static struct attribute *damon_sysfs_stats_attrs[] = {
+ &damon_sysfs_stats_nr_tried_attr.attr,
+ &damon_sysfs_stats_sz_tried_attr.attr,
+ &damon_sysfs_stats_nr_applied_attr.attr,
+ &damon_sysfs_stats_sz_applied_attr.attr,
+ &damon_sysfs_stats_qt_exceeds_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_stats);
+
+static const struct kobj_type damon_sysfs_stats_ktype = {
+ .release = damon_sysfs_stats_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_stats_groups,
+};
+
+/*
+ * filter directory
+ */
+
+struct damon_sysfs_scheme_filter {
+ struct kobject kobj;
+ enum damos_filter_type type;
+ bool matching;
+ char *memcg_path;
+ struct damon_addr_range addr_range;
+ int target_idx;
+};
+
+static struct damon_sysfs_scheme_filter *damon_sysfs_scheme_filter_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_scheme_filter), GFP_KERNEL);
+}
+
+/* Should match with enum damos_filter_type */
+static const char * const damon_sysfs_scheme_filter_type_strs[] = {
+ "anon",
+ "memcg",
+ "addr",
+ "target",
+};
+
+static ssize_t type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ damon_sysfs_scheme_filter_type_strs[filter->type]);
+}
+
+static ssize_t type_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ enum damos_filter_type type;
+ ssize_t ret = -EINVAL;
+
+ for (type = 0; type < NR_DAMOS_FILTER_TYPES; type++) {
+ if (sysfs_streq(buf, damon_sysfs_scheme_filter_type_strs[
+ type])) {
+ filter->type = type;
+ ret = count;
+ break;
+ }
+ }
+ return ret;
+}
+
+static ssize_t matching_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%c\n", filter->matching ? 'Y' : 'N');
+}
+
+static ssize_t matching_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ bool matching;
+ int err = kstrtobool(buf, &matching);
+
+ if (err)
+ return err;
+
+ filter->matching = matching;
+ return count;
+}
+
+static ssize_t memcg_path_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ filter->memcg_path ? filter->memcg_path : "");
+}
+
+static ssize_t memcg_path_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ char *path = kmalloc(sizeof(*path) * (count + 1), GFP_KERNEL);
+
+ if (!path)
+ return -ENOMEM;
+
+ strscpy(path, buf, count + 1);
+ filter->memcg_path = path;
+ return count;
+}
+
+static ssize_t addr_start_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%lu\n", filter->addr_range.start);
+}
+
+static ssize_t addr_start_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int err = kstrtoul(buf, 0, &filter->addr_range.start);
+
+ return err ? err : count;
+}
+
+static ssize_t addr_end_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%lu\n", filter->addr_range.end);
+}
+
+static ssize_t addr_end_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int err = kstrtoul(buf, 0, &filter->addr_range.end);
+
+ return err ? err : count;
+}
+
+static ssize_t damon_target_idx_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ return sysfs_emit(buf, "%d\n", filter->target_idx);
+}
+
+static ssize_t damon_target_idx_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+ int err = kstrtoint(buf, 0, &filter->target_idx);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_scheme_filter_release(struct kobject *kobj)
+{
+ struct damon_sysfs_scheme_filter *filter = container_of(kobj,
+ struct damon_sysfs_scheme_filter, kobj);
+
+ kfree(filter->memcg_path);
+ kfree(filter);
+}
+
+static struct kobj_attribute damon_sysfs_scheme_filter_type_attr =
+ __ATTR_RW_MODE(type, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_matching_attr =
+ __ATTR_RW_MODE(matching, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_memcg_path_attr =
+ __ATTR_RW_MODE(memcg_path, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_addr_start_attr =
+ __ATTR_RW_MODE(addr_start, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_addr_end_attr =
+ __ATTR_RW_MODE(addr_end, 0600);
+
+static struct kobj_attribute damon_sysfs_scheme_filter_damon_target_idx_attr =
+ __ATTR_RW_MODE(damon_target_idx, 0600);
+
+static struct attribute *damon_sysfs_scheme_filter_attrs[] = {
+ &damon_sysfs_scheme_filter_type_attr.attr,
+ &damon_sysfs_scheme_filter_matching_attr.attr,
+ &damon_sysfs_scheme_filter_memcg_path_attr.attr,
+ &damon_sysfs_scheme_filter_addr_start_attr.attr,
+ &damon_sysfs_scheme_filter_addr_end_attr.attr,
+ &damon_sysfs_scheme_filter_damon_target_idx_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_filter);
+
+static const struct kobj_type damon_sysfs_scheme_filter_ktype = {
+ .release = damon_sysfs_scheme_filter_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_filter_groups,
+};
+
+/*
+ * filters directory
+ */
+
+struct damon_sysfs_scheme_filters {
+ struct kobject kobj;
+ struct damon_sysfs_scheme_filter **filters_arr;
+ int nr;
+};
+
+static struct damon_sysfs_scheme_filters *
+damon_sysfs_scheme_filters_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_scheme_filters), GFP_KERNEL);
+}
+
+static void damon_sysfs_scheme_filters_rm_dirs(
+ struct damon_sysfs_scheme_filters *filters)
+{
+ struct damon_sysfs_scheme_filter **filters_arr = filters->filters_arr;
+ int i;
+
+ for (i = 0; i < filters->nr; i++)
+ kobject_put(&filters_arr[i]->kobj);
+ filters->nr = 0;
+ kfree(filters_arr);
+ filters->filters_arr = NULL;
+}
+
+static int damon_sysfs_scheme_filters_add_dirs(
+ struct damon_sysfs_scheme_filters *filters, int nr_filters)
+{
+ struct damon_sysfs_scheme_filter **filters_arr, *filter;
+ int err, i;
+
+ damon_sysfs_scheme_filters_rm_dirs(filters);
+ if (!nr_filters)
+ return 0;
+
+ filters_arr = kmalloc_array(nr_filters, sizeof(*filters_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!filters_arr)
+ return -ENOMEM;
+ filters->filters_arr = filters_arr;
+
+ for (i = 0; i < nr_filters; i++) {
+ filter = damon_sysfs_scheme_filter_alloc();
+ if (!filter) {
+ damon_sysfs_scheme_filters_rm_dirs(filters);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&filter->kobj,
+ &damon_sysfs_scheme_filter_ktype,
+ &filters->kobj, "%d", i);
+ if (err) {
+ kobject_put(&filter->kobj);
+ damon_sysfs_scheme_filters_rm_dirs(filters);
+ return err;
+ }
+
+ filters_arr[i] = filter;
+ filters->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_filters_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme_filters *filters = container_of(kobj,
+ struct damon_sysfs_scheme_filters, kobj);
+
+ return sysfs_emit(buf, "%d\n", filters->nr);
+}
+
+static ssize_t nr_filters_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme_filters *filters;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ filters = container_of(kobj, struct damon_sysfs_scheme_filters, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_scheme_filters_add_dirs(filters, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_scheme_filters_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_scheme_filters, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_scheme_filters_nr_attr =
+ __ATTR_RW_MODE(nr_filters, 0600);
+
+static struct attribute *damon_sysfs_scheme_filters_attrs[] = {
+ &damon_sysfs_scheme_filters_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_filters);
+
+static const struct kobj_type damon_sysfs_scheme_filters_ktype = {
+ .release = damon_sysfs_scheme_filters_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_filters_groups,
+};
+
+/*
+ * watermarks directory
+ */
+
+struct damon_sysfs_watermarks {
+ struct kobject kobj;
+ enum damos_wmark_metric metric;
+ unsigned long interval_us;
+ unsigned long high;
+ unsigned long mid;
+ unsigned long low;
+};
+
+static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc(
+ enum damos_wmark_metric metric, unsigned long interval_us,
+ unsigned long high, unsigned long mid, unsigned long low)
+{
+ struct damon_sysfs_watermarks *watermarks = kmalloc(
+ sizeof(*watermarks), GFP_KERNEL);
+
+ if (!watermarks)
+ return NULL;
+ watermarks->kobj = (struct kobject){};
+ watermarks->metric = metric;
+ watermarks->interval_us = interval_us;
+ watermarks->high = high;
+ watermarks->mid = mid;
+ watermarks->low = low;
+ return watermarks;
+}
+
+/* Should match with enum damos_wmark_metric */
+static const char * const damon_sysfs_wmark_metric_strs[] = {
+ "none",
+ "free_mem_rate",
+};
+
+static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ damon_sysfs_wmark_metric_strs[watermarks->metric]);
+}
+
+static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ enum damos_wmark_metric metric;
+
+ for (metric = 0; metric < NR_DAMOS_WMARK_METRICS; metric++) {
+ if (sysfs_streq(buf, damon_sysfs_wmark_metric_strs[metric])) {
+ watermarks->metric = metric;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static ssize_t interval_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->interval_us);
+}
+
+static ssize_t interval_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->interval_us);
+
+ return err ? err : count;
+}
+
+static ssize_t high_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->high);
+}
+
+static ssize_t high_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->high);
+
+ return err ? err : count;
+}
+
+static ssize_t mid_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->mid);
+}
+
+static ssize_t mid_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->mid);
+
+ return err ? err : count;
+}
+
+static ssize_t low_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+
+ return sysfs_emit(buf, "%lu\n", watermarks->low);
+}
+
+static ssize_t low_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+ struct damon_sysfs_watermarks, kobj);
+ int err = kstrtoul(buf, 0, &watermarks->low);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_watermarks_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_watermarks_metric_attr =
+ __ATTR_RW_MODE(metric, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr =
+ __ATTR_RW_MODE(interval_us, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_high_attr =
+ __ATTR_RW_MODE(high, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_mid_attr =
+ __ATTR_RW_MODE(mid, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_low_attr =
+ __ATTR_RW_MODE(low, 0600);
+
+static struct attribute *damon_sysfs_watermarks_attrs[] = {
+ &damon_sysfs_watermarks_metric_attr.attr,
+ &damon_sysfs_watermarks_interval_us_attr.attr,
+ &damon_sysfs_watermarks_high_attr.attr,
+ &damon_sysfs_watermarks_mid_attr.attr,
+ &damon_sysfs_watermarks_low_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_watermarks);
+
+static const struct kobj_type damon_sysfs_watermarks_ktype = {
+ .release = damon_sysfs_watermarks_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_watermarks_groups,
+};
+
+/*
+ * scheme/weights directory
+ */
+
+struct damon_sysfs_weights {
+ struct kobject kobj;
+ unsigned int sz;
+ unsigned int nr_accesses;
+ unsigned int age;
+};
+
+static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz,
+ unsigned int nr_accesses, unsigned int age)
+{
+ struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights),
+ GFP_KERNEL);
+
+ if (!weights)
+ return NULL;
+ weights->kobj = (struct kobject){};
+ weights->sz = sz;
+ weights->nr_accesses = nr_accesses;
+ weights->age = age;
+ return weights;
+}
+
+static ssize_t sz_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->sz);
+}
+
+static ssize_t sz_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->sz);
+
+ return err ? err : count;
+}
+
+static ssize_t nr_accesses_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->nr_accesses);
+}
+
+static ssize_t nr_accesses_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->nr_accesses);
+
+ return err ? err : count;
+}
+
+static ssize_t age_permil_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+
+ return sysfs_emit(buf, "%u\n", weights->age);
+}
+
+static ssize_t age_permil_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_weights *weights = container_of(kobj,
+ struct damon_sysfs_weights, kobj);
+ int err = kstrtouint(buf, 0, &weights->age);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_weights_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_weights, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_weights_sz_attr =
+ __ATTR_RW_MODE(sz_permil, 0600);
+
+static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr =
+ __ATTR_RW_MODE(nr_accesses_permil, 0600);
+
+static struct kobj_attribute damon_sysfs_weights_age_attr =
+ __ATTR_RW_MODE(age_permil, 0600);
+
+static struct attribute *damon_sysfs_weights_attrs[] = {
+ &damon_sysfs_weights_sz_attr.attr,
+ &damon_sysfs_weights_nr_accesses_attr.attr,
+ &damon_sysfs_weights_age_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_weights);
+
+static const struct kobj_type damon_sysfs_weights_ktype = {
+ .release = damon_sysfs_weights_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_weights_groups,
+};
+
+/*
+ * quotas directory
+ */
+
+struct damon_sysfs_quotas {
+ struct kobject kobj;
+ struct damon_sysfs_weights *weights;
+ unsigned long ms;
+ unsigned long sz;
+ unsigned long reset_interval_ms;
+};
+
+static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL);
+}
+
+static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas)
+{
+ struct damon_sysfs_weights *weights;
+ int err;
+
+ weights = damon_sysfs_weights_alloc(0, 0, 0);
+ if (!weights)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype,
+ &quotas->kobj, "weights");
+ if (err)
+ kobject_put(&weights->kobj);
+ else
+ quotas->weights = weights;
+ return err;
+}
+
+static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas)
+{
+ kobject_put(&quotas->weights->kobj);
+}
+
+static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->ms);
+}
+
+static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->ms);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->sz);
+}
+
+static ssize_t bytes_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->sz);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t reset_interval_ms_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+
+ return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms);
+}
+
+static ssize_t reset_interval_ms_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_quotas *quotas = container_of(kobj,
+ struct damon_sysfs_quotas, kobj);
+ int err = kstrtoul(buf, 0, &quotas->reset_interval_ms);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static void damon_sysfs_quotas_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_quotas, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_quotas_ms_attr =
+ __ATTR_RW_MODE(ms, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_sz_attr =
+ __ATTR_RW_MODE(bytes, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr =
+ __ATTR_RW_MODE(reset_interval_ms, 0600);
+
+static struct attribute *damon_sysfs_quotas_attrs[] = {
+ &damon_sysfs_quotas_ms_attr.attr,
+ &damon_sysfs_quotas_sz_attr.attr,
+ &damon_sysfs_quotas_reset_interval_ms_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_quotas);
+
+static const struct kobj_type damon_sysfs_quotas_ktype = {
+ .release = damon_sysfs_quotas_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_quotas_groups,
+};
+
+/*
+ * access_pattern directory
+ */
+
+struct damon_sysfs_access_pattern {
+ struct kobject kobj;
+ struct damon_sysfs_ul_range *sz;
+ struct damon_sysfs_ul_range *nr_accesses;
+ struct damon_sysfs_ul_range *age;
+};
+
+static
+struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void)
+{
+ struct damon_sysfs_access_pattern *access_pattern =
+ kmalloc(sizeof(*access_pattern), GFP_KERNEL);
+
+ if (!access_pattern)
+ return NULL;
+ access_pattern->kobj = (struct kobject){};
+ return access_pattern;
+}
+
+static int damon_sysfs_access_pattern_add_range_dir(
+ struct damon_sysfs_access_pattern *access_pattern,
+ struct damon_sysfs_ul_range **range_dir_ptr,
+ char *name)
+{
+ struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0);
+ int err;
+
+ if (!range)
+ return -ENOMEM;
+ err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype,
+ &access_pattern->kobj, name);
+ if (err)
+ kobject_put(&range->kobj);
+ else
+ *range_dir_ptr = range;
+ return err;
+}
+
+static int damon_sysfs_access_pattern_add_dirs(
+ struct damon_sysfs_access_pattern *access_pattern)
+{
+ int err;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->sz, "sz");
+ if (err)
+ goto put_sz_out;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->nr_accesses, "nr_accesses");
+ if (err)
+ goto put_nr_accesses_sz_out;
+
+ err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+ &access_pattern->age, "age");
+ if (err)
+ goto put_age_nr_accesses_sz_out;
+ return 0;
+
+put_age_nr_accesses_sz_out:
+ kobject_put(&access_pattern->age->kobj);
+ access_pattern->age = NULL;
+put_nr_accesses_sz_out:
+ kobject_put(&access_pattern->nr_accesses->kobj);
+ access_pattern->nr_accesses = NULL;
+put_sz_out:
+ kobject_put(&access_pattern->sz->kobj);
+ access_pattern->sz = NULL;
+ return err;
+}
+
+static void damon_sysfs_access_pattern_rm_dirs(
+ struct damon_sysfs_access_pattern *access_pattern)
+{
+ kobject_put(&access_pattern->sz->kobj);
+ kobject_put(&access_pattern->nr_accesses->kobj);
+ kobject_put(&access_pattern->age->kobj);
+}
+
+static void damon_sysfs_access_pattern_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj));
+}
+
+static struct attribute *damon_sysfs_access_pattern_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_access_pattern);
+
+static const struct kobj_type damon_sysfs_access_pattern_ktype = {
+ .release = damon_sysfs_access_pattern_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_access_pattern_groups,
+};
+
+/*
+ * scheme directory
+ */
+
+struct damon_sysfs_scheme {
+ struct kobject kobj;
+ enum damos_action action;
+ struct damon_sysfs_access_pattern *access_pattern;
+ struct damon_sysfs_quotas *quotas;
+ struct damon_sysfs_watermarks *watermarks;
+ struct damon_sysfs_scheme_filters *filters;
+ struct damon_sysfs_stats *stats;
+ struct damon_sysfs_scheme_regions *tried_regions;
+};
+
+/* This should match with enum damos_action */
+static const char * const damon_sysfs_damos_action_strs[] = {
+ "willneed",
+ "cold",
+ "pageout",
+ "hugepage",
+ "nohugepage",
+ "lru_prio",
+ "lru_deprio",
+ "stat",
+};
+
+static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc(
+ enum damos_action action)
+{
+ struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme),
+ GFP_KERNEL);
+
+ if (!scheme)
+ return NULL;
+ scheme->kobj = (struct kobject){};
+ scheme->action = action;
+ return scheme;
+}
+
+static int damon_sysfs_scheme_set_access_pattern(
+ struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_access_pattern *access_pattern;
+ int err;
+
+ access_pattern = damon_sysfs_access_pattern_alloc();
+ if (!access_pattern)
+ return -ENOMEM;
+ err = kobject_init_and_add(&access_pattern->kobj,
+ &damon_sysfs_access_pattern_ktype, &scheme->kobj,
+ "access_pattern");
+ if (err)
+ goto out;
+ err = damon_sysfs_access_pattern_add_dirs(access_pattern);
+ if (err)
+ goto out;
+ scheme->access_pattern = access_pattern;
+ return 0;
+
+out:
+ kobject_put(&access_pattern->kobj);
+ return err;
+}
+
+static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc();
+ int err;
+
+ if (!quotas)
+ return -ENOMEM;
+ err = kobject_init_and_add(&quotas->kobj, &damon_sysfs_quotas_ktype,
+ &scheme->kobj, "quotas");
+ if (err)
+ goto out;
+ err = damon_sysfs_quotas_add_dirs(quotas);
+ if (err)
+ goto out;
+ scheme->quotas = quotas;
+ return 0;
+
+out:
+ kobject_put(&quotas->kobj);
+ return err;
+}
+
+static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_watermarks *watermarks =
+ damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0);
+ int err;
+
+ if (!watermarks)
+ return -ENOMEM;
+ err = kobject_init_and_add(&watermarks->kobj,
+ &damon_sysfs_watermarks_ktype, &scheme->kobj,
+ "watermarks");
+ if (err)
+ kobject_put(&watermarks->kobj);
+ else
+ scheme->watermarks = watermarks;
+ return err;
+}
+
+static int damon_sysfs_scheme_set_filters(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_scheme_filters *filters =
+ damon_sysfs_scheme_filters_alloc();
+ int err;
+
+ if (!filters)
+ return -ENOMEM;
+ err = kobject_init_and_add(&filters->kobj,
+ &damon_sysfs_scheme_filters_ktype, &scheme->kobj,
+ "filters");
+ if (err)
+ kobject_put(&filters->kobj);
+ else
+ scheme->filters = filters;
+ return err;
+}
+
+static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc();
+ int err;
+
+ if (!stats)
+ return -ENOMEM;
+ err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype,
+ &scheme->kobj, "stats");
+ if (err)
+ kobject_put(&stats->kobj);
+ else
+ scheme->stats = stats;
+ return err;
+}
+
+static int damon_sysfs_scheme_set_tried_regions(
+ struct damon_sysfs_scheme *scheme)
+{
+ struct damon_sysfs_scheme_regions *tried_regions =
+ damon_sysfs_scheme_regions_alloc();
+ int err;
+
+ if (!tried_regions)
+ return -ENOMEM;
+ err = kobject_init_and_add(&tried_regions->kobj,
+ &damon_sysfs_scheme_regions_ktype, &scheme->kobj,
+ "tried_regions");
+ if (err)
+ kobject_put(&tried_regions->kobj);
+ else
+ scheme->tried_regions = tried_regions;
+ return err;
+}
+
+static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme)
+{
+ int err;
+
+ err = damon_sysfs_scheme_set_access_pattern(scheme);
+ if (err)
+ return err;
+ err = damon_sysfs_scheme_set_quotas(scheme);
+ if (err)
+ goto put_access_pattern_out;
+ err = damon_sysfs_scheme_set_watermarks(scheme);
+ if (err)
+ goto put_quotas_access_pattern_out;
+ err = damon_sysfs_scheme_set_filters(scheme);
+ if (err)
+ goto put_watermarks_quotas_access_pattern_out;
+ err = damon_sysfs_scheme_set_stats(scheme);
+ if (err)
+ goto put_filters_watermarks_quotas_access_pattern_out;
+ err = damon_sysfs_scheme_set_tried_regions(scheme);
+ if (err)
+ goto put_tried_regions_out;
+ return 0;
+
+put_tried_regions_out:
+ kobject_put(&scheme->tried_regions->kobj);
+ scheme->tried_regions = NULL;
+put_filters_watermarks_quotas_access_pattern_out:
+ kobject_put(&scheme->filters->kobj);
+ scheme->filters = NULL;
+put_watermarks_quotas_access_pattern_out:
+ kobject_put(&scheme->watermarks->kobj);
+ scheme->watermarks = NULL;
+put_quotas_access_pattern_out:
+ kobject_put(&scheme->quotas->kobj);
+ scheme->quotas = NULL;
+put_access_pattern_out:
+ kobject_put(&scheme->access_pattern->kobj);
+ scheme->access_pattern = NULL;
+ return err;
+}
+
+static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme)
+{
+ damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern);
+ kobject_put(&scheme->access_pattern->kobj);
+ damon_sysfs_quotas_rm_dirs(scheme->quotas);
+ kobject_put(&scheme->quotas->kobj);
+ kobject_put(&scheme->watermarks->kobj);
+ damon_sysfs_scheme_filters_rm_dirs(scheme->filters);
+ kobject_put(&scheme->filters->kobj);
+ kobject_put(&scheme->stats->kobj);
+ damon_sysfs_scheme_regions_rm_dirs(scheme->tried_regions);
+ kobject_put(&scheme->tried_regions->kobj);
+}
+
+static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ damon_sysfs_damos_action_strs[scheme->action]);
+}
+
+static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+ enum damos_action action;
+
+ for (action = 0; action < NR_DAMOS_ACTIONS; action++) {
+ if (sysfs_streq(buf, damon_sysfs_damos_action_strs[action])) {
+ scheme->action = action;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static void damon_sysfs_scheme_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_scheme, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_scheme_action_attr =
+ __ATTR_RW_MODE(action, 0600);
+
+static struct attribute *damon_sysfs_scheme_attrs[] = {
+ &damon_sysfs_scheme_action_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme);
+
+static const struct kobj_type damon_sysfs_scheme_ktype = {
+ .release = damon_sysfs_scheme_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_scheme_groups,
+};
+
+/*
+ * schemes directory
+ */
+
+struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL);
+}
+
+void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes)
+{
+ struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr;
+ int i;
+
+ for (i = 0; i < schemes->nr; i++) {
+ damon_sysfs_scheme_rm_dirs(schemes_arr[i]);
+ kobject_put(&schemes_arr[i]->kobj);
+ }
+ schemes->nr = 0;
+ kfree(schemes_arr);
+ schemes->schemes_arr = NULL;
+}
+
+static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes,
+ int nr_schemes)
+{
+ struct damon_sysfs_scheme **schemes_arr, *scheme;
+ int err, i;
+
+ damon_sysfs_schemes_rm_dirs(schemes);
+ if (!nr_schemes)
+ return 0;
+
+ schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!schemes_arr)
+ return -ENOMEM;
+ schemes->schemes_arr = schemes_arr;
+
+ for (i = 0; i < nr_schemes; i++) {
+ scheme = damon_sysfs_scheme_alloc(DAMOS_STAT);
+ if (!scheme) {
+ damon_sysfs_schemes_rm_dirs(schemes);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&scheme->kobj,
+ &damon_sysfs_scheme_ktype, &schemes->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+ err = damon_sysfs_scheme_add_dirs(scheme);
+ if (err)
+ goto out;
+
+ schemes_arr[i] = scheme;
+ schemes->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_schemes_rm_dirs(schemes);
+ kobject_put(&scheme->kobj);
+ return err;
+}
+
+static ssize_t nr_schemes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_schemes *schemes = container_of(kobj,
+ struct damon_sysfs_schemes, kobj);
+
+ return sysfs_emit(buf, "%d\n", schemes->nr);
+}
+
+static ssize_t nr_schemes_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_schemes *schemes;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ schemes = container_of(kobj, struct damon_sysfs_schemes, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_schemes_add_dirs(schemes, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+ return count;
+}
+
+static void damon_sysfs_schemes_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_schemes, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_schemes_nr_attr =
+ __ATTR_RW_MODE(nr_schemes, 0600);
+
+static struct attribute *damon_sysfs_schemes_attrs[] = {
+ &damon_sysfs_schemes_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_schemes);
+
+const struct kobj_type damon_sysfs_schemes_ktype = {
+ .release = damon_sysfs_schemes_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_schemes_groups,
+};
+
+static bool damon_sysfs_memcg_path_eq(struct mem_cgroup *memcg,
+ char *memcg_path_buf, char *path)
+{
+#ifdef CONFIG_MEMCG
+ cgroup_path(memcg->css.cgroup, memcg_path_buf, PATH_MAX);
+ if (sysfs_streq(memcg_path_buf, path))
+ return true;
+#endif /* CONFIG_MEMCG */
+ return false;
+}
+
+static int damon_sysfs_memcg_path_to_id(char *memcg_path, unsigned short *id)
+{
+ struct mem_cgroup *memcg;
+ char *path;
+ bool found = false;
+
+ if (!memcg_path)
+ return -EINVAL;
+
+ path = kmalloc(sizeof(*path) * PATH_MAX, GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+
+ for (memcg = mem_cgroup_iter(NULL, NULL, NULL); memcg;
+ memcg = mem_cgroup_iter(NULL, memcg, NULL)) {
+ /* skip removed memcg */
+ if (!mem_cgroup_id(memcg))
+ continue;
+ if (damon_sysfs_memcg_path_eq(memcg, path, memcg_path)) {
+ *id = mem_cgroup_id(memcg);
+ found = true;
+ break;
+ }
+ }
+
+ kfree(path);
+ return found ? 0 : -EINVAL;
+}
+
+static int damon_sysfs_set_scheme_filters(struct damos *scheme,
+ struct damon_sysfs_scheme_filters *sysfs_filters)
+{
+ int i;
+ struct damos_filter *filter, *next;
+
+ damos_for_each_filter_safe(filter, next, scheme)
+ damos_destroy_filter(filter);
+
+ for (i = 0; i < sysfs_filters->nr; i++) {
+ struct damon_sysfs_scheme_filter *sysfs_filter =
+ sysfs_filters->filters_arr[i];
+ struct damos_filter *filter =
+ damos_new_filter(sysfs_filter->type,
+ sysfs_filter->matching);
+ int err;
+
+ if (!filter)
+ return -ENOMEM;
+ if (filter->type == DAMOS_FILTER_TYPE_MEMCG) {
+ err = damon_sysfs_memcg_path_to_id(
+ sysfs_filter->memcg_path,
+ &filter->memcg_id);
+ if (err) {
+ damos_destroy_filter(filter);
+ return err;
+ }
+ } else if (filter->type == DAMOS_FILTER_TYPE_ADDR) {
+ if (sysfs_filter->addr_range.end <
+ sysfs_filter->addr_range.start) {
+ damos_destroy_filter(filter);
+ return -EINVAL;
+ }
+ filter->addr_range = sysfs_filter->addr_range;
+ } else if (filter->type == DAMOS_FILTER_TYPE_TARGET) {
+ filter->target_idx = sysfs_filter->target_idx;
+ }
+
+ damos_add_filter(scheme, filter);
+ }
+ return 0;
+}
+
+static struct damos *damon_sysfs_mk_scheme(
+ struct damon_sysfs_scheme *sysfs_scheme)
+{
+ struct damon_sysfs_access_pattern *access_pattern =
+ sysfs_scheme->access_pattern;
+ struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+ struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+ struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
+ struct damon_sysfs_scheme_filters *sysfs_filters =
+ sysfs_scheme->filters;
+ struct damos *scheme;
+ int err;
+
+ struct damos_access_pattern pattern = {
+ .min_sz_region = access_pattern->sz->min,
+ .max_sz_region = access_pattern->sz->max,
+ .min_nr_accesses = access_pattern->nr_accesses->min,
+ .max_nr_accesses = access_pattern->nr_accesses->max,
+ .min_age_region = access_pattern->age->min,
+ .max_age_region = access_pattern->age->max,
+ };
+ struct damos_quota quota = {
+ .ms = sysfs_quotas->ms,
+ .sz = sysfs_quotas->sz,
+ .reset_interval = sysfs_quotas->reset_interval_ms,
+ .weight_sz = sysfs_weights->sz,
+ .weight_nr_accesses = sysfs_weights->nr_accesses,
+ .weight_age = sysfs_weights->age,
+ };
+ struct damos_watermarks wmarks = {
+ .metric = sysfs_wmarks->metric,
+ .interval = sysfs_wmarks->interval_us,
+ .high = sysfs_wmarks->high,
+ .mid = sysfs_wmarks->mid,
+ .low = sysfs_wmarks->low,
+ };
+
+ scheme = damon_new_scheme(&pattern, sysfs_scheme->action, &quota,
+ &wmarks);
+ if (!scheme)
+ return NULL;
+
+ err = damon_sysfs_set_scheme_filters(scheme, sysfs_filters);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ return scheme;
+}
+
+static void damon_sysfs_update_scheme(struct damos *scheme,
+ struct damon_sysfs_scheme *sysfs_scheme)
+{
+ struct damon_sysfs_access_pattern *access_pattern =
+ sysfs_scheme->access_pattern;
+ struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+ struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+ struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
+ int err;
+
+ scheme->pattern.min_sz_region = access_pattern->sz->min;
+ scheme->pattern.max_sz_region = access_pattern->sz->max;
+ scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min;
+ scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max;
+ scheme->pattern.min_age_region = access_pattern->age->min;
+ scheme->pattern.max_age_region = access_pattern->age->max;
+
+ scheme->action = sysfs_scheme->action;
+
+ scheme->quota.ms = sysfs_quotas->ms;
+ scheme->quota.sz = sysfs_quotas->sz;
+ scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms;
+ scheme->quota.weight_sz = sysfs_weights->sz;
+ scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses;
+ scheme->quota.weight_age = sysfs_weights->age;
+
+ scheme->wmarks.metric = sysfs_wmarks->metric;
+ scheme->wmarks.interval = sysfs_wmarks->interval_us;
+ scheme->wmarks.high = sysfs_wmarks->high;
+ scheme->wmarks.mid = sysfs_wmarks->mid;
+ scheme->wmarks.low = sysfs_wmarks->low;
+
+ err = damon_sysfs_set_scheme_filters(scheme, sysfs_scheme->filters);
+ if (err)
+ damon_destroy_scheme(scheme);
+}
+
+int damon_sysfs_set_schemes(struct damon_ctx *ctx,
+ struct damon_sysfs_schemes *sysfs_schemes)
+{
+ struct damos *scheme, *next;
+ int i = 0;
+
+ damon_for_each_scheme_safe(scheme, next, ctx) {
+ if (i < sysfs_schemes->nr)
+ damon_sysfs_update_scheme(scheme,
+ sysfs_schemes->schemes_arr[i]);
+ else
+ damon_destroy_scheme(scheme);
+ i++;
+ }
+
+ for (; i < sysfs_schemes->nr; i++) {
+ struct damos *scheme, *next;
+
+ scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
+ if (!scheme) {
+ damon_for_each_scheme_safe(scheme, next, ctx)
+ damon_destroy_scheme(scheme);
+ return -ENOMEM;
+ }
+ damon_add_scheme(ctx, scheme);
+ }
+ return 0;
+}
+
+void damon_sysfs_schemes_update_stats(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx)
+{
+ struct damos *scheme;
+ int schemes_idx = 0;
+
+ damon_for_each_scheme(scheme, ctx) {
+ struct damon_sysfs_stats *sysfs_stats;
+
+ /* user could have removed the scheme sysfs dir */
+ if (schemes_idx >= sysfs_schemes->nr)
+ break;
+
+ sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats;
+ sysfs_stats->nr_tried = scheme->stat.nr_tried;
+ sysfs_stats->sz_tried = scheme->stat.sz_tried;
+ sysfs_stats->nr_applied = scheme->stat.nr_applied;
+ sysfs_stats->sz_applied = scheme->stat.sz_applied;
+ sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds;
+ }
+}
+
+/*
+ * damon_sysfs_schemes that need to update its schemes regions dir. Protected
+ * by damon_sysfs_lock
+ */
+static struct damon_sysfs_schemes *damon_sysfs_schemes_for_damos_callback;
+static int damon_sysfs_schemes_region_idx;
+static bool damos_regions_upd_total_bytes_only;
+
+/*
+ * DAMON callback that called before damos apply. While this callback is
+ * registered, damon_sysfs_lock should be held to ensure the regions
+ * directories exist.
+ */
+static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *s)
+{
+ struct damos *scheme;
+ struct damon_sysfs_scheme_regions *sysfs_regions;
+ struct damon_sysfs_scheme_region *region;
+ struct damon_sysfs_schemes *sysfs_schemes =
+ damon_sysfs_schemes_for_damos_callback;
+ int schemes_idx = 0;
+
+ damon_for_each_scheme(scheme, ctx) {
+ if (scheme == s)
+ break;
+ schemes_idx++;
+ }
+
+ /* user could have removed the scheme sysfs dir */
+ if (schemes_idx >= sysfs_schemes->nr)
+ return 0;
+
+ sysfs_regions = sysfs_schemes->schemes_arr[schemes_idx]->tried_regions;
+ sysfs_regions->total_bytes += r->ar.end - r->ar.start;
+ if (damos_regions_upd_total_bytes_only)
+ return 0;
+
+ region = damon_sysfs_scheme_region_alloc(r);
+ if (!region)
+ return 0;
+ list_add_tail(&region->list, &sysfs_regions->regions_list);
+ sysfs_regions->nr_regions++;
+ if (kobject_init_and_add(&region->kobj,
+ &damon_sysfs_scheme_region_ktype,
+ &sysfs_regions->kobj, "%d",
+ damon_sysfs_schemes_region_idx++)) {
+ kobject_put(&region->kobj);
+ }
+ return 0;
+}
+
+/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
+int damon_sysfs_schemes_clear_regions(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx)
+{
+ struct damos *scheme;
+ int schemes_idx = 0;
+
+ damon_for_each_scheme(scheme, ctx) {
+ struct damon_sysfs_scheme *sysfs_scheme;
+
+ /* user could have removed the scheme sysfs dir */
+ if (schemes_idx >= sysfs_schemes->nr)
+ break;
+
+ sysfs_scheme = sysfs_schemes->schemes_arr[schemes_idx++];
+ damon_sysfs_scheme_regions_rm_dirs(
+ sysfs_scheme->tried_regions);
+ sysfs_scheme->tried_regions->total_bytes = 0;
+ }
+ return 0;
+}
+
+/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
+int damon_sysfs_schemes_update_regions_start(
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx, bool total_bytes_only)
+{
+ damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx);
+ damon_sysfs_schemes_for_damos_callback = sysfs_schemes;
+ damos_regions_upd_total_bytes_only = total_bytes_only;
+ ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply;
+ return 0;
+}
+
+/*
+ * Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock. Caller
+ * should unlock damon_sysfs_lock which held before
+ * damon_sysfs_schemes_update_regions_start()
+ */
+int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx)
+{
+ damon_sysfs_schemes_for_damos_callback = NULL;
+ ctx->callback.before_damos_apply = NULL;
+ damon_sysfs_schemes_region_idx = 0;
+ return 0;
+}
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
new file mode 100644
index 000000000..b317f51dc
--- /dev/null
+++ b/mm/damon/sysfs.c
@@ -0,0 +1,1829 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON sysfs Interface
+ *
+ * Copyright (c) 2022 SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "sysfs-common.h"
+
+/*
+ * init region directory
+ */
+
+struct damon_sysfs_region {
+ struct kobject kobj;
+ struct damon_addr_range ar;
+};
+
+static struct damon_sysfs_region *damon_sysfs_region_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_region), GFP_KERNEL);
+}
+
+static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->ar.start);
+}
+
+static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+ int err = kstrtoul(buf, 0, &region->ar.start);
+
+ return err ? err : count;
+}
+
+static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+
+ return sysfs_emit(buf, "%lu\n", region->ar.end);
+}
+
+static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_region *region = container_of(kobj,
+ struct damon_sysfs_region, kobj);
+ int err = kstrtoul(buf, 0, &region->ar.end);
+
+ return err ? err : count;
+}
+
+static void damon_sysfs_region_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_region, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_region_start_attr =
+ __ATTR_RW_MODE(start, 0600);
+
+static struct kobj_attribute damon_sysfs_region_end_attr =
+ __ATTR_RW_MODE(end, 0600);
+
+static struct attribute *damon_sysfs_region_attrs[] = {
+ &damon_sysfs_region_start_attr.attr,
+ &damon_sysfs_region_end_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_region);
+
+static const struct kobj_type damon_sysfs_region_ktype = {
+ .release = damon_sysfs_region_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_region_groups,
+};
+
+/*
+ * init_regions directory
+ */
+
+struct damon_sysfs_regions {
+ struct kobject kobj;
+ struct damon_sysfs_region **regions_arr;
+ int nr;
+};
+
+static struct damon_sysfs_regions *damon_sysfs_regions_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_regions), GFP_KERNEL);
+}
+
+static void damon_sysfs_regions_rm_dirs(struct damon_sysfs_regions *regions)
+{
+ struct damon_sysfs_region **regions_arr = regions->regions_arr;
+ int i;
+
+ for (i = 0; i < regions->nr; i++)
+ kobject_put(&regions_arr[i]->kobj);
+ regions->nr = 0;
+ kfree(regions_arr);
+ regions->regions_arr = NULL;
+}
+
+static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions,
+ int nr_regions)
+{
+ struct damon_sysfs_region **regions_arr, *region;
+ int err, i;
+
+ damon_sysfs_regions_rm_dirs(regions);
+ if (!nr_regions)
+ return 0;
+
+ regions_arr = kmalloc_array(nr_regions, sizeof(*regions_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!regions_arr)
+ return -ENOMEM;
+ regions->regions_arr = regions_arr;
+
+ for (i = 0; i < nr_regions; i++) {
+ region = damon_sysfs_region_alloc();
+ if (!region) {
+ damon_sysfs_regions_rm_dirs(regions);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&region->kobj,
+ &damon_sysfs_region_ktype, &regions->kobj,
+ "%d", i);
+ if (err) {
+ kobject_put(&region->kobj);
+ damon_sysfs_regions_rm_dirs(regions);
+ return err;
+ }
+
+ regions_arr[i] = region;
+ regions->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_regions_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_regions *regions = container_of(kobj,
+ struct damon_sysfs_regions, kobj);
+
+ return sysfs_emit(buf, "%d\n", regions->nr);
+}
+
+static ssize_t nr_regions_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_regions *regions;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ regions = container_of(kobj, struct damon_sysfs_regions, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_regions_add_dirs(regions, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_regions_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_regions, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_regions_nr_attr =
+ __ATTR_RW_MODE(nr_regions, 0600);
+
+static struct attribute *damon_sysfs_regions_attrs[] = {
+ &damon_sysfs_regions_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_regions);
+
+static const struct kobj_type damon_sysfs_regions_ktype = {
+ .release = damon_sysfs_regions_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_regions_groups,
+};
+
+/*
+ * target directory
+ */
+
+struct damon_sysfs_target {
+ struct kobject kobj;
+ struct damon_sysfs_regions *regions;
+ int pid;
+};
+
+static struct damon_sysfs_target *damon_sysfs_target_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_target), GFP_KERNEL);
+}
+
+static int damon_sysfs_target_add_dirs(struct damon_sysfs_target *target)
+{
+ struct damon_sysfs_regions *regions = damon_sysfs_regions_alloc();
+ int err;
+
+ if (!regions)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&regions->kobj, &damon_sysfs_regions_ktype,
+ &target->kobj, "regions");
+ if (err)
+ kobject_put(&regions->kobj);
+ else
+ target->regions = regions;
+ return err;
+}
+
+static void damon_sysfs_target_rm_dirs(struct damon_sysfs_target *target)
+{
+ damon_sysfs_regions_rm_dirs(target->regions);
+ kobject_put(&target->regions->kobj);
+}
+
+static ssize_t pid_target_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_target *target = container_of(kobj,
+ struct damon_sysfs_target, kobj);
+
+ return sysfs_emit(buf, "%d\n", target->pid);
+}
+
+static ssize_t pid_target_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_target *target = container_of(kobj,
+ struct damon_sysfs_target, kobj);
+ int err = kstrtoint(buf, 0, &target->pid);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static void damon_sysfs_target_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_target, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_target_pid_attr =
+ __ATTR_RW_MODE(pid_target, 0600);
+
+static struct attribute *damon_sysfs_target_attrs[] = {
+ &damon_sysfs_target_pid_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_target);
+
+static const struct kobj_type damon_sysfs_target_ktype = {
+ .release = damon_sysfs_target_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_target_groups,
+};
+
+/*
+ * targets directory
+ */
+
+struct damon_sysfs_targets {
+ struct kobject kobj;
+ struct damon_sysfs_target **targets_arr;
+ int nr;
+};
+
+static struct damon_sysfs_targets *damon_sysfs_targets_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_targets), GFP_KERNEL);
+}
+
+static void damon_sysfs_targets_rm_dirs(struct damon_sysfs_targets *targets)
+{
+ struct damon_sysfs_target **targets_arr = targets->targets_arr;
+ int i;
+
+ for (i = 0; i < targets->nr; i++) {
+ damon_sysfs_target_rm_dirs(targets_arr[i]);
+ kobject_put(&targets_arr[i]->kobj);
+ }
+ targets->nr = 0;
+ kfree(targets_arr);
+ targets->targets_arr = NULL;
+}
+
+static int damon_sysfs_targets_add_dirs(struct damon_sysfs_targets *targets,
+ int nr_targets)
+{
+ struct damon_sysfs_target **targets_arr, *target;
+ int err, i;
+
+ damon_sysfs_targets_rm_dirs(targets);
+ if (!nr_targets)
+ return 0;
+
+ targets_arr = kmalloc_array(nr_targets, sizeof(*targets_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!targets_arr)
+ return -ENOMEM;
+ targets->targets_arr = targets_arr;
+
+ for (i = 0; i < nr_targets; i++) {
+ target = damon_sysfs_target_alloc();
+ if (!target) {
+ damon_sysfs_targets_rm_dirs(targets);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&target->kobj,
+ &damon_sysfs_target_ktype, &targets->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_target_add_dirs(target);
+ if (err)
+ goto out;
+
+ targets_arr[i] = target;
+ targets->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_targets_rm_dirs(targets);
+ kobject_put(&target->kobj);
+ return err;
+}
+
+static ssize_t nr_targets_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_targets *targets = container_of(kobj,
+ struct damon_sysfs_targets, kobj);
+
+ return sysfs_emit(buf, "%d\n", targets->nr);
+}
+
+static ssize_t nr_targets_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_targets *targets;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ targets = container_of(kobj, struct damon_sysfs_targets, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_targets_add_dirs(targets, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_targets_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_targets, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_targets_nr_attr =
+ __ATTR_RW_MODE(nr_targets, 0600);
+
+static struct attribute *damon_sysfs_targets_attrs[] = {
+ &damon_sysfs_targets_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_targets);
+
+static const struct kobj_type damon_sysfs_targets_ktype = {
+ .release = damon_sysfs_targets_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_targets_groups,
+};
+
+/*
+ * intervals directory
+ */
+
+struct damon_sysfs_intervals {
+ struct kobject kobj;
+ unsigned long sample_us;
+ unsigned long aggr_us;
+ unsigned long update_us;
+};
+
+static struct damon_sysfs_intervals *damon_sysfs_intervals_alloc(
+ unsigned long sample_us, unsigned long aggr_us,
+ unsigned long update_us)
+{
+ struct damon_sysfs_intervals *intervals = kmalloc(sizeof(*intervals),
+ GFP_KERNEL);
+
+ if (!intervals)
+ return NULL;
+
+ intervals->kobj = (struct kobject){};
+ intervals->sample_us = sample_us;
+ intervals->aggr_us = aggr_us;
+ intervals->update_us = update_us;
+ return intervals;
+}
+
+static ssize_t sample_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->sample_us);
+}
+
+static ssize_t sample_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return err;
+
+ intervals->sample_us = us;
+ return count;
+}
+
+static ssize_t aggr_us_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->aggr_us);
+}
+
+static ssize_t aggr_us_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return err;
+
+ intervals->aggr_us = us;
+ return count;
+}
+
+static ssize_t update_us_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+
+ return sysfs_emit(buf, "%lu\n", intervals->update_us);
+}
+
+static ssize_t update_us_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_intervals *intervals = container_of(kobj,
+ struct damon_sysfs_intervals, kobj);
+ unsigned long us;
+ int err = kstrtoul(buf, 0, &us);
+
+ if (err)
+ return err;
+
+ intervals->update_us = us;
+ return count;
+}
+
+static void damon_sysfs_intervals_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_intervals, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_intervals_sample_us_attr =
+ __ATTR_RW_MODE(sample_us, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_aggr_us_attr =
+ __ATTR_RW_MODE(aggr_us, 0600);
+
+static struct kobj_attribute damon_sysfs_intervals_update_us_attr =
+ __ATTR_RW_MODE(update_us, 0600);
+
+static struct attribute *damon_sysfs_intervals_attrs[] = {
+ &damon_sysfs_intervals_sample_us_attr.attr,
+ &damon_sysfs_intervals_aggr_us_attr.attr,
+ &damon_sysfs_intervals_update_us_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_intervals);
+
+static const struct kobj_type damon_sysfs_intervals_ktype = {
+ .release = damon_sysfs_intervals_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_intervals_groups,
+};
+
+/*
+ * monitoring_attrs directory
+ */
+
+struct damon_sysfs_attrs {
+ struct kobject kobj;
+ struct damon_sysfs_intervals *intervals;
+ struct damon_sysfs_ul_range *nr_regions_range;
+};
+
+static struct damon_sysfs_attrs *damon_sysfs_attrs_alloc(void)
+{
+ struct damon_sysfs_attrs *attrs = kmalloc(sizeof(*attrs), GFP_KERNEL);
+
+ if (!attrs)
+ return NULL;
+ attrs->kobj = (struct kobject){};
+ return attrs;
+}
+
+static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs)
+{
+ struct damon_sysfs_intervals *intervals;
+ struct damon_sysfs_ul_range *nr_regions_range;
+ int err;
+
+ intervals = damon_sysfs_intervals_alloc(5000, 100000, 60000000);
+ if (!intervals)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&intervals->kobj,
+ &damon_sysfs_intervals_ktype, &attrs->kobj,
+ "intervals");
+ if (err)
+ goto put_intervals_out;
+ attrs->intervals = intervals;
+
+ nr_regions_range = damon_sysfs_ul_range_alloc(10, 1000);
+ if (!nr_regions_range) {
+ err = -ENOMEM;
+ goto put_intervals_out;
+ }
+
+ err = kobject_init_and_add(&nr_regions_range->kobj,
+ &damon_sysfs_ul_range_ktype, &attrs->kobj,
+ "nr_regions");
+ if (err)
+ goto put_nr_regions_intervals_out;
+ attrs->nr_regions_range = nr_regions_range;
+ return 0;
+
+put_nr_regions_intervals_out:
+ kobject_put(&nr_regions_range->kobj);
+ attrs->nr_regions_range = NULL;
+put_intervals_out:
+ kobject_put(&intervals->kobj);
+ attrs->intervals = NULL;
+ return err;
+}
+
+static void damon_sysfs_attrs_rm_dirs(struct damon_sysfs_attrs *attrs)
+{
+ kobject_put(&attrs->nr_regions_range->kobj);
+ kobject_put(&attrs->intervals->kobj);
+}
+
+static void damon_sysfs_attrs_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_attrs, kobj));
+}
+
+static struct attribute *damon_sysfs_attrs_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_attrs);
+
+static const struct kobj_type damon_sysfs_attrs_ktype = {
+ .release = damon_sysfs_attrs_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_attrs_groups,
+};
+
+/*
+ * context directory
+ */
+
+/* This should match with enum damon_ops_id */
+static const char * const damon_sysfs_ops_strs[] = {
+ "vaddr",
+ "fvaddr",
+ "paddr",
+};
+
+struct damon_sysfs_context {
+ struct kobject kobj;
+ enum damon_ops_id ops_id;
+ struct damon_sysfs_attrs *attrs;
+ struct damon_sysfs_targets *targets;
+ struct damon_sysfs_schemes *schemes;
+};
+
+static struct damon_sysfs_context *damon_sysfs_context_alloc(
+ enum damon_ops_id ops_id)
+{
+ struct damon_sysfs_context *context = kmalloc(sizeof(*context),
+ GFP_KERNEL);
+
+ if (!context)
+ return NULL;
+ context->kobj = (struct kobject){};
+ context->ops_id = ops_id;
+ return context;
+}
+
+static int damon_sysfs_context_set_attrs(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_attrs *attrs = damon_sysfs_attrs_alloc();
+ int err;
+
+ if (!attrs)
+ return -ENOMEM;
+ err = kobject_init_and_add(&attrs->kobj, &damon_sysfs_attrs_ktype,
+ &context->kobj, "monitoring_attrs");
+ if (err)
+ goto out;
+ err = damon_sysfs_attrs_add_dirs(attrs);
+ if (err)
+ goto out;
+ context->attrs = attrs;
+ return 0;
+
+out:
+ kobject_put(&attrs->kobj);
+ return err;
+}
+
+static int damon_sysfs_context_set_targets(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_targets *targets = damon_sysfs_targets_alloc();
+ int err;
+
+ if (!targets)
+ return -ENOMEM;
+ err = kobject_init_and_add(&targets->kobj, &damon_sysfs_targets_ktype,
+ &context->kobj, "targets");
+ if (err) {
+ kobject_put(&targets->kobj);
+ return err;
+ }
+ context->targets = targets;
+ return 0;
+}
+
+static int damon_sysfs_context_set_schemes(struct damon_sysfs_context *context)
+{
+ struct damon_sysfs_schemes *schemes = damon_sysfs_schemes_alloc();
+ int err;
+
+ if (!schemes)
+ return -ENOMEM;
+ err = kobject_init_and_add(&schemes->kobj, &damon_sysfs_schemes_ktype,
+ &context->kobj, "schemes");
+ if (err) {
+ kobject_put(&schemes->kobj);
+ return err;
+ }
+ context->schemes = schemes;
+ return 0;
+}
+
+static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context)
+{
+ int err;
+
+ err = damon_sysfs_context_set_attrs(context);
+ if (err)
+ return err;
+
+ err = damon_sysfs_context_set_targets(context);
+ if (err)
+ goto put_attrs_out;
+
+ err = damon_sysfs_context_set_schemes(context);
+ if (err)
+ goto put_targets_attrs_out;
+ return 0;
+
+put_targets_attrs_out:
+ kobject_put(&context->targets->kobj);
+ context->targets = NULL;
+put_attrs_out:
+ kobject_put(&context->attrs->kobj);
+ context->attrs = NULL;
+ return err;
+}
+
+static void damon_sysfs_context_rm_dirs(struct damon_sysfs_context *context)
+{
+ damon_sysfs_attrs_rm_dirs(context->attrs);
+ kobject_put(&context->attrs->kobj);
+ damon_sysfs_targets_rm_dirs(context->targets);
+ kobject_put(&context->targets->kobj);
+ damon_sysfs_schemes_rm_dirs(context->schemes);
+ kobject_put(&context->schemes->kobj);
+}
+
+static ssize_t avail_operations_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ enum damon_ops_id id;
+ int len = 0;
+
+ for (id = 0; id < NR_DAMON_OPS; id++) {
+ if (!damon_is_registered_ops(id))
+ continue;
+ len += sysfs_emit_at(buf, len, "%s\n",
+ damon_sysfs_ops_strs[id]);
+ }
+ return len;
+}
+
+static ssize_t operations_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_context *context = container_of(kobj,
+ struct damon_sysfs_context, kobj);
+
+ return sysfs_emit(buf, "%s\n", damon_sysfs_ops_strs[context->ops_id]);
+}
+
+static ssize_t operations_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_context *context = container_of(kobj,
+ struct damon_sysfs_context, kobj);
+ enum damon_ops_id id;
+
+ for (id = 0; id < NR_DAMON_OPS; id++) {
+ if (sysfs_streq(buf, damon_sysfs_ops_strs[id])) {
+ context->ops_id = id;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
+static void damon_sysfs_context_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_context, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_context_avail_operations_attr =
+ __ATTR_RO_MODE(avail_operations, 0400);
+
+static struct kobj_attribute damon_sysfs_context_operations_attr =
+ __ATTR_RW_MODE(operations, 0600);
+
+static struct attribute *damon_sysfs_context_attrs[] = {
+ &damon_sysfs_context_avail_operations_attr.attr,
+ &damon_sysfs_context_operations_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_context);
+
+static const struct kobj_type damon_sysfs_context_ktype = {
+ .release = damon_sysfs_context_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_context_groups,
+};
+
+/*
+ * contexts directory
+ */
+
+struct damon_sysfs_contexts {
+ struct kobject kobj;
+ struct damon_sysfs_context **contexts_arr;
+ int nr;
+};
+
+static struct damon_sysfs_contexts *damon_sysfs_contexts_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_contexts), GFP_KERNEL);
+}
+
+static void damon_sysfs_contexts_rm_dirs(struct damon_sysfs_contexts *contexts)
+{
+ struct damon_sysfs_context **contexts_arr = contexts->contexts_arr;
+ int i;
+
+ for (i = 0; i < contexts->nr; i++) {
+ damon_sysfs_context_rm_dirs(contexts_arr[i]);
+ kobject_put(&contexts_arr[i]->kobj);
+ }
+ contexts->nr = 0;
+ kfree(contexts_arr);
+ contexts->contexts_arr = NULL;
+}
+
+static int damon_sysfs_contexts_add_dirs(struct damon_sysfs_contexts *contexts,
+ int nr_contexts)
+{
+ struct damon_sysfs_context **contexts_arr, *context;
+ int err, i;
+
+ damon_sysfs_contexts_rm_dirs(contexts);
+ if (!nr_contexts)
+ return 0;
+
+ contexts_arr = kmalloc_array(nr_contexts, sizeof(*contexts_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!contexts_arr)
+ return -ENOMEM;
+ contexts->contexts_arr = contexts_arr;
+
+ for (i = 0; i < nr_contexts; i++) {
+ context = damon_sysfs_context_alloc(DAMON_OPS_VADDR);
+ if (!context) {
+ damon_sysfs_contexts_rm_dirs(contexts);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&context->kobj,
+ &damon_sysfs_context_ktype, &contexts->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_context_add_dirs(context);
+ if (err)
+ goto out;
+
+ contexts_arr[i] = context;
+ contexts->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_contexts_rm_dirs(contexts);
+ kobject_put(&context->kobj);
+ return err;
+}
+
+static ssize_t nr_contexts_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_contexts *contexts = container_of(kobj,
+ struct damon_sysfs_contexts, kobj);
+
+ return sysfs_emit(buf, "%d\n", contexts->nr);
+}
+
+static ssize_t nr_contexts_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_contexts *contexts;
+ int nr, err;
+
+ err = kstrtoint(buf, 0, &nr);
+ if (err)
+ return err;
+ /* TODO: support multiple contexts per kdamond */
+ if (nr < 0 || 1 < nr)
+ return -EINVAL;
+
+ contexts = container_of(kobj, struct damon_sysfs_contexts, kobj);
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_contexts_add_dirs(contexts, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_contexts_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_contexts, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_contexts_nr_attr
+ = __ATTR_RW_MODE(nr_contexts, 0600);
+
+static struct attribute *damon_sysfs_contexts_attrs[] = {
+ &damon_sysfs_contexts_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_contexts);
+
+static const struct kobj_type damon_sysfs_contexts_ktype = {
+ .release = damon_sysfs_contexts_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_contexts_groups,
+};
+
+/*
+ * kdamond directory
+ */
+
+struct damon_sysfs_kdamond {
+ struct kobject kobj;
+ struct damon_sysfs_contexts *contexts;
+ struct damon_ctx *damon_ctx;
+};
+
+static struct damon_sysfs_kdamond *damon_sysfs_kdamond_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_kdamond), GFP_KERNEL);
+}
+
+static int damon_sysfs_kdamond_add_dirs(struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_sysfs_contexts *contexts;
+ int err;
+
+ contexts = damon_sysfs_contexts_alloc();
+ if (!contexts)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&contexts->kobj,
+ &damon_sysfs_contexts_ktype, &kdamond->kobj,
+ "contexts");
+ if (err) {
+ kobject_put(&contexts->kobj);
+ return err;
+ }
+ kdamond->contexts = contexts;
+
+ return err;
+}
+
+static void damon_sysfs_kdamond_rm_dirs(struct damon_sysfs_kdamond *kdamond)
+{
+ damon_sysfs_contexts_rm_dirs(kdamond->contexts);
+ kobject_put(&kdamond->contexts->kobj);
+}
+
+static bool damon_sysfs_ctx_running(struct damon_ctx *ctx)
+{
+ bool running;
+
+ mutex_lock(&ctx->kdamond_lock);
+ running = ctx->kdamond != NULL;
+ mutex_unlock(&ctx->kdamond_lock);
+ return running;
+}
+
+/*
+ * enum damon_sysfs_cmd - Commands for a specific kdamond.
+ */
+enum damon_sysfs_cmd {
+ /* @DAMON_SYSFS_CMD_ON: Turn the kdamond on. */
+ DAMON_SYSFS_CMD_ON,
+ /* @DAMON_SYSFS_CMD_OFF: Turn the kdamond off. */
+ DAMON_SYSFS_CMD_OFF,
+ /* @DAMON_SYSFS_CMD_COMMIT: Update kdamond inputs. */
+ DAMON_SYSFS_CMD_COMMIT,
+ /*
+ * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS: Update scheme stats sysfs
+ * files.
+ */
+ DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS,
+ /*
+ * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES: Update
+ * tried_regions/total_bytes sysfs files for each scheme.
+ */
+ DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES,
+ /*
+ * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: Update schemes tried
+ * regions
+ */
+ DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS,
+ /*
+ * @DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: Clear schemes tried
+ * regions
+ */
+ DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS,
+ /*
+ * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands.
+ */
+ NR_DAMON_SYSFS_CMDS,
+};
+
+/* Should match with enum damon_sysfs_cmd */
+static const char * const damon_sysfs_cmd_strs[] = {
+ "on",
+ "off",
+ "commit",
+ "update_schemes_stats",
+ "update_schemes_tried_bytes",
+ "update_schemes_tried_regions",
+ "clear_schemes_tried_regions",
+};
+
+/*
+ * struct damon_sysfs_cmd_request - A request to the DAMON callback.
+ * @cmd: The command that needs to be handled by the callback.
+ * @kdamond: The kobject wrapper that associated to the kdamond thread.
+ *
+ * This structure represents a sysfs command request that need to access some
+ * DAMON context-internal data. Because DAMON context-internal data can be
+ * safely accessed from DAMON callbacks without additional synchronization, the
+ * request will be handled by the DAMON callback. None-``NULL`` @kdamond means
+ * the request is valid.
+ */
+struct damon_sysfs_cmd_request {
+ enum damon_sysfs_cmd cmd;
+ struct damon_sysfs_kdamond *kdamond;
+};
+
+/* Current DAMON callback request. Protected by damon_sysfs_lock. */
+static struct damon_sysfs_cmd_request damon_sysfs_cmd_request;
+
+static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+ bool running;
+
+ if (!ctx)
+ running = false;
+ else
+ running = damon_sysfs_ctx_running(ctx);
+
+ return sysfs_emit(buf, "%s\n", running ?
+ damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] :
+ damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_OFF]);
+}
+
+static int damon_sysfs_set_attrs(struct damon_ctx *ctx,
+ struct damon_sysfs_attrs *sys_attrs)
+{
+ struct damon_sysfs_intervals *sys_intervals = sys_attrs->intervals;
+ struct damon_sysfs_ul_range *sys_nr_regions =
+ sys_attrs->nr_regions_range;
+ struct damon_attrs attrs = {
+ .sample_interval = sys_intervals->sample_us,
+ .aggr_interval = sys_intervals->aggr_us,
+ .ops_update_interval = sys_intervals->update_us,
+ .min_nr_regions = sys_nr_regions->min,
+ .max_nr_regions = sys_nr_regions->max,
+ };
+ return damon_set_attrs(ctx, &attrs);
+}
+
+static void damon_sysfs_destroy_targets(struct damon_ctx *ctx)
+{
+ struct damon_target *t, *next;
+ bool has_pid = damon_target_has_pid(ctx);
+
+ damon_for_each_target_safe(t, next, ctx) {
+ if (has_pid)
+ put_pid(t->pid);
+ damon_destroy_target(t);
+ }
+}
+
+static int damon_sysfs_set_regions(struct damon_target *t,
+ struct damon_sysfs_regions *sysfs_regions)
+{
+ struct damon_addr_range *ranges = kmalloc_array(sysfs_regions->nr,
+ sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN);
+ int i, err = -EINVAL;
+
+ if (!ranges)
+ return -ENOMEM;
+ for (i = 0; i < sysfs_regions->nr; i++) {
+ struct damon_sysfs_region *sys_region =
+ sysfs_regions->regions_arr[i];
+
+ if (sys_region->ar.start > sys_region->ar.end)
+ goto out;
+
+ ranges[i].start = sys_region->ar.start;
+ ranges[i].end = sys_region->ar.end;
+ if (i == 0)
+ continue;
+ if (ranges[i - 1].end > ranges[i].start)
+ goto out;
+ }
+ err = damon_set_regions(t, ranges, sysfs_regions->nr);
+out:
+ kfree(ranges);
+ return err;
+
+}
+
+static int damon_sysfs_add_target(struct damon_sysfs_target *sys_target,
+ struct damon_ctx *ctx)
+{
+ struct damon_target *t = damon_new_target();
+ int err = -EINVAL;
+
+ if (!t)
+ return -ENOMEM;
+ damon_add_target(ctx, t);
+ if (damon_target_has_pid(ctx)) {
+ t->pid = find_get_pid(sys_target->pid);
+ if (!t->pid)
+ goto destroy_targets_out;
+ }
+ err = damon_sysfs_set_regions(t, sys_target->regions);
+ if (err)
+ goto destroy_targets_out;
+ return 0;
+
+destroy_targets_out:
+ damon_sysfs_destroy_targets(ctx);
+ return err;
+}
+
+static int damon_sysfs_update_target_pid(struct damon_target *target, int pid)
+{
+ struct pid *pid_new;
+
+ pid_new = find_get_pid(pid);
+ if (!pid_new)
+ return -EINVAL;
+
+ if (pid_new == target->pid) {
+ put_pid(pid_new);
+ return 0;
+ }
+
+ put_pid(target->pid);
+ target->pid = pid_new;
+ return 0;
+}
+
+static int damon_sysfs_update_target(struct damon_target *target,
+ struct damon_ctx *ctx,
+ struct damon_sysfs_target *sys_target)
+{
+ int err = 0;
+
+ if (damon_target_has_pid(ctx)) {
+ err = damon_sysfs_update_target_pid(target, sys_target->pid);
+ if (err)
+ return err;
+ }
+
+ /*
+ * Do monitoring target region boundary update only if one or more
+ * regions are set by the user. This is for keeping current monitoring
+ * target results and range easier, especially for dynamic monitoring
+ * target regions update ops like 'vaddr'.
+ */
+ if (sys_target->regions->nr)
+ err = damon_sysfs_set_regions(target, sys_target->regions);
+ return err;
+}
+
+static int damon_sysfs_set_targets(struct damon_ctx *ctx,
+ struct damon_sysfs_targets *sysfs_targets)
+{
+ struct damon_target *t, *next;
+ int i = 0, err;
+
+ /* Multiple physical address space monitoring targets makes no sense */
+ if (ctx->ops.id == DAMON_OPS_PADDR && sysfs_targets->nr > 1)
+ return -EINVAL;
+
+ damon_for_each_target_safe(t, next, ctx) {
+ if (i < sysfs_targets->nr) {
+ err = damon_sysfs_update_target(t, ctx,
+ sysfs_targets->targets_arr[i]);
+ if (err)
+ return err;
+ } else {
+ if (damon_target_has_pid(ctx))
+ put_pid(t->pid);
+ damon_destroy_target(t);
+ }
+ i++;
+ }
+
+ for (; i < sysfs_targets->nr; i++) {
+ struct damon_sysfs_target *st = sysfs_targets->targets_arr[i];
+
+ err = damon_sysfs_add_target(st, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static bool damon_sysfs_schemes_regions_updating;
+
+static void damon_sysfs_before_terminate(struct damon_ctx *ctx)
+{
+ struct damon_target *t, *next;
+ struct damon_sysfs_kdamond *kdamond;
+ enum damon_sysfs_cmd cmd;
+
+ /* damon_sysfs_schemes_update_regions_stop() might not yet called */
+ kdamond = damon_sysfs_cmd_request.kdamond;
+ cmd = damon_sysfs_cmd_request.cmd;
+ if (kdamond && ctx == kdamond->damon_ctx &&
+ (cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS ||
+ cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES) &&
+ damon_sysfs_schemes_regions_updating) {
+ damon_sysfs_schemes_update_regions_stop(ctx);
+ damon_sysfs_schemes_regions_updating = false;
+ mutex_unlock(&damon_sysfs_lock);
+ }
+
+ if (!damon_target_has_pid(ctx))
+ return;
+
+ mutex_lock(&ctx->kdamond_lock);
+ damon_for_each_target_safe(t, next, ctx) {
+ put_pid(t->pid);
+ damon_destroy_target(t);
+ }
+ mutex_unlock(&ctx->kdamond_lock);
+}
+
+/*
+ * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files.
+ * @kdamond: The kobject wrapper that associated to the kdamond thread.
+ *
+ * This function reads the schemes stats of specific kdamond and update the
+ * related values for sysfs files. This function should be called from DAMON
+ * callbacks while holding ``damon_syfs_lock``, to safely access the DAMON
+ * contexts-internal data and DAMON sysfs variables.
+ */
+static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+
+ if (!ctx)
+ return -EINVAL;
+ damon_sysfs_schemes_update_stats(
+ kdamond->contexts->contexts_arr[0]->schemes, ctx);
+ return 0;
+}
+
+static int damon_sysfs_upd_schemes_regions_start(
+ struct damon_sysfs_kdamond *kdamond, bool total_bytes_only)
+{
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+
+ if (!ctx)
+ return -EINVAL;
+ return damon_sysfs_schemes_update_regions_start(
+ kdamond->contexts->contexts_arr[0]->schemes, ctx,
+ total_bytes_only);
+}
+
+static int damon_sysfs_upd_schemes_regions_stop(
+ struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+
+ if (!ctx)
+ return -EINVAL;
+ return damon_sysfs_schemes_update_regions_stop(ctx);
+}
+
+static int damon_sysfs_clear_schemes_regions(
+ struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_ctx *ctx = kdamond->damon_ctx;
+
+ if (!ctx)
+ return -EINVAL;
+ return damon_sysfs_schemes_clear_regions(
+ kdamond->contexts->contexts_arr[0]->schemes, ctx);
+}
+
+static inline bool damon_sysfs_kdamond_running(
+ struct damon_sysfs_kdamond *kdamond)
+{
+ return kdamond->damon_ctx &&
+ damon_sysfs_ctx_running(kdamond->damon_ctx);
+}
+
+static int damon_sysfs_apply_inputs(struct damon_ctx *ctx,
+ struct damon_sysfs_context *sys_ctx)
+{
+ int err;
+
+ err = damon_select_ops(ctx, sys_ctx->ops_id);
+ if (err)
+ return err;
+ err = damon_sysfs_set_attrs(ctx, sys_ctx->attrs);
+ if (err)
+ return err;
+ err = damon_sysfs_set_targets(ctx, sys_ctx->targets);
+ if (err)
+ return err;
+ return damon_sysfs_set_schemes(ctx, sys_ctx->schemes);
+}
+
+/*
+ * damon_sysfs_commit_input() - Commit user inputs to a running kdamond.
+ * @kdamond: The kobject wrapper for the associated kdamond.
+ *
+ * If the sysfs input is wrong, the kdamond will be terminated.
+ */
+static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond)
+{
+ if (!damon_sysfs_kdamond_running(kdamond))
+ return -EINVAL;
+ /* TODO: Support multiple contexts per kdamond */
+ if (kdamond->contexts->nr != 1)
+ return -EINVAL;
+
+ return damon_sysfs_apply_inputs(kdamond->damon_ctx,
+ kdamond->contexts->contexts_arr[0]);
+}
+
+/*
+ * damon_sysfs_cmd_request_callback() - DAMON callback for handling requests.
+ * @c: The DAMON context of the callback.
+ *
+ * This function is periodically called back from the kdamond thread for @c.
+ * Then, it checks if there is a waiting DAMON sysfs request and handles it.
+ */
+static int damon_sysfs_cmd_request_callback(struct damon_ctx *c)
+{
+ struct damon_sysfs_kdamond *kdamond;
+ bool total_bytes_only = false;
+ int err = 0;
+
+ /* avoid deadlock due to concurrent state_store('off') */
+ if (!damon_sysfs_schemes_regions_updating &&
+ !mutex_trylock(&damon_sysfs_lock))
+ return 0;
+ kdamond = damon_sysfs_cmd_request.kdamond;
+ if (!kdamond || kdamond->damon_ctx != c)
+ goto out;
+ switch (damon_sysfs_cmd_request.cmd) {
+ case DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS:
+ err = damon_sysfs_upd_schemes_stats(kdamond);
+ break;
+ case DAMON_SYSFS_CMD_COMMIT:
+ err = damon_sysfs_commit_input(kdamond);
+ break;
+ case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES:
+ total_bytes_only = true;
+ fallthrough;
+ case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS:
+ if (!damon_sysfs_schemes_regions_updating) {
+ err = damon_sysfs_upd_schemes_regions_start(kdamond,
+ total_bytes_only);
+ if (!err) {
+ damon_sysfs_schemes_regions_updating = true;
+ goto keep_lock_out;
+ }
+ } else {
+ err = damon_sysfs_upd_schemes_regions_stop(kdamond);
+ damon_sysfs_schemes_regions_updating = false;
+ }
+ break;
+ case DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS:
+ err = damon_sysfs_clear_schemes_regions(kdamond);
+ break;
+ default:
+ break;
+ }
+ /* Mark the request as invalid now. */
+ damon_sysfs_cmd_request.kdamond = NULL;
+out:
+ if (!damon_sysfs_schemes_regions_updating)
+ mutex_unlock(&damon_sysfs_lock);
+keep_lock_out:
+ return err;
+}
+
+static struct damon_ctx *damon_sysfs_build_ctx(
+ struct damon_sysfs_context *sys_ctx)
+{
+ struct damon_ctx *ctx = damon_new_ctx();
+ int err;
+
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ err = damon_sysfs_apply_inputs(ctx, sys_ctx);
+ if (err) {
+ damon_destroy_ctx(ctx);
+ return ERR_PTR(err);
+ }
+
+ ctx->callback.after_wmarks_check = damon_sysfs_cmd_request_callback;
+ ctx->callback.after_aggregation = damon_sysfs_cmd_request_callback;
+ ctx->callback.before_terminate = damon_sysfs_before_terminate;
+ return ctx;
+}
+
+static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond)
+{
+ struct damon_ctx *ctx;
+ int err;
+
+ if (damon_sysfs_kdamond_running(kdamond))
+ return -EBUSY;
+ if (damon_sysfs_cmd_request.kdamond == kdamond)
+ return -EBUSY;
+ /* TODO: support multiple contexts per kdamond */
+ if (kdamond->contexts->nr != 1)
+ return -EINVAL;
+
+ if (kdamond->damon_ctx)
+ damon_destroy_ctx(kdamond->damon_ctx);
+ kdamond->damon_ctx = NULL;
+
+ ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+ err = damon_start(&ctx, 1, false);
+ if (err) {
+ damon_destroy_ctx(ctx);
+ return err;
+ }
+ kdamond->damon_ctx = ctx;
+ return err;
+}
+
+static int damon_sysfs_turn_damon_off(struct damon_sysfs_kdamond *kdamond)
+{
+ if (!kdamond->damon_ctx)
+ return -EINVAL;
+ return damon_stop(&kdamond->damon_ctx, 1);
+ /*
+ * To allow users show final monitoring results of already turned-off
+ * DAMON, we free kdamond->damon_ctx in next
+ * damon_sysfs_turn_damon_on(), or kdamonds_nr_store()
+ */
+}
+
+/*
+ * damon_sysfs_handle_cmd() - Handle a command for a specific kdamond.
+ * @cmd: The command to handle.
+ * @kdamond: The kobject wrapper for the associated kdamond.
+ *
+ * This function handles a DAMON sysfs command for a kdamond. For commands
+ * that need to access running DAMON context-internal data, it requests
+ * handling of the command to the DAMON callback
+ * (@damon_sysfs_cmd_request_callback()) and wait until it is properly handled,
+ * or the context is completed.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int damon_sysfs_handle_cmd(enum damon_sysfs_cmd cmd,
+ struct damon_sysfs_kdamond *kdamond)
+{
+ bool need_wait = true;
+
+ /* Handle commands that doesn't access DAMON context-internal data */
+ switch (cmd) {
+ case DAMON_SYSFS_CMD_ON:
+ return damon_sysfs_turn_damon_on(kdamond);
+ case DAMON_SYSFS_CMD_OFF:
+ return damon_sysfs_turn_damon_off(kdamond);
+ default:
+ break;
+ }
+
+ /* Pass the command to DAMON callback for safe DAMON context access */
+ if (damon_sysfs_cmd_request.kdamond)
+ return -EBUSY;
+ if (!damon_sysfs_kdamond_running(kdamond))
+ return -EINVAL;
+ damon_sysfs_cmd_request.cmd = cmd;
+ damon_sysfs_cmd_request.kdamond = kdamond;
+
+ /*
+ * wait until damon_sysfs_cmd_request_callback() handles the request
+ * from kdamond context
+ */
+ mutex_unlock(&damon_sysfs_lock);
+ while (need_wait) {
+ schedule_timeout_idle(msecs_to_jiffies(100));
+ if (!mutex_trylock(&damon_sysfs_lock))
+ continue;
+ if (!damon_sysfs_cmd_request.kdamond) {
+ /* damon_sysfs_cmd_request_callback() handled */
+ need_wait = false;
+ } else if (!damon_sysfs_kdamond_running(kdamond)) {
+ /* kdamond has already finished */
+ need_wait = false;
+ damon_sysfs_cmd_request.kdamond = NULL;
+ }
+ mutex_unlock(&damon_sysfs_lock);
+ }
+ mutex_lock(&damon_sysfs_lock);
+ return 0;
+}
+
+static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ enum damon_sysfs_cmd cmd;
+ ssize_t ret = -EINVAL;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ for (cmd = 0; cmd < NR_DAMON_SYSFS_CMDS; cmd++) {
+ if (sysfs_streq(buf, damon_sysfs_cmd_strs[cmd])) {
+ ret = damon_sysfs_handle_cmd(cmd, kdamond);
+ break;
+ }
+ }
+ mutex_unlock(&damon_sysfs_lock);
+ if (!ret)
+ ret = count;
+ return ret;
+}
+
+static ssize_t pid_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+ struct damon_ctx *ctx;
+ int pid = -1;
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ ctx = kdamond->damon_ctx;
+ if (!ctx)
+ goto out;
+
+ mutex_lock(&ctx->kdamond_lock);
+ if (ctx->kdamond)
+ pid = ctx->kdamond->pid;
+ mutex_unlock(&ctx->kdamond_lock);
+out:
+ mutex_unlock(&damon_sysfs_lock);
+ return sysfs_emit(buf, "%d\n", pid);
+}
+
+static void damon_sysfs_kdamond_release(struct kobject *kobj)
+{
+ struct damon_sysfs_kdamond *kdamond = container_of(kobj,
+ struct damon_sysfs_kdamond, kobj);
+
+ if (kdamond->damon_ctx)
+ damon_destroy_ctx(kdamond->damon_ctx);
+ kfree(kdamond);
+}
+
+static struct kobj_attribute damon_sysfs_kdamond_state_attr =
+ __ATTR_RW_MODE(state, 0600);
+
+static struct kobj_attribute damon_sysfs_kdamond_pid_attr =
+ __ATTR_RO_MODE(pid, 0400);
+
+static struct attribute *damon_sysfs_kdamond_attrs[] = {
+ &damon_sysfs_kdamond_state_attr.attr,
+ &damon_sysfs_kdamond_pid_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_kdamond);
+
+static const struct kobj_type damon_sysfs_kdamond_ktype = {
+ .release = damon_sysfs_kdamond_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_kdamond_groups,
+};
+
+/*
+ * kdamonds directory
+ */
+
+struct damon_sysfs_kdamonds {
+ struct kobject kobj;
+ struct damon_sysfs_kdamond **kdamonds_arr;
+ int nr;
+};
+
+static struct damon_sysfs_kdamonds *damon_sysfs_kdamonds_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_kdamonds), GFP_KERNEL);
+}
+
+static void damon_sysfs_kdamonds_rm_dirs(struct damon_sysfs_kdamonds *kdamonds)
+{
+ struct damon_sysfs_kdamond **kdamonds_arr = kdamonds->kdamonds_arr;
+ int i;
+
+ for (i = 0; i < kdamonds->nr; i++) {
+ damon_sysfs_kdamond_rm_dirs(kdamonds_arr[i]);
+ kobject_put(&kdamonds_arr[i]->kobj);
+ }
+ kdamonds->nr = 0;
+ kfree(kdamonds_arr);
+ kdamonds->kdamonds_arr = NULL;
+}
+
+static bool damon_sysfs_kdamonds_busy(struct damon_sysfs_kdamond **kdamonds,
+ int nr_kdamonds)
+{
+ int i;
+
+ for (i = 0; i < nr_kdamonds; i++) {
+ if (damon_sysfs_kdamond_running(kdamonds[i]) ||
+ damon_sysfs_cmd_request.kdamond == kdamonds[i])
+ return true;
+ }
+
+ return false;
+}
+
+static int damon_sysfs_kdamonds_add_dirs(struct damon_sysfs_kdamonds *kdamonds,
+ int nr_kdamonds)
+{
+ struct damon_sysfs_kdamond **kdamonds_arr, *kdamond;
+ int err, i;
+
+ if (damon_sysfs_kdamonds_busy(kdamonds->kdamonds_arr, kdamonds->nr))
+ return -EBUSY;
+
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ if (!nr_kdamonds)
+ return 0;
+
+ kdamonds_arr = kmalloc_array(nr_kdamonds, sizeof(*kdamonds_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!kdamonds_arr)
+ return -ENOMEM;
+ kdamonds->kdamonds_arr = kdamonds_arr;
+
+ for (i = 0; i < nr_kdamonds; i++) {
+ kdamond = damon_sysfs_kdamond_alloc();
+ if (!kdamond) {
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&kdamond->kobj,
+ &damon_sysfs_kdamond_ktype, &kdamonds->kobj,
+ "%d", i);
+ if (err)
+ goto out;
+
+ err = damon_sysfs_kdamond_add_dirs(kdamond);
+ if (err)
+ goto out;
+
+ kdamonds_arr[i] = kdamond;
+ kdamonds->nr++;
+ }
+ return 0;
+
+out:
+ damon_sysfs_kdamonds_rm_dirs(kdamonds);
+ kobject_put(&kdamond->kobj);
+ return err;
+}
+
+static ssize_t nr_kdamonds_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_kdamonds *kdamonds = container_of(kobj,
+ struct damon_sysfs_kdamonds, kobj);
+
+ return sysfs_emit(buf, "%d\n", kdamonds->nr);
+}
+
+static ssize_t nr_kdamonds_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_kdamonds *kdamonds;
+ int nr, err;
+
+ err = kstrtoint(buf, 0, &nr);
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ kdamonds = container_of(kobj, struct damon_sysfs_kdamonds, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_kdamonds_add_dirs(kdamonds, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_kdamonds_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_kdamonds, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_kdamonds_nr_attr =
+ __ATTR_RW_MODE(nr_kdamonds, 0600);
+
+static struct attribute *damon_sysfs_kdamonds_attrs[] = {
+ &damon_sysfs_kdamonds_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_kdamonds);
+
+static const struct kobj_type damon_sysfs_kdamonds_ktype = {
+ .release = damon_sysfs_kdamonds_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_kdamonds_groups,
+};
+
+/*
+ * damon user interface directory
+ */
+
+struct damon_sysfs_ui_dir {
+ struct kobject kobj;
+ struct damon_sysfs_kdamonds *kdamonds;
+};
+
+static struct damon_sysfs_ui_dir *damon_sysfs_ui_dir_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_ui_dir), GFP_KERNEL);
+}
+
+static int damon_sysfs_ui_dir_add_dirs(struct damon_sysfs_ui_dir *ui_dir)
+{
+ struct damon_sysfs_kdamonds *kdamonds;
+ int err;
+
+ kdamonds = damon_sysfs_kdamonds_alloc();
+ if (!kdamonds)
+ return -ENOMEM;
+
+ err = kobject_init_and_add(&kdamonds->kobj,
+ &damon_sysfs_kdamonds_ktype, &ui_dir->kobj,
+ "kdamonds");
+ if (err) {
+ kobject_put(&kdamonds->kobj);
+ return err;
+ }
+ ui_dir->kdamonds = kdamonds;
+ return err;
+}
+
+static void damon_sysfs_ui_dir_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_ui_dir, kobj));
+}
+
+static struct attribute *damon_sysfs_ui_dir_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_ui_dir);
+
+static const struct kobj_type damon_sysfs_ui_dir_ktype = {
+ .release = damon_sysfs_ui_dir_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_ui_dir_groups,
+};
+
+static int __init damon_sysfs_init(void)
+{
+ struct kobject *damon_sysfs_root;
+ struct damon_sysfs_ui_dir *admin;
+ int err;
+
+ damon_sysfs_root = kobject_create_and_add("damon", mm_kobj);
+ if (!damon_sysfs_root)
+ return -ENOMEM;
+
+ admin = damon_sysfs_ui_dir_alloc();
+ if (!admin) {
+ kobject_put(damon_sysfs_root);
+ return -ENOMEM;
+ }
+ err = kobject_init_and_add(&admin->kobj, &damon_sysfs_ui_dir_ktype,
+ damon_sysfs_root, "admin");
+ if (err)
+ goto out;
+ err = damon_sysfs_ui_dir_add_dirs(admin);
+ if (err)
+ goto out;
+ return 0;
+
+out:
+ kobject_put(&admin->kobj);
+ kobject_put(damon_sysfs_root);
+ return err;
+}
+subsys_initcall(damon_sysfs_init);
diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h
new file mode 100644
index 000000000..dcf1ca6b3
--- /dev/null
+++ b/mm/damon/vaddr-test.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Access Monitor Unit Tests
+ *
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
+
+#ifndef _DAMON_VADDR_TEST_H
+#define _DAMON_VADDR_TEST_H
+
+#include <kunit/test.h>
+
+static int __link_vmas(struct maple_tree *mt, struct vm_area_struct *vmas,
+ ssize_t nr_vmas)
+{
+ int i, ret = -ENOMEM;
+ MA_STATE(mas, mt, 0, 0);
+
+ if (!nr_vmas)
+ return 0;
+
+ mas_lock(&mas);
+ for (i = 0; i < nr_vmas; i++) {
+ mas_set_range(&mas, vmas[i].vm_start, vmas[i].vm_end - 1);
+ if (mas_store_gfp(&mas, &vmas[i], GFP_KERNEL))
+ goto failed;
+ }
+
+ ret = 0;
+failed:
+ mas_unlock(&mas);
+ return ret;
+}
+
+/*
+ * Test __damon_va_three_regions() function
+ *
+ * In case of virtual memory address spaces monitoring, DAMON converts the
+ * complex and dynamic memory mappings of each target task to three
+ * discontiguous regions which cover every mapped areas. However, the three
+ * regions should not include the two biggest unmapped areas in the original
+ * mapping, because the two biggest areas are normally the areas between 1)
+ * heap and the mmap()-ed regions, and 2) the mmap()-ed regions and stack.
+ * Because these two unmapped areas are very huge but obviously never accessed,
+ * covering the region is just a waste.
+ *
+ * '__damon_va_three_regions() receives an address space of a process. It
+ * first identifies the start of mappings, end of mappings, and the two biggest
+ * unmapped areas. After that, based on the information, it constructs the
+ * three regions and returns. For more detail, refer to the comment of
+ * 'damon_init_regions_of()' function definition in 'mm/damon.c' file.
+ *
+ * For example, suppose virtual address ranges of 10-20, 20-25, 200-210,
+ * 210-220, 300-305, and 307-330 (Other comments represent this mappings in
+ * more short form: 10-20-25, 200-210-220, 300-305, 307-330) of a process are
+ * mapped. To cover every mappings, the three regions should start with 10,
+ * and end with 305. The process also has three unmapped areas, 25-200,
+ * 220-300, and 305-307. Among those, 25-200 and 220-300 are the biggest two
+ * unmapped areas, and thus it should be converted to three regions of 10-25,
+ * 200-220, and 300-330.
+ */
+static void damon_test_three_regions_in_vmas(struct kunit *test)
+{
+ static struct mm_struct mm;
+ struct damon_addr_range regions[3] = {0,};
+ /* 10-20-25, 200-210-220, 300-305, 307-330 */
+ struct vm_area_struct vmas[] = {
+ (struct vm_area_struct) {.vm_start = 10, .vm_end = 20},
+ (struct vm_area_struct) {.vm_start = 20, .vm_end = 25},
+ (struct vm_area_struct) {.vm_start = 200, .vm_end = 210},
+ (struct vm_area_struct) {.vm_start = 210, .vm_end = 220},
+ (struct vm_area_struct) {.vm_start = 300, .vm_end = 305},
+ (struct vm_area_struct) {.vm_start = 307, .vm_end = 330},
+ };
+
+ mt_init_flags(&mm.mm_mt, MM_MT_FLAGS);
+ if (__link_vmas(&mm.mm_mt, vmas, ARRAY_SIZE(vmas)))
+ kunit_skip(test, "Failed to create VMA tree");
+
+ __damon_va_three_regions(&mm, regions);
+
+ KUNIT_EXPECT_EQ(test, 10ul, regions[0].start);
+ KUNIT_EXPECT_EQ(test, 25ul, regions[0].end);
+ KUNIT_EXPECT_EQ(test, 200ul, regions[1].start);
+ KUNIT_EXPECT_EQ(test, 220ul, regions[1].end);
+ KUNIT_EXPECT_EQ(test, 300ul, regions[2].start);
+ KUNIT_EXPECT_EQ(test, 330ul, regions[2].end);
+}
+
+static struct damon_region *__nth_region_of(struct damon_target *t, int idx)
+{
+ struct damon_region *r;
+ unsigned int i = 0;
+
+ damon_for_each_region(r, t) {
+ if (i++ == idx)
+ return r;
+ }
+
+ return NULL;
+}
+
+/*
+ * Test 'damon_set_regions()'
+ *
+ * test kunit object
+ * regions an array containing start/end addresses of current
+ * monitoring target regions
+ * nr_regions the number of the addresses in 'regions'
+ * three_regions The three regions that need to be applied now
+ * expected start/end addresses of monitoring target regions that
+ * 'three_regions' are applied
+ * nr_expected the number of addresses in 'expected'
+ *
+ * The memory mapping of the target processes changes dynamically. To follow
+ * the change, DAMON periodically reads the mappings, simplifies it to the
+ * three regions, and updates the monitoring target regions to fit in the three
+ * regions. The update of current target regions is the role of
+ * 'damon_set_regions()'.
+ *
+ * This test passes the given target regions and the new three regions that
+ * need to be applied to the function and check whether it updates the regions
+ * as expected.
+ */
+static void damon_do_test_apply_three_regions(struct kunit *test,
+ unsigned long *regions, int nr_regions,
+ struct damon_addr_range *three_regions,
+ unsigned long *expected, int nr_expected)
+{
+ struct damon_target *t;
+ struct damon_region *r;
+ int i;
+
+ t = damon_new_target();
+ for (i = 0; i < nr_regions / 2; i++) {
+ r = damon_new_region(regions[i * 2], regions[i * 2 + 1]);
+ damon_add_region(r, t);
+ }
+
+ damon_set_regions(t, three_regions, 3);
+
+ for (i = 0; i < nr_expected / 2; i++) {
+ r = __nth_region_of(t, i);
+ KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]);
+ KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]);
+ }
+
+ damon_destroy_target(t);
+}
+
+/*
+ * This function test most common case where the three big regions are only
+ * slightly changed. Target regions should adjust their boundary (10-20-30,
+ * 50-55, 70-80, 90-100) to fit with the new big regions or remove target
+ * regions (57-79) that now out of the three regions.
+ */
+static void damon_test_apply_three_regions1(struct kunit *test)
+{
+ /* 10-20-30, 50-55-57-59, 70-80-90-100 */
+ unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+ 70, 80, 80, 90, 90, 100};
+ /* 5-27, 45-55, 73-104 */
+ struct damon_addr_range new_three_regions[3] = {
+ (struct damon_addr_range){.start = 5, .end = 27},
+ (struct damon_addr_range){.start = 45, .end = 55},
+ (struct damon_addr_range){.start = 73, .end = 104} };
+ /* 5-20-27, 45-55, 73-80-90-104 */
+ unsigned long expected[] = {5, 20, 20, 27, 45, 55,
+ 73, 80, 80, 90, 90, 104};
+
+ damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+ new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test slightly bigger change. Similar to above, but the second big region
+ * now require two target regions (50-55, 57-59) to be removed.
+ */
+static void damon_test_apply_three_regions2(struct kunit *test)
+{
+ /* 10-20-30, 50-55-57-59, 70-80-90-100 */
+ unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+ 70, 80, 80, 90, 90, 100};
+ /* 5-27, 56-57, 65-104 */
+ struct damon_addr_range new_three_regions[3] = {
+ (struct damon_addr_range){.start = 5, .end = 27},
+ (struct damon_addr_range){.start = 56, .end = 57},
+ (struct damon_addr_range){.start = 65, .end = 104} };
+ /* 5-20-27, 56-57, 65-80-90-104 */
+ unsigned long expected[] = {5, 20, 20, 27, 56, 57,
+ 65, 80, 80, 90, 90, 104};
+
+ damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+ new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test a big change. The second big region has totally freed and mapped to
+ * different area (50-59 -> 61-63). The target regions which were in the old
+ * second big region (50-55-57-59) should be removed and new target region
+ * covering the second big region (61-63) should be created.
+ */
+static void damon_test_apply_three_regions3(struct kunit *test)
+{
+ /* 10-20-30, 50-55-57-59, 70-80-90-100 */
+ unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+ 70, 80, 80, 90, 90, 100};
+ /* 5-27, 61-63, 65-104 */
+ struct damon_addr_range new_three_regions[3] = {
+ (struct damon_addr_range){.start = 5, .end = 27},
+ (struct damon_addr_range){.start = 61, .end = 63},
+ (struct damon_addr_range){.start = 65, .end = 104} };
+ /* 5-20-27, 61-63, 65-80-90-104 */
+ unsigned long expected[] = {5, 20, 20, 27, 61, 63,
+ 65, 80, 80, 90, 90, 104};
+
+ damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+ new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+/*
+ * Test another big change. Both of the second and third big regions (50-59
+ * and 70-100) has totally freed and mapped to different area (30-32 and
+ * 65-68). The target regions which were in the old second and third big
+ * regions should now be removed and new target regions covering the new second
+ * and third big regions should be created.
+ */
+static void damon_test_apply_three_regions4(struct kunit *test)
+{
+ /* 10-20-30, 50-55-57-59, 70-80-90-100 */
+ unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59,
+ 70, 80, 80, 90, 90, 100};
+ /* 5-7, 30-32, 65-68 */
+ struct damon_addr_range new_three_regions[3] = {
+ (struct damon_addr_range){.start = 5, .end = 7},
+ (struct damon_addr_range){.start = 30, .end = 32},
+ (struct damon_addr_range){.start = 65, .end = 68} };
+ /* expect 5-7, 30-32, 65-68 */
+ unsigned long expected[] = {5, 7, 30, 32, 65, 68};
+
+ damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions),
+ new_three_regions, expected, ARRAY_SIZE(expected));
+}
+
+static void damon_test_split_evenly_fail(struct kunit *test,
+ unsigned long start, unsigned long end, unsigned int nr_pieces)
+{
+ struct damon_target *t = damon_new_target();
+ struct damon_region *r = damon_new_region(start, end);
+
+ damon_add_region(r, t);
+ KUNIT_EXPECT_EQ(test,
+ damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u);
+
+ damon_for_each_region(r, t) {
+ KUNIT_EXPECT_EQ(test, r->ar.start, start);
+ KUNIT_EXPECT_EQ(test, r->ar.end, end);
+ }
+
+ damon_free_target(t);
+}
+
+static void damon_test_split_evenly_succ(struct kunit *test,
+ unsigned long start, unsigned long end, unsigned int nr_pieces)
+{
+ struct damon_target *t = damon_new_target();
+ struct damon_region *r = damon_new_region(start, end);
+ unsigned long expected_width = (end - start) / nr_pieces;
+ unsigned long i = 0;
+
+ damon_add_region(r, t);
+ KUNIT_EXPECT_EQ(test,
+ damon_va_evenly_split_region(t, r, nr_pieces), 0);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces);
+
+ damon_for_each_region(r, t) {
+ if (i == nr_pieces - 1) {
+ KUNIT_EXPECT_EQ(test,
+ r->ar.start, start + i * expected_width);
+ KUNIT_EXPECT_EQ(test, r->ar.end, end);
+ break;
+ }
+ KUNIT_EXPECT_EQ(test,
+ r->ar.start, start + i++ * expected_width);
+ KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width);
+ }
+ damon_free_target(t);
+}
+
+static void damon_test_split_evenly(struct kunit *test)
+{
+ KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5),
+ -EINVAL);
+
+ damon_test_split_evenly_fail(test, 0, 100, 0);
+ damon_test_split_evenly_succ(test, 0, 100, 10);
+ damon_test_split_evenly_succ(test, 5, 59, 5);
+ damon_test_split_evenly_fail(test, 5, 6, 2);
+}
+
+static struct kunit_case damon_test_cases[] = {
+ KUNIT_CASE(damon_test_three_regions_in_vmas),
+ KUNIT_CASE(damon_test_apply_three_regions1),
+ KUNIT_CASE(damon_test_apply_three_regions2),
+ KUNIT_CASE(damon_test_apply_three_regions3),
+ KUNIT_CASE(damon_test_apply_three_regions4),
+ KUNIT_CASE(damon_test_split_evenly),
+ {},
+};
+
+static struct kunit_suite damon_test_suite = {
+ .name = "damon-operations",
+ .test_cases = damon_test_cases,
+};
+kunit_test_suite(damon_test_suite);
+
+#endif /* _DAMON_VADDR_TEST_H */
+
+#endif /* CONFIG_DAMON_VADDR_KUNIT_TEST */
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
new file mode 100644
index 000000000..cf8a9fc5c
--- /dev/null
+++ b/mm/damon/vaddr.c
@@ -0,0 +1,729 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON Primitives for Virtual Address Spaces
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#define pr_fmt(fmt) "damon-va: " fmt
+
+#include <asm-generic/mman-common.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
+#include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
+
+#include "ops-common.h"
+
+#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
+#undef DAMON_MIN_REGION
+#define DAMON_MIN_REGION 1
+#endif
+
+/*
+ * 't->pid' should be the pointer to the relevant 'struct pid' having reference
+ * count. Caller must put the returned task, unless it is NULL.
+ */
+static inline struct task_struct *damon_get_task_struct(struct damon_target *t)
+{
+ return get_pid_task(t->pid, PIDTYPE_PID);
+}
+
+/*
+ * Get the mm_struct of the given target
+ *
+ * Caller _must_ put the mm_struct after use, unless it is NULL.
+ *
+ * Returns the mm_struct of the target on success, NULL on failure
+ */
+static struct mm_struct *damon_get_mm(struct damon_target *t)
+{
+ struct task_struct *task;
+ struct mm_struct *mm;
+
+ task = damon_get_task_struct(t);
+ if (!task)
+ return NULL;
+
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ return mm;
+}
+
+/*
+ * Functions for the initial monitoring target regions construction
+ */
+
+/*
+ * Size-evenly split a region into 'nr_pieces' small regions
+ *
+ * Returns 0 on success, or negative error code otherwise.
+ */
+static int damon_va_evenly_split_region(struct damon_target *t,
+ struct damon_region *r, unsigned int nr_pieces)
+{
+ unsigned long sz_orig, sz_piece, orig_end;
+ struct damon_region *n = NULL, *next;
+ unsigned long start;
+
+ if (!r || !nr_pieces)
+ return -EINVAL;
+
+ orig_end = r->ar.end;
+ sz_orig = damon_sz_region(r);
+ sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION);
+
+ if (!sz_piece)
+ return -EINVAL;
+
+ r->ar.end = r->ar.start + sz_piece;
+ next = damon_next_region(r);
+ for (start = r->ar.end; start + sz_piece <= orig_end;
+ start += sz_piece) {
+ n = damon_new_region(start, start + sz_piece);
+ if (!n)
+ return -ENOMEM;
+ damon_insert_region(n, r, next, t);
+ r = n;
+ }
+ /* complement last region for possible rounding error */
+ if (n)
+ n->ar.end = orig_end;
+
+ return 0;
+}
+
+static unsigned long sz_range(struct damon_addr_range *r)
+{
+ return r->end - r->start;
+}
+
+/*
+ * Find three regions separated by two biggest unmapped regions
+ *
+ * vma the head vma of the target address space
+ * regions an array of three address ranges that results will be saved
+ *
+ * This function receives an address space and finds three regions in it which
+ * separated by the two biggest unmapped regions in the space. Please refer to
+ * below comments of '__damon_va_init_regions()' function to know why this is
+ * necessary.
+ *
+ * Returns 0 if success, or negative error code otherwise.
+ */
+static int __damon_va_three_regions(struct mm_struct *mm,
+ struct damon_addr_range regions[3])
+{
+ struct damon_addr_range first_gap = {0}, second_gap = {0};
+ VMA_ITERATOR(vmi, mm, 0);
+ struct vm_area_struct *vma, *prev = NULL;
+ unsigned long start;
+
+ /*
+ * Find the two biggest gaps so that first_gap > second_gap > others.
+ * If this is too slow, it can be optimised to examine the maple
+ * tree gaps.
+ */
+ for_each_vma(vmi, vma) {
+ unsigned long gap;
+
+ if (!prev) {
+ start = vma->vm_start;
+ goto next;
+ }
+ gap = vma->vm_start - prev->vm_end;
+
+ if (gap > sz_range(&first_gap)) {
+ second_gap = first_gap;
+ first_gap.start = prev->vm_end;
+ first_gap.end = vma->vm_start;
+ } else if (gap > sz_range(&second_gap)) {
+ second_gap.start = prev->vm_end;
+ second_gap.end = vma->vm_start;
+ }
+next:
+ prev = vma;
+ }
+
+ if (!sz_range(&second_gap) || !sz_range(&first_gap))
+ return -EINVAL;
+
+ /* Sort the two biggest gaps by address */
+ if (first_gap.start > second_gap.start)
+ swap(first_gap, second_gap);
+
+ /* Store the result */
+ regions[0].start = ALIGN(start, DAMON_MIN_REGION);
+ regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION);
+ regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION);
+ regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION);
+ regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION);
+ regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION);
+
+ return 0;
+}
+
+/*
+ * Get the three regions in the given target (task)
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+static int damon_va_three_regions(struct damon_target *t,
+ struct damon_addr_range regions[3])
+{
+ struct mm_struct *mm;
+ int rc;
+
+ mm = damon_get_mm(t);
+ if (!mm)
+ return -EINVAL;
+
+ mmap_read_lock(mm);
+ rc = __damon_va_three_regions(mm, regions);
+ mmap_read_unlock(mm);
+
+ mmput(mm);
+ return rc;
+}
+
+/*
+ * Initialize the monitoring target regions for the given target (task)
+ *
+ * t the given target
+ *
+ * Because only a number of small portions of the entire address space
+ * is actually mapped to the memory and accessed, monitoring the unmapped
+ * regions is wasteful. That said, because we can deal with small noises,
+ * tracking every mapping is not strictly required but could even incur a high
+ * overhead if the mapping frequently changes or the number of mappings is
+ * high. The adaptive regions adjustment mechanism will further help to deal
+ * with the noise by simply identifying the unmapped areas as a region that
+ * has no access. Moreover, applying the real mappings that would have many
+ * unmapped areas inside will make the adaptive mechanism quite complex. That
+ * said, too huge unmapped areas inside the monitoring target should be removed
+ * to not take the time for the adaptive mechanism.
+ *
+ * For the reason, we convert the complex mappings to three distinct regions
+ * that cover every mapped area of the address space. Also the two gaps
+ * between the three regions are the two biggest unmapped areas in the given
+ * address space. In detail, this function first identifies the start and the
+ * end of the mappings and the two biggest unmapped areas of the address space.
+ * Then, it constructs the three regions as below:
+ *
+ * [mappings[0]->start, big_two_unmapped_areas[0]->start)
+ * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start)
+ * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end)
+ *
+ * As usual memory map of processes is as below, the gap between the heap and
+ * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed
+ * region and the stack will be two biggest unmapped regions. Because these
+ * gaps are exceptionally huge areas in usual address space, excluding these
+ * two biggest unmapped regions will be sufficient to make a trade-off.
+ *
+ * <heap>
+ * <BIG UNMAPPED REGION 1>
+ * <uppermost mmap()-ed region>
+ * (other mmap()-ed regions and small unmapped regions)
+ * <lowermost mmap()-ed region>
+ * <BIG UNMAPPED REGION 2>
+ * <stack>
+ */
+static void __damon_va_init_regions(struct damon_ctx *ctx,
+ struct damon_target *t)
+{
+ struct damon_target *ti;
+ struct damon_region *r;
+ struct damon_addr_range regions[3];
+ unsigned long sz = 0, nr_pieces;
+ int i, tidx = 0;
+
+ if (damon_va_three_regions(t, regions)) {
+ damon_for_each_target(ti, ctx) {
+ if (ti == t)
+ break;
+ tidx++;
+ }
+ pr_debug("Failed to get three regions of %dth target\n", tidx);
+ return;
+ }
+
+ for (i = 0; i < 3; i++)
+ sz += regions[i].end - regions[i].start;
+ if (ctx->attrs.min_nr_regions)
+ sz /= ctx->attrs.min_nr_regions;
+ if (sz < DAMON_MIN_REGION)
+ sz = DAMON_MIN_REGION;
+
+ /* Set the initial three regions of the target */
+ for (i = 0; i < 3; i++) {
+ r = damon_new_region(regions[i].start, regions[i].end);
+ if (!r) {
+ pr_err("%d'th init region creation failed\n", i);
+ return;
+ }
+ damon_add_region(r, t);
+
+ nr_pieces = (regions[i].end - regions[i].start) / sz;
+ damon_va_evenly_split_region(t, r, nr_pieces);
+ }
+}
+
+/* Initialize '->regions_list' of every target (task) */
+static void damon_va_init(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+
+ damon_for_each_target(t, ctx) {
+ /* the user may set the target regions as they want */
+ if (!damon_nr_regions(t))
+ __damon_va_init_regions(ctx, t);
+ }
+}
+
+/*
+ * Update regions for current memory mappings
+ */
+static void damon_va_update(struct damon_ctx *ctx)
+{
+ struct damon_addr_range three_regions[3];
+ struct damon_target *t;
+
+ damon_for_each_target(t, ctx) {
+ if (damon_va_three_regions(t, three_regions))
+ continue;
+ damon_set_regions(t, three_regions, 3);
+ }
+}
+
+static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t *pte;
+ pmd_t pmde;
+ spinlock_t *ptl;
+
+ if (pmd_trans_huge(pmdp_get(pmd))) {
+ ptl = pmd_lock(walk->mm, pmd);
+ pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde)) {
+ spin_unlock(ptl);
+ return 0;
+ }
+
+ if (pmd_trans_huge(pmde)) {
+ damon_pmdp_mkold(pmd, walk->vma, addr);
+ spin_unlock(ptl);
+ return 0;
+ }
+ spin_unlock(ptl);
+ }
+
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte) {
+ walk->action = ACTION_AGAIN;
+ return 0;
+ }
+ if (!pte_present(ptep_get(pte)))
+ goto out;
+ damon_ptep_mkold(pte, walk->vma, addr);
+out:
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ bool referenced = false;
+ pte_t entry = huge_ptep_get(pte);
+ struct folio *folio = pfn_folio(pte_pfn(entry));
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
+ folio_get(folio);
+
+ if (pte_young(entry)) {
+ referenced = true;
+ entry = pte_mkold(entry);
+ set_huge_pte_at(mm, addr, pte, entry, psize);
+ }
+
+#ifdef CONFIG_MMU_NOTIFIER
+ if (mmu_notifier_clear_young(mm, addr,
+ addr + huge_page_size(hstate_vma(vma))))
+ referenced = true;
+#endif /* CONFIG_MMU_NOTIFIER */
+
+ if (referenced)
+ folio_set_young(folio);
+
+ folio_set_idle(folio);
+ folio_put(folio);
+}
+
+static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct hstate *h = hstate_vma(walk->vma);
+ spinlock_t *ptl;
+ pte_t entry;
+
+ ptl = huge_pte_lock(h, walk->mm, pte);
+ entry = huge_ptep_get(pte);
+ if (!pte_present(entry))
+ goto out;
+
+ damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr);
+
+out:
+ spin_unlock(ptl);
+ return 0;
+}
+#else
+#define damon_mkold_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static const struct mm_walk_ops damon_mkold_ops = {
+ .pmd_entry = damon_mkold_pmd_entry,
+ .hugetlb_entry = damon_mkold_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
+{
+ mmap_read_lock(mm);
+ walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
+ mmap_read_unlock(mm);
+}
+
+/*
+ * Functions for the access checking of the regions
+ */
+
+static void __damon_va_prepare_access_check(struct mm_struct *mm,
+ struct damon_region *r)
+{
+ r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
+
+ damon_va_mkold(mm, r->sampling_addr);
+}
+
+static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct mm_struct *mm;
+ struct damon_region *r;
+
+ damon_for_each_target(t, ctx) {
+ mm = damon_get_mm(t);
+ if (!mm)
+ continue;
+ damon_for_each_region(r, t)
+ __damon_va_prepare_access_check(mm, r);
+ mmput(mm);
+ }
+}
+
+struct damon_young_walk_private {
+ /* size of the folio for the access checked virtual memory address */
+ unsigned long *folio_sz;
+ bool young;
+};
+
+static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t *pte;
+ pte_t ptent;
+ spinlock_t *ptl;
+ struct folio *folio;
+ struct damon_young_walk_private *priv = walk->private;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_trans_huge(pmdp_get(pmd))) {
+ pmd_t pmde;
+
+ ptl = pmd_lock(walk->mm, pmd);
+ pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde)) {
+ spin_unlock(ptl);
+ return 0;
+ }
+
+ if (!pmd_trans_huge(pmde)) {
+ spin_unlock(ptl);
+ goto regular_page;
+ }
+ folio = damon_get_folio(pmd_pfn(pmde));
+ if (!folio)
+ goto huge_out;
+ if (pmd_young(pmde) || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(walk->mm,
+ addr))
+ priv->young = true;
+ *priv->folio_sz = HPAGE_PMD_SIZE;
+ folio_put(folio);
+huge_out:
+ spin_unlock(ptl);
+ return 0;
+ }
+
+regular_page:
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte) {
+ walk->action = ACTION_AGAIN;
+ return 0;
+ }
+ ptent = ptep_get(pte);
+ if (!pte_present(ptent))
+ goto out;
+ folio = damon_get_folio(pte_pfn(ptent));
+ if (!folio)
+ goto out;
+ if (pte_young(ptent) || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(walk->mm, addr))
+ priv->young = true;
+ *priv->folio_sz = folio_size(folio);
+ folio_put(folio);
+out:
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct damon_young_walk_private *priv = walk->private;
+ struct hstate *h = hstate_vma(walk->vma);
+ struct folio *folio;
+ spinlock_t *ptl;
+ pte_t entry;
+
+ ptl = huge_pte_lock(h, walk->mm, pte);
+ entry = huge_ptep_get(pte);
+ if (!pte_present(entry))
+ goto out;
+
+ folio = pfn_folio(pte_pfn(entry));
+ folio_get(folio);
+
+ if (pte_young(entry) || !folio_test_idle(folio) ||
+ mmu_notifier_test_young(walk->mm, addr))
+ priv->young = true;
+ *priv->folio_sz = huge_page_size(h);
+
+ folio_put(folio);
+
+out:
+ spin_unlock(ptl);
+ return 0;
+}
+#else
+#define damon_young_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static const struct mm_walk_ops damon_young_ops = {
+ .pmd_entry = damon_young_pmd_entry,
+ .hugetlb_entry = damon_young_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
+ unsigned long *folio_sz)
+{
+ struct damon_young_walk_private arg = {
+ .folio_sz = folio_sz,
+ .young = false,
+ };
+
+ mmap_read_lock(mm);
+ walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
+ mmap_read_unlock(mm);
+ return arg.young;
+}
+
+/*
+ * Check whether the region was accessed after the last preparation
+ *
+ * mm 'mm_struct' for the given virtual address space
+ * r the region to be checked
+ */
+static void __damon_va_check_access(struct mm_struct *mm,
+ struct damon_region *r, bool same_target)
+{
+ static unsigned long last_addr;
+ static unsigned long last_folio_sz = PAGE_SIZE;
+ static bool last_accessed;
+
+ /* If the region is in the last checked page, reuse the result */
+ if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) ==
+ ALIGN_DOWN(r->sampling_addr, last_folio_sz))) {
+ if (last_accessed)
+ r->nr_accesses++;
+ return;
+ }
+
+ last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
+ if (last_accessed)
+ r->nr_accesses++;
+
+ last_addr = r->sampling_addr;
+}
+
+static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct mm_struct *mm;
+ struct damon_region *r;
+ unsigned int max_nr_accesses = 0;
+ bool same_target;
+
+ damon_for_each_target(t, ctx) {
+ mm = damon_get_mm(t);
+ if (!mm)
+ continue;
+ same_target = false;
+ damon_for_each_region(r, t) {
+ __damon_va_check_access(mm, r, same_target);
+ max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+ same_target = true;
+ }
+ mmput(mm);
+ }
+
+ return max_nr_accesses;
+}
+
+/*
+ * Functions for the target validity check and cleanup
+ */
+
+static bool damon_va_target_valid(struct damon_target *t)
+{
+ struct task_struct *task;
+
+ task = damon_get_task_struct(t);
+ if (task) {
+ put_task_struct(task);
+ return true;
+ }
+
+ return false;
+}
+
+#ifndef CONFIG_ADVISE_SYSCALLS
+static unsigned long damos_madvise(struct damon_target *target,
+ struct damon_region *r, int behavior)
+{
+ return 0;
+}
+#else
+static unsigned long damos_madvise(struct damon_target *target,
+ struct damon_region *r, int behavior)
+{
+ struct mm_struct *mm;
+ unsigned long start = PAGE_ALIGN(r->ar.start);
+ unsigned long len = PAGE_ALIGN(damon_sz_region(r));
+ unsigned long applied;
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ return 0;
+
+ applied = do_madvise(mm, start, len, behavior) ? 0 : len;
+ mmput(mm);
+
+ return applied;
+}
+#endif /* CONFIG_ADVISE_SYSCALLS */
+
+static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
+{
+ int madv_action;
+
+ switch (scheme->action) {
+ case DAMOS_WILLNEED:
+ madv_action = MADV_WILLNEED;
+ break;
+ case DAMOS_COLD:
+ madv_action = MADV_COLD;
+ break;
+ case DAMOS_PAGEOUT:
+ madv_action = MADV_PAGEOUT;
+ break;
+ case DAMOS_HUGEPAGE:
+ madv_action = MADV_HUGEPAGE;
+ break;
+ case DAMOS_NOHUGEPAGE:
+ madv_action = MADV_NOHUGEPAGE;
+ break;
+ case DAMOS_STAT:
+ return 0;
+ default:
+ /*
+ * DAMOS actions that are not yet supported by 'vaddr'.
+ */
+ return 0;
+ }
+
+ return damos_madvise(t, r, madv_action);
+}
+
+static int damon_va_scheme_score(struct damon_ctx *context,
+ struct damon_target *t, struct damon_region *r,
+ struct damos *scheme)
+{
+
+ switch (scheme->action) {
+ case DAMOS_PAGEOUT:
+ return damon_cold_score(context, r, scheme);
+ default:
+ break;
+ }
+
+ return DAMOS_MAX_SCORE;
+}
+
+static int __init damon_va_initcall(void)
+{
+ struct damon_operations ops = {
+ .id = DAMON_OPS_VADDR,
+ .init = damon_va_init,
+ .update = damon_va_update,
+ .prepare_access_checks = damon_va_prepare_access_checks,
+ .check_accesses = damon_va_check_accesses,
+ .reset_aggregated = NULL,
+ .target_valid = damon_va_target_valid,
+ .cleanup = NULL,
+ .apply_scheme = damon_va_apply_scheme,
+ .get_scheme_score = damon_va_scheme_score,
+ };
+ /* ops for fixed virtual address ranges */
+ struct damon_operations ops_fvaddr = ops;
+ int err;
+
+ /* Don't set the monitoring target regions for the entire mapping */
+ ops_fvaddr.id = DAMON_OPS_FVADDR;
+ ops_fvaddr.init = NULL;
+ ops_fvaddr.update = NULL;
+
+ err = damon_register_ops(&ops);
+ if (err)
+ return err;
+ return damon_register_ops(&ops_fvaddr);
+};
+
+subsys_initcall(damon_va_initcall);
+
+#include "vaddr-test.h"