From dc50eab76b709d68175a358d6e23a5a3890764d3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:39:57 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- Documentation/trace/coresight/coresight.rst | 2 +- Documentation/trace/fprobetrace.rst | 8 +++-- Documentation/trace/kprobes.rst | 1 - Documentation/trace/kprobetrace.rst | 8 +++-- .../trace/postprocess/trace-vmscan-postprocess.pl | 42 ++++++++++------------ Documentation/trace/user_events.rst | 21 +++++++++-- 6 files changed, 49 insertions(+), 33 deletions(-) (limited to 'Documentation/trace') diff --git a/Documentation/trace/coresight/coresight.rst b/Documentation/trace/coresight/coresight.rst index 4a71ea6cb3..826e59a698 100644 --- a/Documentation/trace/coresight/coresight.rst +++ b/Documentation/trace/coresight/coresight.rst @@ -130,7 +130,7 @@ Misc: Device Tree Bindings -------------------- -See Documentation/devicetree/bindings/arm/arm,coresight-\*.yaml for details. +See ``Documentation/devicetree/bindings/arm/arm,coresight-*.yaml`` for details. As of this writing drivers for ITM, STMs and CTIs are not provided but are expected to be added as the solution matures. diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst index 8e9bebcf0a..e35e6b18df 100644 --- a/Documentation/trace/fprobetrace.rst +++ b/Documentation/trace/fprobetrace.rst @@ -59,8 +59,12 @@ Synopsis of fprobe-events and bitfield are supported. (\*1) This is available only when BTF is enabled. - (\*2) only for the probe on function entry (offs == 0). - (\*3) only for return probe. + (\*2) only for the probe on function entry (offs == 0). Note, this argument access + is best effort, because depending on the argument type, it may be passed on + the stack. But this only support the arguments via registers. + (\*3) only for return probe. Note that this is also best effort. Depending on the + return value type, it might be passed via a pair of registers. But this only + accesses one register. (\*4) this is useful for fetching a field of data structures. (\*5) "u" means user-space dereference. diff --git a/Documentation/trace/kprobes.rst b/Documentation/trace/kprobes.rst index fc7ce76eab..f825970a14 100644 --- a/Documentation/trace/kprobes.rst +++ b/Documentation/trace/kprobes.rst @@ -315,7 +315,6 @@ architectures: - i386 (Supports jump optimization) - x86_64 (AMD-64, EM64T) (Supports jump optimization) - ppc64 -- ia64 (Does not support probes on instruction slot1.) - sparc64 (Return probes not yet implemented.) - arm - ppc diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 8a2dfee381..bf9cecb69f 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -61,8 +61,12 @@ Synopsis of kprobe_events (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr" and bitfield are supported. - (\*1) only for the probe on function entry (offs == 0). - (\*2) only for return probe. + (\*1) only for the probe on function entry (offs == 0). Note, this argument access + is best effort, because depending on the argument type, it may be passed on + the stack. But this only support the arguments via registers. + (\*2) only for return probe. Note that this is also best effort. Depending on the + return value type, it might be passed via a pair of registers. But this only + accesses one register. (\*3) this is useful for fetching a field of data structures. (\*4) "u" means user-space dereference. See :ref:`user_mem_access`. diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index e24c009789..048dc0dbce 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -107,14 +107,14 @@ GetOptions( ); # Defaults for dynamically discovered regex's -my $regex_direct_begin_default = 'order=([0-9]*) may_writepage=([0-9]*) gfp_flags=([A-Z_|]*)'; +my $regex_direct_begin_default = 'order=([0-9]*) gfp_flags=([A-Z_|]*)'; my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; -my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)'; -my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; +my $regex_wakeup_kswapd_default = 'nid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)'; +my $regex_lru_isolate_default = 'classzone=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate_anon=([0-9]*) nr_activate_file=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; -my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)'; +my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_taken=([0-9]*) nr_active=([0-9]*) nr_deactivated=([0-9]*) nr_referenced=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)' ; my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)'; # Dyanically discovered regex @@ -184,8 +184,7 @@ sub generate_traceevent_regex { $regex_direct_begin = generate_traceevent_regex( "vmscan/mm_vmscan_direct_reclaim_begin", $regex_direct_begin_default, - "order", "may_writepage", - "gfp_flags"); + "order", "gfp_flags"); $regex_direct_end = generate_traceevent_regex( "vmscan/mm_vmscan_direct_reclaim_end", $regex_direct_end_default, @@ -201,11 +200,11 @@ $regex_kswapd_sleep = generate_traceevent_regex( $regex_wakeup_kswapd = generate_traceevent_regex( "vmscan/mm_vmscan_wakeup_kswapd", $regex_wakeup_kswapd_default, - "nid", "zid", "order", "gfp_flags"); + "nid", "order", "gfp_flags"); $regex_lru_isolate = generate_traceevent_regex( "vmscan/mm_vmscan_lru_isolate", $regex_lru_isolate_default, - "isolate_mode", "classzone_idx", "order", + "classzone", "order", "nr_requested", "nr_scanned", "nr_skipped", "nr_taken", "lru"); $regex_lru_shrink_inactive = generate_traceevent_regex( @@ -218,11 +217,10 @@ $regex_lru_shrink_inactive = generate_traceevent_regex( $regex_lru_shrink_active = generate_traceevent_regex( "vmscan/mm_vmscan_lru_shrink_active", $regex_lru_shrink_active_default, - "nid", "zid", - "lru", - "nr_scanned", "nr_rotated", "priority"); + "nid", "nr_taken", "nr_active", "nr_deactivated", "nr_referenced", + "priority", "flags"); $regex_writepage = generate_traceevent_regex( - "vmscan/mm_vmscan_writepage", + "vmscan/mm_vmscan_write_folio", $regex_writepage_default, "page", "pfn", "flags"); @@ -371,7 +369,7 @@ EVENT_PROCESS: print " $regex_wakeup_kswapd\n"; next; } - my $order = $3; + my $order = $2; $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order]++; } elsif ($tracepoint eq "mm_vmscan_lru_isolate") { $details = $6; @@ -381,18 +379,14 @@ EVENT_PROCESS: print " $regex_lru_isolate/o\n"; next; } - my $isolate_mode = $1; - my $nr_scanned = $5; - my $file = $8; - - # To closer match vmstat scanning statistics, only count isolate_both - # and isolate_inactive as scanning. isolate_active is rotation - # isolate_inactive == 1 - # isolate_active == 2 - # isolate_both == 3 - if ($isolate_mode != 2) { + my $nr_scanned = $4; + my $lru = $7; + + # To closer match vmstat scanning statistics, only count + # inactive lru as scanning + if ($lru =~ /inactive_/) { $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; - if ($file =~ /_file/) { + if ($lru =~ /_file/) { $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned; } else { $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned; diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst index f9530d0ac5..d8f12442aa 100644 --- a/Documentation/trace/user_events.rst +++ b/Documentation/trace/user_events.rst @@ -14,6 +14,11 @@ Programs can view status of the events via /sys/kernel/tracing/user_events_status and can both register and write data out via /sys/kernel/tracing/user_events_data. +Programs can also use /sys/kernel/tracing/dynamic_events to register and +delete user based events via the u: prefix. The format of the command to +dynamic_events is the same as the ioctl with the u: prefix applied. This +requires CAP_PERFMON due to the event persisting, otherwise -EPERM is returned. + Typically programs will register a set of events that they wish to expose to tools that can read trace_events (such as ftrace and perf). The registration process tells the kernel which address and bit to reflect if any tool has @@ -45,7 +50,7 @@ This command takes a packed struct user_reg as an argument:: /* Input: Enable size in bytes at address */ __u8 enable_size; - /* Input: Flags for future use, set to 0 */ + /* Input: Flags to use, if any */ __u16 flags; /* Input: Address to update when enabled */ @@ -69,7 +74,7 @@ The struct user_reg requires all the above inputs to be set appropriately. This must be 4 (32-bit) or 8 (64-bit). 64-bit values are only allowed to be used on 64-bit kernels, however, 32-bit can be used on all kernels. -+ flags: The flags to use, if any. For the initial version this must be 0. ++ flags: The flags to use, if any. Callers should first attempt to use flags and retry without flags to ensure support for lower versions of the kernel. If a flag is not supported -EINVAL is returned. @@ -80,6 +85,13 @@ The struct user_reg requires all the above inputs to be set appropriately. + name_args: The name and arguments to describe the event, see command format for details. +The following flags are currently supported. + ++ USER_EVENT_REG_PERSIST: The event will not delete upon the last reference + closing. Callers may use this if an event should exist even after the + process closes or unregisters the event. Requires CAP_PERFMON otherwise + -EPERM is returned. + Upon successful registration the following is set. + write_index: The index to use for this file descriptor that represents this @@ -141,7 +153,10 @@ event (in both user and kernel space). User programs should use a separate file to request deletes than the one used for registration due to this. **NOTE:** By default events will auto-delete when there are no references left -to the event. Flags in the future may change this logic. +to the event. If programs do not want auto-delete, they must use the +USER_EVENT_REG_PERSIST flag when registering the event. Once that flag is used +the event exists until DIAG_IOCSDEL is invoked. Both register and delete of an +event that persists requires CAP_PERFMON, otherwise -EPERM is returned. Unregistering ------------- -- cgit v1.2.3