bin/xcapture-bpf


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732

#!/usr/bin/env python3

#  xcapture-bpf -- Always-on profiling of Linux thread activity, by Tanel Poder [https://tanelpoder.com]
#  Copyright 2024 Tanel Poder
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#  SPDX-License-Identifier: GPL-2.0-or-later

__version__      = "2.0.3"
__author__       = "Tanel Poder"
__date__         = "2024-06-27"
__description__  = "Always-on profiling of Linux thread activity using eBPF."
__url__          = "https://0x.tools"

DEFAULT_GROUP_BY = "st,username,comm,syscall" # for xtop mode
DECODE_CHARSET   = "utf-8"
XTOP_MAX_LINES   = 25 # how many top output lines to print
BLOCK_CHARS      = ['▏', '▎', '▍', '▌', '▋', '▊', '▉', '█'] # for fancy viz

import os, sys, io, pwd, time, ctypes, platform, re, shutil, argparse, signal
from collections import defaultdict
from datetime import datetime
from bcc import BPF, PerfType, PerfSWConfig

# distro package might not be present
try:
    import distro
except ImportError:
    distro = None
    pass

# all available fields with descriptions (if you add more fields to thread_state_t in BPF/C, add them here)
available_fdescr = [ ('timestamp' , 'sample timestamp')
                   , ('st'        , 'short thread state')
                   , ('tid'       , 'thread/task id')
                   , ('pid'       , 'process/thread group id')
                   , ('username'  , 'username or user id if not found')
                   , ('comm'      , 'task comm digits deduplicated')
                   , ('comm2'     , 'task comm actual') 
                   , ('syscall'   , 'system call')
                   , ('cmdline'   , 'argv0 command line digits deduplicated')
                   , ('cmdline2'  , 'argv0 command line actual')
                   , ('offcpu_u'  , 'user stack id when thread went off CPU')
                   , ('offcpu_k'  , 'kernel stack id when thread went off CPU')
                   , ('oncpu_u'   , 'recent user stack id if the thread was on CPU')
                   , ('oncpu_k'   , 'recent kernel stack id if the thread was on CPU')
                   , ('waker_tid' , 'thread ID that woke up this thread last')
                   , ('sch'       , 'thread state flags for scheduler nerds')
                   ]

available_fields = []
for f in available_fdescr:
    available_fields.append(f[0])

# default output fields for ungrouped full detail output
output_fields = [ 'timestamp', 'st', 'tid', 'pid', 'username', 'comm', 'syscall', 'cmdline'
                , 'offcpu_u', 'offcpu_k', 'oncpu_u', 'oncpu_k', 'waker_tid', 'sch' ]


# syscall id to name translation (todo: fix aarch64 include file lookup)
def extract_system_call_ids(unistd_64_fh):
    syscall_id_to_name = {}

    # strip 3264bit prefixes from syscall names
    for name_prefix in ['__NR_', '__NR3264_']:
        for line in unistd_64_fh.readlines():
            tokens = line.split()
            if tokens and len(tokens) == 3 and tokens[0] == '#define' and tokens[2].isnumeric() is True:
                _, s_name, s_id = tokens
                s_id = int(s_id)
                if s_name.startswith(name_prefix):
                    s_name = s_name[len(name_prefix):]
                    syscall_id_to_name[s_id] = s_name

    return syscall_id_to_name

def get_system_call_names():
    psn_dir=os.path.dirname(os.path.realpath(__file__))
    kernel_ver=platform.release().split('-')[0]

    # this probably needs to be improved for better platform support
    if platform.machine() == 'aarch64':
        unistd_64_paths = ['/usr/include/asm-generic/unistd.h']
    else:
        unistd_64_paths = [  '/usr/include/asm/unistd_64.h', '/usr/include/x86_64-linux-gnu/asm/unistd_64.h'
                           , '/usr/include/asm-x86_64/unistd.h', '/usr/include/asm/unistd.h'
                           , psn_dir+'/syscall_64_'+kernel_ver+'.h', psn_dir+'/syscall_64.h']
    
    for path in unistd_64_paths:
        try:
            with open(path) as f:
                return extract_system_call_ids(f)
        except IOError as e:
            pass

    raise Exception('unistd_64.h not found in' + ' or '.join(unistd_64_paths) + '.\n' +
                    '           You may need to "dnf install kernel-headers" or "apt-get install libc6-dev"\n') 

# syscall lookup table
syscall_id_to_name = get_system_call_names()


# task states
TASK_RUNNING           =   0x00000000
TASK_INTERRUPTIBLE     =   0x00000001
TASK_UNINTERRUPTIBLE   =   0x00000002
TASK_STOPPED           =   0x00000004
TASK_TRACED            =   0x00000008

EXIT_DEAD              =   0x00000010
EXIT_ZOMBIE            =   0x00000020
EXIT_TRACE             =   (EXIT_ZOMBIE | EXIT_DEAD)

TASK_PARKED            =   0x00000040
TASK_DEAD              =   0x00000080
TASK_WAKEKILL          =   0x00000100
TASK_WAKING            =   0x00000200
TASK_NOLOAD            =   0x00000400
TASK_NEW               =   0x00000800
TASK_RTLOCK_WAIT       =   0x00001000
TASK_FREEZABLE         =   0x00002000
TASK_FREEZABLE_UNSAFE  =   0x00004000 # depends on: IS_ENABLED(CONFIG_LOCKDEP)
TASK_FROZEN            =   0x00008000
TASK_STATE_MAX         =   0x00010000 # as of linux kernel 6.9

##define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"

task_states = {
    0x00000000: "R", # "RUNNING",
    0x00000001: "S", # "INTERRUPTIBLE",
    0x00000002: "D", # UNINTERRUPTIBLE",
    0x00000004: "T", # "STOPPED",
    0x00000008: "t", # "TRACED",
    0x00000010: "X", # "EXIT_DEAD",
    0x00000020: "Z", # "EXIT_ZOMBIE",
    0x00000040: "P", # "PARKED",
    0x00000080: "dd",# "DEAD",
    0x00000100: "wk",# "WAKEKILL",
    0x00000200: "wg",# "WAKING",
    0x00000400: "I", # "NOLOAD",
    0x00000800: "N", # "NEW",
    0x00001000: "rt",# "RTLOCK_WAIT",
    0x00002000: "fe",# "FREEZABLE",
    0x00004000: "fu",# "__TASK_FREEZABLE_UNSAFE = (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))"
    0x00008000: "fo",# "FROZEN"
}


def get_task_state_name(task_state):
    if task_state == 0:
        return "R"
    if task_state & TASK_NOLOAD: # idle kthread waiting for work
        return "I"

    names = []
    for state, name in task_states.items():
        if task_state & state:
            names.append(name)

    return "+".join(names)
    

# is task state interesting ("active") according to your rules
#   mode=active: any states that should be captured and printed out (including perf/on-cpu samples)
#   mode=offcpu: states that are relevant for offcpu stack printing (the BPF program doesn't clear up previous offcpu stackids)
#   mode=oncpu:  states that are relevant for on-cpu stack printing (don't print previous oncpu stacks if a task sample is not on CPU)
def is_interesting(st, syscall, comm, mode="active"):
    if mode == "active":
        if st[0] in ['R','D', 'T', 't']:
            return True
        if st[0] == 'S':
            if current_syscall == 'io_getevents' and comm.startswith('ora'):
                return True 

    if mode == "offcpu":
        if st[0] in ['D', 'T', 't'] or st.startswith('RQ'): # there may be occasinal states like "D+wk" reported
            return True
        if st[0] == 'S':
            if current_syscall == 'io_getevents' and comm.startswith('ora'):
                return True 

    if mode == "oncpu":
        if st[0] == 'R':
            return True

    return False

# translate uid to username (no container/uid namespace support right now)
def get_username(uid):
    try:
        username = pwd.getpwuid(uid).pw_name
        return username
    except KeyError:
        return str(uid)


def print_fields(rows, columns, linelimit=0):
    columns = [col.rstrip() for col in columns] # strip as colname might have extra spaces passed in for width/formatting
    col_widths = {}
    # column width auto-sizing
    for col in columns:
        col_length = len(col) # the col may have extra trailing spaces as a formatting directive
        max_value_length = max((len(str(row[col])) for row in rows if col in row), default=0)
        col_widths[col] = max(col_length, max_value_length)

    header1 = "=== Active Threads "
    header2 = " | ".join(f"{col:<{col_widths[col]}}" for col in columns)

    print(header1 + "=" * (len(header2) - len(header1)) + "\n")
    print(header2)
    print("-" * len(header2))

    for i, row in enumerate(rows):
        line = " | ".join(
            f"{row[col]:>{col_widths[col]}.2f}" if col in ["seconds", "samples", "avg_thr"] 
                                                else f"{str(row[col]):<{col_widths[col]}}"
            if col in row else ' ' * col_widths[col] for col in columns
        )
        print(line)

        # dont break out if linelimit is at its default 0
        if linelimit and i >= linelimit - 1:
            break

def print_header_csv(columns):
    header = ",".join(f"{col.upper()}" for col in columns)
    print(header)

def print_fields_csv(rows, columns):
    for i, row in enumerate(rows):
        line = ",".join(f"{row[col]}" for col in columns)
    print(line)

def get_ustack_traces(ustack_traces, ignore_ustacks={}, strip_args=True):
    exclusions = ['__GI___clone3']
    dedup_map = {}
    lines = []

    for stack_id, pid in output_ustack:
        if stack_id and stack_id >= 0 and stack_id not in ignore_ustacks:   # todo: find why we have Null/none stackids in this map
            line = f"ustack {stack_id:6} "
            stack = list(ustack_traces.walk(stack_id))
            for addr in reversed(stack): # reversed(stack):
                func_name = b.sym(addr, pid).decode(DECODE_CHARSET, 'replace')
                if func_name not in exclusions:
                    if strip_args:
                        func_name = re.split('[<(]', func_name)[0]
                    line += "->" + (func_name if func_name != '[unknown]' else '{:x}'.format(addr))

            dedup_map[stack_id] = line

    for stack_id in sorted(dedup_map):
        lines.append(dedup_map[stack_id])

    return lines

def get_kstack_traces(kstack_traces, ignore_kstacks={}):
    exclusions = ['entry_SYSCALL_64_after_hwframe', 'do_syscall_64', 'x64_sys_call'
                 , 'ret_from_fork_asm', 'ret_from_fork', '__bpf_trace_sched_switch', '__traceiter_sched_switch'
                 , 'el0t_64_sync', 'el0t_64_sync_handler', 'el0_svc', 'do_el0_svc', 'el0_svc_common', 'invoke_syscall' ]
    lines = []
 
    for k, v in kstack_traces.items():
        stack_id = k.value
        if stack_id in output_kstack and stack_id not in ignore_kstacks:
            line = f"kstack {stack_id:6} "
            if stack_id >= 0:
                stack = list(kstack_traces.walk(stack_id))
               
                for addr in reversed(stack):
                    func = b.ksym(addr).decode(DECODE_CHARSET, 'replace')
                    if func not in exclusions and not func.startswith('bpf_'):
                        line += "->" + b.ksym(addr).decode(DECODE_CHARSET, 'replace')

                lines.append(line)
        
    return lines


def pivot_stack_traces(traces):
    pivoted_traces = []
    for trace in traces:
        parts = trace.split("->")
        pivoted_traces.append(parts)
    
    max_length = max(len(trace) for trace in pivoted_traces)
    for trace in pivoted_traces:
        while len(trace) < max_length:
            trace.append("")
    
    return pivoted_traces

def calculate_columns(pivoted_traces, max_line_length):
    max_length = max(len(part) for trace in pivoted_traces for part in trace)
    return max(1, max_line_length // (max_length + 3))

def print_pivoted_dynamic(traces, max_line_length):
    num_traces = len(traces)
    start = 0
    
    while start < num_traces:
        end = start + 1
        while end <= num_traces:
            subset_traces = traces[start:end]
            pivoted_traces = pivot_stack_traces(subset_traces)
            num_columns = calculate_columns(pivoted_traces, max_line_length)
            
            if num_columns < end - start:
                break
            
            end += 1

        end -= 1
        subset_traces = traces[start:end]
        pivoted_traces = pivot_stack_traces(subset_traces)
        
        max_length = max(len(part) for trace in pivoted_traces for part in trace)
        
        print("-" * max_line_length)
        for row in zip(*pivoted_traces):
            print(" | ".join(f"{part:<{max_length}}" for part in row) + ' |')
        
        start = end

# stack printing and formatting choice driver function
def print_stacks_if_nerdmode():
    if args.giant_nerd_mode and stackmap:
        # printing stacktiles first, so the task state info is in the bottom of terminal output
        (term_width, term_height) = shutil.get_terminal_size()

        print_pivoted_dynamic(get_kstack_traces(stackmap), max_line_length=term_width)
        print()

        print_pivoted_dynamic(get_ustack_traces(stackmap), max_line_length=term_width)
        print()

    if args.nerd_mode:
        for s in get_kstack_traces(stackmap): 
            print(s)
        print()
        for s in get_ustack_traces(stackmap): 
            print(s)

# group by for reporting
def group_by(records, column_names, sample_attempts_in_set, time_range_in_set):
    total_records = len(records)
    grouped_data = defaultdict(lambda: {'samples': 0})

    for record in records:
        key = tuple(record[col] for col in column_names)
        if key not in grouped_data:
            grouped_data[key].update({col: record[col] for col in column_names})
        grouped_data[key]['samples'] += 1

    grouped_list = list(grouped_data.values())

    for item in grouped_list:
        item['avg_thr'] = round(item['samples'] / sample_attempts_in_set, 2)
        item['seconds'] = round(item['samples'] * (time_range_in_set / sample_attempts_in_set), 2)

        # fancy viz
        pct = item['samples'] / total_records
        full_blocks = int(pct * 10)
        remainder = (pct * 80) % 8
        visual = '█' * full_blocks
        if remainder > 0:
            visual += BLOCK_CHARS[int(remainder)]
        item['visual_pct'] = visual
        #ascii also possible
        #item['visual_pct'] = '#' * int(pct * 10)
                 

    return grouped_list


# main()
signal.signal(signal.SIGPIPE, signal.SIG_DFL)

# args 
parser = argparse.ArgumentParser(description=__description__)
parser.add_argument('-x', '--xtop', action='store_true', help='Run in aggregated top-thread-activity (xtop) mode')
parser.add_argument('-d', dest="report_seconds", metavar='report_seconds', type=int, default=5, help='xtop report printing interval (default: %(default)ds)')
parser.add_argument('-f', '--sample-hz', default=20, type=int, help='xtop sampling frequency in Hz (default: %(default)d)')
parser.add_argument('-g', '--group-by', metavar='csv-columns', default=DEFAULT_GROUP_BY, help='Full column list what to group by')
parser.add_argument('-G', '--append-group-by', metavar='append-csv-columns', default=None, help='List of additional columns to default cols what to group by')
parser.add_argument('-n', '--nerd-mode', action='store_true', help='Print out relevant stack traces as wide output lines')
parser.add_argument('-N', '--giant-nerd-mode', action='store_true', help='Print out relevant stack traces as stacktiles')
parser.add_argument('-c', '--clear-screen', action='store_true', help='Clear screen before printing next output')
parser.add_argument('-V', '--version', action='version', version=f"%(prog)s {__version__} by {__author__} [{__url__}]", help='Show the program version and exit')
parser.add_argument('-o', '--output-dir', type=str, default=None, help=f'Directory path where to write the output CSV files')
parser.add_argument('-l', '--list', default=None, action='store_true', help='list all available columns for display and grouping')

args = parser.parse_args()

if args.list:
    for f in available_fdescr:
        print(f'{f[0]:15} {f[1]}')
    sys.exit(0)

if args.clear_screen and args.output_dir:
    print("Error: --clear-screen (interactive) and --output-dir (continuous logging) are mutually exclusive, use only one option.")
    sys.exit(1)

# handle xtop -g and -G group by columns (and same -g/-G options work for non-xtop output col addition too)
# args.group_by defaults to DEFAULT_GROUP_BY
groupby_fields = args.group_by.split(',')

if args.xtop:
    groupby_fields = groupby_fields + args.append_group_by.split(',') if args.append_group_by else groupby_fields
    used_fields = groupby_fields # todo
else:
    output_fields = output_fields + args.append_group_by.split(',') if args.append_group_by else output_fields
    used_fields = output_fields

if set(used_fields) - set(available_fields):
    print("Error: incorrect group by field name specified, use --list option see allowed columns")
    exit(1)

# eBPF programs have be loaded as root
if os.geteuid() != 0:
    print("Error: you need to run this command as root")
    sys.exit(1)

# ready to go
progname  = "xtop" if args.xtop else "xcapture-bpf"
kernname  = platform.release().split('-')[0]
archname  = platform.machine()
distroid  = distro.id().title() if distro else ''
distrover = distro.version() if distro else ''
sf        = None # fd for separate stackfile in continuous csv sampling mode

print(f'=== [0x.tools] {progname} {__version__} BETA by {__author__}. {distroid} Linux {distrover} {kernname} {archname}')

# open and load the BPF instrumenter
with open(os.path.dirname(os.path.abspath(__file__)) + '/xcapture-bpf.c', 'r') as file:
    bpf_text = file.read()

# set up global variables for conditionally inserting stack capture code
offcpu_u = 'offcpu_u' in used_fields
offcpu_k = 'offcpu_k' in used_fields
offcpu_stacks = offcpu_u or offcpu_k
oncpu_stacks = ('oncpu_u' in used_fields or 'oncpu_k' in used_fields)
cmdline = ('cmdline' in used_fields or 'cmdline2' in used_fields)

# dynamic compilation of features that are needed
ifdef = ''
if offcpu_u:
    ifdef += '#define OFFCPU_U 1\n'
if offcpu_k:
    ifdef += '#define OFFCPU_K 1\n'
if offcpu_stacks:
    ifdef += '#define OFFCPU_STACKS 1\n'
if oncpu_stacks:
    ifdef += '#define ONCPU_STACKS 1\n'
if cmdline:
    ifdef += '#define CMDLINE 1\n'


print('===  Loading BPF...')
b = BPF(text= ifdef + bpf_text)

# Software CPU_CLOCK is useful in cloud & VM environments where perf hardware events 
# are not available, but software clocks don't measure what happens when CPUs are in 
# critical sections when most interrupts are disabled
b.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK
                    , fn_name="update_cpu_stack_profile"
                    , sample_freq=2) # args.sample_hz if args.xtop else 1

# start sampling the Task State Array
tsa = b.get_table("tsa")

if oncpu_stacks or offcpu_stacks:
    stackmap  = b.get_table("stackmap")
else:
    stackmap = {}

# get own pid so to not display it in output
mypid = os.getpid()
print(f"===  Ready (mypid {mypid})\n")

# regex for replacing digits in "comm" for better grouping and reporting (comm2 shows original)
trim_comm = re.compile(r'\d+')

written_kstacks = {} # stack ids already written to csv (in -o mode)
written_ustacks = {}

first_report_printed = False # show first xtop report quicker
csv_header_printed   = False

while True:
    try:
        output_kstack = {} # map of stack_ids seen so far
        output_ustack = {}
        output_records = []
    
        sample_start = time.time()
        duration = (args.report_seconds if args.xtop and first_report_printed else 1)
        sample_end = sample_start + duration # todo: 1 Hz for raw/csv output for now
        first_report_printed = True
        samples_attempted = 0 # not all TSA samples contain active threads of interest, this tells us how many samples we really took
    
        while time.time() < sample_end:
            samples_attempted += 1
            ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
            i = tsa.items()[0]
    
            for i in tsa.items():
                save_record = True
                # extract python values from BPF ctypes, return '-' if there's no match
                fields_dict = {field[0]: getattr(i[1], field[0], '-') for field in i[1]._fields_}
    
                if fields_dict['tid'] == mypid:
                    continue
    
                # additional fields for adding human readable info (not using None as it would be printed out as "None")
                fields_dict['st']          = ''
                fields_dict['sch']         = '' # for scheduler nerds
                fields_dict['state_flags'] = '' # full scheduler state bitmap
                fields_dict['username']    = ''
                fields_dict['syscall']     = ''
                fields_dict['comm2']       = ''
                fields_dict['cmdline2']    = ''
     
                current_syscall   = syscall_id_to_name.get(fields_dict['syscall_id'], '-') if fields_dict['syscall_set'] else '-'
                comm              = str(fields_dict['comm'], DECODE_CHARSET)

                in_sched_migrate  = fields_dict['in_sched_migrate']
                in_sched_wakeup   = fields_dict['in_sched_wakeup']
                in_sched_waking   = fields_dict['in_sched_waking']
                is_running_on_cpu = fields_dict['is_running_on_cpu']

                # we use state for conditionally printing out things like offcpu_stack etc
                state_suffix = ''
                state = get_task_state_name(fields_dict['state'])

                if state == 'R' and not is_running_on_cpu: # runnable on runqueue
                    state += 'Q'

                enriched_fields = {"timestamp": ts[:-3]}
    
                for field_name in fields_dict:
                    if not field_name in used_fields:
                        continue

                    outv = None # enriched value
                    if field_name in ['state', 'st']:
                        if is_interesting(state, current_syscall, comm):
                            outv = state
                        else:
                            save_record = False
                            break
    
                    elif field_name.startswith('comm'):
                        val = fields_dict['comm'] # source field is "comm" regardless of potential comm2 output field name
                        if isinstance(val, bytes):
                            outv = str(val, DECODE_CHARSET)
                        else:
                            outv = str(val)
                        if field_name == 'comm':  # only trim "comm", but not comm2 that is the unaltered string
                            outv = re.sub(trim_comm, '*', outv)
    
                    elif field_name.startswith('cmdline'):
                        val = fields_dict['cmdline']
                        if isinstance(val, bytes):
                            outv = str(val, DECODE_CHARSET)
                        else:
                            outv = str(val)
                        if field_name == 'cmdline':
                            outv = re.sub(trim_comm, '*', outv)
    
                    elif field_name == 'syscall':
                        outv = current_syscall 
    
                    elif field_name == 'username':
                        outv = get_username(fields_dict['uid']) 
    
                    elif field_name == ('offcpu_k'):   # kstack id
                        val = fields_dict[field_name]
                        # runnable state can be R or RQ: RQ is also off CPU, so will capture it
                        if is_interesting(state, current_syscall, comm, 'offcpu') and val > 0:  
                            outv = val
                            output_kstack[val] = True
                        else:
                            outv = '-'
    
                    elif field_name == ("offcpu_u"):   # ustack id
                        val = fields_dict[field_name]
                        if is_interesting(state, current_syscall, comm, 'offcpu') and val > 0:
                            outv = val
                            # using pid/tgid here, address space is same for all threads in a process
                            output_ustack[val, fields_dict['pid']] = True  
                        else:
                            outv = '-'

                    elif field_name == ('oncpu_k'):
                        val = fields_dict[field_name]
                        # only print the perf-cpu samples when actually caught on cpu (not runqueue) for now
                        if is_interesting(state, current_syscall, comm, 'oncpu') and val > 0: 
                            outv = val
                            output_kstack[val] = True
                        else:
                            outv = '-'

                    elif field_name == ("oncpu_u"):
                        val = fields_dict[field_name]
                        if is_interesting(state, current_syscall, comm, 'oncpu') and val > 0:
                            outv = val
                            # using pid/tgid here, address space is same for all threads in a process
                            output_ustack[val, fields_dict['pid']] = True  
                        else:
                            outv = '-'
    
                    elif field_name == 'sch': 
                        # (in_sched_waking, in_sched_wakeup, is_running_on_cpu)
                        outv  = '-' if in_sched_migrate  else '_'
                        outv += '-' if in_sched_waking   else '_'
                        outv += '-' if in_sched_wakeup   else '_'
                        outv += '-' if is_running_on_cpu else '_'
    
                    else:
                        val = fields_dict[field_name]
                        if isinstance(val, bytes):
                            outv = str(val, DECODE_CHARSET)
                        else:
                            outv = str(val)
                        
                    enriched_fields[field_name] = outv
    
                if save_record:
                    output_records.append(enriched_fields)
    
            time.sleep(1 / (args.sample_hz if args.xtop else 1))
    
        if output_records:
            # csv output mode will not do any terminal stuff
            if args.output_dir:
                outfile = args.output_dir + '/threads_' + ts[:13].replace(' ', '.') + '.csv'

                if os.path.isfile(outfile):  # special case if xcapture-bpf has been restarted within the same hour
                    csv_header_printed = True

                if sys.stdout.name != outfile: # create a new output file when the hour changes
                    csv_header_printed = False # new file
                    sys.stdout = open(outfile, 'a')

                if not csv_header_printed:
                    print_header_csv(output_fields)
                    csv_header_printed = True
                    
                print_fields_csv(output_records, output_fields)

                # stackfile is created once and name doesn't change throughout xcapture process lifetime
                if not sf:
                    stackfile = args.output_dir + '/stacks_' + ts[:13].replace(' ', '.') + '.csv'
                    sf = open(stackfile, 'a')

                if sf:
                    for s in get_kstack_traces(stackmap, ignore_kstacks=written_kstacks):
                        print(s, file=sf)
                        written_kstacks[int(s.split()[1])] = True
                        #print(written_kstacks, file=sf)

                    for s in get_ustack_traces(stackmap, ignore_ustacks=written_ustacks):
                        print(s, file=sf)
                        written_ustacks[int(s.split()[1])] = True
                        #print(written_ustacks, file=sf)

                    sf.flush()

            else:
                if args.clear_screen:               # interactive (xtop)
                    buffer = io.StringIO()
                    sys.stdout = buffer

                print_stacks_if_nerdmode()
                print()
                print()

                if args.xtop:
                    total_records = len(output_records)
                    # a new field "samples" shows up (count(*))
                    grouped_list = group_by(output_records, groupby_fields, samples_attempted, sample_end - sample_start) 
                    ordered_aggr = sorted(grouped_list, key=lambda x: x['samples'], reverse=True)
                    print_fields(ordered_aggr, ['seconds', 'avg_thr', 'visual_pct'] + groupby_fields, linelimit=XTOP_MAX_LINES)
        
                    print()
                    print()
                    print(f'sampled: {samples_attempted} times, avg_thr: {round(total_records / samples_attempted, 2)}')
                    print(f'start: {ts[:19]}, duration: {duration}s')
              
                    if args.clear_screen:
                        # terminal size may change over time 
                        (term_width, term_height) = shutil.get_terminal_size()
        
                        for x in range(1, term_height - min(len(ordered_aggr), XTOP_MAX_LINES) - 9): # header/footer lines
                            print()
                    else:
                        print()
        
                else: # wide raw terminal output
                    print_fields(output_records, output_fields) 
                    print()
                    print()
        
                if args.clear_screen:
                    os.system('clear')
                    output = buffer.getvalue()
                    sys.stdout = sys.__stdout__ 
                    print(output)

            sys.stdout.flush()

    except KeyboardInterrupt:
        exit(0)
        #signal.signal(signal.SIGINT, signal.SIG_IGN)


# That's all, folks!