From 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 12:05:51 +0200 Subject: Adding upstream version 5.10.209. Signed-off-by: Daniel Baumann --- samples/Kconfig | 219 +++ samples/Makefile | 31 + samples/auxdisplay/.gitignore | 2 + samples/auxdisplay/Makefile | 2 + samples/auxdisplay/cfag12864b-example.c | 267 ++++ samples/binderfs/.gitignore | 1 + samples/binderfs/Makefile | 4 + samples/binderfs/binderfs_example.c | 82 ++ samples/bpf/.gitignore | 54 + samples/bpf/Makefile | 329 +++++ samples/bpf/Makefile.target | 75 + samples/bpf/README.rst | 105 ++ samples/bpf/asm_goto_workaround.h | 28 + samples/bpf/bpf_insn.h | 217 +++ samples/bpf/bpf_load.c | 667 +++++++++ samples/bpf/bpf_load.h | 57 + samples/bpf/cookie_uid_helper_example.c | 323 ++++ samples/bpf/cpustat_kern.c | 281 ++++ samples/bpf/cpustat_user.c | 252 ++++ samples/bpf/do_hbm_test.sh | 442 ++++++ samples/bpf/fds_example.c | 193 +++ samples/bpf/hash_func01.h | 55 + samples/bpf/hbm.c | 500 +++++++ samples/bpf/hbm.h | 38 + samples/bpf/hbm_edt_kern.c | 168 +++ samples/bpf/hbm_kern.h | 217 +++ samples/bpf/hbm_out_kern.c | 179 +++ samples/bpf/ibumad_kern.c | 138 ++ samples/bpf/ibumad_user.c | 122 ++ samples/bpf/lathist_kern.c | 99 ++ samples/bpf/lathist_user.c | 130 ++ samples/bpf/lwt_len_hist.sh | 40 + samples/bpf/lwt_len_hist_kern.c | 82 ++ samples/bpf/lwt_len_hist_user.c | 77 + samples/bpf/map_perf_test_kern.c | 291 ++++ samples/bpf/map_perf_test_user.c | 507 +++++++ samples/bpf/offwaketime_kern.c | 157 ++ samples/bpf/offwaketime_user.c | 160 ++ samples/bpf/parse_ldabs.c | 43 + samples/bpf/parse_simple.c | 49 + samples/bpf/parse_varlen.c | 150 ++ samples/bpf/run_cookie_uid_helper_example.sh | 15 + samples/bpf/sampleip_kern.c | 39 + samples/bpf/sampleip_user.c | 227 +++ samples/bpf/sock_example.c | 106 ++ samples/bpf/sock_example.h | 35 + samples/bpf/sock_flags_kern.c | 49 + samples/bpf/sockex1_kern.c | 30 + samples/bpf/sockex1_user.c | 54 + samples/bpf/sockex2_kern.c | 223 +++ samples/bpf/sockex2_user.c | 57 + samples/bpf/sockex3_kern.c | 293 ++++ samples/bpf/sockex3_user.c | 106 ++ samples/bpf/spintest_kern.c | 69 + samples/bpf/spintest_user.c | 99 ++ samples/bpf/syscall_nrs.c | 19 + samples/bpf/syscall_tp_kern.c | 73 + samples/bpf/syscall_tp_user.c | 138 ++ samples/bpf/task_fd_query_kern.c | 19 + samples/bpf/task_fd_query_user.c | 383 +++++ samples/bpf/tc_l2_redirect.sh | 174 +++ samples/bpf/tc_l2_redirect_kern.c | 237 +++ samples/bpf/tc_l2_redirect_user.c | 70 + samples/bpf/tcbpf1_kern.c | 91 ++ samples/bpf/tcp_basertt_kern.c | 71 + samples/bpf/tcp_bpf.readme | 28 + samples/bpf/tcp_bufs_kern.c | 81 + samples/bpf/tcp_clamp_kern.c | 97 ++ samples/bpf/tcp_cong_kern.c | 78 + samples/bpf/tcp_dumpstats_kern.c | 68 + samples/bpf/tcp_iw_kern.c | 83 ++ samples/bpf/tcp_rwnd_kern.c | 64 + samples/bpf/tcp_synrto_kern.c | 64 + samples/bpf/tcp_tos_reflect_kern.c | 80 + samples/bpf/test_cgrp2_array_pin.c | 106 ++ samples/bpf/test_cgrp2_attach.c | 172 +++ samples/bpf/test_cgrp2_sock.c | 290 ++++ samples/bpf/test_cgrp2_sock.sh | 135 ++ samples/bpf/test_cgrp2_sock2.c | 68 + samples/bpf/test_cgrp2_sock2.sh | 85 ++ samples/bpf/test_cgrp2_tc.sh | 185 +++ samples/bpf/test_cgrp2_tc_kern.c | 70 + samples/bpf/test_cls_bpf.sh | 38 + samples/bpf/test_current_task_under_cgroup_kern.c | 44 + samples/bpf/test_current_task_under_cgroup_user.c | 113 ++ samples/bpf/test_ipip.sh | 179 +++ samples/bpf/test_lru_dist.c | 540 +++++++ samples/bpf/test_lwt_bpf.c | 253 ++++ samples/bpf/test_lwt_bpf.sh | 400 +++++ samples/bpf/test_map_in_map_kern.c | 176 +++ samples/bpf/test_map_in_map_user.c | 173 +++ samples/bpf/test_overhead_kprobe_kern.c | 48 + samples/bpf/test_overhead_raw_tp_kern.c | 17 + samples/bpf/test_overhead_tp_kern.c | 36 + samples/bpf/test_overhead_user.c | 182 +++ samples/bpf/test_override_return.sh | 16 + samples/bpf/test_probe_write_user_kern.c | 56 + samples/bpf/test_probe_write_user_user.c | 108 ++ samples/bpf/trace_common.h | 13 + samples/bpf/trace_event_kern.c | 80 + samples/bpf/trace_event_user.c | 354 +++++ samples/bpf/trace_output_kern.c | 31 + samples/bpf/trace_output_user.c | 107 ++ samples/bpf/tracex1_kern.c | 54 + samples/bpf/tracex1_user.c | 50 + samples/bpf/tracex2_kern.c | 102 ++ samples/bpf/tracex2_user.c | 193 +++ samples/bpf/tracex3_kern.c | 90 ++ samples/bpf/tracex3_user.c | 190 +++ samples/bpf/tracex4_kern.c | 55 + samples/bpf/tracex4_user.c | 103 ++ samples/bpf/tracex5_kern.c | 93 ++ samples/bpf/tracex5_user.c | 101 ++ samples/bpf/tracex6_kern.c | 82 ++ samples/bpf/tracex6_user.c | 226 +++ samples/bpf/tracex7_kern.c | 16 + samples/bpf/tracex7_user.c | 56 + samples/bpf/xdp1_kern.c | 93 ++ samples/bpf/xdp1_user.c | 167 +++ samples/bpf/xdp2_kern.c | 114 ++ samples/bpf/xdp2skb_meta.sh | 220 +++ samples/bpf/xdp2skb_meta_kern.c | 105 ++ samples/bpf/xdp_adjust_tail_kern.c | 155 ++ samples/bpf/xdp_adjust_tail_user.c | 198 +++ samples/bpf/xdp_fwd_kern.c | 158 ++ samples/bpf/xdp_fwd_user.c | 170 +++ samples/bpf/xdp_monitor_kern.c | 257 ++++ samples/bpf/xdp_monitor_user.c | 792 ++++++++++ samples/bpf/xdp_redirect_cpu_kern.c | 730 +++++++++ samples/bpf/xdp_redirect_cpu_user.c | 983 +++++++++++++ samples/bpf/xdp_redirect_kern.c | 90 ++ samples/bpf/xdp_redirect_map_kern.c | 92 ++ samples/bpf/xdp_redirect_map_user.c | 222 +++ samples/bpf/xdp_redirect_user.c | 223 +++ samples/bpf/xdp_router_ipv4_kern.c | 186 +++ samples/bpf/xdp_router_ipv4_user.c | 741 ++++++++++ samples/bpf/xdp_rxq_info_kern.c | 140 ++ samples/bpf/xdp_rxq_info_user.c | 605 ++++++++ samples/bpf/xdp_sample_pkts_kern.c | 57 + samples/bpf/xdp_sample_pkts_user.c | 202 +++ samples/bpf/xdp_tx_iptunnel_common.h | 34 + samples/bpf/xdp_tx_iptunnel_kern.c | 237 +++ samples/bpf/xdp_tx_iptunnel_user.c | 314 ++++ samples/bpf/xdpsock.h | 11 + samples/bpf/xdpsock_kern.c | 24 + samples/bpf/xdpsock_user.c | 1550 ++++++++++++++++++++ samples/bpf/xsk_fwd.c | 1085 ++++++++++++++ samples/configfs/Makefile | 3 + samples/configfs/configfs_sample.c | 369 +++++ samples/connector/.gitignore | 2 + samples/connector/Makefile | 6 + samples/connector/cn_test.c | 188 +++ samples/connector/ucon.c | 236 +++ samples/ftrace/Makefile | 8 + samples/ftrace/ftrace-direct-modify.c | 97 ++ samples/ftrace/ftrace-direct-too.c | 59 + samples/ftrace/ftrace-direct.c | 50 + samples/ftrace/sample-trace-array.c | 143 ++ samples/ftrace/sample-trace-array.h | 84 ++ samples/hidraw/.gitignore | 2 + samples/hidraw/Makefile | 4 + samples/hidraw/hid-example.c | 182 +++ samples/hw_breakpoint/Makefile | 2 + samples/hw_breakpoint/data_breakpoint.c | 84 ++ samples/kdb/Makefile | 2 + samples/kdb/kdb_hello.c | 60 + samples/kfifo/Makefile | 2 + samples/kfifo/bytestream-example.c | 195 +++ samples/kfifo/dma-example.c | 141 ++ samples/kfifo/inttype-example.c | 186 +++ samples/kfifo/record-example.c | 202 +++ samples/kmemleak/Makefile | 3 + samples/kmemleak/kmemleak-test.c | 99 ++ samples/kobject/Makefile | 2 + samples/kobject/kobject-example.c | 144 ++ samples/kobject/kset-example.c | 288 ++++ samples/kprobes/Makefile | 6 + samples/kprobes/kprobe_example.c | 120 ++ samples/kprobes/kretprobe_example.c | 108 ++ samples/livepatch/Makefile | 8 + samples/livepatch/livepatch-callbacks-busymod.c | 60 + samples/livepatch/livepatch-callbacks-demo.c | 196 +++ samples/livepatch/livepatch-callbacks-mod.c | 41 + samples/livepatch/livepatch-sample.c | 70 + samples/livepatch/livepatch-shadow-fix1.c | 173 +++ samples/livepatch/livepatch-shadow-fix2.c | 132 ++ samples/livepatch/livepatch-shadow-mod.c | 217 +++ samples/mei/.gitignore | 2 + samples/mei/Makefile | 5 + samples/mei/mei-amt-version.c | 479 ++++++ samples/nitro_enclaves/.gitignore | 2 + samples/nitro_enclaves/Makefile | 16 + samples/nitro_enclaves/ne_ioctl_sample.c | 883 +++++++++++ samples/pidfd/.gitignore | 2 + samples/pidfd/Makefile | 4 + samples/pidfd/pidfd-metadata.c | 120 ++ samples/pktgen/README.rst | 46 + samples/pktgen/functions.sh | 334 +++++ samples/pktgen/parameters.sh | 121 ++ .../pktgen/pktgen_bench_xmit_mode_netif_receive.sh | 105 ++ .../pktgen/pktgen_bench_xmit_mode_queue_xmit.sh | 85 ++ samples/pktgen/pktgen_sample01_simple.sh | 90 ++ samples/pktgen/pktgen_sample02_multiqueue.sh | 95 ++ .../pktgen/pktgen_sample03_burst_single_flow.sh | 101 ++ samples/pktgen/pktgen_sample04_many_flows.sh | 115 ++ samples/pktgen/pktgen_sample05_flow_per_thread.sh | 99 ++ ...tgen_sample06_numa_awared_queue_irq_affinity.sh | 113 ++ samples/qmi/Makefile | 2 + samples/qmi/qmi_sample_client.c | 622 ++++++++ samples/rpmsg/Makefile | 2 + samples/rpmsg/rpmsg_client_sample.c | 96 ++ samples/seccomp/.gitignore | 5 + samples/seccomp/Makefile | 6 + samples/seccomp/bpf-direct.c | 191 +++ samples/seccomp/bpf-fancy.c | 105 ++ samples/seccomp/bpf-helper.c | 96 ++ samples/seccomp/bpf-helper.h | 263 ++++ samples/seccomp/dropper.c | 72 + samples/seccomp/user-trap.c | 375 +++++ samples/timers/.gitignore | 2 + samples/timers/Makefile | 4 + samples/timers/hpet_example.c | 295 ++++ samples/trace_events/Makefile | 15 + samples/trace_events/trace-events-sample.c | 140 ++ samples/trace_events/trace-events-sample.h | 524 +++++++ samples/trace_printk/Makefile | 7 + samples/trace_printk/trace-printk.c | 58 + samples/uhid/.gitignore | 2 + samples/uhid/Makefile | 4 + samples/uhid/uhid-example.c | 465 ++++++ samples/v4l/Makefile | 2 + samples/v4l/v4l2-pci-skeleton.c | 915 ++++++++++++ samples/vfio-mdev/Makefile | 5 + samples/vfio-mdev/mbochs.c | 1485 +++++++++++++++++++ samples/vfio-mdev/mdpy-defs.h | 22 + samples/vfio-mdev/mdpy-fb.c | 243 +++ samples/vfio-mdev/mdpy.c | 807 ++++++++++ samples/vfio-mdev/mtty.c | 1491 +++++++++++++++++++ samples/vfs/.gitignore | 3 + samples/vfs/Makefile | 4 + samples/vfs/test-fsmount.c | 129 ++ samples/vfs/test-statx.c | 265 ++++ samples/watch_queue/.gitignore | 1 + samples/watch_queue/Makefile | 4 + samples/watch_queue/watch_test.c | 186 +++ samples/watchdog/.gitignore | 2 + samples/watchdog/Makefile | 2 + samples/watchdog/watchdog-simple.c | 25 + 248 files changed, 41327 insertions(+) create mode 100644 samples/Kconfig create mode 100644 samples/Makefile create mode 100644 samples/auxdisplay/.gitignore create mode 100644 samples/auxdisplay/Makefile create mode 100644 samples/auxdisplay/cfag12864b-example.c create mode 100644 samples/binderfs/.gitignore create mode 100644 samples/binderfs/Makefile create mode 100644 samples/binderfs/binderfs_example.c create mode 100644 samples/bpf/.gitignore create mode 100644 samples/bpf/Makefile create mode 100644 samples/bpf/Makefile.target create mode 100644 samples/bpf/README.rst create mode 100644 samples/bpf/asm_goto_workaround.h create mode 100644 samples/bpf/bpf_insn.h create mode 100644 samples/bpf/bpf_load.c create mode 100644 samples/bpf/bpf_load.h create mode 100644 samples/bpf/cookie_uid_helper_example.c create mode 100644 samples/bpf/cpustat_kern.c create mode 100644 samples/bpf/cpustat_user.c create mode 100755 samples/bpf/do_hbm_test.sh create mode 100644 samples/bpf/fds_example.c create mode 100644 samples/bpf/hash_func01.h create mode 100644 samples/bpf/hbm.c create mode 100644 samples/bpf/hbm.h create mode 100644 samples/bpf/hbm_edt_kern.c create mode 100644 samples/bpf/hbm_kern.h create mode 100644 samples/bpf/hbm_out_kern.c create mode 100644 samples/bpf/ibumad_kern.c create mode 100644 samples/bpf/ibumad_user.c create mode 100644 samples/bpf/lathist_kern.c create mode 100644 samples/bpf/lathist_user.c create mode 100755 samples/bpf/lwt_len_hist.sh create mode 100644 samples/bpf/lwt_len_hist_kern.c create mode 100644 samples/bpf/lwt_len_hist_user.c create mode 100644 samples/bpf/map_perf_test_kern.c create mode 100644 samples/bpf/map_perf_test_user.c create mode 100644 samples/bpf/offwaketime_kern.c create mode 100644 samples/bpf/offwaketime_user.c create mode 100644 samples/bpf/parse_ldabs.c create mode 100644 samples/bpf/parse_simple.c create mode 100644 samples/bpf/parse_varlen.c create mode 100755 samples/bpf/run_cookie_uid_helper_example.sh create mode 100644 samples/bpf/sampleip_kern.c create mode 100644 samples/bpf/sampleip_user.c create mode 100644 samples/bpf/sock_example.c create mode 100644 samples/bpf/sock_example.h create mode 100644 samples/bpf/sock_flags_kern.c create mode 100644 samples/bpf/sockex1_kern.c create mode 100644 samples/bpf/sockex1_user.c create mode 100644 samples/bpf/sockex2_kern.c create mode 100644 samples/bpf/sockex2_user.c create mode 100644 samples/bpf/sockex3_kern.c create mode 100644 samples/bpf/sockex3_user.c create mode 100644 samples/bpf/spintest_kern.c create mode 100644 samples/bpf/spintest_user.c create mode 100644 samples/bpf/syscall_nrs.c create mode 100644 samples/bpf/syscall_tp_kern.c create mode 100644 samples/bpf/syscall_tp_user.c create mode 100644 samples/bpf/task_fd_query_kern.c create mode 100644 samples/bpf/task_fd_query_user.c create mode 100755 samples/bpf/tc_l2_redirect.sh create mode 100644 samples/bpf/tc_l2_redirect_kern.c create mode 100644 samples/bpf/tc_l2_redirect_user.c create mode 100644 samples/bpf/tcbpf1_kern.c create mode 100644 samples/bpf/tcp_basertt_kern.c create mode 100644 samples/bpf/tcp_bpf.readme create mode 100644 samples/bpf/tcp_bufs_kern.c create mode 100644 samples/bpf/tcp_clamp_kern.c create mode 100644 samples/bpf/tcp_cong_kern.c create mode 100644 samples/bpf/tcp_dumpstats_kern.c create mode 100644 samples/bpf/tcp_iw_kern.c create mode 100644 samples/bpf/tcp_rwnd_kern.c create mode 100644 samples/bpf/tcp_synrto_kern.c create mode 100644 samples/bpf/tcp_tos_reflect_kern.c create mode 100644 samples/bpf/test_cgrp2_array_pin.c create mode 100644 samples/bpf/test_cgrp2_attach.c create mode 100644 samples/bpf/test_cgrp2_sock.c create mode 100755 samples/bpf/test_cgrp2_sock.sh create mode 100644 samples/bpf/test_cgrp2_sock2.c create mode 100755 samples/bpf/test_cgrp2_sock2.sh create mode 100755 samples/bpf/test_cgrp2_tc.sh create mode 100644 samples/bpf/test_cgrp2_tc_kern.c create mode 100755 samples/bpf/test_cls_bpf.sh create mode 100644 samples/bpf/test_current_task_under_cgroup_kern.c create mode 100644 samples/bpf/test_current_task_under_cgroup_user.c create mode 100755 samples/bpf/test_ipip.sh create mode 100644 samples/bpf/test_lru_dist.c create mode 100644 samples/bpf/test_lwt_bpf.c create mode 100755 samples/bpf/test_lwt_bpf.sh create mode 100644 samples/bpf/test_map_in_map_kern.c create mode 100644 samples/bpf/test_map_in_map_user.c create mode 100644 samples/bpf/test_overhead_kprobe_kern.c create mode 100644 samples/bpf/test_overhead_raw_tp_kern.c create mode 100644 samples/bpf/test_overhead_tp_kern.c create mode 100644 samples/bpf/test_overhead_user.c create mode 100755 samples/bpf/test_override_return.sh create mode 100644 samples/bpf/test_probe_write_user_kern.c create mode 100644 samples/bpf/test_probe_write_user_user.c create mode 100644 samples/bpf/trace_common.h create mode 100644 samples/bpf/trace_event_kern.c create mode 100644 samples/bpf/trace_event_user.c create mode 100644 samples/bpf/trace_output_kern.c create mode 100644 samples/bpf/trace_output_user.c create mode 100644 samples/bpf/tracex1_kern.c create mode 100644 samples/bpf/tracex1_user.c create mode 100644 samples/bpf/tracex2_kern.c create mode 100644 samples/bpf/tracex2_user.c create mode 100644 samples/bpf/tracex3_kern.c create mode 100644 samples/bpf/tracex3_user.c create mode 100644 samples/bpf/tracex4_kern.c create mode 100644 samples/bpf/tracex4_user.c create mode 100644 samples/bpf/tracex5_kern.c create mode 100644 samples/bpf/tracex5_user.c create mode 100644 samples/bpf/tracex6_kern.c create mode 100644 samples/bpf/tracex6_user.c create mode 100644 samples/bpf/tracex7_kern.c create mode 100644 samples/bpf/tracex7_user.c create mode 100644 samples/bpf/xdp1_kern.c create mode 100644 samples/bpf/xdp1_user.c create mode 100644 samples/bpf/xdp2_kern.c create mode 100755 samples/bpf/xdp2skb_meta.sh create mode 100644 samples/bpf/xdp2skb_meta_kern.c create mode 100644 samples/bpf/xdp_adjust_tail_kern.c create mode 100644 samples/bpf/xdp_adjust_tail_user.c create mode 100644 samples/bpf/xdp_fwd_kern.c create mode 100644 samples/bpf/xdp_fwd_user.c create mode 100644 samples/bpf/xdp_monitor_kern.c create mode 100644 samples/bpf/xdp_monitor_user.c create mode 100644 samples/bpf/xdp_redirect_cpu_kern.c create mode 100644 samples/bpf/xdp_redirect_cpu_user.c create mode 100644 samples/bpf/xdp_redirect_kern.c create mode 100644 samples/bpf/xdp_redirect_map_kern.c create mode 100644 samples/bpf/xdp_redirect_map_user.c create mode 100644 samples/bpf/xdp_redirect_user.c create mode 100644 samples/bpf/xdp_router_ipv4_kern.c create mode 100644 samples/bpf/xdp_router_ipv4_user.c create mode 100644 samples/bpf/xdp_rxq_info_kern.c create mode 100644 samples/bpf/xdp_rxq_info_user.c create mode 100644 samples/bpf/xdp_sample_pkts_kern.c create mode 100644 samples/bpf/xdp_sample_pkts_user.c create mode 100644 samples/bpf/xdp_tx_iptunnel_common.h create mode 100644 samples/bpf/xdp_tx_iptunnel_kern.c create mode 100644 samples/bpf/xdp_tx_iptunnel_user.c create mode 100644 samples/bpf/xdpsock.h create mode 100644 samples/bpf/xdpsock_kern.c create mode 100644 samples/bpf/xdpsock_user.c create mode 100644 samples/bpf/xsk_fwd.c create mode 100644 samples/configfs/Makefile create mode 100644 samples/configfs/configfs_sample.c create mode 100644 samples/connector/.gitignore create mode 100644 samples/connector/Makefile create mode 100644 samples/connector/cn_test.c create mode 100644 samples/connector/ucon.c create mode 100644 samples/ftrace/Makefile create mode 100644 samples/ftrace/ftrace-direct-modify.c create mode 100644 samples/ftrace/ftrace-direct-too.c create mode 100644 samples/ftrace/ftrace-direct.c create mode 100644 samples/ftrace/sample-trace-array.c create mode 100644 samples/ftrace/sample-trace-array.h create mode 100644 samples/hidraw/.gitignore create mode 100644 samples/hidraw/Makefile create mode 100644 samples/hidraw/hid-example.c create mode 100644 samples/hw_breakpoint/Makefile create mode 100644 samples/hw_breakpoint/data_breakpoint.c create mode 100644 samples/kdb/Makefile create mode 100644 samples/kdb/kdb_hello.c create mode 100644 samples/kfifo/Makefile create mode 100644 samples/kfifo/bytestream-example.c create mode 100644 samples/kfifo/dma-example.c create mode 100644 samples/kfifo/inttype-example.c create mode 100644 samples/kfifo/record-example.c create mode 100644 samples/kmemleak/Makefile create mode 100644 samples/kmemleak/kmemleak-test.c create mode 100644 samples/kobject/Makefile create mode 100644 samples/kobject/kobject-example.c create mode 100644 samples/kobject/kset-example.c create mode 100644 samples/kprobes/Makefile create mode 100644 samples/kprobes/kprobe_example.c create mode 100644 samples/kprobes/kretprobe_example.c create mode 100644 samples/livepatch/Makefile create mode 100644 samples/livepatch/livepatch-callbacks-busymod.c create mode 100644 samples/livepatch/livepatch-callbacks-demo.c create mode 100644 samples/livepatch/livepatch-callbacks-mod.c create mode 100644 samples/livepatch/livepatch-sample.c create mode 100644 samples/livepatch/livepatch-shadow-fix1.c create mode 100644 samples/livepatch/livepatch-shadow-fix2.c create mode 100644 samples/livepatch/livepatch-shadow-mod.c create mode 100644 samples/mei/.gitignore create mode 100644 samples/mei/Makefile create mode 100644 samples/mei/mei-amt-version.c create mode 100644 samples/nitro_enclaves/.gitignore create mode 100644 samples/nitro_enclaves/Makefile create mode 100644 samples/nitro_enclaves/ne_ioctl_sample.c create mode 100644 samples/pidfd/.gitignore create mode 100644 samples/pidfd/Makefile create mode 100644 samples/pidfd/pidfd-metadata.c create mode 100644 samples/pktgen/README.rst create mode 100644 samples/pktgen/functions.sh create mode 100644 samples/pktgen/parameters.sh create mode 100755 samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh create mode 100755 samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh create mode 100755 samples/pktgen/pktgen_sample01_simple.sh create mode 100755 samples/pktgen/pktgen_sample02_multiqueue.sh create mode 100755 samples/pktgen/pktgen_sample03_burst_single_flow.sh create mode 100755 samples/pktgen/pktgen_sample04_many_flows.sh create mode 100755 samples/pktgen/pktgen_sample05_flow_per_thread.sh create mode 100755 samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh create mode 100644 samples/qmi/Makefile create mode 100644 samples/qmi/qmi_sample_client.c create mode 100644 samples/rpmsg/Makefile create mode 100644 samples/rpmsg/rpmsg_client_sample.c create mode 100644 samples/seccomp/.gitignore create mode 100644 samples/seccomp/Makefile create mode 100644 samples/seccomp/bpf-direct.c create mode 100644 samples/seccomp/bpf-fancy.c create mode 100644 samples/seccomp/bpf-helper.c create mode 100644 samples/seccomp/bpf-helper.h create mode 100644 samples/seccomp/dropper.c create mode 100644 samples/seccomp/user-trap.c create mode 100644 samples/timers/.gitignore create mode 100644 samples/timers/Makefile create mode 100644 samples/timers/hpet_example.c create mode 100644 samples/trace_events/Makefile create mode 100644 samples/trace_events/trace-events-sample.c create mode 100644 samples/trace_events/trace-events-sample.h create mode 100644 samples/trace_printk/Makefile create mode 100644 samples/trace_printk/trace-printk.c create mode 100644 samples/uhid/.gitignore create mode 100644 samples/uhid/Makefile create mode 100644 samples/uhid/uhid-example.c create mode 100644 samples/v4l/Makefile create mode 100644 samples/v4l/v4l2-pci-skeleton.c create mode 100644 samples/vfio-mdev/Makefile create mode 100644 samples/vfio-mdev/mbochs.c create mode 100644 samples/vfio-mdev/mdpy-defs.h create mode 100644 samples/vfio-mdev/mdpy-fb.c create mode 100644 samples/vfio-mdev/mdpy.c create mode 100644 samples/vfio-mdev/mtty.c create mode 100644 samples/vfs/.gitignore create mode 100644 samples/vfs/Makefile create mode 100644 samples/vfs/test-fsmount.c create mode 100644 samples/vfs/test-statx.c create mode 100644 samples/watch_queue/.gitignore create mode 100644 samples/watch_queue/Makefile create mode 100644 samples/watch_queue/watch_test.c create mode 100644 samples/watchdog/.gitignore create mode 100644 samples/watchdog/Makefile create mode 100644 samples/watchdog/watchdog-simple.c (limited to 'samples') diff --git a/samples/Kconfig b/samples/Kconfig new file mode 100644 index 000000000..e76cdfc50 --- /dev/null +++ b/samples/Kconfig @@ -0,0 +1,219 @@ +# SPDX-License-Identifier: GPL-2.0-only +menuconfig SAMPLES + bool "Sample kernel code" + help + You can build and test sample kernel code here. + +if SAMPLES + +config SAMPLE_AUXDISPLAY + bool "auxdisplay sample" + depends on CC_CAN_LINK + +config SAMPLE_TRACE_EVENTS + tristate "Build trace_events examples -- loadable modules only" + depends on EVENT_TRACING && m + help + This build trace event example modules. + +config SAMPLE_TRACE_PRINTK + tristate "Build trace_printk module - tests various trace_printk formats" + depends on EVENT_TRACING && m + help + This builds a module that calls trace_printk() and can be used to + test various trace_printk() calls from a module. + +config SAMPLE_FTRACE_DIRECT + tristate "Build register_ftrace_direct() example" + depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m + depends on X86_64 # has x86_64 inlined asm + help + This builds an ftrace direct function example + that hooks to wake_up_process and prints the parameters. + +config SAMPLE_TRACE_ARRAY + tristate "Build sample module for kernel access to Ftrace instancess" + depends on EVENT_TRACING && m + help + This builds a module that demonstrates the use of various APIs to + access Ftrace instances from within the kernel. + +config SAMPLE_KOBJECT + tristate "Build kobject examples" + help + This config option will allow you to build a number of + different kobject sample modules showing how to use kobjects, + ksets, and ktypes properly. + + If in doubt, say "N" here. + +config SAMPLE_KPROBES + tristate "Build kprobes examples -- loadable modules only" + depends on KPROBES && m + help + This build several kprobes example modules. + +config SAMPLE_KRETPROBES + tristate "Build kretprobes example -- loadable modules only" + default m + depends on SAMPLE_KPROBES && KRETPROBES + +config SAMPLE_HW_BREAKPOINT + tristate "Build kernel hardware breakpoint examples -- loadable module only" + depends on HAVE_HW_BREAKPOINT && m + help + This builds kernel hardware breakpoint example modules. + +config SAMPLE_KFIFO + tristate "Build kfifo examples -- loadable modules only" + depends on m + help + This config option will allow you to build a number of + different kfifo sample modules showing how to use the + generic kfifo API. + + If in doubt, say "N" here. + +config SAMPLE_KDB + tristate "Build kdb command example -- loadable modules only" + depends on KGDB_KDB && m + help + Build an example of how to dynamically add the hello + command to the kdb shell. + +config SAMPLE_QMI_CLIENT + tristate "Build qmi client sample -- loadable modules only" + depends on m + depends on ARCH_QCOM + depends on NET + select QCOM_QMI_HELPERS + help + Build an QMI client sample driver, which demonstrates how to + communicate with a remote QRTR service, using QMI encoded messages. + +config SAMPLE_RPMSG_CLIENT + tristate "Build rpmsg client sample -- loadable modules only" + depends on RPMSG && m + help + Build an rpmsg client sample driver, which demonstrates how + to communicate with an AMP-configured remote processor over + the rpmsg bus. + +config SAMPLE_LIVEPATCH + tristate "Build live patching samples -- loadable modules only" + depends on LIVEPATCH && m + help + Build sample live patch demonstrations. + +config SAMPLE_CONFIGFS + tristate "Build configfs patching sample -- loadable modules only" + depends on CONFIGFS_FS && m + help + Builds a sample configfs interface. + +config SAMPLE_CONNECTOR + tristate "Build connector sample -- loadable modules only" + depends on CONNECTOR && HEADERS_INSTALL && m + help + When enabled, this builds both a sample kernel module for + the connector interface and a user space tool to communicate + with it. + See also Documentation/driver-api/connector.rst + +config SAMPLE_HIDRAW + bool "hidraw sample" + depends on CC_CAN_LINK && HEADERS_INSTALL + +config SAMPLE_PIDFD + bool "pidfd sample" + depends on CC_CAN_LINK && HEADERS_INSTALL + +config SAMPLE_SECCOMP + bool "Build seccomp sample code" + depends on SECCOMP_FILTER && CC_CAN_LINK && HEADERS_INSTALL + help + Build samples of seccomp filters using various methods of + BPF filter construction. + +config SAMPLE_TIMER + bool "Timer sample" + depends on CC_CAN_LINK && HEADERS_INSTALL + +config SAMPLE_UHID + bool "UHID sample" + depends on CC_CAN_LINK && HEADERS_INSTALL + help + Build UHID sample program. + +config SAMPLE_VFIO_MDEV_MTTY + tristate "Build VFIO mtty example mediated device sample code -- loadable modules only" + depends on VFIO_MDEV_DEVICE && m + help + Build a virtual tty sample driver for use as a VFIO + mediated device + +config SAMPLE_VFIO_MDEV_MDPY + tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" + depends on VFIO_MDEV_DEVICE && m + help + Build a virtual display sample driver for use as a VFIO + mediated device. It is a simple framebuffer and supports + the region display interface (VFIO_GFX_PLANE_TYPE_REGION). + +config SAMPLE_VFIO_MDEV_MDPY_FB + tristate "Build VFIO mdpy example guest fbdev driver -- loadable module only" + depends on FB && m + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Guest fbdev driver for the virtual display sample driver. + +config SAMPLE_VFIO_MDEV_MBOCHS + tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" + depends on VFIO_MDEV_DEVICE && m + select DMA_SHARED_BUFFER + help + Build a virtual display sample driver for use as a VFIO + mediated device. It supports the region display interface + (VFIO_GFX_PLANE_TYPE_DMABUF). + Emulate enough of qemu stdvga to make bochs-drm.ko happy. + That is basically the vram memory bar and the bochs dispi + interface vbe registers in the mmio register bar. + Specifically it does *not* include any legacy vga stuff. + Device looks a lot like "qemu -device secondary-vga". + +config SAMPLE_ANDROID_BINDERFS + bool "Build Android binderfs example" + depends on CC_CAN_LINK && HEADERS_INSTALL + help + Builds a sample program to illustrate the use of the Android binderfs + filesystem. + +config SAMPLE_VFS + bool "Build example programs that use new VFS system calls" + depends on CC_CAN_LINK && HEADERS_INSTALL + help + Build example userspace programs that use new VFS system calls such + as mount API and statx(). Note that this is restricted to the x86 + arch whilst it accesses system calls that aren't yet in all arches. + +config SAMPLE_INTEL_MEI + bool "Build example program working with intel mei driver" + depends on INTEL_MEI + depends on CC_CAN_LINK && HEADERS_INSTALL + help + Build a sample program to work with mei device. + +config SAMPLE_WATCHDOG + bool "watchdog sample" + depends on CC_CAN_LINK + +config SAMPLE_WATCH_QUEUE + bool "Build example watch_queue notification API consumer" + depends on CC_CAN_LINK && HEADERS_INSTALL + help + Build example userspace program to use the new mount_notify(), + sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function. + +endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile new file mode 100644 index 000000000..c3392a595 --- /dev/null +++ b/samples/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for Linux samples code + +subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay +subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs +obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/ +obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/ +subdir-$(CONFIG_SAMPLE_HIDRAW) += hidraw +obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += hw_breakpoint/ +obj-$(CONFIG_SAMPLE_KDB) += kdb/ +obj-$(CONFIG_SAMPLE_KFIFO) += kfifo/ +obj-$(CONFIG_SAMPLE_KOBJECT) += kobject/ +obj-$(CONFIG_SAMPLE_KPROBES) += kprobes/ +obj-$(CONFIG_SAMPLE_LIVEPATCH) += livepatch/ +subdir-$(CONFIG_SAMPLE_PIDFD) += pidfd +obj-$(CONFIG_SAMPLE_QMI_CLIENT) += qmi/ +obj-$(CONFIG_SAMPLE_RPMSG_CLIENT) += rpmsg/ +subdir-$(CONFIG_SAMPLE_SECCOMP) += seccomp +subdir-$(CONFIG_SAMPLE_TIMER) += timers +obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace_events/ +obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/ +obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/ +obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += ftrace/ +subdir-$(CONFIG_SAMPLE_UHID) += uhid +obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/ +obj-y += vfio-mdev/ +subdir-$(CONFIG_SAMPLE_VFS) += vfs +obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/ +subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog +subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue +obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/ diff --git a/samples/auxdisplay/.gitignore b/samples/auxdisplay/.gitignore new file mode 100644 index 000000000..2ed744c0e --- /dev/null +++ b/samples/auxdisplay/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +cfag12864b-example diff --git a/samples/auxdisplay/Makefile b/samples/auxdisplay/Makefile new file mode 100644 index 000000000..19d556893 --- /dev/null +++ b/samples/auxdisplay/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +userprogs-always-y += cfag12864b-example diff --git a/samples/auxdisplay/cfag12864b-example.c b/samples/auxdisplay/cfag12864b-example.c new file mode 100644 index 000000000..bfeab44f8 --- /dev/null +++ b/samples/auxdisplay/cfag12864b-example.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Filename: cfag12864b-example.c + * Version: 0.1.0 + * Description: cfag12864b LCD userspace example program + * + * Author: Copyright (C) Miguel Ojeda Sandonis + * Date: 2006-10-31 + */ + +/* + * ------------------------ + * start of cfag12864b code + * ------------------------ + */ + +#include +#include +#include +#include +#include +#include + +#define CFAG12864B_WIDTH (128) +#define CFAG12864B_HEIGHT (64) +#define CFAG12864B_SIZE (128 * 64 / 8) +#define CFAG12864B_BPB (8) +#define CFAG12864B_ADDRESS(x, y) ((y) * CFAG12864B_WIDTH / \ + CFAG12864B_BPB + (x) / CFAG12864B_BPB) +#define CFAG12864B_BIT(n) (((unsigned char) 1) << (n)) + +#undef CFAG12864B_DOCHECK +#ifdef CFAG12864B_DOCHECK + #define CFAG12864B_CHECK(x, y) ((x) < CFAG12864B_WIDTH && \ + (y) < CFAG12864B_HEIGHT) +#else + #define CFAG12864B_CHECK(x, y) (1) +#endif + +int cfag12864b_fd; +unsigned char * cfag12864b_mem; +unsigned char cfag12864b_buffer[CFAG12864B_SIZE]; + +/* + * init a cfag12864b framebuffer device + * + * No error: return = 0 + * Unable to open: return = -1 + * Unable to mmap: return = -2 + */ +static int cfag12864b_init(char *path) +{ + cfag12864b_fd = open(path, O_RDWR); + if (cfag12864b_fd == -1) + return -1; + + cfag12864b_mem = mmap(0, CFAG12864B_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, cfag12864b_fd, 0); + if (cfag12864b_mem == MAP_FAILED) { + close(cfag12864b_fd); + return -2; + } + + return 0; +} + +/* + * exit a cfag12864b framebuffer device + */ +static void cfag12864b_exit(void) +{ + munmap(cfag12864b_mem, CFAG12864B_SIZE); + close(cfag12864b_fd); +} + +/* + * set (x, y) pixel + */ +static void cfag12864b_set(unsigned char x, unsigned char y) +{ + if (CFAG12864B_CHECK(x, y)) + cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] |= + CFAG12864B_BIT(x % CFAG12864B_BPB); +} + +/* + * unset (x, y) pixel + */ +static void cfag12864b_unset(unsigned char x, unsigned char y) +{ + if (CFAG12864B_CHECK(x, y)) + cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &= + ~CFAG12864B_BIT(x % CFAG12864B_BPB); +} + +/* + * is set (x, y) pixel? + * + * Pixel off: return = 0 + * Pixel on: return = 1 + */ +static unsigned char cfag12864b_isset(unsigned char x, unsigned char y) +{ + if (CFAG12864B_CHECK(x, y)) + if (cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] & + CFAG12864B_BIT(x % CFAG12864B_BPB)) + return 1; + + return 0; +} + +/* + * not (x, y) pixel + */ +static void cfag12864b_not(unsigned char x, unsigned char y) +{ + if (cfag12864b_isset(x, y)) + cfag12864b_unset(x, y); + else + cfag12864b_set(x, y); +} + +/* + * fill (set all pixels) + */ +static void cfag12864b_fill(void) +{ + unsigned short i; + + for (i = 0; i < CFAG12864B_SIZE; i++) + cfag12864b_buffer[i] = 0xFF; +} + +/* + * clear (unset all pixels) + */ +static void cfag12864b_clear(void) +{ + unsigned short i; + + for (i = 0; i < CFAG12864B_SIZE; i++) + cfag12864b_buffer[i] = 0; +} + +/* + * format a [128*64] matrix + * + * Pixel off: src[i] = 0 + * Pixel on: src[i] > 0 + */ +static void cfag12864b_format(unsigned char * matrix) +{ + unsigned char i, j, n; + + for (i = 0; i < CFAG12864B_HEIGHT; i++) + for (j = 0; j < CFAG12864B_WIDTH / CFAG12864B_BPB; j++) { + cfag12864b_buffer[i * CFAG12864B_WIDTH / CFAG12864B_BPB + + j] = 0; + for (n = 0; n < CFAG12864B_BPB; n++) + if (matrix[i * CFAG12864B_WIDTH + + j * CFAG12864B_BPB + n]) + cfag12864b_buffer[i * CFAG12864B_WIDTH / + CFAG12864B_BPB + j] |= + CFAG12864B_BIT(n); + } +} + +/* + * blit buffer to lcd + */ +static void cfag12864b_blit(void) +{ + memcpy(cfag12864b_mem, cfag12864b_buffer, CFAG12864B_SIZE); +} + +/* + * ---------------------- + * end of cfag12864b code + * ---------------------- + */ + +#include + +#define EXAMPLES 6 + +static void example(unsigned char n) +{ + unsigned short i, j; + unsigned char matrix[CFAG12864B_WIDTH * CFAG12864B_HEIGHT]; + + if (n > EXAMPLES) + return; + + printf("Example %i/%i - ", n, EXAMPLES); + + switch (n) { + case 1: + printf("Draw points setting bits"); + cfag12864b_clear(); + for (i = 0; i < CFAG12864B_WIDTH; i += 2) + for (j = 0; j < CFAG12864B_HEIGHT; j += 2) + cfag12864b_set(i, j); + break; + + case 2: + printf("Clear the LCD"); + cfag12864b_clear(); + break; + + case 3: + printf("Draw rows formatting a [128*64] matrix"); + memset(matrix, 0, CFAG12864B_WIDTH * CFAG12864B_HEIGHT); + for (i = 0; i < CFAG12864B_WIDTH; i++) + for (j = 0; j < CFAG12864B_HEIGHT; j += 2) + matrix[j * CFAG12864B_WIDTH + i] = 1; + cfag12864b_format(matrix); + break; + + case 4: + printf("Fill the lcd"); + cfag12864b_fill(); + break; + + case 5: + printf("Draw columns unsetting bits"); + for (i = 0; i < CFAG12864B_WIDTH; i += 2) + for (j = 0; j < CFAG12864B_HEIGHT; j++) + cfag12864b_unset(i, j); + break; + + case 6: + printf("Do negative not-ing all bits"); + for (i = 0; i < CFAG12864B_WIDTH; i++) + for (j = 0; j < CFAG12864B_HEIGHT; j ++) + cfag12864b_not(i, j); + break; + } + + puts(" - [Press Enter]"); +} + +int main(int argc, char *argv[]) +{ + unsigned char n; + + if (argc != 2) { + printf( + "Syntax: %s fbdev\n" + "Usually: /dev/fb0, /dev/fb1...\n", argv[0]); + return -1; + } + + if (cfag12864b_init(argv[1])) { + printf("Can't init %s fbdev\n", argv[1]); + return -2; + } + + for (n = 1; n <= EXAMPLES; n++) { + example(n); + cfag12864b_blit(); + while (getchar() != '\n'); + } + + cfag12864b_exit(); + + return 0; +} diff --git a/samples/binderfs/.gitignore b/samples/binderfs/.gitignore new file mode 100644 index 000000000..eb60241e8 --- /dev/null +++ b/samples/binderfs/.gitignore @@ -0,0 +1 @@ +binderfs_example diff --git a/samples/binderfs/Makefile b/samples/binderfs/Makefile new file mode 100644 index 000000000..629e43b9b --- /dev/null +++ b/samples/binderfs/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +userprogs-always-y += binderfs_example + +userccflags += -I usr/include diff --git a/samples/binderfs/binderfs_example.c b/samples/binderfs/binderfs_example.c new file mode 100644 index 000000000..0fd92cdda --- /dev/null +++ b/samples/binderfs/binderfs_example.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + int fd, ret, saved_errno; + struct binderfs_device device = { 0 }; + + ret = unshare(CLONE_NEWNS); + if (ret < 0) { + fprintf(stderr, "%s - Failed to unshare mount namespace\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (ret < 0) { + fprintf(stderr, "%s - Failed to mount / as private\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + ret = mkdir("/dev/binderfs", 0755); + if (ret < 0 && errno != EEXIST) { + fprintf(stderr, "%s - Failed to create binderfs mountpoint\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + ret = mount(NULL, "/dev/binderfs", "binder", 0, 0); + if (ret < 0) { + fprintf(stderr, "%s - Failed to mount binderfs\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + memcpy(device.name, "my-binder", strlen("my-binder")); + + fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); + if (fd < 0) { + fprintf(stderr, "%s - Failed to open binder-control device\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + ret = ioctl(fd, BINDER_CTL_ADD, &device); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + fprintf(stderr, "%s - Failed to allocate new binder device\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + printf("Allocated new binder device with major %d, minor %d, and name %s\n", + device.major, device.minor, device.name); + + ret = unlink("/dev/binderfs/my-binder"); + if (ret < 0) { + fprintf(stderr, "%s - Failed to delete binder device\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + /* Cleanup happens when the mount namespace dies. */ + exit(EXIT_SUCCESS); +} diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore new file mode 100644 index 000000000..b2f29bc8d --- /dev/null +++ b/samples/bpf/.gitignore @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: GPL-2.0-only +cpustat +fds_example +hbm +ibumad +lathist +lwt_len_hist +map_perf_test +offwaketime +per_socket_stats_example +sampleip +sock_example +sockex1 +sockex2 +sockex3 +spintest +syscall_nrs.h +syscall_tp +task_fd_query +tc_l2_redirect +test_cgrp2_array_pin +test_cgrp2_attach +test_cgrp2_attach2 +test_cgrp2_sock +test_cgrp2_sock2 +test_current_task_under_cgroup +test_lru_dist +test_map_in_map +test_overhead +test_probe_write_user +trace_event +trace_output +tracex1 +tracex2 +tracex3 +tracex4 +tracex5 +tracex6 +tracex7 +xdp1 +xdp2 +xdp_adjust_tail +xdp_fwd +xdp_monitor +xdp_redirect +xdp_redirect_cpu +xdp_redirect_map +xdp_router_ipv4 +xdp_rxq_info +xdp_sample_pkts +xdp_tx_iptunnel +xdpsock +xsk_fwd +testfile.img diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile new file mode 100644 index 000000000..aeebf5d12 --- /dev/null +++ b/samples/bpf/Makefile @@ -0,0 +1,329 @@ +# SPDX-License-Identifier: GPL-2.0 + +BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src)) +TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools + +# List of programs to build +tprogs-y := test_lru_dist +tprogs-y += sock_example +tprogs-y += fds_example +tprogs-y += sockex1 +tprogs-y += sockex2 +tprogs-y += sockex3 +tprogs-y += tracex1 +tprogs-y += tracex2 +tprogs-y += tracex3 +tprogs-y += tracex4 +tprogs-y += tracex5 +tprogs-y += tracex6 +tprogs-y += tracex7 +tprogs-y += test_probe_write_user +tprogs-y += trace_output +tprogs-y += lathist +tprogs-y += offwaketime +tprogs-y += spintest +tprogs-y += map_perf_test +tprogs-y += test_overhead +tprogs-y += test_cgrp2_array_pin +tprogs-y += test_cgrp2_attach +tprogs-y += test_cgrp2_sock +tprogs-y += test_cgrp2_sock2 +tprogs-y += xdp1 +tprogs-y += xdp2 +tprogs-y += xdp_router_ipv4 +tprogs-y += test_current_task_under_cgroup +tprogs-y += trace_event +tprogs-y += sampleip +tprogs-y += tc_l2_redirect +tprogs-y += lwt_len_hist +tprogs-y += xdp_tx_iptunnel +tprogs-y += test_map_in_map +tprogs-y += per_socket_stats_example +tprogs-y += xdp_redirect +tprogs-y += xdp_redirect_map +tprogs-y += xdp_redirect_cpu +tprogs-y += xdp_monitor +tprogs-y += xdp_rxq_info +tprogs-y += syscall_tp +tprogs-y += cpustat +tprogs-y += xdp_adjust_tail +tprogs-y += xdpsock +tprogs-y += xsk_fwd +tprogs-y += xdp_fwd +tprogs-y += task_fd_query +tprogs-y += xdp_sample_pkts +tprogs-y += ibumad +tprogs-y += hbm + +# Libbpf dependencies +LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a + +CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o +TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o + +fds_example-objs := fds_example.o +sockex1-objs := sockex1_user.o +sockex2-objs := sockex2_user.o +sockex3-objs := sockex3_user.o +tracex1-objs := tracex1_user.o $(TRACE_HELPERS) +tracex2-objs := tracex2_user.o +tracex3-objs := tracex3_user.o +tracex4-objs := tracex4_user.o +tracex5-objs := tracex5_user.o $(TRACE_HELPERS) +tracex6-objs := tracex6_user.o +tracex7-objs := tracex7_user.o +test_probe_write_user-objs := test_probe_write_user_user.o +trace_output-objs := trace_output_user.o $(TRACE_HELPERS) +lathist-objs := lathist_user.o +offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) +spintest-objs := spintest_user.o $(TRACE_HELPERS) +map_perf_test-objs := map_perf_test_user.o +test_overhead-objs := bpf_load.o test_overhead_user.o +test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o +test_cgrp2_attach-objs := test_cgrp2_attach.o +test_cgrp2_sock-objs := test_cgrp2_sock.o +test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o +xdp1-objs := xdp1_user.o +# reuse xdp1 source intentionally +xdp2-objs := xdp1_user.o +xdp_router_ipv4-objs := xdp_router_ipv4_user.o +test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ + test_current_task_under_cgroup_user.o +trace_event-objs := trace_event_user.o $(TRACE_HELPERS) +sampleip-objs := sampleip_user.o $(TRACE_HELPERS) +tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o +lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o +xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o +test_map_in_map-objs := test_map_in_map_user.o +per_socket_stats_example-objs := cookie_uid_helper_example.o +xdp_redirect-objs := xdp_redirect_user.o +xdp_redirect_map-objs := xdp_redirect_map_user.o +xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o +xdp_monitor-objs := xdp_monitor_user.o +xdp_rxq_info-objs := xdp_rxq_info_user.o +syscall_tp-objs := syscall_tp_user.o +cpustat-objs := cpustat_user.o +xdp_adjust_tail-objs := xdp_adjust_tail_user.o +xdpsock-objs := xdpsock_user.o +xsk_fwd-objs := xsk_fwd.o +xdp_fwd-objs := xdp_fwd_user.o +task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) +xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) +ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) +hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) + +# Tell kbuild to always build the programs +always-y := $(tprogs-y) +always-y += sockex1_kern.o +always-y += sockex2_kern.o +always-y += sockex3_kern.o +always-y += tracex1_kern.o +always-y += tracex2_kern.o +always-y += tracex3_kern.o +always-y += tracex4_kern.o +always-y += tracex5_kern.o +always-y += tracex6_kern.o +always-y += tracex7_kern.o +always-y += sock_flags_kern.o +always-y += test_probe_write_user_kern.o +always-y += trace_output_kern.o +always-y += tcbpf1_kern.o +always-y += tc_l2_redirect_kern.o +always-y += lathist_kern.o +always-y += offwaketime_kern.o +always-y += spintest_kern.o +always-y += map_perf_test_kern.o +always-y += test_overhead_tp_kern.o +always-y += test_overhead_raw_tp_kern.o +always-y += test_overhead_kprobe_kern.o +always-y += parse_varlen.o parse_simple.o parse_ldabs.o +always-y += test_cgrp2_tc_kern.o +always-y += xdp1_kern.o +always-y += xdp2_kern.o +always-y += xdp_router_ipv4_kern.o +always-y += test_current_task_under_cgroup_kern.o +always-y += trace_event_kern.o +always-y += sampleip_kern.o +always-y += lwt_len_hist_kern.o +always-y += xdp_tx_iptunnel_kern.o +always-y += test_map_in_map_kern.o +always-y += tcp_synrto_kern.o +always-y += tcp_rwnd_kern.o +always-y += tcp_bufs_kern.o +always-y += tcp_cong_kern.o +always-y += tcp_iw_kern.o +always-y += tcp_clamp_kern.o +always-y += tcp_basertt_kern.o +always-y += tcp_tos_reflect_kern.o +always-y += tcp_dumpstats_kern.o +always-y += xdp_redirect_kern.o +always-y += xdp_redirect_map_kern.o +always-y += xdp_redirect_cpu_kern.o +always-y += xdp_monitor_kern.o +always-y += xdp_rxq_info_kern.o +always-y += xdp2skb_meta_kern.o +always-y += syscall_tp_kern.o +always-y += cpustat_kern.o +always-y += xdp_adjust_tail_kern.o +always-y += xdp_fwd_kern.o +always-y += task_fd_query_kern.o +always-y += xdp_sample_pkts_kern.o +always-y += ibumad_kern.o +always-y += hbm_out_kern.o +always-y += hbm_edt_kern.o +always-y += xdpsock_kern.o + +ifeq ($(ARCH), arm) +# Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux +# headers when arm instruction set identification is requested. +ARM_ARCH_SELECTOR := $(filter -D__LINUX_ARM_ARCH__%, $(KBUILD_CFLAGS)) +BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR) +TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR) +endif + +TPROGS_CFLAGS += -Wall -O2 +TPROGS_CFLAGS += -Wmissing-prototypes +TPROGS_CFLAGS += -Wstrict-prototypes + +TPROGS_CFLAGS += -I$(objtree)/usr/include +TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ +TPROGS_CFLAGS += -I$(srctree)/tools/lib/ +TPROGS_CFLAGS += -I$(srctree)/tools/include +TPROGS_CFLAGS += -I$(srctree)/tools/perf +TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0 + +ifdef SYSROOT +TPROGS_CFLAGS += --sysroot=$(SYSROOT) +TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib +endif + +TPROGCFLAGS_bpf_load.o += -Wno-unused-variable + +TPROGS_LDLIBS += $(LIBBPF) -lelf -lz +TPROGLDLIBS_tracex4 += -lrt +TPROGLDLIBS_trace_output += -lrt +TPROGLDLIBS_map_perf_test += -lrt +TPROGLDLIBS_test_overhead += -lrt +TPROGLDLIBS_xdpsock += -pthread +TPROGLDLIBS_xsk_fwd += -pthread + +# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: +# make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang +LLC ?= llc +CLANG ?= clang +OPT ?= opt +LLVM_DIS ?= llvm-dis +LLVM_OBJCOPY ?= llvm-objcopy +BTF_PAHOLE ?= pahole + +# Detect that we're cross compiling and use the cross compiler +ifdef CROSS_COMPILE +CLANG_ARCH_ARGS = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +# Don't evaluate probes and warnings if we need to run make recursively +ifneq ($(src),) +HDR_PROBE := $(shell printf "\#include \n struct list_head { int a; }; int main() { return 0; }" | \ + $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \ + -o /dev/null 2>/dev/null && echo okay) + +ifeq ($(HDR_PROBE),) +$(warning WARNING: Detected possible issues with include path.) +$(warning WARNING: Please install kernel headers locally (make headers_install).) +endif + +BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) +BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) +BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') +BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ + $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ + readelf -S ./llvm_btf_verify.o | grep BTF; \ + /bin/rm -f ./llvm_btf_verify.o) + +BPF_EXTRA_CFLAGS += -fno-stack-protector +ifneq ($(BTF_LLVM_PROBE),) + BPF_EXTRA_CFLAGS += -g +else +ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),) + BPF_EXTRA_CFLAGS += -g + LLC_FLAGS += -mattr=dwarfris + DWARF2BTF = y +endif +endif +endif + +# Trick to allow make to be run from this directory +all: + $(MAKE) -C ../../ M=$(CURDIR) BPF_SAMPLES_PATH=$(CURDIR) + +clean: + $(MAKE) -C ../../ M=$(CURDIR) clean + @find $(CURDIR) -type f -name '*~' -delete + +$(LIBBPF): FORCE +# Fix up variables inherited from Kbuild that tools/ build system won't like + $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ + LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O= + +$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE + $(call filechk,offsets,__SYSCALL_NRS_H__) + +targets += syscall_nrs.s +clean-files += syscall_nrs.h + +FORCE: + + +# Verify LLVM compiler tools are available and bpf target is supported by llc +.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC) + +verify_cmds: $(CLANG) $(LLC) + @for TOOL in $^ ; do \ + if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \ + echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\ + exit 1; \ + else true; fi; \ + done + +verify_target_bpf: verify_cmds + @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \ + echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\ + echo " NOTICE: LLVM version >= 3.7.1 required" ;\ + exit 2; \ + else true; fi + +$(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) +$(src)/*.c: verify_target_bpf $(LIBBPF) + +$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h +$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h +$(obj)/hbm.o: $(src)/hbm.h +$(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h + +-include $(BPF_SAMPLES_PATH)/Makefile.target + +# asm/sysreg.h - inline assembly used by it is incompatible with llvm. +# But, there is no easy way to fix it, so just exclude it since it is +# useless for BPF samples. +# below we use long chain of commands, clang | opt | llvm-dis | llc, +# to generate final object file. 'clang' compiles the source into IR +# with native target, e.g., x64, arm64, etc. 'opt' does bpf CORE IR builtin +# processing (llvm12) and IR optimizations. 'llvm-dis' converts +# 'opt' output to IR, and finally 'llc' generates bpf byte code. +$(obj)/%.o: $(src)/%.c + @echo " CLANG-bpf " $@ + $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ + -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ + -I$(srctree)/tools/lib/ \ + -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ + -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ + -Wno-gnu-variable-sized-type-not-at-end \ + -Wno-address-of-packed-member -Wno-tautological-compare \ + -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ + -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ + -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ + $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ + $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target new file mode 100644 index 000000000..7621f55e2 --- /dev/null +++ b/samples/bpf/Makefile.target @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0 +# ========================================================================== +# Building binaries on the host system +# Binaries are not used during the compilation of the kernel, and intended +# to be build for target board, target board can be host of course. Added to +# build binaries to run not on host system. +# +# Sample syntax +# tprogs-y := xsk_example +# Will compile xsk_example.c and create an executable named xsk_example +# +# tprogs-y := xdpsock +# xdpsock-objs := xdpsock_1.o xdpsock_2.o +# Will compile xdpsock_1.c and xdpsock_2.c, and then link the executable +# xdpsock, based on xdpsock_1.o and xdpsock_2.o +# +# Derived from scripts/Makefile.host +# +__tprogs := $(sort $(tprogs-y)) + +# C code +# Executables compiled from a single .c file +tprog-csingle := $(foreach m,$(__tprogs), \ + $(if $($(m)-objs),,$(m))) + +# C executables linked based on several .o files +tprog-cmulti := $(foreach m,$(__tprogs),\ + $(if $($(m)-objs),$(m))) + +# Object (.o) files compiled from .c files +tprog-cobjs := $(sort $(foreach m,$(__tprogs),$($(m)-objs))) + +tprog-csingle := $(addprefix $(obj)/,$(tprog-csingle)) +tprog-cmulti := $(addprefix $(obj)/,$(tprog-cmulti)) +tprog-cobjs := $(addprefix $(obj)/,$(tprog-cobjs)) + +##### +# Handle options to gcc. Support building with separate output directory + +_tprogc_flags = $(TPROGS_CFLAGS) \ + $(TPROGCFLAGS_$(basetarget).o) + +# $(objtree)/$(obj) for including generated headers from checkin source files +ifeq ($(KBUILD_EXTMOD),) +ifdef building_out_of_srctree +_tprogc_flags += -I $(objtree)/$(obj) +endif +endif + +tprogc_flags = -Wp,-MD,$(depfile) $(_tprogc_flags) + +# Create executable from a single .c file +# tprog-csingle -> Executable +quiet_cmd_tprog-csingle = CC $@ + cmd_tprog-csingle = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ $< \ + $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F)) +$(tprog-csingle): $(obj)/%: $(src)/%.c FORCE + $(call if_changed_dep,tprog-csingle) + +# Link an executable based on list of .o files, all plain c +# tprog-cmulti -> executable +quiet_cmd_tprog-cmulti = LD $@ + cmd_tprog-cmulti = $(CC) $(tprogc_flags) $(TPROGS_LDFLAGS) -o $@ \ + $(addprefix $(obj)/,$($(@F)-objs)) \ + $(TPROGS_LDLIBS) $(TPROGLDLIBS_$(@F)) +$(tprog-cmulti): $(tprog-cobjs) FORCE + $(call if_changed,tprog-cmulti) +$(call multi_depend, $(tprog-cmulti), , -objs) + +# Create .o file from a single .c file +# tprog-cobjs -> .o +quiet_cmd_tprog-cobjs = CC $@ + cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $< +$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE + $(call if_changed_dep,tprog-cobjs) diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst new file mode 100644 index 000000000..dd34b2d26 --- /dev/null +++ b/samples/bpf/README.rst @@ -0,0 +1,105 @@ +eBPF sample programs +==================== + +This directory contains a test stubs, verifier test-suite and examples +for using eBPF. The examples use libbpf from tools/lib/bpf. + +Build dependencies +================== + +Compiling requires having installed: + * clang >= version 3.4.0 + * llvm >= version 3.7.1 + +Note that LLVM's tool 'llc' must support target 'bpf', list version +and supported targets with command: ``llc --version`` + +Clean and configuration +----------------------- + +It can be needed to clean tools, samples or kernel before trying new arch or +after some changes (on demand):: + + make -C tools clean + make -C samples/bpf clean + make clean + +Configure kernel, defconfig for instance:: + + make defconfig + +Kernel headers +-------------- + +There are usually dependencies to header files of the current kernel. +To avoid installing devel kernel headers system wide, as a normal +user, simply call:: + + make headers_install + +This will creates a local "usr/include" directory in the git/build top +level directory, that the make system automatically pickup first. + +Compiling +========= + +For building the BPF samples, issue the below command from the kernel +top level directory:: + + make M=samples/bpf + +It is also possible to call make from this directory. This will just +hide the invocation of make as above. + +Manually compiling LLVM with 'bpf' support +------------------------------------------ + +Since version 3.7.0, LLVM adds a proper LLVM backend target for the +BPF bytecode architecture. + +By default llvm will build all non-experimental backends including bpf. +To generate a smaller llc binary one can use:: + + -DLLVM_TARGETS_TO_BUILD="BPF" + +Quick sniplet for manually compiling LLVM and clang +(build dependencies are cmake and gcc-c++):: + + $ git clone http://llvm.org/git/llvm.git + $ cd llvm/tools + $ git clone --depth 1 http://llvm.org/git/clang.git + $ cd ..; mkdir build; cd build + $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" + $ make -j $(getconf _NPROCESSORS_ONLN) + +It is also possible to point make to the newly compiled 'llc' or +'clang' command via redefining LLC or CLANG on the make command line:: + + make M=samples/bpf LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang + +Cross compiling samples +----------------------- +In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH +environment variables before calling make. But do this before clean, +cofiguration and header install steps described above. This will direct make to +build samples for the cross target:: + + export ARCH=arm64 + export CROSS_COMPILE="aarch64-linux-gnu-" + +Headers can be also installed on RFS of target board if need to keep them in +sync (not necessarily and it creates a local "usr/include" directory also):: + + make INSTALL_HDR_PATH=~/some_sysroot/usr headers_install + +Pointing LLC and CLANG is not necessarily if it's installed on HOST and have +in its targets appropriate arm64 arch (usually it has several arches). +Build samples:: + + make M=samples/bpf + +Or build samples with SYSROOT if some header or library is absent in toolchain, +say libelf, providing address to file system containing headers and libs, +can be RFS of target board:: + + make M=samples/bpf SYSROOT=~/some_sysroot diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h new file mode 100644 index 000000000..7048bb359 --- /dev/null +++ b/samples/bpf/asm_goto_workaround.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Facebook */ +#ifndef __ASM_GOTO_WORKAROUND_H +#define __ASM_GOTO_WORKAROUND_H + +/* + * This will bring in asm_volatile_goto and asm_inline macro definitions + * if enabled by compiler and config options. + */ +#include + +#ifdef asm_volatile_goto +#undef asm_volatile_goto +#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#endif + +/* + * asm_inline is defined as asm __inline in "include/linux/compiler_types.h" + * if supported by the kernel's CC (i.e CONFIG_CC_HAS_ASM_INLINE) which is not + * supported by CLANG. + */ +#ifdef asm_inline +#undef asm_inline +#define asm_inline asm +#endif + +#define volatile(x...) volatile("") +#endif diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h new file mode 100644 index 000000000..544237980 --- /dev/null +++ b/samples/bpf/bpf_insn.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* eBPF instruction mini library */ +#ifndef __BPF_INSN_H +#define __BPF_INSN_H + +struct bpf_insn; + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, dst_reg = imm32 */ + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif + +/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + + +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = CODE, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#endif diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c new file mode 100644 index 000000000..c5ad528f0 --- /dev/null +++ b/samples/bpf/bpf_load.c @@ -0,0 +1,667 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_load.h" +#include "perf-sys.h" + +#define DEBUGFS "/sys/kernel/debug/tracing/" + +static char license[128]; +static int kern_version; +static bool processed_sec[128]; +char bpf_log_buf[BPF_LOG_BUF_SIZE]; +int map_fd[MAX_MAPS]; +int prog_fd[MAX_PROGS]; +int event_fd[MAX_PROGS]; +int prog_cnt; +int prog_array_fd = -1; + +struct bpf_map_data map_data[MAX_MAPS]; +int map_data_count; + +static int populate_prog_array(const char *event, int prog_fd) +{ + int ind = atoi(event), err; + + err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); + if (err < 0) { + printf("failed to store prog_fd in prog_array\n"); + return -1; + } + return 0; +} + +static int write_kprobe_events(const char *val) +{ + int fd, ret, flags; + + if (val == NULL) + return -1; + else if (val[0] == '\0') + flags = O_WRONLY | O_TRUNC; + else + flags = O_WRONLY | O_APPEND; + + fd = open(DEBUGFS "kprobe_events", flags); + + ret = write(fd, val, strlen(val)); + close(fd); + + return ret; +} + +static int load_and_attach(const char *event, struct bpf_insn *prog, int size) +{ + bool is_socket = strncmp(event, "socket", 6) == 0; + bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; + bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; + bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; + bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0; + bool is_xdp = strncmp(event, "xdp", 3) == 0; + bool is_perf_event = strncmp(event, "perf_event", 10) == 0; + bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; + bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; + bool is_sockops = strncmp(event, "sockops", 7) == 0; + bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0; + bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0; + size_t insns_cnt = size / sizeof(struct bpf_insn); + enum bpf_prog_type prog_type; + char buf[256]; + int fd, efd, err, id; + struct perf_event_attr attr = {}; + + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + + if (is_socket) { + prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + } else if (is_kprobe || is_kretprobe) { + prog_type = BPF_PROG_TYPE_KPROBE; + } else if (is_tracepoint) { + prog_type = BPF_PROG_TYPE_TRACEPOINT; + } else if (is_raw_tracepoint) { + prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT; + } else if (is_xdp) { + prog_type = BPF_PROG_TYPE_XDP; + } else if (is_perf_event) { + prog_type = BPF_PROG_TYPE_PERF_EVENT; + } else if (is_cgroup_skb) { + prog_type = BPF_PROG_TYPE_CGROUP_SKB; + } else if (is_cgroup_sk) { + prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + } else if (is_sockops) { + prog_type = BPF_PROG_TYPE_SOCK_OPS; + } else if (is_sk_skb) { + prog_type = BPF_PROG_TYPE_SK_SKB; + } else if (is_sk_msg) { + prog_type = BPF_PROG_TYPE_SK_MSG; + } else { + printf("Unknown event '%s'\n", event); + return -1; + } + + if (prog_cnt == MAX_PROGS) + return -1; + + fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version, + bpf_log_buf, BPF_LOG_BUF_SIZE); + if (fd < 0) { + printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf); + return -1; + } + + prog_fd[prog_cnt++] = fd; + + if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) + return 0; + + if (is_socket || is_sockops || is_sk_skb || is_sk_msg) { + if (is_socket) + event += 6; + else + event += 7; + if (*event != '/') + return 0; + event++; + if (!isdigit(*event)) { + printf("invalid prog number\n"); + return -1; + } + return populate_prog_array(event, fd); + } + + if (is_raw_tracepoint) { + efd = bpf_raw_tracepoint_open(event + 15, fd); + if (efd < 0) { + printf("tracepoint %s %s\n", event + 15, strerror(errno)); + return -1; + } + event_fd[prog_cnt - 1] = efd; + return 0; + } + + if (is_kprobe || is_kretprobe) { + bool need_normal_check = true; + const char *event_prefix = ""; + + if (is_kprobe) + event += 7; + else + event += 10; + + if (*event == 0) { + printf("event name cannot be empty\n"); + return -1; + } + + if (isdigit(*event)) + return populate_prog_array(event, fd); + +#ifdef __x86_64__ + if (strncmp(event, "sys_", 4) == 0) { + snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s", + is_kprobe ? 'p' : 'r', event, event); + err = write_kprobe_events(buf); + if (err >= 0) { + need_normal_check = false; + event_prefix = "__x64_"; + } + } +#endif + if (need_normal_check) { + snprintf(buf, sizeof(buf), "%c:%s %s", + is_kprobe ? 'p' : 'r', event, event); + err = write_kprobe_events(buf); + if (err < 0) { + printf("failed to create kprobe '%s' error '%s'\n", + event, strerror(errno)); + return -1; + } + } + + strcpy(buf, DEBUGFS); + strcat(buf, "events/kprobes/"); + strcat(buf, event_prefix); + strcat(buf, event); + strcat(buf, "/id"); + } else if (is_tracepoint) { + event += 11; + + if (*event == 0) { + printf("event name cannot be empty\n"); + return -1; + } + strcpy(buf, DEBUGFS); + strcat(buf, "events/"); + strcat(buf, event); + strcat(buf, "/id"); + } + + efd = open(buf, O_RDONLY, 0); + if (efd < 0) { + printf("failed to open event %s\n", event); + return -1; + } + + err = read(efd, buf, sizeof(buf)); + if (err < 0 || err >= sizeof(buf)) { + printf("read from '%s' failed '%s'\n", event, strerror(errno)); + return -1; + } + + close(efd); + + buf[err] = 0; + id = atoi(buf); + attr.config = id; + + efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); + if (efd < 0) { + printf("event %d fd %d err %s\n", id, efd, strerror(errno)); + return -1; + } + event_fd[prog_cnt - 1] = efd; + err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); + if (err < 0) { + printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n", + strerror(errno)); + return -1; + } + err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); + if (err < 0) { + printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n", + strerror(errno)); + return -1; + } + + return 0; +} + +static int load_maps(struct bpf_map_data *maps, int nr_maps, + fixup_map_cb fixup_map) +{ + int i, numa_node; + + for (i = 0; i < nr_maps; i++) { + if (fixup_map) { + fixup_map(&maps[i], i); + /* Allow userspace to assign map FD prior to creation */ + if (maps[i].fd != -1) { + map_fd[i] = maps[i].fd; + continue; + } + } + + numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ? + maps[i].def.numa_node : -1; + + if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { + int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; + + map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, + maps[i].name, + maps[i].def.key_size, + inner_map_fd, + maps[i].def.max_entries, + maps[i].def.map_flags, + numa_node); + } else { + map_fd[i] = bpf_create_map_node(maps[i].def.type, + maps[i].name, + maps[i].def.key_size, + maps[i].def.value_size, + maps[i].def.max_entries, + maps[i].def.map_flags, + numa_node); + } + if (map_fd[i] < 0) { + printf("failed to create map %d (%s): %d %s\n", + i, maps[i].name, errno, strerror(errno)); + return 1; + } + maps[i].fd = map_fd[i]; + + if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY) + prog_array_fd = map_fd[i]; + } + return 0; +} + +static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, + GElf_Shdr *shdr, Elf_Data **data) +{ + Elf_Scn *scn; + + scn = elf_getscn(elf, i); + if (!scn) + return 1; + + if (gelf_getshdr(scn, shdr) != shdr) + return 2; + + *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name); + if (!*shname || !shdr->sh_size) + return 3; + + *data = elf_getdata(scn, 0); + if (!*data || elf_getdata(scn, *data) != NULL) + return 4; + + return 0; +} + +static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, + GElf_Shdr *shdr, struct bpf_insn *insn, + struct bpf_map_data *maps, int nr_maps) +{ + int i, nrels; + + nrels = shdr->sh_size / shdr->sh_entsize; + + for (i = 0; i < nrels; i++) { + GElf_Sym sym; + GElf_Rel rel; + unsigned int insn_idx; + bool match = false; + int j, map_idx; + + gelf_getrel(data, i, &rel); + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + + gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym); + + if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { + printf("invalid relo for insn[%d].code 0x%x\n", + insn_idx, insn[insn_idx].code); + return 1; + } + insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + + /* Match FD relocation against recorded map_data[] offset */ + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + if (maps[map_idx].elf_offset == sym.st_value) { + match = true; + break; + } + } + if (match) { + insn[insn_idx].imm = maps[map_idx].fd; + } else { + printf("invalid relo for insn[%d] no map_data match\n", + insn_idx); + return 1; + } + } + + return 0; +} + +static int cmp_symbols(const void *l, const void *r) +{ + const GElf_Sym *lsym = (const GElf_Sym *)l; + const GElf_Sym *rsym = (const GElf_Sym *)r; + + if (lsym->st_value < rsym->st_value) + return -1; + else if (lsym->st_value > rsym->st_value) + return 1; + else + return 0; +} + +static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, + Elf *elf, Elf_Data *symbols, int strtabidx) +{ + int map_sz_elf, map_sz_copy; + bool validate_zero = false; + Elf_Data *data_maps; + int i, nr_maps; + GElf_Sym *sym; + Elf_Scn *scn; + int copy_sz; + + if (maps_shndx < 0) + return -EINVAL; + if (!symbols) + return -EINVAL; + + /* Get data for maps section via elf index */ + scn = elf_getscn(elf, maps_shndx); + if (scn) + data_maps = elf_getdata(scn, NULL); + if (!scn || !data_maps) { + printf("Failed to get Elf_Data from maps section %d\n", + maps_shndx); + return -EINVAL; + } + + /* For each map get corrosponding symbol table entry */ + sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym)); + for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + assert(nr_maps < MAX_MAPS+1); + if (!gelf_getsym(symbols, i, &sym[nr_maps])) + continue; + if (sym[nr_maps].st_shndx != maps_shndx) + continue; + /* Only increment iif maps section */ + nr_maps++; + } + + /* Align to map_fd[] order, via sort on offset in sym.st_value */ + qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols); + + /* Keeping compatible with ELF maps section changes + * ------------------------------------------------ + * The program size of struct bpf_load_map_def is known by loader + * code, but struct stored in ELF file can be different. + * + * Unfortunately sym[i].st_size is zero. To calculate the + * struct size stored in the ELF file, assume all struct have + * the same size, and simply divide with number of map + * symbols. + */ + map_sz_elf = data_maps->d_size / nr_maps; + map_sz_copy = sizeof(struct bpf_load_map_def); + if (map_sz_elf < map_sz_copy) { + /* + * Backward compat, loading older ELF file with + * smaller struct, keeping remaining bytes zero. + */ + map_sz_copy = map_sz_elf; + } else if (map_sz_elf > map_sz_copy) { + /* + * Forward compat, loading newer ELF file with larger + * struct with unknown features. Assume zero means + * feature not used. Thus, validate rest of struct + * data is zero. + */ + validate_zero = true; + } + + /* Memcpy relevant part of ELF maps data to loader maps */ + for (i = 0; i < nr_maps; i++) { + struct bpf_load_map_def *def; + unsigned char *addr, *end; + const char *map_name; + size_t offset; + + map_name = elf_strptr(elf, strtabidx, sym[i].st_name); + maps[i].name = strdup(map_name); + if (!maps[i].name) { + printf("strdup(%s): %s(%d)\n", map_name, + strerror(errno), errno); + free(sym); + return -errno; + } + + /* Symbol value is offset into ELF maps section data area */ + offset = sym[i].st_value; + def = (struct bpf_load_map_def *)(data_maps->d_buf + offset); + maps[i].elf_offset = offset; + memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def)); + memcpy(&maps[i].def, def, map_sz_copy); + + /* Verify no newer features were requested */ + if (validate_zero) { + addr = (unsigned char *) def + map_sz_copy; + end = (unsigned char *) def + map_sz_elf; + for (; addr < end; addr++) { + if (*addr != 0) { + free(sym); + return -EFBIG; + } + } + } + } + + free(sym); + return nr_maps; +} + +static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) +{ + int fd, i, ret, maps_shndx = -1, strtabidx = -1; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr, shdr_prog; + Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL; + char *shname, *shname_prog; + int nr_maps = 0; + + /* reset global variables */ + kern_version = 0; + memset(license, 0, sizeof(license)); + memset(processed_sec, 0, sizeof(processed_sec)); + + if (elf_version(EV_CURRENT) == EV_NONE) + return 1; + + fd = open(path, O_RDONLY, 0); + if (fd < 0) + return 1; + + elf = elf_begin(fd, ELF_C_READ, NULL); + + if (!elf) + return 1; + + if (gelf_getehdr(elf, &ehdr) != &ehdr) + return 1; + + /* clear all kprobes */ + i = write_kprobe_events(""); + + /* scan over all elf sections to get license and map info */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (0) /* helpful for llvm debugging */ + printf("section %d:%s data %p size %zd link %d flags %d\n", + i, shname, data->d_buf, data->d_size, + shdr.sh_link, (int) shdr.sh_flags); + + if (strcmp(shname, "license") == 0) { + processed_sec[i] = true; + memcpy(license, data->d_buf, data->d_size); + } else if (strcmp(shname, "version") == 0) { + processed_sec[i] = true; + if (data->d_size != sizeof(int)) { + printf("invalid size of version section %zd\n", + data->d_size); + return 1; + } + memcpy(&kern_version, data->d_buf, sizeof(int)); + } else if (strcmp(shname, "maps") == 0) { + int j; + + maps_shndx = i; + data_maps = data; + for (j = 0; j < MAX_MAPS; j++) + map_data[j].fd = -1; + } else if (shdr.sh_type == SHT_SYMTAB) { + strtabidx = shdr.sh_link; + symbols = data; + } + } + + ret = 1; + + if (!symbols) { + printf("missing SHT_SYMTAB section\n"); + goto done; + } + + if (data_maps) { + nr_maps = load_elf_maps_section(map_data, maps_shndx, + elf, symbols, strtabidx); + if (nr_maps < 0) { + printf("Error: Failed loading ELF maps (errno:%d):%s\n", + nr_maps, strerror(-nr_maps)); + goto done; + } + if (load_maps(map_data, nr_maps, fixup_map)) + goto done; + map_data_count = nr_maps; + + processed_sec[maps_shndx] = true; + } + + /* process all relo sections, and rewrite bpf insns for maps */ + for (i = 1; i < ehdr.e_shnum; i++) { + if (processed_sec[i]) + continue; + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (shdr.sh_type == SHT_REL) { + struct bpf_insn *insns; + + /* locate prog sec that need map fixup (relocations) */ + if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, + &shdr_prog, &data_prog)) + continue; + + if (shdr_prog.sh_type != SHT_PROGBITS || + !(shdr_prog.sh_flags & SHF_EXECINSTR)) + continue; + + insns = (struct bpf_insn *) data_prog->d_buf; + processed_sec[i] = true; /* relo section */ + + if (parse_relo_and_apply(data, symbols, &shdr, insns, + map_data, nr_maps)) + continue; + } + } + + /* load programs */ + for (i = 1; i < ehdr.e_shnum; i++) { + + if (processed_sec[i]) + continue; + + if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) + continue; + + if (memcmp(shname, "kprobe/", 7) == 0 || + memcmp(shname, "kretprobe/", 10) == 0 || + memcmp(shname, "tracepoint/", 11) == 0 || + memcmp(shname, "raw_tracepoint/", 15) == 0 || + memcmp(shname, "xdp", 3) == 0 || + memcmp(shname, "perf_event", 10) == 0 || + memcmp(shname, "socket", 6) == 0 || + memcmp(shname, "cgroup/", 7) == 0 || + memcmp(shname, "sockops", 7) == 0 || + memcmp(shname, "sk_skb", 6) == 0 || + memcmp(shname, "sk_msg", 6) == 0) { + ret = load_and_attach(shname, data->d_buf, + data->d_size); + if (ret != 0) + goto done; + } + } + +done: + close(fd); + return ret; +} + +int load_bpf_file(char *path) +{ + return do_load_bpf_file(path, NULL); +} + +int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map) +{ + return do_load_bpf_file(path, fixup_map); +} diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h new file mode 100644 index 000000000..4fcd258c6 --- /dev/null +++ b/samples/bpf/bpf_load.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_LOAD_H +#define __BPF_LOAD_H + +#include + +#define MAX_MAPS 32 +#define MAX_PROGS 32 + +struct bpf_load_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; + unsigned int inner_map_idx; + unsigned int numa_node; +}; + +struct bpf_map_data { + int fd; + char *name; + size_t elf_offset; + struct bpf_load_map_def def; +}; + +typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx); + +extern int prog_fd[MAX_PROGS]; +extern int event_fd[MAX_PROGS]; +extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; +extern int prog_cnt; + +/* There is a one-to-one mapping between map_fd[] and map_data[]. + * The map_data[] just contains more rich info on the given map. + */ +extern int map_fd[MAX_MAPS]; +extern struct bpf_map_data map_data[MAX_MAPS]; +extern int map_data_count; + +/* parses elf file compiled by llvm .c->.o + * . parses 'maps' section and creates maps via BPF syscall + * . parses 'license' section and passes it to syscall + * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by + * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD + * . loads eBPF programs via BPF syscall + * + * One ELF file can contain multiple BPF programs which will be loaded + * and their FDs stored stored in prog_fd array + * + * returns zero on success + */ +int load_bpf_file(char *path); +int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +#endif diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c new file mode 100644 index 000000000..deb0e3e03 --- /dev/null +++ b/samples/bpf/cookie_uid_helper_example.c @@ -0,0 +1,323 @@ +/* This test is a demo of using get_socket_uid and get_socket_cookie + * helper function to do per socket based network traffic monitoring. + * It requires iptables version higher then 1.6.1. to load pinned eBPF + * program into the xt_bpf match. + * + * TEST: + * ./run_cookie_uid_helper_example.sh -option + * option: + * -t: do traffic monitoring test, the program will continuously + * print out network traffic happens after program started A sample + * output is shown below: + * + * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058 + * cookie: 132, uid: 0x0, Pakcet Count: 2, Bytes Count: 286 + * cookie: 812, uid: 0x3e8, Pakcet Count: 3, Bytes Count: 1726 + * cookie: 802, uid: 0x3e8, Pakcet Count: 2, Bytes Count: 104 + * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058 + * cookie: 831, uid: 0x3e8, Pakcet Count: 2, Bytes Count: 104 + * cookie: 0, uid: 0x0, Pakcet Count: 6, Bytes Count: 712 + * cookie: 880, uid: 0xfffe, Pakcet Count: 1, Bytes Count: 70 + * + * -s: do getsockopt SO_COOKIE test, the program will set up a pair of + * UDP sockets and send packets between them. And read out the traffic data + * directly from the ebpf map based on the socket cookie. + * + * Clean up: if using shell script, the script file will delete the iptables + * rule and unmount the bpf program when exit. Else the iptables rule need + * to be deleted by hand, see run_cookie_uid_helper_example.sh for detail. + */ + +#define _GNU_SOURCE + +#define offsetof(type, member) __builtin_offsetof(type, member) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_insn.h" + +#define PORT 8888 + +struct stats { + uint32_t uid; + uint64_t packets; + uint64_t bytes; +}; + +static int map_fd, prog_fd; + +static bool test_finish; + +static void maps_create(void) +{ + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t), + sizeof(struct stats), 100, 0); + if (map_fd < 0) + error(1, errno, "map create failed!\n"); +} + +static void prog_load(void) +{ + static char log_buf[1 << 16]; + + struct bpf_insn prog[] = { + /* + * Save sk_buff for future usage. value stored in R6 to R10 will + * not be reset after a bpf helper function call. + */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* + * pc1: BPF_FUNC_get_socket_cookie takes one parameter, + * R1: sk_buff + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + /* pc2-4: save &socketCookie to r7 for future usage*/ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* + * pc5-8: set up the registers for BPF_FUNC_map_lookup_elem, + * it takes two parameters (R1: map_fd, R2: &socket_cookie) + */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* + * pc9. if r0 != 0x0, go to pc+14, since we have the cookie + * stored already + * Otherwise do pc10-22 to setup a new data entry. + */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 14), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_uid), + /* + * Place a struct stats in the R10 stack and sequentially + * place the member value into the memory. Packets value + * is set by directly place a IMM value 1 into the stack. + */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, + -32 + (__s16)offsetof(struct stats, uid)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, + -32 + (__s16)offsetof(struct stats, packets), 1), + /* + * __sk_buff is a special struct used for eBPF program to + * directly access some sk_buff field. + */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, + -32 + (__s16)offsetof(struct stats, bytes)), + /* + * add new map entry using BPF_FUNC_map_update_elem, it takes + * 4 parameters (R1: map_fd, R2: &socket_cookie, R3: &stats, + * R4: flags) + */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_update_elem), + BPF_JMP_IMM(BPF_JA, 0, 0, 5), + /* + * pc24-30 update the packet info to a exist data entry, it can + * be done by directly write to pointers instead of using + * BPF_FUNC_map_update_elem helper function + */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1, + offsetof(struct stats, packets)), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1, + offsetof(struct stats, bytes)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }; + prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + ARRAY_SIZE(prog), "GPL", 0, + log_buf, sizeof(log_buf)); + if (prog_fd < 0) + error(1, errno, "failed to load prog\n%s\n", log_buf); +} + +static void prog_attach_iptables(char *file) +{ + int ret; + char rules[100]; + + if (bpf_obj_pin(prog_fd, file)) + error(1, errno, "bpf_obj_pin"); + if (strlen(file) > 50) { + printf("file path too long: %s\n", file); + exit(1); + } + sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT", + file); + ret = system(rules); + if (ret < 0) { + printf("iptables rule update failed: %d/n", WEXITSTATUS(ret)); + exit(1); + } +} + +static void print_table(void) +{ + struct stats curEntry; + uint32_t curN = UINT32_MAX; + uint32_t nextN; + int res; + + while (bpf_map_get_next_key(map_fd, &curN, &nextN) > -1) { + curN = nextN; + res = bpf_map_lookup_elem(map_fd, &curN, &curEntry); + if (res < 0) { + error(1, errno, "fail to get entry value of Key: %u\n", + curN); + } else { + printf("cookie: %u, uid: 0x%x, Packet Count: %lu," + " Bytes Count: %lu\n", curN, curEntry.uid, + curEntry.packets, curEntry.bytes); + } + } +} + +static void udp_client(void) +{ + struct sockaddr_in si_other = {0}; + struct sockaddr_in si_me = {0}; + struct stats dataEntry; + int s_rcv, s_send, i, recv_len; + char message = 'a'; + char buf; + uint64_t cookie; + int res; + socklen_t cookie_len = sizeof(cookie); + socklen_t slen = sizeof(si_other); + + s_rcv = socket(PF_INET, SOCK_DGRAM, 0); + if (s_rcv < 0) + error(1, errno, "rcv socket creat failed!\n"); + si_other.sin_family = AF_INET; + si_other.sin_port = htons(PORT); + if (inet_aton("127.0.0.1", &si_other.sin_addr) == 0) + error(1, errno, "inet_aton\n"); + if (bind(s_rcv, (struct sockaddr *)&si_other, sizeof(si_other)) == -1) + error(1, errno, "bind\n"); + s_send = socket(PF_INET, SOCK_DGRAM, 0); + if (s_send < 0) + error(1, errno, "send socket creat failed!\n"); + res = getsockopt(s_send, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len); + if (res < 0) + printf("get cookie failed: %s\n", strerror(errno)); + res = bpf_map_lookup_elem(map_fd, &cookie, &dataEntry); + if (res != -1) + error(1, errno, "socket stat found while flow not active\n"); + for (i = 0; i < 10; i++) { + res = sendto(s_send, &message, sizeof(message), 0, + (struct sockaddr *)&si_other, slen); + if (res == -1) + error(1, errno, "send\n"); + if (res != sizeof(message)) + error(1, 0, "%uB != %luB\n", res, sizeof(message)); + recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0, + (struct sockaddr *)&si_me, &slen); + if (recv_len < 0) + error(1, errno, "receive\n"); + res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr), + sizeof(si_me.sin_addr)); + if (res != 0) + error(1, EFAULT, "sender addr error: %d\n", res); + printf("Message received: %c\n", buf); + res = bpf_map_lookup_elem(map_fd, &cookie, &dataEntry); + if (res < 0) + error(1, errno, "lookup sk stat failed, cookie: %lu\n", + cookie); + printf("cookie: %lu, uid: 0x%x, Packet Count: %lu," + " Bytes Count: %lu\n\n", cookie, dataEntry.uid, + dataEntry.packets, dataEntry.bytes); + } + close(s_send); + close(s_rcv); +} + +static int usage(void) +{ + printf("Usage: ./run_cookie_uid_helper_example.sh" + " bpfObjName -option\n" + " -t traffic monitor test\n" + " -s getsockopt cookie test\n"); + return 1; +} + +static void finish(int ret) +{ + test_finish = true; +} + +int main(int argc, char *argv[]) +{ + int opt; + bool cfg_test_traffic = false; + bool cfg_test_cookie = false; + + if (argc != 3) + return usage(); + while ((opt = getopt(argc, argv, "ts")) != -1) { + switch (opt) { + case 't': + cfg_test_traffic = true; + break; + case 's': + cfg_test_cookie = true; + break; + + default: + printf("unknown option %c\n", opt); + usage(); + return -1; + } + } + maps_create(); + prog_load(); + prog_attach_iptables(argv[2]); + if (cfg_test_traffic) { + if (signal(SIGINT, finish) == SIG_ERR) + error(1, errno, "register SIGINT handler failed"); + if (signal(SIGTERM, finish) == SIG_ERR) + error(1, errno, "register SIGTERM handler failed"); + while (!test_finish) { + print_table(); + printf("\n"); + sleep(1); + }; + } else if (cfg_test_cookie) { + udp_client(); + } + close(prog_fd); + close(map_fd); + return 0; +} diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c new file mode 100644 index 000000000..5aefd19cd --- /dev/null +++ b/samples/bpf/cpustat_kern.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +/* + * The CPU number, cstate number and pstate number are based + * on 96boards Hikey with octa CA53 CPUs. + * + * Every CPU have three idle states for cstate: + * WFI, CPU_OFF, CLUSTER_OFF + * + * Every CPU have 5 operating points: + * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz + * + * This code is based on these assumption and other platforms + * need to adjust these definitions. + */ +#define MAX_CPU 8 +#define MAX_PSTATE_ENTRIES 5 +#define MAX_CSTATE_ENTRIES 3 + +static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 }; + +/* + * my_map structure is used to record cstate and pstate index and + * timestamp (Idx, Ts), when new event incoming we need to update + * combination for new state index and timestamp (Idx`, Ts`). + * + * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time + * interval for the previous state: Duration(Idx) = Ts` - Ts. + * + * Every CPU has one below array for recording state index and + * timestamp, and record for cstate and pstate saperately: + * + * +--------------------------+ + * | cstate timestamp | + * +--------------------------+ + * | cstate index | + * +--------------------------+ + * | pstate timestamp | + * +--------------------------+ + * | pstate index | + * +--------------------------+ + */ +#define MAP_OFF_CSTATE_TIME 0 +#define MAP_OFF_CSTATE_IDX 1 +#define MAP_OFF_PSTATE_TIME 2 +#define MAP_OFF_PSTATE_IDX 3 +#define MAP_OFF_NUM 4 + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAP_OFF_NUM); +} my_map SEC(".maps"); + +/* cstate_duration records duration time for every idle state per CPU */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES); +} cstate_duration SEC(".maps"); + +/* pstate_duration records duration time for every operating point per CPU */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES); +} pstate_duration SEC(".maps"); + +/* + * The trace events for cpu_idle and cpu_frequency are taken from: + * /sys/kernel/debug/tracing/events/power/cpu_idle/format + * /sys/kernel/debug/tracing/events/power/cpu_frequency/format + * + * These two events have same format, so define one common structure. + */ +struct cpu_args { + u64 pad; + u32 state; + u32 cpu_id; +}; + +/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */ +static u32 find_cpu_pstate_idx(u32 frequency) +{ + u32 i; + + for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) { + if (frequency == cpu_opps[i]) + return i; + } + + return i; +} + +SEC("tracepoint/power/cpu_idle") +int bpf_prog1(struct cpu_args *ctx) +{ + u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta; + u32 key, cpu, pstate_idx; + u64 *val; + + if (ctx->cpu_id > MAX_CPU) + return 0; + + cpu = ctx->cpu_id; + + key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME; + cts = bpf_map_lookup_elem(&my_map, &key); + if (!cts) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; + cstate = bpf_map_lookup_elem(&my_map, &key); + if (!cstate) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; + pts = bpf_map_lookup_elem(&my_map, &key); + if (!pts) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; + pstate = bpf_map_lookup_elem(&my_map, &key); + if (!pstate) + return 0; + + prev_state = *cstate; + *cstate = ctx->state; + + if (!*cts) { + *cts = bpf_ktime_get_ns(); + return 0; + } + + cur_ts = bpf_ktime_get_ns(); + delta = cur_ts - *cts; + *cts = cur_ts; + + /* + * When state doesn't equal to (u32)-1, the cpu will enter + * one idle state; for this case we need to record interval + * for the pstate. + * + * OPP2 + * +---------------------+ + * OPP1 | | + * ---------+ | + * | Idle state + * +--------------- + * + * |<- pstate duration ->| + * ^ ^ + * pts cur_ts + */ + if (ctx->state != (u32)-1) { + + /* record pstate after have first cpu_frequency event */ + if (!*pts) + return 0; + + delta = cur_ts - *pts; + + pstate_idx = find_cpu_pstate_idx(*pstate); + if (pstate_idx >= MAX_PSTATE_ENTRIES) + return 0; + + key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; + val = bpf_map_lookup_elem(&pstate_duration, &key); + if (val) + __sync_fetch_and_add((long *)val, delta); + + /* + * When state equal to (u32)-1, the cpu just exits from one + * specific idle state; for this case we need to record + * interval for the pstate. + * + * OPP2 + * -----------+ + * | OPP1 + * | +----------- + * | Idle state | + * +---------------------+ + * + * |<- cstate duration ->| + * ^ ^ + * cts cur_ts + */ + } else { + + key = cpu * MAX_CSTATE_ENTRIES + prev_state; + val = bpf_map_lookup_elem(&cstate_duration, &key); + if (val) + __sync_fetch_and_add((long *)val, delta); + } + + /* Update timestamp for pstate as new start time */ + if (*pts) + *pts = cur_ts; + + return 0; +} + +SEC("tracepoint/power/cpu_frequency") +int bpf_prog2(struct cpu_args *ctx) +{ + u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta; + u32 key, cpu, pstate_idx; + u64 *val; + + cpu = ctx->cpu_id; + + key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; + pts = bpf_map_lookup_elem(&my_map, &key); + if (!pts) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; + pstate = bpf_map_lookup_elem(&my_map, &key); + if (!pstate) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; + cstate = bpf_map_lookup_elem(&my_map, &key); + if (!cstate) + return 0; + + prev_state = *pstate; + *pstate = ctx->state; + + if (!*pts) { + *pts = bpf_ktime_get_ns(); + return 0; + } + + cur_ts = bpf_ktime_get_ns(); + delta = cur_ts - *pts; + *pts = cur_ts; + + /* When CPU is in idle, bail out to skip pstate statistics */ + if (*cstate != (u32)(-1)) + return 0; + + /* + * The cpu changes to another different OPP (in below diagram + * change frequency from OPP3 to OPP1), need recording interval + * for previous frequency OPP3 and update timestamp as start + * time for new frequency OPP1. + * + * OPP3 + * +---------------------+ + * OPP2 | | + * ---------+ | + * | OPP1 + * +--------------- + * + * |<- pstate duration ->| + * ^ ^ + * pts cur_ts + */ + pstate_idx = find_cpu_pstate_idx(*pstate); + if (pstate_idx >= MAX_PSTATE_ENTRIES) + return 0; + + key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; + val = bpf_map_lookup_elem(&pstate_duration, &key); + if (val) + __sync_fetch_and_add((long *)val, delta); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c new file mode 100644 index 000000000..96675985e --- /dev/null +++ b/samples/bpf/cpustat_user.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static int cstate_map_fd, pstate_map_fd; + +#define MAX_CPU 8 +#define MAX_PSTATE_ENTRIES 5 +#define MAX_CSTATE_ENTRIES 3 +#define MAX_STARS 40 + +#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq" +#define CPUFREQ_LOWEST_FREQ "208000" +#define CPUFREQ_HIGHEST_FREQ "12000000" + +struct cpu_stat_data { + unsigned long cstate[MAX_CSTATE_ENTRIES]; + unsigned long pstate[MAX_PSTATE_ENTRIES]; +}; + +static struct cpu_stat_data stat_data[MAX_CPU]; + +static void cpu_stat_print(void) +{ + int i, j; + char state_str[sizeof("cstate-9")]; + struct cpu_stat_data *data; + + /* Clear screen */ + printf("\033[2J"); + + /* Header */ + printf("\nCPU states statistics:\n"); + printf("%-10s ", "state(ms)"); + + for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { + sprintf(state_str, "cstate-%d", i); + printf("%-11s ", state_str); + } + + for (i = 0; i < MAX_PSTATE_ENTRIES; i++) { + sprintf(state_str, "pstate-%d", i); + printf("%-11s ", state_str); + } + + printf("\n"); + + for (j = 0; j < MAX_CPU; j++) { + data = &stat_data[j]; + + printf("CPU-%-6d ", j); + for (i = 0; i < MAX_CSTATE_ENTRIES; i++) + printf("%-11ld ", data->cstate[i] / 1000000); + + for (i = 0; i < MAX_PSTATE_ENTRIES; i++) + printf("%-11ld ", data->pstate[i] / 1000000); + + printf("\n"); + } +} + +static void cpu_stat_update(int cstate_fd, int pstate_fd) +{ + unsigned long key, value; + int c, i; + + for (c = 0; c < MAX_CPU; c++) { + for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { + key = c * MAX_CSTATE_ENTRIES + i; + bpf_map_lookup_elem(cstate_fd, &key, &value); + stat_data[c].cstate[i] = value; + } + + for (i = 0; i < MAX_PSTATE_ENTRIES; i++) { + key = c * MAX_PSTATE_ENTRIES + i; + bpf_map_lookup_elem(pstate_fd, &key, &value); + stat_data[c].pstate[i] = value; + } + } +} + +/* + * This function is copied from 'idlestat' tool function + * idlestat_wake_all() in idlestate.c. + * + * It sets the self running task affinity to cpus one by one so can wake up + * the specific CPU to handle scheduling; this results in all cpus can be + * waken up once and produce ftrace event 'trace_cpu_idle'. + */ +static int cpu_stat_inject_cpu_idle_event(void) +{ + int rcpu, i, ret; + cpu_set_t cpumask; + cpu_set_t original_cpumask; + + ret = sysconf(_SC_NPROCESSORS_CONF); + if (ret < 0) + return -1; + + rcpu = sched_getcpu(); + if (rcpu < 0) + return -1; + + /* Keep track of the CPUs we will run on */ + sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask); + + for (i = 0; i < ret; i++) { + + /* Pointless to wake up ourself */ + if (i == rcpu) + continue; + + /* Pointless to wake CPUs we will not run on */ + if (!CPU_ISSET(i, &original_cpumask)) + continue; + + CPU_ZERO(&cpumask); + CPU_SET(i, &cpumask); + + sched_setaffinity(0, sizeof(cpumask), &cpumask); + } + + /* Enable all the CPUs of the original mask */ + sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask); + return 0; +} + +/* + * It's possible to have no any frequency change for long time and cannot + * get ftrace event 'trace_cpu_frequency' for long period, this introduces + * big deviation for pstate statistics. + * + * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz + * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to + * the maximum frequency value 1.2GHz. + */ +static int cpu_stat_inject_cpu_frequency_event(void) +{ + int len, fd; + + fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY); + if (fd < 0) { + printf("failed to open scaling_max_freq, errno=%d\n", errno); + return fd; + } + + len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ)); + if (len < 0) { + printf("failed to open scaling_max_freq, errno=%d\n", errno); + goto err; + } + + len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ)); + if (len < 0) { + printf("failed to open scaling_max_freq, errno=%d\n", errno); + goto err; + } + +err: + close(fd); + return len; +} + +static void int_exit(int sig) +{ + cpu_stat_inject_cpu_idle_event(); + cpu_stat_inject_cpu_frequency_event(); + cpu_stat_update(cstate_map_fd, pstate_map_fd); + cpu_stat_print(); + exit(0); +} + +int main(int argc, char **argv) +{ + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_object *obj; + char filename[256]; + int ret; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 0; + } + + prog = bpf_object__find_program_by_name(obj, "bpf_prog1"); + if (!prog) { + printf("finding a prog in obj file failed\n"); + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration"); + pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration"); + if (cstate_map_fd < 0 || pstate_map_fd < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; + } + + ret = cpu_stat_inject_cpu_idle_event(); + if (ret < 0) + return 1; + + ret = cpu_stat_inject_cpu_frequency_event(); + if (ret < 0) + return 1; + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + while (1) { + cpu_stat_update(cstate_map_fd, pstate_map_fd); + cpu_stat_print(); + sleep(5); + } + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); + return 0; +} diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh new file mode 100755 index 000000000..ffe4c0607 --- /dev/null +++ b/samples/bpf/do_hbm_test.sh @@ -0,0 +1,442 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2019 Facebook +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of version 2 of the GNU General Public +# License as published by the Free Software Foundation. + +Usage() { + echo "Script for testing HBM (Host Bandwidth Manager) framework." + echo "It creates a cgroup to use for testing and load a BPF program to limit" + echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" + echo "loads. The output is the goodput in Mbps (unless -D was used)." + echo "" + echo "USAGE: $name [out] [-b=|--bpf=] [-c=|--cc=]" + echo " [-D] [-d=|--delay=] [--debug] [-E] [--edt]" + echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=|--id=]" + echo " [-l] [-N] [--no_cn] [-p=|--port=] [-P]" + echo " [-q=] [-R] [-s=|--server=|--time=