summaryrefslogtreecommitdiffstats
path: root/src/spdk/dpdk/lib/librte_eal
diff options
context:
space:
mode:
Diffstat (limited to 'src/spdk/dpdk/lib/librte_eal')
-rw-r--r--src/spdk/dpdk/lib/librte_eal/Makefile12
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/meson.build30
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic.h14
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic_32.h44
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic_64.h190
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_byteorder.h81
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags.h14
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags_32.h54
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags_64.h36
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles.h14
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles_32.h93
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles_64.h76
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_io.h22
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_io_64.h171
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_mcslock.h22
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy.h14
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy_32.h305
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy_64.h372
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause.h22
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause_32.h23
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause_64.h157
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch.h14
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch_32.h40
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch_64.h39
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_rwlock.h42
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_spinlock.h64
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_ticketlock.h22
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/include/rte_vect.h178
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/meson.build10
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/rte_cpuflags.c140
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/rte_cycles.c17
-rw-r--r--src/spdk/dpdk/lib/librte_eal/arm/rte_hypervisor.c11
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c279
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c62
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c39
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c793
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c403
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c50
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c1510
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c77
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c22
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c92
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c211
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c481
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c170
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c363
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c939
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c420
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c1861
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c1217
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c66
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c171
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c230
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c116
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c498
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c488
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c115
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c448
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c167
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h107
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h40
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h91
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h96
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h98
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_options.h105
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_private.h423
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_thread.h60
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/eal_trace.h120
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c465
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h55
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c682
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h190
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c1367
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h107
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c751
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h86
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/meson.build58
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c162
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c668
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_random.c211
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c122
-rw-r--r--src/spdk/dpdk/lib/librte_eal/common/rte_service.c919
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/Makefile96
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal.c1105
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_alarm.c317
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_alarm_private.h19
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_cpuflags.c21
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_debug.c92
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_dev.c35
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_hugepage_info.c156
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_interrupts.c713
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_lcore.c52
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_memalloc.c81
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_memory.c536
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_thread.c194
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/eal_timer.c64
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/include/meson.build8
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/include/rte_os.h52
-rw-r--r--src/spdk/dpdk/lib/librte_eal/freebsd/meson.build21
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/Makefile19
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_atomic.h1150
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_byteorder.h247
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_cpuflags.h79
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_cycles.h181
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_io.h350
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_mcslock.h179
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_memcpy.h112
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_pause.h128
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_prefetch.h54
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_rwlock.h239
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_spinlock.h305
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_ticketlock.h223
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/generic/rte_vect.h186
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/meson.build67
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_alarm.h77
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_bitmap.h575
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_branch_prediction.h41
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_bus.h389
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_class.h134
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_common.h842
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_compat.h35
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_debug.h82
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_dev.h518
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_devargs.h238
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_eal.h495
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_eal_interrupts.h238
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_eal_memconfig.h129
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_eal_trace.h278
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_errno.h66
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_fbarray.h565
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_function_versioning.h99
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_hexdump.h60
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_hypervisor.h33
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_interrupts.h145
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_keepalive.h142
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_launch.h148
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_lcore.h306
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_log.h383
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_malloc.h560
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_memory.h784
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_memzone.h320
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_pci_dev_feature_defs.h16
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_pci_dev_features.h15
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_per_lcore.h50
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_random.h74
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_reciprocal.h90
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_service.h422
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_service_component.h133
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_string_fns.h123
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_tailq.h140
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_test.h46
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_time.h101
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_trace.h141
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_trace_point.h408
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_trace_point_register.h41
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_uuid.h103
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_version.h68
-rw-r--r--src/spdk/dpdk/lib/librte_eal/include/rte_vfio.h360
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/Makefile103
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal.c1405
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_alarm.c248
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_cpuflags.c84
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_debug.c92
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_dev.c396
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_hugepage_info.c547
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_interrupts.c1521
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_lcore.c81
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_log.c62
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_memalloc.c1607
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_memory.c2481
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_thread.c211
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_timer.c232
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_vfio.c2186
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_vfio.h158
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/eal_vfio_mp_sync.c123
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/include/meson.build9
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/include/rte_kni_common.h137
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/include/rte_os.h33
-rw-r--r--src/spdk/dpdk/lib/librte_eal/linux/meson.build27
-rw-r--r--src/spdk/dpdk/lib/librte_eal/meson.build26
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/meson.build18
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_altivec.h22
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_atomic.h413
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_byteorder.h120
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_cpuflags.h61
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_cycles.h46
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_io.h18
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_mcslock.h18
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_memcpy.h209
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_pause.h29
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_prefetch.h41
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_rwlock.h40
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_spinlock.h88
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_ticketlock.h18
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/include/rte_vect.h36
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/meson.build10
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/rte_cpuflags.c110
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/rte_cycles.c11
-rw-r--r--src/spdk/dpdk/lib/librte_eal/ppc/rte_hypervisor.c11
-rw-r--r--src/spdk/dpdk/lib/librte_eal/rte_eal_exports.def9
-rw-r--r--src/spdk/dpdk/lib/librte_eal/rte_eal_version.map389
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/eal.c276
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/eal_debug.c20
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/eal_lcore.c105
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/eal_log.c16
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/eal_thread.c166
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/eal_windows.h29
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/fnmatch.c172
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/getopt.c470
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/dirent.h664
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/fnmatch.h50
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/getopt.h96
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/meson.build9
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/pthread.h96
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/regex.h90
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/rte_os.h81
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/rte_windows.h41
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/sched.h92
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/sys/queue.h302
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/include/unistd.h12
-rw-r--r--src/spdk/dpdk/lib/librte_eal/windows/meson.build14
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/meson.build22
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic.h270
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic_32.h214
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic_64.h218
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder.h99
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder_32.h29
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder_64.h30
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_cpuflags.h145
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_cycles.h66
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_io.h18
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_mcslock.h18
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_memcpy.h885
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_pause.h24
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_prefetch.h39
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_rtm.h62
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_rwlock.h53
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_spinlock.h181
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_ticketlock.h18
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/include/rte_vect.h97
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/meson.build11
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/rte_cpuflags.c179
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/rte_cpuid.h19
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/rte_cycles.c123
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/rte_hypervisor.c40
-rw-r--r--src/spdk/dpdk/lib/librte_eal/x86/rte_spinlock.c15
246 files changed, 56870 insertions, 0 deletions
diff --git a/src/spdk/dpdk/lib/librte_eal/Makefile b/src/spdk/dpdk/lib/librte_eal/Makefile
new file mode 100644
index 000000000..2fda40d23
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-y += include
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUX) += linux
+DEPDIRS-linux := include
+DIRS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += freebsd
+DEPDIRS-freebsd := include
+
+include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/meson.build b/src/spdk/dpdk/lib/librte_eal/arm/include/meson.build
new file mode 100644
index 000000000..73b750a18
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/meson.build
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation.
+
+arch_headers = files(
+ 'rte_atomic_32.h',
+ 'rte_atomic_64.h',
+ 'rte_atomic.h',
+ 'rte_byteorder.h',
+ 'rte_cpuflags_32.h',
+ 'rte_cpuflags_64.h',
+ 'rte_cpuflags.h',
+ 'rte_cycles_32.h',
+ 'rte_cycles_64.h',
+ 'rte_cycles.h',
+ 'rte_io_64.h',
+ 'rte_io.h',
+ 'rte_memcpy_32.h',
+ 'rte_memcpy_64.h',
+ 'rte_memcpy.h',
+ 'rte_pause_32.h',
+ 'rte_pause_64.h',
+ 'rte_pause.h',
+ 'rte_prefetch_32.h',
+ 'rte_prefetch_64.h',
+ 'rte_prefetch.h',
+ 'rte_rwlock.h',
+ 'rte_spinlock.h',
+ 'rte_vect.h',
+)
+install_headers(arch_headers, subdir: get_option('include_subdir_arch'))
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic.h
new file mode 100644
index 000000000..40e14e56f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_ARM_H_
+#define _RTE_ATOMIC_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_atomic_64.h>
+#else
+#include <rte_atomic_32.h>
+#endif
+
+#endif /* _RTE_ATOMIC_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic_32.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic_32.h
new file mode 100644
index 000000000..7dc0d06d1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic_32.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_ARM32_H_
+#define _RTE_ATOMIC_ARM32_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+
+#define rte_mb() __sync_synchronize()
+
+#define rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0)
+
+#define rte_rmb() __sync_synchronize()
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_wmb()
+
+#define rte_smp_rmb() rte_rmb()
+
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_wmb()
+
+#define rte_io_rmb() rte_rmb()
+
+#define rte_cio_wmb() rte_wmb()
+
+#define rte_cio_rmb() rte_rmb()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_ARM32_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic_64.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic_64.h
new file mode 100644
index 000000000..7b7099cdc
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_atomic_64.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_ATOMIC_ARM64_H_
+#define _RTE_ATOMIC_ARM64_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_atomic.h"
+#include <rte_branch_prediction.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
+
+#define rte_mb() asm volatile("dsb sy" : : : "memory")
+
+#define rte_wmb() asm volatile("dsb st" : : : "memory")
+
+#define rte_rmb() asm volatile("dsb ld" : : : "memory")
+
+#define rte_smp_mb() asm volatile("dmb ish" : : : "memory")
+
+#define rte_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+
+#define rte_smp_rmb() asm volatile("dmb ishld" : : : "memory")
+
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_wmb()
+
+#define rte_io_rmb() rte_rmb()
+
+#define rte_cio_wmb() asm volatile("dmb oshst" : : : "memory")
+
+#define rte_cio_rmb() asm volatile("dmb oshld" : : : "memory")
+
+/*------------------------ 128 bit atomic operations -------------------------*/
+
+#if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
+#define __ATOMIC128_CAS_OP(cas_op_name, op_string) \
+static __rte_noinline rte_int128_t \
+cas_op_name(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated) \
+{ \
+ /* caspX instructions register pair must start from even-numbered
+ * register at operand 1.
+ * So, specify registers for local variables here.
+ */ \
+ register uint64_t x0 __asm("x0") = (uint64_t)old.val[0]; \
+ register uint64_t x1 __asm("x1") = (uint64_t)old.val[1]; \
+ register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0]; \
+ register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1]; \
+ asm volatile( \
+ op_string " %[old0], %[old1], %[upd0], %[upd1], [%[dst]]" \
+ : [old0] "+r" (x0), \
+ [old1] "+r" (x1) \
+ : [upd0] "r" (x2), \
+ [upd1] "r" (x3), \
+ [dst] "r" (dst) \
+ : "memory"); \
+ old.val[0] = x0; \
+ old.val[1] = x1; \
+ return old; \
+}
+
+__ATOMIC128_CAS_OP(__cas_128_relaxed, "casp")
+__ATOMIC128_CAS_OP(__cas_128_acquire, "caspa")
+__ATOMIC128_CAS_OP(__cas_128_release, "caspl")
+__ATOMIC128_CAS_OP(__cas_128_acq_rel, "caspal")
+
+#undef __ATOMIC128_CAS_OP
+
+#endif
+
+__rte_experimental
+static inline int
+rte_atomic128_cmp_exchange(rte_int128_t *dst, rte_int128_t *exp,
+ const rte_int128_t *src, unsigned int weak, int success,
+ int failure)
+{
+ /* Always do strong CAS */
+ RTE_SET_USED(weak);
+ /* Ignore memory ordering for failure, memory order for
+ * success must be stronger or equal
+ */
+ RTE_SET_USED(failure);
+ /* Find invalid memory order */
+ RTE_ASSERT(success == __ATOMIC_RELAXED ||
+ success == __ATOMIC_ACQUIRE ||
+ success == __ATOMIC_RELEASE ||
+ success == __ATOMIC_ACQ_REL ||
+ success == __ATOMIC_SEQ_CST);
+
+ rte_int128_t expected = *exp;
+ rte_int128_t desired = *src;
+ rte_int128_t old;
+
+#if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
+ if (success == __ATOMIC_RELAXED)
+ old = __cas_128_relaxed(dst, expected, desired);
+ else if (success == __ATOMIC_ACQUIRE)
+ old = __cas_128_acquire(dst, expected, desired);
+ else if (success == __ATOMIC_RELEASE)
+ old = __cas_128_release(dst, expected, desired);
+ else
+ old = __cas_128_acq_rel(dst, expected, desired);
+#else
+#define __HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
+#define __HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || (mo) == __ATOMIC_ACQ_REL || \
+ (mo) == __ATOMIC_SEQ_CST)
+
+ int ldx_mo = __HAS_ACQ(success) ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED;
+ int stx_mo = __HAS_RLS(success) ? __ATOMIC_RELEASE : __ATOMIC_RELAXED;
+
+#undef __HAS_ACQ
+#undef __HAS_RLS
+
+ uint32_t ret = 1;
+
+ /* ldx128 can not guarantee atomic,
+ * Must write back src or old to verify atomicity of ldx128;
+ */
+ do {
+
+#define __LOAD_128(op_string, src, dst) { \
+ asm volatile( \
+ op_string " %0, %1, %2" \
+ : "=&r" (dst.val[0]), \
+ "=&r" (dst.val[1]) \
+ : "Q" (src->val[0]) \
+ : "memory"); }
+
+ if (ldx_mo == __ATOMIC_RELAXED)
+ __LOAD_128("ldxp", dst, old)
+ else
+ __LOAD_128("ldaxp", dst, old)
+
+#undef __LOAD_128
+
+#define __STORE_128(op_string, dst, src, ret) { \
+ asm volatile( \
+ op_string " %w0, %1, %2, %3" \
+ : "=&r" (ret) \
+ : "r" (src.val[0]), \
+ "r" (src.val[1]), \
+ "Q" (dst->val[0]) \
+ : "memory"); }
+
+ if (likely(old.int128 == expected.int128)) {
+ if (stx_mo == __ATOMIC_RELAXED)
+ __STORE_128("stxp", dst, desired, ret)
+ else
+ __STORE_128("stlxp", dst, desired, ret)
+ } else {
+ /* In the failure case (since 'weak' is ignored and only
+ * weak == 0 is implemented), expected should contain
+ * the atomically read value of dst. This means, 'old'
+ * needs to be stored back to ensure it was read
+ * atomically.
+ */
+ if (stx_mo == __ATOMIC_RELAXED)
+ __STORE_128("stxp", dst, old, ret)
+ else
+ __STORE_128("stlxp", dst, old, ret)
+ }
+
+#undef __STORE_128
+
+ } while (unlikely(ret));
+#endif
+
+ /* Unconditionally updating expected removes an 'if' statement.
+ * expected should already be in register if not in the cache.
+ */
+ *exp = old;
+
+ return (old.int128 == expected.int128);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_ARM64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_byteorder.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_byteorder.h
new file mode 100644
index 000000000..9ec4a9750
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_byteorder.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_BYTEORDER_ARM_H_
+#define _RTE_BYTEORDER_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_common.h>
+#include "generic/rte_byteorder.h"
+
+/* fix missing __builtin_bswap16 for gcc older then 4.8 */
+#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+ uint16_t x = _x;
+
+ asm volatile ("rev16 %w0,%w1"
+ : "=r" (x)
+ : "r" (x)
+ );
+ return x;
+}
+
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+#endif
+
+/* ARM architecture is bi-endian (both big and little). */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#else /* RTE_BIG_ENDIAN */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags.h
new file mode 100644
index 000000000..022e7da55
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM_H_
+#define _RTE_CPUFLAGS_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_cpuflags_64.h>
+#else
+#include <rte_cpuflags_32.h>
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags_32.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags_32.h
new file mode 100644
index 000000000..b5347be1e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags_32.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM32_H_
+#define _RTE_CPUFLAGS_ARM32_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+ RTE_CPUFLAG_SWP = 0,
+ RTE_CPUFLAG_HALF,
+ RTE_CPUFLAG_THUMB,
+ RTE_CPUFLAG_A26BIT,
+ RTE_CPUFLAG_FAST_MULT,
+ RTE_CPUFLAG_FPA,
+ RTE_CPUFLAG_VFP,
+ RTE_CPUFLAG_EDSP,
+ RTE_CPUFLAG_JAVA,
+ RTE_CPUFLAG_IWMMXT,
+ RTE_CPUFLAG_CRUNCH,
+ RTE_CPUFLAG_THUMBEE,
+ RTE_CPUFLAG_NEON,
+ RTE_CPUFLAG_VFPv3,
+ RTE_CPUFLAG_VFPv3D16,
+ RTE_CPUFLAG_TLS,
+ RTE_CPUFLAG_VFPv4,
+ RTE_CPUFLAG_IDIVA,
+ RTE_CPUFLAG_IDIVT,
+ RTE_CPUFLAG_VFPD32,
+ RTE_CPUFLAG_LPAE,
+ RTE_CPUFLAG_EVTSTRM,
+ RTE_CPUFLAG_AES,
+ RTE_CPUFLAG_PMULL,
+ RTE_CPUFLAG_SHA1,
+ RTE_CPUFLAG_SHA2,
+ RTE_CPUFLAG_CRC32,
+ RTE_CPUFLAG_V7L,
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM32_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags_64.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags_64.h
new file mode 100644
index 000000000..95cc01474
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cpuflags_64.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ */
+
+#ifndef _RTE_CPUFLAGS_ARM64_H_
+#define _RTE_CPUFLAGS_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+ RTE_CPUFLAG_FP = 0,
+ RTE_CPUFLAG_NEON,
+ RTE_CPUFLAG_EVTSTRM,
+ RTE_CPUFLAG_AES,
+ RTE_CPUFLAG_PMULL,
+ RTE_CPUFLAG_SHA1,
+ RTE_CPUFLAG_SHA2,
+ RTE_CPUFLAG_CRC32,
+ RTE_CPUFLAG_ATOMICS,
+ RTE_CPUFLAG_AARCH64,
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_ARM64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles.h
new file mode 100644
index 000000000..e8ffa894b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_CYCLES_ARM_H_
+#define _RTE_CYCLES_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_cycles_64.h>
+#else
+#include <rte_cycles_32.h>
+#endif
+
+#endif /* _RTE_CYCLES_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles_32.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles_32.h
new file mode 100644
index 000000000..f79718ce8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles_32.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_CYCLES_ARM32_H_
+#define _RTE_CYCLES_ARM32_H_
+
+/* ARM v7 does not have suitable source of clock signals. The only clock counter
+ available in the core is 32 bit wide. Therefore it is unsuitable as the
+ counter overlaps every few seconds and probably is not accessible by
+ userspace programs. Therefore we use clock_gettime(CLOCK_MONOTONIC_RAW) to
+ simulate counter running at 1GHz.
+*/
+
+#include <time.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ * The time base for this lcore.
+ */
+#ifndef RTE_ARM_EAL_RDTSC_USE_PMU
+
+/**
+ * This call is easily portable to any architecture, however,
+ * it may require a system call and inprecise for some tasks.
+ */
+static inline uint64_t
+__rte_rdtsc_syscall(void)
+{
+ struct timespec val;
+ uint64_t v;
+
+ while (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0)
+ /* no body */;
+
+ v = (uint64_t) val.tv_sec * 1000000000LL;
+ v += (uint64_t) val.tv_nsec;
+ return v;
+}
+#define rte_rdtsc __rte_rdtsc_syscall
+
+#else
+
+/**
+ * This function requires to configure the PMCCNTR and enable
+ * userspace access to it:
+ *
+ * asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r"(1));
+ * asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(29));
+ * asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x8000000f));
+ *
+ * which is possible only from the privileged mode (kernel space).
+ */
+static inline uint64_t
+__rte_rdtsc_pmccntr(void)
+{
+ unsigned tsc;
+ uint64_t final_tsc;
+
+ /* Read PMCCNTR */
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(tsc));
+ /* 1 tick = 64 clocks */
+ final_tsc = ((uint64_t)tsc) << 6;
+
+ return (uint64_t)final_tsc;
+}
+#define rte_rdtsc __rte_rdtsc_pmccntr
+
+#endif /* RTE_ARM_EAL_RDTSC_USE_PMU */
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_ARM32_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles_64.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles_64.h
new file mode 100644
index 000000000..da557b6a1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_cycles_64.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ */
+
+#ifndef _RTE_CYCLES_ARM64_H_
+#define _RTE_CYCLES_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ * The time base for this lcore.
+ */
+#ifndef RTE_ARM_EAL_RDTSC_USE_PMU
+/**
+ * This call is portable to any ARMv8 architecture, however, typically
+ * cntvct_el0 runs at <= 100MHz and it may be imprecise for some tasks.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+ uint64_t tsc;
+
+ asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
+ return tsc;
+}
+#else
+/**
+ * This is an alternative method to enable rte_rdtsc() with high resolution
+ * PMU cycles counter.The cycle counter runs at cpu frequency and this scheme
+ * uses ARMv8 PMU subsystem to get the cycle counter at userspace, However,
+ * access to PMU cycle counter from user space is not enabled by default in
+ * arm64 linux kernel.
+ * It is possible to enable cycle counter at user space access by configuring
+ * the PMU from the privileged mode (kernel space).
+ *
+ * asm volatile("msr pmintenset_el1, %0" : : "r" ((u64)(0 << 31)));
+ * asm volatile("msr pmcntenset_el0, %0" :: "r" BIT(31));
+ * asm volatile("msr pmuserenr_el0, %0" : : "r"(BIT(0) | BIT(2)));
+ * asm volatile("mrs %0, pmcr_el0" : "=r" (val));
+ * val |= (BIT(0) | BIT(2));
+ * isb();
+ * asm volatile("msr pmcr_el0, %0" : : "r" (val));
+ *
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+ uint64_t tsc;
+
+ asm volatile("mrs %0, pmccntr_el0" : "=r"(tsc));
+ return tsc;
+}
+#endif
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ asm volatile("isb" : : : "memory");
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_ARM64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_io.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_io.h
new file mode 100644
index 000000000..f4e66e6ba
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_io.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Cavium, Inc
+ */
+
+#ifndef _RTE_IO_ARM_H_
+#define _RTE_IO_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef RTE_ARCH_64
+#include "rte_io_64.h"
+#else
+#include "generic/rte_io.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IO_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_io_64.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_io_64.h
new file mode 100644
index 000000000..e5346240e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_io_64.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Cavium, Inc
+ */
+
+#ifndef _RTE_IO_ARM64_H_
+#define _RTE_IO_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_OVERRIDE_IO_H
+
+#include "generic/rte_io.h"
+#include "rte_atomic_64.h"
+
+static __rte_always_inline uint8_t
+rte_read8_relaxed(const volatile void *addr)
+{
+ uint8_t val;
+
+ asm volatile(
+ "ldrb %w[val], [%x[addr]]"
+ : [val] "=r" (val)
+ : [addr] "r" (addr));
+ return val;
+}
+
+static __rte_always_inline uint16_t
+rte_read16_relaxed(const volatile void *addr)
+{
+ uint16_t val;
+
+ asm volatile(
+ "ldrh %w[val], [%x[addr]]"
+ : [val] "=r" (val)
+ : [addr] "r" (addr));
+ return val;
+}
+
+static __rte_always_inline uint32_t
+rte_read32_relaxed(const volatile void *addr)
+{
+ uint32_t val;
+
+ asm volatile(
+ "ldr %w[val], [%x[addr]]"
+ : [val] "=r" (val)
+ : [addr] "r" (addr));
+ return val;
+}
+
+static __rte_always_inline uint64_t
+rte_read64_relaxed(const volatile void *addr)
+{
+ uint64_t val;
+
+ asm volatile(
+ "ldr %x[val], [%x[addr]]"
+ : [val] "=r" (val)
+ : [addr] "r" (addr));
+ return val;
+}
+
+static __rte_always_inline void
+rte_write8_relaxed(uint8_t val, volatile void *addr)
+{
+ asm volatile(
+ "strb %w[val], [%x[addr]]"
+ :
+ : [val] "r" (val), [addr] "r" (addr));
+}
+
+static __rte_always_inline void
+rte_write16_relaxed(uint16_t val, volatile void *addr)
+{
+ asm volatile(
+ "strh %w[val], [%x[addr]]"
+ :
+ : [val] "r" (val), [addr] "r" (addr));
+}
+
+static __rte_always_inline void
+rte_write32_relaxed(uint32_t val, volatile void *addr)
+{
+ asm volatile(
+ "str %w[val], [%x[addr]]"
+ :
+ : [val] "r" (val), [addr] "r" (addr));
+}
+
+static __rte_always_inline void
+rte_write64_relaxed(uint64_t val, volatile void *addr)
+{
+ asm volatile(
+ "str %x[val], [%x[addr]]"
+ :
+ : [val] "r" (val), [addr] "r" (addr));
+}
+
+static __rte_always_inline uint8_t
+rte_read8(const volatile void *addr)
+{
+ uint8_t val;
+ val = rte_read8_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint16_t
+rte_read16(const volatile void *addr)
+{
+ uint16_t val;
+ val = rte_read16_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint32_t
+rte_read32(const volatile void *addr)
+{
+ uint32_t val;
+ val = rte_read32_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint64_t
+rte_read64(const volatile void *addr)
+{
+ uint64_t val;
+ val = rte_read64_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline void
+rte_write8(uint8_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write8_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write16(uint16_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write16_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32(uint32_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write32_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write64(uint64_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write64_relaxed(value, addr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IO_ARM64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_mcslock.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_mcslock.h
new file mode 100644
index 000000000..dd1fe135b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_mcslock.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_MCSLOCK_ARM_H_
+#define _RTE_MCSLOCK_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_mcslock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MCSLOCK_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy.h
new file mode 100644
index 000000000..47dea9a8c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_MEMCPY_ARM_H_
+#define _RTE_MEMCPY_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_memcpy_64.h>
+#else
+#include <rte_memcpy_32.h>
+#endif
+
+#endif /* _RTE_MEMCPY_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy_32.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy_32.h
new file mode 100644
index 000000000..eb02c3b41
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy_32.h
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_MEMCPY_ARM32_H_
+#define _RTE_MEMCPY_ARM32_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+#ifdef RTE_ARCH_ARM_NEON_MEMCPY
+
+#ifndef RTE_MACHINE_CPUFLAG_NEON
+#error "Cannot optimize memcpy by NEON as the CPU seems to not support this"
+#endif
+
+/* ARM NEON Intrinsics are used to copy data */
+#include <arm_neon.h>
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ vst1q_u8(dst, vld1q_u8(src));
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]\n\t"
+ : "+r" (src), "+r" (dst)
+ : : "memory", "d0", "d1", "d2", "d3");
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]!\n\t"
+ "vld1.8 {d4-d5}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]!\n\t"
+ "vst1.8 {d4-d5}, [%1]\n\t"
+ : "+r" (src), "+r" (dst)
+ :
+ : "memory", "d0", "d1", "d2", "d3", "d4", "d5");
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]!\n\t"
+ "vld1.8 {d4-d7}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]!\n\t"
+ "vst1.8 {d4-d7}, [%1]\n\t"
+ : "+r" (src), "+r" (dst)
+ :
+ : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7");
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile ("pld [%0, #64]" : : "r" (src));
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]!\n\t"
+ "vld1.8 {d4-d7}, [%0]!\n\t"
+ "vld1.8 {d8-d11}, [%0]!\n\t"
+ "vld1.8 {d12-d15}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]!\n\t"
+ "vst1.8 {d4-d7}, [%1]!\n\t"
+ "vst1.8 {d8-d11}, [%1]!\n\t"
+ "vst1.8 {d12-d15}, [%1]\n\t"
+ : "+r" (src), "+r" (dst)
+ :
+ : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+ "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15");
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ asm volatile ("pld [%0, #64]" : : "r" (src));
+ asm volatile ("pld [%0, #128]" : : "r" (src));
+ asm volatile ("pld [%0, #192]" : : "r" (src));
+ asm volatile ("pld [%0, #256]" : : "r" (src));
+ asm volatile ("pld [%0, #320]" : : "r" (src));
+ asm volatile ("pld [%0, #384]" : : "r" (src));
+ asm volatile ("pld [%0, #448]" : : "r" (src));
+ asm volatile (
+ "vld1.8 {d0-d3}, [%0]!\n\t"
+ "vld1.8 {d4-d7}, [%0]!\n\t"
+ "vld1.8 {d8-d11}, [%0]!\n\t"
+ "vld1.8 {d12-d15}, [%0]!\n\t"
+ "vld1.8 {d16-d19}, [%0]!\n\t"
+ "vld1.8 {d20-d23}, [%0]!\n\t"
+ "vld1.8 {d24-d27}, [%0]!\n\t"
+ "vld1.8 {d28-d31}, [%0]\n\t"
+ "vst1.8 {d0-d3}, [%1]!\n\t"
+ "vst1.8 {d4-d7}, [%1]!\n\t"
+ "vst1.8 {d8-d11}, [%1]!\n\t"
+ "vst1.8 {d12-d15}, [%1]!\n\t"
+ "vst1.8 {d16-d19}, [%1]!\n\t"
+ "vst1.8 {d20-d23}, [%1]!\n\t"
+ "vst1.8 {d24-d27}, [%1]!\n\t"
+ "vst1.8 {d28-d31}, [%1]!\n\t"
+ : "+r" (src), "+r" (dst)
+ :
+ : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+ "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");
+}
+
+#define rte_memcpy(dst, src, n) \
+ __extension__ ({ \
+ (__builtin_constant_p(n)) ? \
+ memcpy((dst), (src), (n)) : \
+ rte_memcpy_func((dst), (src), (n)); })
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+ void *ret = dst;
+
+ /* We can't copy < 16 bytes using XMM registers so do it manually. */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dst = *(const uint8_t *)src;
+ dst = (uint8_t *)dst + 1;
+ src = (const uint8_t *)src + 1;
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dst = *(const uint16_t *)src;
+ dst = (uint16_t *)dst + 1;
+ src = (const uint16_t *)src + 1;
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ dst = (uint32_t *)dst + 1;
+ src = (const uint32_t *)src + 1;
+ }
+ if (n & 0x08) {
+ /* ARMv7 can not handle unaligned access to long long
+ * (uint64_t). Therefore two uint32_t operations are
+ * used.
+ */
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ dst = (uint32_t *)dst + 1;
+ src = (const uint32_t *)src + 1;
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ }
+ return ret;
+ }
+
+ /* Special fast cases for <= 128 bytes */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+
+ if (n <= 128) {
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+ return ret;
+ }
+
+ /*
+ * For large copies > 128 bytes. This combination of 256, 64 and 16 byte
+ * copies was found to be faster than doing 128 and 32 byte copies as
+ * well.
+ */
+ for ( ; n >= 256; n -= 256) {
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ dst = (uint8_t *)dst + 256;
+ src = (const uint8_t *)src + 256;
+ }
+
+ /*
+ * We split the remaining bytes (which will be less than 256) into
+ * 64byte (2^6) chunks.
+ * Using incrementing integers in the case labels of a switch statement
+ * encourages the compiler to use a jump table. To get incrementing
+ * integers, we shift the 2 relevant bits to the LSB position to first
+ * get decrementing integers, and then subtract.
+ */
+ switch (3 - (n >> 6)) {
+ case 0x00:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ case 0x01:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ case 0x02:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ default:
+ break;
+ }
+
+ /*
+ * We split the remaining bytes (which will be less than 64) into
+ * 16byte (2^4) chunks, using the same switch structure as above.
+ */
+ switch (3 - (n >> 4)) {
+ case 0x00:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ case 0x01:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ case 0x02:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ default:
+ break;
+ }
+
+ /* Copy any remaining bytes, without going beyond end of buffers */
+ if (n != 0)
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+}
+
+#else
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 256);
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+ return memcpy(dst, src, n);
+}
+
+#endif /* RTE_ARCH_ARM_NEON_MEMCPY */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_ARM32_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy_64.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy_64.h
new file mode 100644
index 000000000..85ad587bd
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_memcpy_64.h
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ */
+
+#ifndef _RTE_MEMCPY_ARM64_H_
+#define _RTE_MEMCPY_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+
+#include "generic/rte_memcpy.h"
+
+#ifdef RTE_ARCH_ARM64_MEMCPY
+#include <rte_common.h>
+#include <rte_branch_prediction.h>
+
+/*
+ * The memory copy performance differs on different AArch64 micro-architectures.
+ * And the most recent glibc (e.g. 2.23 or later) can provide a better memcpy()
+ * performance compared to old glibc versions. It's always suggested to use a
+ * more recent glibc if possible, from which the entire system can get benefit.
+ *
+ * This implementation improves memory copy on some aarch64 micro-architectures,
+ * when an old glibc (e.g. 2.19, 2.17...) is being used. It is disabled by
+ * default and needs "RTE_ARCH_ARM64_MEMCPY" defined to activate. It's not
+ * always providing better performance than memcpy() so users need to run unit
+ * test "memcpy_perf_autotest" and customize parameters in customization section
+ * below for best performance.
+ *
+ * Compiler version will also impact the rte_memcpy() performance. It's observed
+ * on some platforms and with the same code, GCC 7.2.0 compiled binaries can
+ * provide better performance than GCC 4.8.5 compiled binaries.
+ */
+
+/**************************************
+ * Beginning of customization section
+ **************************************/
+#ifndef RTE_ARM64_MEMCPY_ALIGN_MASK
+#define RTE_ARM64_MEMCPY_ALIGN_MASK ((RTE_CACHE_LINE_SIZE >> 3) - 1)
+#endif
+
+#ifndef RTE_ARM64_MEMCPY_STRICT_ALIGN
+/* Only src unalignment will be treated as unaligned copy */
+#define RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) \
+ ((uintptr_t)(src) & RTE_ARM64_MEMCPY_ALIGN_MASK)
+#else
+/* Both dst and src unalignment will be treated as unaligned copy */
+#define RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) \
+ (((uintptr_t)(dst) | (uintptr_t)(src)) & RTE_ARM64_MEMCPY_ALIGN_MASK)
+#endif
+
+
+/*
+ * If copy size is larger than threshold, memcpy() will be used.
+ * Run "memcpy_perf_autotest" to determine the proper threshold.
+ */
+#ifdef RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD
+#define USE_ALIGNED_RTE_MEMCPY(dst, src, n) \
+(!RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) && \
+n <= (size_t)RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD)
+#else
+#define USE_ALIGNED_RTE_MEMCPY(dst, src, n) \
+(!RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src))
+#endif
+#ifdef RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD
+#define USE_UNALIGNED_RTE_MEMCPY(dst, src, n) \
+(RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src) && \
+n <= (size_t)RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD)
+#else
+#define USE_UNALIGNED_RTE_MEMCPY(dst, src, n) \
+(RTE_ARM64_MEMCPY_IS_UNALIGNED_COPY(dst, src))
+#endif
+/*
+ * The logic of USE_RTE_MEMCPY() can also be modified to best fit platform.
+ */
+#if defined(RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD) \
+|| defined(RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD)
+#define USE_RTE_MEMCPY(dst, src, n) \
+(USE_ALIGNED_RTE_MEMCPY(dst, src, n) || USE_UNALIGNED_RTE_MEMCPY(dst, src, n))
+#else
+#define USE_RTE_MEMCPY(dst, src, n) (1)
+#endif
+/**************************************
+ * End of customization section
+ **************************************/
+
+
+#if RTE_CC_IS_GNU && !defined RTE_ARM64_MEMCPY_SKIP_GCC_VER_CHECK
+#if (GCC_VERSION < 50400)
+#warning "The GCC version is quite old, which may result in sub-optimal \
+performance of the compiled code. It is suggested that at least GCC 5.4.0 \
+be used."
+#endif
+#endif
+
+static __rte_always_inline
+void rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __uint128_t *dst128 = (__uint128_t *)dst;
+ const __uint128_t *src128 = (const __uint128_t *)src;
+ *dst128 = *src128;
+}
+
+static __rte_always_inline
+void rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ __uint128_t *dst128 = (__uint128_t *)dst;
+ const __uint128_t *src128 = (const __uint128_t *)src;
+ const __uint128_t x0 = src128[0], x1 = src128[1];
+ dst128[0] = x0;
+ dst128[1] = x1;
+}
+
+static __rte_always_inline
+void rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ __uint128_t *dst128 = (__uint128_t *)dst;
+ const __uint128_t *src128 = (const __uint128_t *)src;
+ const __uint128_t x0 = src128[0], x1 = src128[1], x2 = src128[2];
+ dst128[0] = x0;
+ dst128[1] = x1;
+ dst128[2] = x2;
+}
+
+static __rte_always_inline
+void rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ __uint128_t *dst128 = (__uint128_t *)dst;
+ const __uint128_t *src128 = (const __uint128_t *)src;
+ const __uint128_t
+ x0 = src128[0], x1 = src128[1], x2 = src128[2], x3 = src128[3];
+ dst128[0] = x0;
+ dst128[1] = x1;
+ dst128[2] = x2;
+ dst128[3] = x3;
+}
+
+static __rte_always_inline
+void rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ __uint128_t *dst128 = (__uint128_t *)dst;
+ const __uint128_t *src128 = (const __uint128_t *)src;
+ /* Keep below declaration & copy sequence for optimized instructions */
+ const __uint128_t
+ x0 = src128[0], x1 = src128[1], x2 = src128[2], x3 = src128[3];
+ dst128[0] = x0;
+ __uint128_t x4 = src128[4];
+ dst128[1] = x1;
+ __uint128_t x5 = src128[5];
+ dst128[2] = x2;
+ __uint128_t x6 = src128[6];
+ dst128[3] = x3;
+ __uint128_t x7 = src128[7];
+ dst128[4] = x4;
+ dst128[5] = x5;
+ dst128[6] = x6;
+ dst128[7] = x7;
+}
+
+static __rte_always_inline
+void rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov128(dst, src);
+ rte_mov128(dst + 128, src + 128);
+}
+
+static __rte_always_inline void
+rte_memcpy_lt16(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ if (n & 0x08) {
+ /* copy 8 ~ 15 bytes */
+ *(uint64_t *)dst = *(const uint64_t *)src;
+ *(uint64_t *)(dst - 8 + n) = *(const uint64_t *)(src - 8 + n);
+ } else if (n & 0x04) {
+ /* copy 4 ~ 7 bytes */
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ *(uint32_t *)(dst - 4 + n) = *(const uint32_t *)(src - 4 + n);
+ } else if (n & 0x02) {
+ /* copy 2 ~ 3 bytes */
+ *(uint16_t *)dst = *(const uint16_t *)src;
+ *(uint16_t *)(dst - 2 + n) = *(const uint16_t *)(src - 2 + n);
+ } else if (n & 0x01) {
+ /* copy 1 byte */
+ *dst = *src;
+ }
+}
+
+static __rte_always_inline
+void rte_memcpy_ge16_lt128(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ if (n < 64) {
+ if (n == 16) {
+ rte_mov16(dst, src);
+ } else if (n <= 32) {
+ rte_mov16(dst, src);
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ } else if (n <= 48) {
+ rte_mov32(dst, src);
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ } else {
+ rte_mov48(dst, src);
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ }
+ } else {
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ if (n > 48 + 64)
+ rte_mov64(dst - 64 + n, src - 64 + n);
+ else if (n > 32 + 64)
+ rte_mov48(dst - 48 + n, src - 48 + n);
+ else if (n > 16 + 64)
+ rte_mov32(dst - 32 + n, src - 32 + n);
+ else if (n > 64)
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ }
+}
+
+static __rte_always_inline
+void rte_memcpy_ge128(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ do {
+ rte_mov128(dst, src);
+ src += 128;
+ dst += 128;
+ n -= 128;
+ } while (likely(n >= 128));
+
+ if (likely(n)) {
+ if (n <= 16)
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ else if (n <= 32)
+ rte_mov32(dst - 32 + n, src - 32 + n);
+ else if (n <= 48)
+ rte_mov48(dst - 48 + n, src - 48 + n);
+ else if (n <= 64)
+ rte_mov64(dst - 64 + n, src - 64 + n);
+ else
+ rte_memcpy_ge16_lt128(dst, src, n);
+ }
+}
+
+static __rte_always_inline
+void rte_memcpy_ge16_lt64(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ if (n == 16) {
+ rte_mov16(dst, src);
+ } else if (n <= 32) {
+ rte_mov16(dst, src);
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ } else if (n <= 48) {
+ rte_mov32(dst, src);
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ } else {
+ rte_mov48(dst, src);
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ }
+}
+
+static __rte_always_inline
+void rte_memcpy_ge64(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ do {
+ rte_mov64(dst, src);
+ src += 64;
+ dst += 64;
+ n -= 64;
+ } while (likely(n >= 64));
+
+ if (likely(n)) {
+ if (n <= 16)
+ rte_mov16(dst - 16 + n, src - 16 + n);
+ else if (n <= 32)
+ rte_mov32(dst - 32 + n, src - 32 + n);
+ else if (n <= 48)
+ rte_mov48(dst - 48 + n, src - 48 + n);
+ else
+ rte_mov64(dst - 64 + n, src - 64 + n);
+ }
+}
+
+#if RTE_CACHE_LINE_SIZE >= 128
+static __rte_always_inline
+void *rte_memcpy(void *dst, const void *src, size_t n)
+{
+ if (n < 16) {
+ rte_memcpy_lt16((uint8_t *)dst, (const uint8_t *)src, n);
+ return dst;
+ }
+ if (n < 128) {
+ rte_memcpy_ge16_lt128((uint8_t *)dst, (const uint8_t *)src, n);
+ return dst;
+ }
+ __builtin_prefetch(src, 0, 0);
+ __builtin_prefetch(dst, 1, 0);
+ if (likely(USE_RTE_MEMCPY(dst, src, n))) {
+ rte_memcpy_ge128((uint8_t *)dst, (const uint8_t *)src, n);
+ return dst;
+ } else
+ return memcpy(dst, src, n);
+}
+
+#else
+static __rte_always_inline
+void *rte_memcpy(void *dst, const void *src, size_t n)
+{
+ if (n < 16) {
+ rte_memcpy_lt16((uint8_t *)dst, (const uint8_t *)src, n);
+ return dst;
+ }
+ if (n < 64) {
+ rte_memcpy_ge16_lt64((uint8_t *)dst, (const uint8_t *)src, n);
+ return dst;
+ }
+ __builtin_prefetch(src, 0, 0);
+ __builtin_prefetch(dst, 1, 0);
+ if (likely(USE_RTE_MEMCPY(dst, src, n))) {
+ rte_memcpy_ge64((uint8_t *)dst, (const uint8_t *)src, n);
+ return dst;
+ } else
+ return memcpy(dst, src, n);
+}
+#endif /* RTE_CACHE_LINE_SIZE >= 128 */
+
+#else
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ memcpy(dst, src, 256);
+}
+
+#define rte_memcpy(d, s, n) memcpy((d), (s), (n))
+
+#endif /* RTE_ARCH_ARM64_MEMCPY */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_ARM_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause.h
new file mode 100644
index 000000000..6c7002ad9
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+
+#ifndef _RTE_PAUSE_ARM_H_
+#define _RTE_PAUSE_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef RTE_ARCH_64
+#include <rte_pause_64.h>
+#else
+#include <rte_pause_32.h>
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause_32.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause_32.h
new file mode 100644
index 000000000..d4768c7a9
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause_32.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+
+#ifndef _RTE_PAUSE_ARM32_H_
+#define _RTE_PAUSE_ARM32_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_ARM32_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause_64.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause_64.h
new file mode 100644
index 000000000..e87d10b8c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_pause_64.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_PAUSE_ARM64_H_
+#define _RTE_PAUSE_ARM64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+#ifdef RTE_ARM_USE_WFE
+#define RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
+#endif
+
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+ asm volatile("yield" ::: "memory");
+}
+
+#ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
+
+/* Send an event to quit WFE. */
+#define __SEVL() { asm volatile("sevl" : : : "memory"); }
+
+/* Put processor into low power WFE(Wait For Event) state. */
+#define __WFE() { asm volatile("wfe" : : : "memory"); }
+
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+ int memorder)
+{
+ uint16_t value;
+
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+ /*
+ * Atomic exclusive load from addr, it returns the 16-bit content of
+ * *addr while making it 'monitored',when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_16(src, dst, memorder) { \
+ if (memorder == __ATOMIC_RELAXED) { \
+ asm volatile("ldxrh %w[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } else { \
+ asm volatile("ldaxrh %w[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } }
+
+ __LOAD_EXC_16(addr, value, memorder)
+ if (value != expected) {
+ __SEVL()
+ do {
+ __WFE()
+ __LOAD_EXC_16(addr, value, memorder)
+ } while (value != expected);
+ }
+#undef __LOAD_EXC_16
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
+ int memorder)
+{
+ uint32_t value;
+
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+ /*
+ * Atomic exclusive load from addr, it returns the 32-bit content of
+ * *addr while making it 'monitored',when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_32(src, dst, memorder) { \
+ if (memorder == __ATOMIC_RELAXED) { \
+ asm volatile("ldxr %w[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } else { \
+ asm volatile("ldaxr %w[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } }
+
+ __LOAD_EXC_32(addr, value, memorder)
+ if (value != expected) {
+ __SEVL()
+ do {
+ __WFE()
+ __LOAD_EXC_32(addr, value, memorder)
+ } while (value != expected);
+ }
+#undef __LOAD_EXC_32
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
+ int memorder)
+{
+ uint64_t value;
+
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+ /*
+ * Atomic exclusive load from addr, it returns the 64-bit content of
+ * *addr while making it 'monitored',when it is written by someone
+ * else, the 'monitored' state is cleared and a event is generated
+ * implicitly to exit WFE.
+ */
+#define __LOAD_EXC_64(src, dst, memorder) { \
+ if (memorder == __ATOMIC_RELAXED) { \
+ asm volatile("ldxr %x[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } else { \
+ asm volatile("ldaxr %x[tmp], [%x[addr]]" \
+ : [tmp] "=&r" (dst) \
+ : [addr] "r"(src) \
+ : "memory"); \
+ } }
+
+ __LOAD_EXC_64(addr, value, memorder)
+ if (value != expected) {
+ __SEVL()
+ do {
+ __WFE()
+ __LOAD_EXC_64(addr, value, memorder)
+ } while (value != expected);
+ }
+}
+#undef __LOAD_EXC_64
+
+#undef __SEVL
+#undef __WFE
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_ARM64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch.h
new file mode 100644
index 000000000..27870c2a8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_PREFETCH_ARM_H_
+#define _RTE_PREFETCH_ARM_H_
+
+#ifdef RTE_ARCH_64
+#include <rte_prefetch_64.h>
+#else
+#include <rte_prefetch_32.h>
+#endif
+
+#endif /* _RTE_PREFETCH_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch_32.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch_32.h
new file mode 100644
index 000000000..e53420a0b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch_32.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_PREFETCH_ARM32_H_
+#define _RTE_PREFETCH_ARM32_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("pld [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_ARM32_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch_64.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch_64.h
new file mode 100644
index 000000000..fc2b391aa
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_prefetch_64.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ */
+
+#ifndef _RTE_PREFETCH_ARM_64_H_
+#define _RTE_PREFETCH_ARM_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL2KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_ARM_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_rwlock.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_rwlock.h
new file mode 100644
index 000000000..18bb37b03
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_rwlock.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ */
+/* copied from ppc_64 */
+
+#ifndef _RTE_RWLOCK_ARM_H_
+#define _RTE_RWLOCK_ARM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_spinlock.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_spinlock.h
new file mode 100644
index 000000000..1a6916b6e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_spinlock.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#ifndef _RTE_SPINLOCK_ARM_H_
+#define _RTE_SPINLOCK_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_spinlock.h"
+
+static inline int rte_tm_supported(void)
+{
+ return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+ return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+ return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_ticketlock.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_ticketlock.h
new file mode 100644
index 000000000..e09fbd6a6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_ticketlock.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_TICKETLOCK_ARM_H_
+#define _RTE_TICKETLOCK_ARM_H_
+
+#ifndef RTE_FORCE_INTRINSICS
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_ticketlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TICKETLOCK_ARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/include/rte_vect.h b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_vect.h
new file mode 100644
index 000000000..9287a1117
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/include/rte_vect.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ */
+
+#ifndef _RTE_VECT_ARM_H_
+#define _RTE_VECT_ARM_H_
+
+#include <stdint.h>
+#include "generic/rte_vect.h"
+#include "rte_debug.h"
+#include "arm_neon.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int32x4_t xmm_t;
+
+#define XMM_SIZE (sizeof(xmm_t))
+#define XMM_MASK (XMM_SIZE - 1)
+
+typedef union rte_xmm {
+ xmm_t x;
+ uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+ double pd[XMM_SIZE / sizeof(double)];
+} __rte_aligned(16) rte_xmm_t;
+
+#ifdef RTE_ARCH_ARM
+/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
+static __inline uint8x16_t
+vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
+{
+ uint8_t i, pos;
+ rte_xmm_t rte_a, rte_b, rte_ret;
+
+ vst1q_u8(rte_a.u8, a);
+ vst1q_u8(rte_b.u8, b);
+
+ for (i = 0; i < 16; i++) {
+ pos = rte_b.u8[i];
+ if (pos < 16)
+ rte_ret.u8[i] = rte_a.u8[pos];
+ else
+ rte_ret.u8[i] = 0;
+ }
+
+ return vld1q_u8(rte_ret.u8);
+}
+
+static inline uint16_t
+vaddvq_u16(uint16x8_t a)
+{
+ uint32x4_t m = vpaddlq_u16(a);
+ uint64x2_t n = vpaddlq_u32(m);
+ uint64x1_t o = vget_low_u64(n) + vget_high_u64(n);
+
+ return vget_lane_u32((uint32x2_t)o, 0);
+}
+
+#endif
+
+#if RTE_CC_IS_GNU && (GCC_VERSION < 70000)
+static inline uint32x4_t
+vcopyq_laneq_u32(uint32x4_t a, const int lane_a,
+ uint32x4_t b, const int lane_b)
+{
+ return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
+}
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#if RTE_CC_IS_GNU && (GCC_VERSION < 70000)
+
+#if (GCC_VERSION < 40900)
+typedef uint64_t poly64_t;
+typedef uint64x2_t poly64x2_t;
+typedef uint8_t poly128_t __attribute__((vector_size(16), aligned(16)));
+
+static inline uint32x4_t
+vceqzq_u32(uint32x4_t a)
+{
+ return (a == 0);
+}
+#endif
+
+/* NEON intrinsic vreinterpretq_u64_p128() is supported since GCC version 7 */
+static inline uint64x2_t
+vreinterpretq_u64_p128(poly128_t x)
+{
+ return (uint64x2_t)x;
+}
+
+/* NEON intrinsic vreinterpretq_p64_u64() is supported since GCC version 7 */
+static inline poly64x2_t
+vreinterpretq_p64_u64(uint64x2_t x)
+{
+ return (poly64x2_t)x;
+}
+
+/* NEON intrinsic vgetq_lane_p64() is supported since GCC version 7 */
+static inline poly64_t
+vgetq_lane_p64(poly64x2_t x, const int lane)
+{
+ RTE_ASSERT(lane >= 0 && lane <= 1);
+
+ poly64_t *p = (poly64_t *)&x;
+
+ return p[lane];
+}
+#endif
+#endif
+
+/*
+ * If (0 <= index <= 15), then call the ASIMD ext instruction on the
+ * 128 bit regs v0 and v1 with the appropriate index.
+ *
+ * Else returns a zero vector.
+ */
+static inline uint8x16_t
+vextract(uint8x16_t v0, uint8x16_t v1, const int index)
+{
+ switch (index) {
+ case 0: return vextq_u8(v0, v1, 0);
+ case 1: return vextq_u8(v0, v1, 1);
+ case 2: return vextq_u8(v0, v1, 2);
+ case 3: return vextq_u8(v0, v1, 3);
+ case 4: return vextq_u8(v0, v1, 4);
+ case 5: return vextq_u8(v0, v1, 5);
+ case 6: return vextq_u8(v0, v1, 6);
+ case 7: return vextq_u8(v0, v1, 7);
+ case 8: return vextq_u8(v0, v1, 8);
+ case 9: return vextq_u8(v0, v1, 9);
+ case 10: return vextq_u8(v0, v1, 10);
+ case 11: return vextq_u8(v0, v1, 11);
+ case 12: return vextq_u8(v0, v1, 12);
+ case 13: return vextq_u8(v0, v1, 13);
+ case 14: return vextq_u8(v0, v1, 14);
+ case 15: return vextq_u8(v0, v1, 15);
+ }
+ return vdupq_n_u8(0);
+}
+
+/**
+ * Shifts right 128 bit register by specified number of bytes
+ *
+ * Value of shift parameter must be in range 0 - 16
+ */
+static inline uint64x2_t
+vshift_bytes_right(uint64x2_t reg, const unsigned int shift)
+{
+ return vreinterpretq_u64_u8(vextract(
+ vreinterpretq_u8_u64(reg),
+ vdupq_n_u8(0),
+ shift));
+}
+
+/**
+ * Shifts left 128 bit register by specified number of bytes
+ *
+ * Value of shift parameter must be in range 0 - 16
+ */
+static inline uint64x2_t
+vshift_bytes_left(uint64x2_t reg, const unsigned int shift)
+{
+ return vreinterpretq_u64_u8(vextract(
+ vdupq_n_u8(0),
+ vreinterpretq_u8_u64(reg),
+ 16 - shift));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/meson.build b/src/spdk/dpdk/lib/librte_eal/arm/meson.build
new file mode 100644
index 000000000..d62875eba
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation.
+
+subdir('include')
+
+sources += files(
+ 'rte_cpuflags.c',
+ 'rte_cycles.c',
+ 'rte_hypervisor.c',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/rte_cpuflags.c b/src/spdk/dpdk/lib/librte_eal/arm/rte_cpuflags.c
new file mode 100644
index 000000000..caf3dc83a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/rte_cpuflags.c
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) Cavium, Inc. 2015.
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
+ */
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+
+#ifndef AT_HWCAP
+#define AT_HWCAP 16
+#endif
+
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+
+#ifndef AT_PLATFORM
+#define AT_PLATFORM 15
+#endif
+
+enum cpu_register_t {
+ REG_NONE = 0,
+ REG_HWCAP,
+ REG_HWCAP2,
+ REG_PLATFORM,
+ REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+/**
+ * Struct to hold a processor feature entry
+ */
+struct feature_entry {
+ uint32_t reg;
+ uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+ char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+ [RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+#ifdef RTE_ARCH_ARMv7
+#define PLATFORM_STR "v7l"
+typedef Elf32_auxv_t _Elfx_auxv_t;
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(SWP, REG_HWCAP, 0)
+ FEAT_DEF(HALF, REG_HWCAP, 1)
+ FEAT_DEF(THUMB, REG_HWCAP, 2)
+ FEAT_DEF(A26BIT, REG_HWCAP, 3)
+ FEAT_DEF(FAST_MULT, REG_HWCAP, 4)
+ FEAT_DEF(FPA, REG_HWCAP, 5)
+ FEAT_DEF(VFP, REG_HWCAP, 6)
+ FEAT_DEF(EDSP, REG_HWCAP, 7)
+ FEAT_DEF(JAVA, REG_HWCAP, 8)
+ FEAT_DEF(IWMMXT, REG_HWCAP, 9)
+ FEAT_DEF(CRUNCH, REG_HWCAP, 10)
+ FEAT_DEF(THUMBEE, REG_HWCAP, 11)
+ FEAT_DEF(NEON, REG_HWCAP, 12)
+ FEAT_DEF(VFPv3, REG_HWCAP, 13)
+ FEAT_DEF(VFPv3D16, REG_HWCAP, 14)
+ FEAT_DEF(TLS, REG_HWCAP, 15)
+ FEAT_DEF(VFPv4, REG_HWCAP, 16)
+ FEAT_DEF(IDIVA, REG_HWCAP, 17)
+ FEAT_DEF(IDIVT, REG_HWCAP, 18)
+ FEAT_DEF(VFPD32, REG_HWCAP, 19)
+ FEAT_DEF(LPAE, REG_HWCAP, 20)
+ FEAT_DEF(EVTSTRM, REG_HWCAP, 21)
+ FEAT_DEF(AES, REG_HWCAP2, 0)
+ FEAT_DEF(PMULL, REG_HWCAP2, 1)
+ FEAT_DEF(SHA1, REG_HWCAP2, 2)
+ FEAT_DEF(SHA2, REG_HWCAP2, 3)
+ FEAT_DEF(CRC32, REG_HWCAP2, 4)
+ FEAT_DEF(V7L, REG_PLATFORM, 0)
+};
+
+#elif defined RTE_ARCH_ARM64
+#define PLATFORM_STR "aarch64"
+typedef Elf64_auxv_t _Elfx_auxv_t;
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(FP, REG_HWCAP, 0)
+ FEAT_DEF(NEON, REG_HWCAP, 1)
+ FEAT_DEF(EVTSTRM, REG_HWCAP, 2)
+ FEAT_DEF(AES, REG_HWCAP, 3)
+ FEAT_DEF(PMULL, REG_HWCAP, 4)
+ FEAT_DEF(SHA1, REG_HWCAP, 5)
+ FEAT_DEF(SHA2, REG_HWCAP, 6)
+ FEAT_DEF(CRC32, REG_HWCAP, 7)
+ FEAT_DEF(ATOMICS, REG_HWCAP, 8)
+ FEAT_DEF(AARCH64, REG_PLATFORM, 1)
+};
+#endif /* RTE_ARCH */
+
+/*
+ * Read AUXV software register and get cpu features for ARM
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+ out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+ out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+ if (!rte_cpu_strcmp_auxval(AT_PLATFORM, PLATFORM_STR))
+ out[REG_PLATFORM] = 0x0001;
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+ const struct feature_entry *feat;
+ hwcap_registers_t regs = {0};
+
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return -ENOENT;
+
+ feat = &rte_cpu_feature_table[feature];
+ if (feat->reg == REG_NONE)
+ return -EFAULT;
+
+ rte_cpu_get_features(regs);
+ return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return NULL;
+ return rte_cpu_feature_table[feature].name;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/rte_cycles.c b/src/spdk/dpdk/lib/librte_eal/arm/rte_cycles.c
new file mode 100644
index 000000000..3500d523e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/rte_cycles.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ */
+
+#include "eal_private.h"
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+#if defined RTE_ARCH_ARM64 && !defined RTE_ARM_EAL_RDTSC_USE_PMU
+ uint64_t freq;
+ asm volatile("mrs %0, cntfrq_el0" : "=r" (freq));
+ return freq;
+#else
+ return 0;
+#endif
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/arm/rte_hypervisor.c b/src/spdk/dpdk/lib/librte_eal/arm/rte_hypervisor.c
new file mode 100644
index 000000000..08a1c97d1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/arm/rte_hypervisor.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include "rte_hypervisor.h"
+
+enum rte_hypervisor
+rte_hypervisor_get(void)
+{
+ return RTE_HYPERVISOR_UNKNOWN;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c
new file mode 100644
index 000000000..baa5b532a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_bus.c
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2016 NXP
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_bus.h>
+#include <rte_debug.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+
+#include "eal_private.h"
+
+static struct rte_bus_list rte_bus_list =
+ TAILQ_HEAD_INITIALIZER(rte_bus_list);
+
+void
+rte_bus_register(struct rte_bus *bus)
+{
+ RTE_VERIFY(bus);
+ RTE_VERIFY(bus->name && strlen(bus->name));
+ /* A bus should mandatorily have the scan implemented */
+ RTE_VERIFY(bus->scan);
+ RTE_VERIFY(bus->probe);
+ RTE_VERIFY(bus->find_device);
+ /* Buses supporting driver plug also require unplug. */
+ RTE_VERIFY(!bus->plug || bus->unplug);
+
+ TAILQ_INSERT_TAIL(&rte_bus_list, bus, next);
+ RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name);
+}
+
+void
+rte_bus_unregister(struct rte_bus *bus)
+{
+ TAILQ_REMOVE(&rte_bus_list, bus, next);
+ RTE_LOG(DEBUG, EAL, "Unregistered [%s] bus.\n", bus->name);
+}
+
+/* Scan all the buses for registered devices */
+int
+rte_bus_scan(void)
+{
+ int ret;
+ struct rte_bus *bus = NULL;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ ret = bus->scan();
+ if (ret)
+ RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n",
+ bus->name);
+ }
+
+ return 0;
+}
+
+/* Probe all devices of all buses */
+int
+rte_bus_probe(void)
+{
+ int ret;
+ struct rte_bus *bus, *vbus = NULL;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ if (!strcmp(bus->name, "vdev")) {
+ vbus = bus;
+ continue;
+ }
+
+ ret = bus->probe();
+ if (ret)
+ RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
+ bus->name);
+ }
+
+ if (vbus) {
+ ret = vbus->probe();
+ if (ret)
+ RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
+ vbus->name);
+ }
+
+ return 0;
+}
+
+/* Dump information of a single bus */
+static int
+bus_dump_one(FILE *f, struct rte_bus *bus)
+{
+ int ret;
+
+ /* For now, dump only the bus name */
+ ret = fprintf(f, " %s\n", bus->name);
+
+ /* Error in case of inability in writing to stream */
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+void
+rte_bus_dump(FILE *f)
+{
+ int ret;
+ struct rte_bus *bus;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ ret = bus_dump_one(f, bus);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Unable to write to stream (%d)\n",
+ ret);
+ break;
+ }
+ }
+}
+
+struct rte_bus *
+rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
+ const void *data)
+{
+ struct rte_bus *bus;
+
+ if (start != NULL)
+ bus = TAILQ_NEXT(start, next);
+ else
+ bus = TAILQ_FIRST(&rte_bus_list);
+ while (bus != NULL) {
+ if (cmp(bus, data) == 0)
+ break;
+ bus = TAILQ_NEXT(bus, next);
+ }
+ return bus;
+}
+
+static int
+cmp_rte_device(const struct rte_device *dev1, const void *_dev2)
+{
+ const struct rte_device *dev2 = _dev2;
+
+ return dev1 != dev2;
+}
+
+static int
+bus_find_device(const struct rte_bus *bus, const void *_dev)
+{
+ struct rte_device *dev;
+
+ dev = bus->find_device(NULL, cmp_rte_device, _dev);
+ return dev == NULL;
+}
+
+struct rte_bus *
+rte_bus_find_by_device(const struct rte_device *dev)
+{
+ return rte_bus_find(NULL, bus_find_device, (const void *)dev);
+}
+
+static int
+cmp_bus_name(const struct rte_bus *bus, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(bus->name, name);
+}
+
+struct rte_bus *
+rte_bus_find_by_name(const char *busname)
+{
+ return rte_bus_find(NULL, cmp_bus_name, (const void *)busname);
+}
+
+static int
+bus_can_parse(const struct rte_bus *bus, const void *_name)
+{
+ const char *name = _name;
+
+ return !(bus->parse && bus->parse(name, NULL) == 0);
+}
+
+struct rte_bus *
+rte_bus_find_by_device_name(const char *str)
+{
+ char name[RTE_DEV_NAME_MAX_LEN];
+ char *c;
+
+ strlcpy(name, str, sizeof(name));
+ c = strchr(name, ',');
+ if (c != NULL)
+ c[0] = '\0';
+ return rte_bus_find(NULL, bus_can_parse, name);
+}
+
+
+/*
+ * Get iommu class of devices on the bus.
+ */
+enum rte_iova_mode
+rte_bus_get_iommu_class(void)
+{
+ enum rte_iova_mode mode = RTE_IOVA_DC;
+ bool buses_want_va = false;
+ bool buses_want_pa = false;
+ struct rte_bus *bus;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+ enum rte_iova_mode bus_iova_mode;
+
+ if (bus->get_iommu_class == NULL)
+ continue;
+
+ bus_iova_mode = bus->get_iommu_class();
+ RTE_LOG(DEBUG, EAL, "Bus %s wants IOVA as '%s'\n",
+ bus->name,
+ bus_iova_mode == RTE_IOVA_DC ? "DC" :
+ (bus_iova_mode == RTE_IOVA_PA ? "PA" : "VA"));
+ if (bus_iova_mode == RTE_IOVA_PA)
+ buses_want_pa = true;
+ else if (bus_iova_mode == RTE_IOVA_VA)
+ buses_want_va = true;
+ }
+ if (buses_want_va && !buses_want_pa) {
+ mode = RTE_IOVA_VA;
+ } else if (buses_want_pa && !buses_want_va) {
+ mode = RTE_IOVA_PA;
+ } else {
+ mode = RTE_IOVA_DC;
+ if (buses_want_va) {
+ RTE_LOG(WARNING, EAL, "Some buses want 'VA' but forcing 'DC' because other buses want 'PA'.\n");
+ RTE_LOG(WARNING, EAL, "Depending on the final decision by the EAL, not all buses may be able to initialize.\n");
+ }
+ }
+
+ return mode;
+}
+
+static int
+bus_handle_sigbus(const struct rte_bus *bus,
+ const void *failure_addr)
+{
+ int ret;
+
+ if (!bus->sigbus_handler)
+ return -1;
+
+ ret = bus->sigbus_handler(failure_addr);
+
+ /* find bus but handle failed, keep the errno be set. */
+ if (ret < 0 && rte_errno == 0)
+ rte_errno = ENOTSUP;
+
+ return ret > 0;
+}
+
+int
+rte_bus_sigbus_handler(const void *failure_addr)
+{
+ struct rte_bus *bus;
+
+ int ret = 0;
+ int old_errno = rte_errno;
+
+ rte_errno = 0;
+
+ bus = rte_bus_find(NULL, bus_handle_sigbus, failure_addr);
+ /* can not find bus. */
+ if (!bus)
+ return 1;
+ /* find bus but handle failed, pass on the new errno. */
+ else if (rte_errno != 0)
+ return -1;
+
+ /* restore the old errno. */
+ rte_errno = old_errno;
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c
new file mode 100644
index 000000000..0187076af
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_class.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_class.h>
+#include <rte_debug.h>
+
+static struct rte_class_list rte_class_list =
+ TAILQ_HEAD_INITIALIZER(rte_class_list);
+
+void
+rte_class_register(struct rte_class *class)
+{
+ RTE_VERIFY(class);
+ RTE_VERIFY(class->name && strlen(class->name));
+
+ TAILQ_INSERT_TAIL(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name);
+}
+
+void
+rte_class_unregister(struct rte_class *class)
+{
+ TAILQ_REMOVE(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
+}
+
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data)
+{
+ struct rte_class *cls;
+
+ if (start != NULL)
+ cls = TAILQ_NEXT(start, next);
+ else
+ cls = TAILQ_FIRST(&rte_class_list);
+ while (cls != NULL) {
+ if (cmp(cls, data) == 0)
+ break;
+ cls = TAILQ_NEXT(cls, next);
+ }
+ return cls;
+}
+
+static int
+cmp_class_name(const struct rte_class *class, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(class->name, name);
+}
+
+struct rte_class *
+rte_class_find_by_name(const char *name)
+{
+ return rte_class_find(NULL, cmp_class_name, (const void *)name);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c
new file mode 100644
index 000000000..dc5f75d05
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_cpuflags.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_cpuflags.h>
+
+int
+rte_cpu_is_supported(void)
+{
+ /* This is generated at compile-time by the build system */
+ static const enum rte_cpu_flag_t compile_time_flags[] = {
+ RTE_COMPILE_TIME_CPUFLAGS
+ };
+ unsigned count = RTE_DIM(compile_time_flags), i;
+ int ret;
+
+ for (i = 0; i < count; i++) {
+ ret = rte_cpu_get_flag_enabled(compile_time_flags[i]);
+
+ if (ret < 0) {
+ fprintf(stderr,
+ "ERROR: CPU feature flag lookup failed with error %d\n",
+ ret);
+ return 0;
+ }
+ if (!ret) {
+ fprintf(stderr,
+ "ERROR: This system does not support \"%s\".\n"
+ "Please check that RTE_MACHINE is set correctly.\n",
+ rte_cpu_get_flag_name(compile_time_flags[i]));
+ return 0;
+ }
+ }
+
+ return 1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c
new file mode 100644
index 000000000..9e4f09d83
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_dev.c
@@ -0,0 +1,793 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+
+#include <rte_compat.h>
+#include <rte_bus.h>
+#include <rte_class.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "hotplug_mp.h"
+
+/**
+ * The device event callback description.
+ *
+ * It contains callback address to be registered by user application,
+ * the pointer to the parameters for callback, and the device name.
+ */
+struct dev_event_callback {
+ TAILQ_ENTRY(dev_event_callback) next; /**< Callbacks list */
+ rte_dev_event_cb_fn cb_fn; /**< Callback address */
+ void *cb_arg; /**< Callback parameter */
+ char *dev_name; /**< Callback device name, NULL is for all device */
+ uint32_t active; /**< Callback is executing */
+};
+
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(dev_event_cb_list, dev_event_callback);
+
+/* The device event callback list for all registered callbacks. */
+static struct dev_event_cb_list dev_event_cbs;
+
+/* spinlock for device callbacks */
+static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
+
+struct dev_next_ctx {
+ struct rte_dev_iterator *it;
+ const char *bus_str;
+ const char *cls_str;
+};
+
+#define CTX(it, bus_str, cls_str) \
+ (&(const struct dev_next_ctx){ \
+ .it = it, \
+ .bus_str = bus_str, \
+ .cls_str = cls_str, \
+ })
+
+#define ITCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->it)
+
+#define BUSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->bus_str)
+
+#define CLSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->cls_str)
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(dev->name, name);
+}
+
+int
+rte_dev_is_probed(const struct rte_device *dev)
+{
+ /* The field driver should be set only when the probe is successful. */
+ return dev->driver != NULL;
+}
+
+/* helper function to build devargs, caller should free the memory */
+static int
+build_devargs(const char *busname, const char *devname,
+ const char *drvargs, char **devargs)
+{
+ int length;
+
+ length = snprintf(NULL, 0, "%s:%s,%s", busname, devname, drvargs);
+ if (length < 0)
+ return -EINVAL;
+
+ *devargs = malloc(length + 1);
+ if (*devargs == NULL)
+ return -ENOMEM;
+
+ length = snprintf(*devargs, length + 1, "%s:%s,%s",
+ busname, devname, drvargs);
+ if (length < 0) {
+ free(*devargs);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+rte_eal_hotplug_add(const char *busname, const char *devname,
+ const char *drvargs)
+{
+
+ char *devargs;
+ int ret;
+
+ ret = build_devargs(busname, devname, drvargs, &devargs);
+ if (ret != 0)
+ return ret;
+
+ ret = rte_dev_probe(devargs);
+ free(devargs);
+
+ return ret;
+}
+
+/* probe device at local process. */
+int
+local_dev_probe(const char *devargs, struct rte_device **new_dev)
+{
+ struct rte_device *dev;
+ struct rte_devargs *da;
+ int ret;
+
+ *new_dev = NULL;
+ da = calloc(1, sizeof(*da));
+ if (da == NULL)
+ return -ENOMEM;
+
+ ret = rte_devargs_parse(da, devargs);
+ if (ret)
+ goto err_devarg;
+
+ if (da->bus->plug == NULL) {
+ RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
+ da->bus->name);
+ ret = -ENOTSUP;
+ goto err_devarg;
+ }
+
+ ret = rte_devargs_insert(&da);
+ if (ret)
+ goto err_devarg;
+
+ /* the rte_devargs will be referenced in the matching rte_device */
+ ret = da->bus->scan();
+ if (ret)
+ goto err_devarg;
+
+ dev = da->bus->find_device(NULL, cmp_dev_name, da->name);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
+ da->name);
+ ret = -ENODEV;
+ goto err_devarg;
+ }
+ /* Since there is a matching device, it is now its responsibility
+ * to manage the devargs we've just inserted. From this point
+ * those devargs shouldn't be removed manually anymore.
+ */
+
+ ret = dev->bus->plug(dev);
+ if (ret > 0)
+ ret = -ENOTSUP;
+
+ if (ret && !rte_dev_is_probed(dev)) { /* if hasn't ever succeeded */
+ RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
+ dev->name);
+ return ret;
+ }
+
+ *new_dev = dev;
+ return ret;
+
+err_devarg:
+ if (rte_devargs_remove(da) != 0) {
+ free(da->args);
+ free(da);
+ }
+ return ret;
+}
+
+int
+rte_dev_probe(const char *devargs)
+{
+ struct eal_dev_mp_req req;
+ struct rte_device *dev;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ req.t = EAL_DEV_REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /**
+ * If in secondary process, just send IPC request to
+ * primary process.
+ */
+ ret = eal_dev_hotplug_request_to_primary(&req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug request to primary\n");
+ return -ENOMSG;
+ }
+ if (req.result != 0)
+ RTE_LOG(ERR, EAL,
+ "Failed to hotplug add device\n");
+ return req.result;
+ }
+
+ /* attach a shared device from primary start from here: */
+
+ /* primary attach the new device itself. */
+ ret = local_dev_probe(devargs, &dev);
+
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to attach device on primary process\n");
+
+ /**
+ * it is possible that secondary process failed to attached a
+ * device that primary process have during initialization,
+ * so for -EEXIST case, we still need to sync with secondary
+ * process.
+ */
+ if (ret != -EEXIST)
+ return ret;
+ }
+
+ /* primary send attach sync request to secondary. */
+ ret = eal_dev_hotplug_request_to_secondary(&req);
+
+ /* if any communication error, we need to rollback. */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug add request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ /**
+ * if any secondary failed to attach, we need to consider if rollback
+ * is necessary.
+ */
+ if (req.result != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to attach device on secondary process\n");
+ ret = req.result;
+
+ /* for -EEXIST, we don't need to rollback. */
+ if (ret == -EEXIST)
+ return ret;
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+
+ /* primary send rollback request to secondary. */
+ if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device attach on secondary."
+ "Devices in secondary may not sync with primary\n");
+
+ /* primary rollback itself. */
+ if (local_dev_remove(dev) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device attach on primary."
+ "Devices in secondary may not sync with primary\n");
+
+ return ret;
+}
+
+int
+rte_eal_hotplug_remove(const char *busname, const char *devname)
+{
+ struct rte_device *dev;
+ struct rte_bus *bus;
+
+ bus = rte_bus_find_by_name(busname);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
+ return -ENOENT;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, devname);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname);
+ return -EINVAL;
+ }
+
+ return rte_dev_remove(dev);
+}
+
+/* remove device at local process. */
+int
+local_dev_remove(struct rte_device *dev)
+{
+ int ret;
+
+ if (dev->bus->unplug == NULL) {
+ RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
+ dev->bus->name);
+ return -ENOTSUP;
+ }
+
+ ret = dev->bus->unplug(dev);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
+ dev->name);
+ return (ret < 0) ? ret : -ENOENT;
+ }
+
+ return 0;
+}
+
+int
+rte_dev_remove(struct rte_device *dev)
+{
+ struct eal_dev_mp_req req;
+ char *devargs;
+ int ret;
+
+ if (!rte_dev_is_probed(dev)) {
+ RTE_LOG(ERR, EAL, "Device is not probed\n");
+ return -ENOENT;
+ }
+
+ ret = build_devargs(dev->bus->name, dev->name, "", &devargs);
+ if (ret != 0)
+ return ret;
+
+ memset(&req, 0, sizeof(req));
+ req.t = EAL_DEV_REQ_TYPE_DETACH;
+ strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+ free(devargs);
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /**
+ * If in secondary process, just send IPC request to
+ * primary process.
+ */
+ ret = eal_dev_hotplug_request_to_primary(&req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug request to primary\n");
+ return -ENOMSG;
+ }
+ if (req.result != 0)
+ RTE_LOG(ERR, EAL,
+ "Failed to hotplug remove device\n");
+ return req.result;
+ }
+
+ /* detach a device from primary start from here: */
+
+ /* primary send detach sync request to secondary */
+ ret = eal_dev_hotplug_request_to_secondary(&req);
+
+ /**
+ * if communication error, we need to rollback, because it is possible
+ * part of the secondary processes still detached it successfully.
+ */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send device detach request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ /**
+ * if any secondary failed to detach, we need to consider if rollback
+ * is necessary.
+ */
+ if (req.result != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to detach device on secondary process\n");
+ ret = req.result;
+ /**
+ * if -ENOENT, we don't need to rollback, since devices is
+ * already detached on secondary process.
+ */
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+
+ /* primary detach the device itself. */
+ ret = local_dev_remove(dev);
+
+ /* if primary failed, still need to consider if rollback is necessary */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to detach device on primary process\n");
+ /* if -ENOENT, we don't need to rollback */
+ if (ret == -ENOENT)
+ return ret;
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+
+ /* primary send rollback request to secondary. */
+ if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device detach on secondary."
+ "Devices in secondary may not sync with primary\n");
+
+ return ret;
+}
+
+int
+rte_dev_event_callback_register(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg)
+{
+ struct dev_event_callback *event_cb;
+ int ret;
+
+ if (!cb_fn)
+ return -EINVAL;
+
+ rte_spinlock_lock(&dev_event_lock);
+
+ if (TAILQ_EMPTY(&dev_event_cbs))
+ TAILQ_INIT(&dev_event_cbs);
+
+ TAILQ_FOREACH(event_cb, &dev_event_cbs, next) {
+ if (event_cb->cb_fn == cb_fn && event_cb->cb_arg == cb_arg) {
+ if (device_name == NULL && event_cb->dev_name == NULL)
+ break;
+ if (device_name == NULL || event_cb->dev_name == NULL)
+ continue;
+ if (!strcmp(event_cb->dev_name, device_name))
+ break;
+ }
+ }
+
+ /* create a new callback. */
+ if (event_cb == NULL) {
+ event_cb = malloc(sizeof(struct dev_event_callback));
+ if (event_cb != NULL) {
+ event_cb->cb_fn = cb_fn;
+ event_cb->cb_arg = cb_arg;
+ event_cb->active = 0;
+ if (!device_name) {
+ event_cb->dev_name = NULL;
+ } else {
+ event_cb->dev_name = strdup(device_name);
+ if (event_cb->dev_name == NULL) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+ TAILQ_INSERT_TAIL(&dev_event_cbs, event_cb, next);
+ } else {
+ RTE_LOG(ERR, EAL,
+ "Failed to allocate memory for device "
+ "event callback.");
+ ret = -ENOMEM;
+ goto error;
+ }
+ } else {
+ RTE_LOG(ERR, EAL,
+ "The callback is already exist, no need "
+ "to register again.\n");
+ ret = -EEXIST;
+ }
+
+ rte_spinlock_unlock(&dev_event_lock);
+ return 0;
+error:
+ free(event_cb);
+ rte_spinlock_unlock(&dev_event_lock);
+ return ret;
+}
+
+int
+rte_dev_event_callback_unregister(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg)
+{
+ int ret = 0;
+ struct dev_event_callback *event_cb, *next;
+
+ if (!cb_fn)
+ return -EINVAL;
+
+ rte_spinlock_lock(&dev_event_lock);
+ /*walk through the callbacks and remove all that match. */
+ for (event_cb = TAILQ_FIRST(&dev_event_cbs); event_cb != NULL;
+ event_cb = next) {
+
+ next = TAILQ_NEXT(event_cb, next);
+
+ if (device_name != NULL && event_cb->dev_name != NULL) {
+ if (!strcmp(event_cb->dev_name, device_name)) {
+ if (event_cb->cb_fn != cb_fn ||
+ (cb_arg != (void *)-1 &&
+ event_cb->cb_arg != cb_arg))
+ continue;
+ }
+ } else if (device_name != NULL) {
+ continue;
+ }
+
+ /*
+ * if this callback is not executing right now,
+ * then remove it.
+ */
+ if (event_cb->active == 0) {
+ TAILQ_REMOVE(&dev_event_cbs, event_cb, next);
+ free(event_cb);
+ ret++;
+ } else {
+ continue;
+ }
+ }
+ rte_spinlock_unlock(&dev_event_lock);
+ return ret;
+}
+
+void
+rte_dev_event_callback_process(const char *device_name,
+ enum rte_dev_event_type event)
+{
+ struct dev_event_callback *cb_lst;
+
+ if (device_name == NULL)
+ return;
+
+ rte_spinlock_lock(&dev_event_lock);
+
+ TAILQ_FOREACH(cb_lst, &dev_event_cbs, next) {
+ if (cb_lst->dev_name) {
+ if (strcmp(cb_lst->dev_name, device_name))
+ continue;
+ }
+ cb_lst->active = 1;
+ rte_spinlock_unlock(&dev_event_lock);
+ cb_lst->cb_fn(device_name, event,
+ cb_lst->cb_arg);
+ rte_spinlock_lock(&dev_event_lock);
+ cb_lst->active = 0;
+ }
+ rte_spinlock_unlock(&dev_event_lock);
+}
+
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it,
+ const char *dev_str)
+{
+ struct rte_devargs devargs;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+
+ /* Having both bus_str and cls_str NULL is illegal,
+ * marking this iterator as invalid unless
+ * everything goes well.
+ */
+ it->bus_str = NULL;
+ it->cls_str = NULL;
+
+ devargs.data = dev_str;
+ if (rte_devargs_layers_parse(&devargs, dev_str))
+ goto get_out;
+
+ bus = devargs.bus;
+ cls = devargs.cls;
+ /* The string should have at least
+ * one layer specified.
+ */
+ if (bus == NULL && cls == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Either bus or class must be specified.\n");
+ rte_errno = EINVAL;
+ goto get_out;
+ }
+ if (bus != NULL && bus->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ if (cls != NULL && cls->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ it->bus_str = devargs.bus_str;
+ it->cls_str = devargs.cls_str;
+ it->dev_str = dev_str;
+ it->bus = bus;
+ it->cls = cls;
+ it->device = NULL;
+ it->class_device = NULL;
+get_out:
+ return -rte_errno;
+}
+
+static char *
+dev_str_sane_copy(const char *str)
+{
+ size_t end;
+ char *copy;
+
+ end = strcspn(str, ",/");
+ if (str[end] == ',') {
+ copy = strdup(&str[end + 1]);
+ } else {
+ /* '/' or '\0' */
+ copy = strdup("");
+ }
+ if (copy == NULL) {
+ rte_errno = ENOMEM;
+ } else {
+ char *slash;
+
+ slash = strchr(copy, '/');
+ if (slash != NULL)
+ slash[0] = '\0';
+ }
+ return copy;
+}
+
+static int
+class_next_dev_cmp(const struct rte_class *cls,
+ const void *ctx)
+{
+ struct rte_dev_iterator *it;
+ const char *cls_str = NULL;
+ void *dev;
+
+ if (cls->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ cls_str = CLSCTX(ctx);
+ dev = it->class_device;
+ /* it->cls_str != NULL means a class
+ * was specified in the devstr.
+ */
+ if (it->cls_str != NULL && cls != it->cls)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ dev = cls->dev_iterate(dev, cls_str, it);
+ it->class_device = dev;
+ return dev == NULL;
+}
+
+static int
+bus_next_dev_cmp(const struct rte_bus *bus,
+ const void *ctx)
+{
+ struct rte_device *dev = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_dev_iterator *it;
+ const char *bus_str = NULL;
+
+ if (bus->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ bus_str = BUSCTX(ctx);
+ dev = it->device;
+ /* it->bus_str != NULL means a bus
+ * was specified in the devstr.
+ */
+ if (it->bus_str != NULL && bus != it->bus)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ if (it->cls_str == NULL) {
+ dev = bus->dev_iterate(dev, bus_str, it);
+ goto end;
+ }
+ /* cls_str != NULL */
+ if (dev == NULL) {
+next_dev_on_bus:
+ dev = bus->dev_iterate(dev, bus_str, it);
+ it->device = dev;
+ }
+ if (dev == NULL)
+ return 1;
+ if (it->cls != NULL)
+ cls = TAILQ_PREV(it->cls, rte_class_list, next);
+ cls = rte_class_find(cls, class_next_dev_cmp, ctx);
+ if (cls != NULL) {
+ it->cls = cls;
+ goto end;
+ }
+ goto next_dev_on_bus;
+end:
+ it->device = dev;
+ return dev == NULL;
+}
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it)
+{
+ struct rte_bus *bus = NULL;
+ int old_errno = rte_errno;
+ char *bus_str = NULL;
+ char *cls_str = NULL;
+
+ rte_errno = 0;
+ if (it->bus_str == NULL && it->cls_str == NULL) {
+ /* Invalid iterator. */
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ if (it->bus != NULL)
+ bus = TAILQ_PREV(it->bus, rte_bus_list, next);
+ if (it->bus_str != NULL) {
+ bus_str = dev_str_sane_copy(it->bus_str);
+ if (bus_str == NULL)
+ goto out;
+ }
+ if (it->cls_str != NULL) {
+ cls_str = dev_str_sane_copy(it->cls_str);
+ if (cls_str == NULL)
+ goto out;
+ }
+ while ((bus = rte_bus_find(bus, bus_next_dev_cmp,
+ CTX(it, bus_str, cls_str)))) {
+ if (it->device != NULL) {
+ it->bus = bus;
+ goto out;
+ }
+ if (it->bus_str != NULL ||
+ rte_errno != 0)
+ break;
+ }
+ if (rte_errno == 0)
+ rte_errno = old_errno;
+out:
+ free(bus_str);
+ free(cls_str);
+ return it->device;
+}
+
+int
+rte_dev_dma_map(struct rte_device *dev, void *addr, uint64_t iova,
+ size_t len)
+{
+ if (dev->bus->dma_map == NULL || len == 0) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ /* Memory must be registered through rte_extmem_* APIs */
+ if (rte_mem_virt2memseg_list(addr) == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ return dev->bus->dma_map(dev, addr, iova, len);
+}
+
+int
+rte_dev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova,
+ size_t len)
+{
+ if (dev->bus->dma_unmap == NULL || len == 0) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ /* Memory must be registered through rte_extmem_* APIs */
+ if (rte_mem_virt2memseg_list(addr) == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ return dev->bus->dma_unmap(dev, addr, iova, len);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c
new file mode 100644
index 000000000..2123773ef
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_devargs.c
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2014 6WIND S.A.
+ */
+
+/* This file manages the list of devices and their arguments, as given
+ * by the user at startup
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include <rte_bus.h>
+#include <rte_class.h>
+#include <rte_compat.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_tailq.h>
+#include "eal_private.h"
+
+/** user device double-linked queue type definition */
+TAILQ_HEAD(rte_devargs_list, rte_devargs);
+
+/** Global list of user devices */
+static struct rte_devargs_list devargs_list =
+ TAILQ_HEAD_INITIALIZER(devargs_list);
+
+static size_t
+devargs_layer_count(const char *s)
+{
+ size_t i = s ? 1 : 0;
+
+ while (s != NULL && s[0] != '\0') {
+ i += s[0] == '/';
+ s++;
+ }
+ return i;
+}
+
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr)
+{
+ struct {
+ const char *key;
+ const char *str;
+ struct rte_kvargs *kvlist;
+ } layers[] = {
+ { "bus=", NULL, NULL, },
+ { "class=", NULL, NULL, },
+ { "driver=", NULL, NULL, },
+ };
+ struct rte_kvargs_pair *kv = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+ const char *s = devstr;
+ size_t nblayer;
+ size_t i = 0;
+ int ret = 0;
+
+ /* Split each sub-lists. */
+ nblayer = devargs_layer_count(devstr);
+ if (nblayer > RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n",
+ nblayer);
+ ret = -E2BIG;
+ goto get_out;
+ }
+
+ /* If the devargs points the devstr
+ * as source data, then it should not allocate
+ * anything and keep referring only to it.
+ */
+ if (devargs->data != devstr) {
+ devargs->data = strdup(devstr);
+ if (devargs->data == NULL) {
+ RTE_LOG(ERR, EAL, "OOM\n");
+ ret = -ENOMEM;
+ goto get_out;
+ }
+ s = devargs->data;
+ }
+
+ while (s != NULL) {
+ if (i >= RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ /*
+ * The last layer is free-form.
+ * The "driver" key is not required (but accepted).
+ */
+ if (strncmp(layers[i].key, s, strlen(layers[i].key)) &&
+ i != RTE_DIM(layers) - 1)
+ goto next_layer;
+ layers[i].str = s;
+ layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/");
+ if (layers[i].kvlist == NULL) {
+ RTE_LOG(ERR, EAL, "Could not parse %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ s = strchr(s, '/');
+ if (s != NULL)
+ s++;
+next_layer:
+ i++;
+ }
+
+ /* Parse each sub-list. */
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist == NULL)
+ continue;
+ kv = &layers[i].kvlist->pairs[0];
+ if (strcmp(kv->key, "bus") == 0) {
+ bus = rte_bus_find_by_name(kv->value);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "class") == 0) {
+ cls = rte_class_find_by_name(kv->value);
+ if (cls == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "driver") == 0) {
+ /* Ignore */
+ continue;
+ }
+ }
+
+ /* Fill devargs fields. */
+ devargs->bus_str = layers[0].str;
+ devargs->cls_str = layers[1].str;
+ devargs->drv_str = layers[2].str;
+ devargs->bus = bus;
+ devargs->cls = cls;
+
+ /* If we own the data, clean up a bit
+ * the several layers string, to ease
+ * their parsing afterward.
+ */
+ if (devargs->data != devstr) {
+ char *s = (void *)(intptr_t)(devargs->data);
+
+ while ((s = strchr(s, '/'))) {
+ *s = '\0';
+ s++;
+ }
+ }
+
+get_out:
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist)
+ rte_kvargs_free(layers[i].kvlist);
+ }
+ if (ret != 0)
+ rte_errno = -ret;
+ return ret;
+}
+
+static int
+bus_name_cmp(const struct rte_bus *bus, const void *name)
+{
+ return strncmp(bus->name, name, strlen(bus->name));
+}
+
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev)
+{
+ struct rte_bus *bus = NULL;
+ const char *devname;
+ const size_t maxlen = sizeof(da->name);
+ size_t i;
+
+ if (da == NULL)
+ return -EINVAL;
+
+ /* Retrieve eventual bus info */
+ do {
+ devname = dev;
+ bus = rte_bus_find(bus, bus_name_cmp, dev);
+ if (bus == NULL)
+ break;
+ devname = dev + strlen(bus->name) + 1;
+ if (rte_bus_find_by_device_name(devname) == bus)
+ break;
+ } while (1);
+ /* Store device name */
+ i = 0;
+ while (devname[i] != '\0' && devname[i] != ',') {
+ da->name[i] = devname[i];
+ i++;
+ if (i == maxlen) {
+ RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n",
+ dev, maxlen);
+ da->name[i - 1] = '\0';
+ return -EINVAL;
+ }
+ }
+ da->name[i] = '\0';
+ if (bus == NULL) {
+ bus = rte_bus_find_by_device_name(da->name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n",
+ da->name);
+ return -EFAULT;
+ }
+ }
+ da->bus = bus;
+ /* Parse eventual device arguments */
+ if (devname[i] == ',')
+ da->args = strdup(&devname[i + 1]);
+ else
+ da->args = strdup("");
+ if (da->args == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int
+rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
+{
+ va_list ap;
+ size_t len;
+ char *dev;
+ int ret;
+
+ if (da == NULL)
+ return -EINVAL;
+
+ va_start(ap, format);
+ len = vsnprintf(NULL, 0, format, ap);
+ va_end(ap);
+
+ dev = calloc(1, len + 1);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory to parse device\n");
+ return -ENOMEM;
+ }
+
+ va_start(ap, format);
+ vsnprintf(dev, len + 1, format, ap);
+ va_end(ap);
+
+ ret = rte_devargs_parse(da, dev);
+
+ free(dev);
+ return ret;
+}
+
+int
+rte_devargs_insert(struct rte_devargs **da)
+{
+ struct rte_devargs *listed_da;
+ void *tmp;
+
+ if (*da == NULL || (*da)->bus == NULL)
+ return -1;
+
+ TAILQ_FOREACH_SAFE(listed_da, &devargs_list, next, tmp) {
+ if (listed_da == *da)
+ /* devargs already in the list */
+ return 0;
+ if (strcmp(listed_da->bus->name, (*da)->bus->name) == 0 &&
+ strcmp(listed_da->name, (*da)->name) == 0) {
+ /* device already in devargs list, must be updated */
+ listed_da->type = (*da)->type;
+ listed_da->policy = (*da)->policy;
+ free(listed_da->args);
+ listed_da->args = (*da)->args;
+ listed_da->bus = (*da)->bus;
+ listed_da->cls = (*da)->cls;
+ listed_da->bus_str = (*da)->bus_str;
+ listed_da->cls_str = (*da)->cls_str;
+ listed_da->data = (*da)->data;
+ /* replace provided devargs with found one */
+ free(*da);
+ *da = listed_da;
+ return 0;
+ }
+ }
+ /* new device in the list */
+ TAILQ_INSERT_TAIL(&devargs_list, *da, next);
+ return 0;
+}
+
+/* store a whitelist parameter for later parsing */
+int
+rte_devargs_add(enum rte_devtype devtype, const char *devargs_str)
+{
+ struct rte_devargs *devargs = NULL;
+ struct rte_bus *bus = NULL;
+ const char *dev = devargs_str;
+
+ /* use calloc instead of rte_zmalloc as it's called early at init */
+ devargs = calloc(1, sizeof(*devargs));
+ if (devargs == NULL)
+ goto fail;
+
+ if (rte_devargs_parse(devargs, dev))
+ goto fail;
+ devargs->type = devtype;
+ bus = devargs->bus;
+ if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI)
+ devargs->policy = RTE_DEV_BLACKLISTED;
+ if (bus->conf.scan_mode == RTE_BUS_SCAN_UNDEFINED) {
+ if (devargs->policy == RTE_DEV_WHITELISTED)
+ bus->conf.scan_mode = RTE_BUS_SCAN_WHITELIST;
+ else if (devargs->policy == RTE_DEV_BLACKLISTED)
+ bus->conf.scan_mode = RTE_BUS_SCAN_BLACKLIST;
+ }
+ TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
+ return 0;
+
+fail:
+ if (devargs) {
+ free(devargs->args);
+ free(devargs);
+ }
+
+ return -1;
+}
+
+int
+rte_devargs_remove(struct rte_devargs *devargs)
+{
+ struct rte_devargs *d;
+ void *tmp;
+
+ if (devargs == NULL || devargs->bus == NULL)
+ return -1;
+
+ TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) {
+ if (strcmp(d->bus->name, devargs->bus->name) == 0 &&
+ strcmp(d->name, devargs->name) == 0) {
+ TAILQ_REMOVE(&devargs_list, d, next);
+ free(d->args);
+ free(d);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* count the number of devices of a specified type */
+unsigned int
+rte_devargs_type_count(enum rte_devtype devtype)
+{
+ struct rte_devargs *devargs;
+ unsigned int count = 0;
+
+ TAILQ_FOREACH(devargs, &devargs_list, next) {
+ if (devargs->type != devtype)
+ continue;
+ count++;
+ }
+ return count;
+}
+
+/* dump the user devices on the console */
+void
+rte_devargs_dump(FILE *f)
+{
+ struct rte_devargs *devargs;
+
+ fprintf(f, "User device list:\n");
+ TAILQ_FOREACH(devargs, &devargs_list, next) {
+ fprintf(f, " [%s]: %s %s\n",
+ (devargs->bus ? devargs->bus->name : "??"),
+ devargs->name, devargs->args);
+ }
+}
+
+/* bus-aware rte_devargs iterator. */
+struct rte_devargs *
+rte_devargs_next(const char *busname, const struct rte_devargs *start)
+{
+ struct rte_devargs *da;
+
+ if (start != NULL)
+ da = TAILQ_NEXT(start, next);
+ else
+ da = TAILQ_FIRST(&devargs_list);
+ while (da != NULL) {
+ if (busname == NULL ||
+ (strcmp(busname, da->bus->name) == 0))
+ return da;
+ da = TAILQ_NEXT(da, next);
+ }
+ return NULL;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c
new file mode 100644
index 000000000..2a10fb823
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_errno.c
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/* Use XSI-compliant portable version of strerror_r() */
+#undef _GNU_SOURCE
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+RTE_DEFINE_PER_LCORE(int, _rte_errno);
+
+const char *
+rte_strerror(int errnum)
+{
+ /* BSD puts a colon in the "unknown error" messages, Linux doesn't */
+#ifdef RTE_EXEC_ENV_FREEBSD
+ static const char *sep = ":";
+#else
+ static const char *sep = "";
+#endif
+#define RETVAL_SZ 256
+ static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
+ char *ret = RTE_PER_LCORE(retval);
+
+ /* since some implementations of strerror_r throw an error
+ * themselves if errnum is too big, we handle that case here */
+ if (errnum >= RTE_MAX_ERRNO)
+ snprintf(ret, RETVAL_SZ, "Unknown error%s %d", sep, errnum);
+ else
+ switch (errnum){
+ case E_RTE_SECONDARY:
+ return "Invalid call in secondary process";
+ case E_RTE_NO_CONFIG:
+ return "Missing rte_config structure";
+ default:
+ if (strerror_r(errnum, ret, RETVAL_SZ) != 0)
+ snprintf(ret, RETVAL_SZ, "Unknown error%s %d",
+ sep, errnum);
+ }
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c
new file mode 100644
index 000000000..4f8f1af73
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c
@@ -0,0 +1,1510 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/file.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#include "rte_fbarray.h"
+
+#define MASK_SHIFT 6ULL
+#define MASK_ALIGN (1ULL << MASK_SHIFT)
+#define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
+#define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
+#define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
+
+/*
+ * We use this to keep track of created/attached memory areas to prevent user
+ * errors in API usage.
+ */
+struct mem_area {
+ TAILQ_ENTRY(mem_area) next;
+ void *addr;
+ size_t len;
+ int fd;
+};
+TAILQ_HEAD(mem_area_head, mem_area);
+/* local per-process tailq */
+static struct mem_area_head mem_area_tailq =
+ TAILQ_HEAD_INITIALIZER(mem_area_tailq);
+static rte_spinlock_t mem_area_lock = RTE_SPINLOCK_INITIALIZER;
+
+/*
+ * This is a mask that is always stored at the end of array, to provide fast
+ * way of finding free/used spots without looping through each element.
+ */
+
+struct used_mask {
+ unsigned int n_masks;
+ uint64_t data[];
+};
+
+static size_t
+calc_mask_size(unsigned int len)
+{
+ /* mask must be multiple of MASK_ALIGN, even though length of array
+ * itself may not be aligned on that boundary.
+ */
+ len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
+ return sizeof(struct used_mask) +
+ sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
+}
+
+static size_t
+calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len)
+{
+ size_t data_sz = elt_sz * len;
+ size_t msk_sz = calc_mask_size(len);
+ return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
+}
+
+static struct used_mask *
+get_used_mask(void *data, unsigned int elt_sz, unsigned int len)
+{
+ return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
+}
+
+static int
+resize_and_map(int fd, void *addr, size_t len)
+{
+ char path[PATH_MAX];
+ void *map_addr;
+
+ if (ftruncate(fd, len)) {
+ RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
+ /* pass errno up the chain */
+ rte_errno = errno;
+ return -1;
+ }
+
+ map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, 0);
+ if (map_addr != addr) {
+ RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
+ /* pass errno up the chain */
+ rte_errno = errno;
+ return -1;
+ }
+ return 0;
+}
+
+static int
+overlap(const struct mem_area *ma, const void *start, size_t len)
+{
+ const void *end = RTE_PTR_ADD(start, len);
+ const void *ma_start = ma->addr;
+ const void *ma_end = RTE_PTR_ADD(ma->addr, ma->len);
+
+ /* start overlap? */
+ if (start >= ma_start && start < ma_end)
+ return 1;
+ /* end overlap? */
+ if (end >= ma_start && end < ma_end)
+ return 1;
+ return 0;
+}
+
+static int
+find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int msk_idx, lookahead_idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk, ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ ignore_msk = ~((1ULL << first_mod) - 1);
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-1ULL << last_mod);
+
+ for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
+ uint64_t cur_msk, lookahead_msk;
+ unsigned int run_start, clz, left;
+ bool found = false;
+ /*
+ * The process of getting n consecutive bits for arbitrary n is
+ * a bit involved, but here it is in a nutshell:
+ *
+ * 1. let n be the number of consecutive bits we're looking for
+ * 2. check if n can fit in one mask, and if so, do n-1
+ * rshift-ands to see if there is an appropriate run inside
+ * our current mask
+ * 2a. if we found a run, bail out early
+ * 2b. if we didn't find a run, proceed
+ * 3. invert the mask and count leading zeroes (that is, count
+ * how many consecutive set bits we had starting from the
+ * end of current mask) as k
+ * 3a. if k is 0, continue to next mask
+ * 3b. if k is not 0, we have a potential run
+ * 4. to satisfy our requirements, next mask must have n-k
+ * consecutive set bits right at the start, so we will do
+ * (n-k-1) rshift-ands and check if first bit is set.
+ *
+ * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+ * we either run out of masks, lose the run, or find what we
+ * were looking for.
+ */
+ cur_msk = msk->data[msk_idx];
+ left = n;
+
+ /* if we're looking for free spaces, invert the mask */
+ if (!used)
+ cur_msk = ~cur_msk;
+
+ /* combine current ignore mask with last index ignore mask */
+ if (msk_idx == last)
+ ignore_msk |= last_msk;
+
+ /* if we have an ignore mask, ignore once */
+ if (ignore_msk) {
+ cur_msk &= ignore_msk;
+ ignore_msk = 0;
+ }
+
+ /* if n can fit in within a single mask, do a search */
+ if (n <= MASK_ALIGN) {
+ uint64_t tmp_msk = cur_msk;
+ unsigned int s_idx;
+ for (s_idx = 0; s_idx < n - 1; s_idx++)
+ tmp_msk &= tmp_msk >> 1ULL;
+ /* we found what we were looking for */
+ if (tmp_msk != 0) {
+ run_start = __builtin_ctzll(tmp_msk);
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ }
+
+ /*
+ * we didn't find our run within the mask, or n > MASK_ALIGN,
+ * so we're going for plan B.
+ */
+
+ /* count leading zeroes on inverted mask */
+ if (~cur_msk == 0)
+ clz = sizeof(cur_msk) * 8;
+ else
+ clz = __builtin_clzll(~cur_msk);
+
+ /* if there aren't any runs at the end either, just continue */
+ if (clz == 0)
+ continue;
+
+ /* we have a partial run at the end, so try looking ahead */
+ run_start = MASK_ALIGN - clz;
+ left -= clz;
+
+ for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
+ lookahead_idx++) {
+ unsigned int s_idx, need;
+ lookahead_msk = msk->data[lookahead_idx];
+
+ /* if we're looking for free space, invert the mask */
+ if (!used)
+ lookahead_msk = ~lookahead_msk;
+
+ /* figure out how many consecutive bits we need here */
+ need = RTE_MIN(left, MASK_ALIGN);
+
+ for (s_idx = 0; s_idx < need - 1; s_idx++)
+ lookahead_msk &= lookahead_msk >> 1ULL;
+
+ /* if first bit is not set, we've lost the run */
+ if ((lookahead_msk & 1) == 0) {
+ /*
+ * we've scanned this far, so we know there are
+ * no runs in the space we've lookahead-scanned
+ * as well, so skip that on next iteration.
+ */
+ ignore_msk = ~((1ULL << need) - 1);
+ msk_idx = lookahead_idx;
+ break;
+ }
+
+ left -= need;
+
+ /* check if we've found what we were looking for */
+ if (left == 0) {
+ found = true;
+ break;
+ }
+ }
+
+ /* we didn't find anything, so continue */
+ if (!found)
+ continue;
+
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk, ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ ignore_msk = ~((1ULL << first_mod) - 1ULL);
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-(1ULL) << last_mod);
+
+ for (idx = first; idx < msk->n_masks; idx++) {
+ uint64_t cur = msk->data[idx];
+ int found;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ if (idx == last)
+ cur &= last_msk;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first)
+ cur &= ignore_msk;
+
+ /* check if we have any entries */
+ if (cur == 0)
+ continue;
+
+ /*
+ * find first set bit - that will correspond to whatever it is
+ * that we're looking for.
+ */
+ found = __builtin_ctzll(cur);
+ return MASK_GET_IDX(idx, found);
+ }
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk;
+ unsigned int need_len, result = 0;
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-(1ULL) << last_mod);
+
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
+ uint64_t cur = msk->data[idx];
+ unsigned int run_len;
+
+ need_len = MASK_ALIGN;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* if this is last mask, ignore everything after last bit */
+ if (idx == last)
+ cur &= last_msk;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first) {
+ cur >>= first_mod;
+ /* at the start, we don't need the full mask len */
+ need_len -= first_mod;
+ }
+
+ /* we will be looking for zeroes, so invert the mask */
+ cur = ~cur;
+
+ /* if mask is zero, we have a complete run */
+ if (cur == 0)
+ continue;
+
+ /*
+ * see if current run ends before mask end.
+ */
+ run_len = __builtin_ctzll(cur);
+
+ /* add however many zeroes we've had in the last run and quit */
+ if (run_len < need_len) {
+ result += run_len;
+ break;
+ }
+ }
+ return result;
+}
+
+static int
+find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int msk_idx, lookbehind_idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ msk_idx = first;
+ do {
+ uint64_t cur_msk, lookbehind_msk;
+ unsigned int run_start, run_end, ctz, left;
+ bool found = false;
+ /*
+ * The process of getting n consecutive bits from the top for
+ * arbitrary n is a bit involved, but here it is in a nutshell:
+ *
+ * 1. let n be the number of consecutive bits we're looking for
+ * 2. check if n can fit in one mask, and if so, do n-1
+ * lshift-ands to see if there is an appropriate run inside
+ * our current mask
+ * 2a. if we found a run, bail out early
+ * 2b. if we didn't find a run, proceed
+ * 3. invert the mask and count trailing zeroes (that is, count
+ * how many consecutive set bits we had starting from the
+ * start of current mask) as k
+ * 3a. if k is 0, continue to next mask
+ * 3b. if k is not 0, we have a potential run
+ * 4. to satisfy our requirements, next mask must have n-k
+ * consecutive set bits at the end, so we will do (n-k-1)
+ * lshift-ands and check if last bit is set.
+ *
+ * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+ * we either run out of masks, lose the run, or find what we
+ * were looking for.
+ */
+ cur_msk = msk->data[msk_idx];
+ left = n;
+
+ /* if we're looking for free spaces, invert the mask */
+ if (!used)
+ cur_msk = ~cur_msk;
+
+ /* if we have an ignore mask, ignore once */
+ if (ignore_msk) {
+ cur_msk &= ignore_msk;
+ ignore_msk = 0;
+ }
+
+ /* if n can fit in within a single mask, do a search */
+ if (n <= MASK_ALIGN) {
+ uint64_t tmp_msk = cur_msk;
+ unsigned int s_idx;
+ for (s_idx = 0; s_idx < n - 1; s_idx++)
+ tmp_msk &= tmp_msk << 1ULL;
+ /* we found what we were looking for */
+ if (tmp_msk != 0) {
+ /* clz will give us offset from end of mask, and
+ * we only get the end of our run, not start,
+ * so adjust result to point to where start
+ * would have been.
+ */
+ run_start = MASK_ALIGN -
+ __builtin_clzll(tmp_msk) - n;
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ }
+
+ /*
+ * we didn't find our run within the mask, or n > MASK_ALIGN,
+ * so we're going for plan B.
+ */
+
+ /* count trailing zeroes on inverted mask */
+ if (~cur_msk == 0)
+ ctz = sizeof(cur_msk) * 8;
+ else
+ ctz = __builtin_ctzll(~cur_msk);
+
+ /* if there aren't any runs at the start either, just
+ * continue
+ */
+ if (ctz == 0)
+ continue;
+
+ /* we have a partial run at the start, so try looking behind */
+ run_end = MASK_GET_IDX(msk_idx, ctz);
+ left -= ctz;
+
+ /* go backwards, include zero */
+ lookbehind_idx = msk_idx - 1;
+
+ /* we can't lookbehind as we've run out of masks, so stop */
+ if (msk_idx == 0)
+ break;
+
+ do {
+ const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
+ unsigned int s_idx, need;
+
+ lookbehind_msk = msk->data[lookbehind_idx];
+
+ /* if we're looking for free space, invert the mask */
+ if (!used)
+ lookbehind_msk = ~lookbehind_msk;
+
+ /* figure out how many consecutive bits we need here */
+ need = RTE_MIN(left, MASK_ALIGN);
+
+ for (s_idx = 0; s_idx < need - 1; s_idx++)
+ lookbehind_msk &= lookbehind_msk << 1ULL;
+
+ /* if last bit is not set, we've lost the run */
+ if ((lookbehind_msk & last_bit) == 0) {
+ /*
+ * we've scanned this far, so we know there are
+ * no runs in the space we've lookbehind-scanned
+ * as well, so skip that on next iteration.
+ */
+ ignore_msk = -1ULL << need;
+ msk_idx = lookbehind_idx;
+ break;
+ }
+
+ left -= need;
+
+ /* check if we've found what we were looking for */
+ if (left == 0) {
+ found = true;
+ break;
+ }
+ } while ((lookbehind_idx--) != 0); /* decrement after check to
+ * include zero
+ */
+
+ /* we didn't find anything, so continue */
+ if (!found)
+ continue;
+
+ /* we've found what we were looking for, but we only know where
+ * the run ended, so calculate start position.
+ */
+ return run_end - n;
+ } while (msk_idx-- != 0); /* decrement after check to include zero */
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing clz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ int found;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first)
+ cur &= ignore_msk;
+
+ /* check if we have any entries */
+ if (cur == 0)
+ continue;
+
+ /*
+ * find last set bit - that will correspond to whatever it is
+ * that we're looking for. we're counting trailing zeroes, thus
+ * the value we get is counted from end of mask, so calculate
+ * position from start of mask.
+ */
+ found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+
+ return MASK_GET_IDX(idx, found);
+ } while (idx-- != 0); /* decrement after check to include zero*/
+
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int need_len, result = 0;
+
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ unsigned int run_len;
+
+ need_len = MASK_ALIGN;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything after start on first iteration */
+ if (idx == first) {
+ unsigned int end_len = MASK_ALIGN - first_mod - 1;
+ cur <<= end_len;
+ /* at the start, we don't need the full mask len */
+ need_len -= end_len;
+ }
+
+ /* we will be looking for zeroes, so invert the mask */
+ cur = ~cur;
+
+ /* if mask is zero, we have a complete run */
+ if (cur == 0)
+ goto endloop;
+
+ /*
+ * see where run ends, starting from the end.
+ */
+ run_len = __builtin_clzll(cur);
+
+ /* add however many zeroes we've had in the last run and quit */
+ if (run_len < need_len) {
+ result += run_len;
+ break;
+ }
+endloop:
+ result += need_len;
+ } while (idx-- != 0); /* decrement after check to include zero */
+ return result;
+}
+
+static int
+set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
+{
+ struct used_mask *msk;
+ uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+ unsigned int msk_idx = MASK_LEN_TO_IDX(idx);
+ bool already_used;
+ int ret = -1;
+
+ if (arr == NULL || idx >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+ ret = 0;
+
+ /* prevent array from changing under us */
+ rte_rwlock_write_lock(&arr->rwlock);
+
+ already_used = (msk->data[msk_idx] & msk_bit) != 0;
+
+ /* nothing to be done */
+ if (used == already_used)
+ goto out;
+
+ if (used) {
+ msk->data[msk_idx] |= msk_bit;
+ arr->count++;
+ } else {
+ msk->data[msk_idx] &= ~msk_bit;
+ arr->count--;
+ }
+out:
+ rte_rwlock_write_unlock(&arr->rwlock);
+
+ return ret;
+}
+
+static int
+fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
+{
+ if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ return 0;
+}
+
+int
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
+ unsigned int elt_sz)
+{
+ size_t page_sz, mmap_len;
+ char path[PATH_MAX];
+ struct used_mask *msk;
+ struct mem_area *ma = NULL;
+ void *data = NULL;
+ int fd = -1;
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (fully_validate(name, elt_sz, len))
+ return -1;
+
+ /* allocate mem area before doing anything */
+ ma = malloc(sizeof(*ma));
+ if (ma == NULL) {
+ rte_errno = ENOMEM;
+ return -1;
+ }
+
+ page_sz = sysconf(_SC_PAGESIZE);
+ if (page_sz == (size_t)-1) {
+ free(ma);
+ return -1;
+ }
+
+ /* calculate our memory limits */
+ mmap_len = calc_data_size(page_sz, elt_sz, len);
+
+ data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
+ if (data == NULL) {
+ free(ma);
+ return -1;
+ }
+
+ rte_spinlock_lock(&mem_area_lock);
+
+ fd = -1;
+
+ if (internal_config.no_shconf) {
+ /* remap virtual area as writable */
+ void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
+ if (new_data == MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
+ __func__, strerror(errno));
+ goto fail;
+ }
+ } else {
+ eal_get_fbarray_path(path, sizeof(path), name);
+
+ /*
+ * Each fbarray is unique to process namespace, i.e. the
+ * filename depends on process prefix. Try to take out a lock
+ * and see if we succeed. If we don't, someone else is using it
+ * already.
+ */
+ fd = open(path, O_CREAT | O_RDWR, 0600);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = errno;
+ goto fail;
+ } else if (flock(fd, LOCK_EX | LOCK_NB)) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = EBUSY;
+ goto fail;
+ }
+
+ /* take out a non-exclusive lock, so that other processes could
+ * still attach to it, but no other process could reinitialize
+ * it.
+ */
+ if (flock(fd, LOCK_SH | LOCK_NB)) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ if (resize_and_map(fd, data, mmap_len))
+ goto fail;
+ }
+ ma->addr = data;
+ ma->len = mmap_len;
+ ma->fd = fd;
+
+ /* do not close fd - keep it until detach/destroy */
+ TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
+
+ /* initialize the data */
+ memset(data, 0, mmap_len);
+
+ /* populate data structure */
+ strlcpy(arr->name, name, sizeof(arr->name));
+ arr->data = data;
+ arr->len = len;
+ arr->elt_sz = elt_sz;
+ arr->count = 0;
+
+ msk = get_used_mask(data, elt_sz, len);
+ msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
+
+ rte_rwlock_init(&arr->rwlock);
+
+ rte_spinlock_unlock(&mem_area_lock);
+
+ return 0;
+fail:
+ if (data)
+ munmap(data, mmap_len);
+ if (fd >= 0)
+ close(fd);
+ free(ma);
+
+ rte_spinlock_unlock(&mem_area_lock);
+ return -1;
+}
+
+int
+rte_fbarray_attach(struct rte_fbarray *arr)
+{
+ struct mem_area *ma = NULL, *tmp = NULL;
+ size_t page_sz, mmap_len;
+ char path[PATH_MAX];
+ void *data = NULL;
+ int fd = -1;
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * we don't need to synchronize attach as two values we need (element
+ * size and array length) are constant for the duration of life of
+ * the array, so the parts we care about will not race.
+ */
+
+ if (fully_validate(arr->name, arr->elt_sz, arr->len))
+ return -1;
+
+ ma = malloc(sizeof(*ma));
+ if (ma == NULL) {
+ rte_errno = ENOMEM;
+ return -1;
+ }
+
+ page_sz = sysconf(_SC_PAGESIZE);
+ if (page_sz == (size_t)-1) {
+ free(ma);
+ return -1;
+ }
+
+ mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+ /* check the tailq - maybe user has already mapped this address space */
+ rte_spinlock_lock(&mem_area_lock);
+
+ TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+ if (overlap(tmp, arr->data, mmap_len)) {
+ rte_errno = EEXIST;
+ goto fail;
+ }
+ }
+
+ /* we know this memory area is unique, so proceed */
+
+ data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
+ if (data == NULL)
+ goto fail;
+
+ eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ /* lock the file, to let others know we're using it */
+ if (flock(fd, LOCK_SH | LOCK_NB)) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ if (resize_and_map(fd, data, mmap_len))
+ goto fail;
+
+ /* store our new memory area */
+ ma->addr = data;
+ ma->fd = fd; /* keep fd until detach/destroy */
+ ma->len = mmap_len;
+
+ TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
+
+ /* we're done */
+
+ rte_spinlock_unlock(&mem_area_lock);
+ return 0;
+fail:
+ if (data)
+ munmap(data, mmap_len);
+ if (fd >= 0)
+ close(fd);
+ free(ma);
+ rte_spinlock_unlock(&mem_area_lock);
+ return -1;
+}
+
+int
+rte_fbarray_detach(struct rte_fbarray *arr)
+{
+ struct mem_area *tmp = NULL;
+ size_t mmap_len;
+ int ret = -1;
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * we don't need to synchronize detach as two values we need (element
+ * size and total capacity) are constant for the duration of life of
+ * the array, so the parts we care about will not race. if the user is
+ * detaching while doing something else in the same process, we can't
+ * really do anything about it, things will blow up either way.
+ */
+
+ size_t page_sz = sysconf(_SC_PAGESIZE);
+
+ if (page_sz == (size_t)-1)
+ return -1;
+
+ mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+ /* does this area exist? */
+ rte_spinlock_lock(&mem_area_lock);
+
+ TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+ if (tmp->addr == arr->data && tmp->len == mmap_len)
+ break;
+ }
+ if (tmp == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto out;
+ }
+
+ munmap(arr->data, mmap_len);
+
+ /* area is unmapped, close fd and remove the tailq entry */
+ if (tmp->fd >= 0)
+ close(tmp->fd);
+ TAILQ_REMOVE(&mem_area_tailq, tmp, next);
+ free(tmp);
+
+ ret = 0;
+out:
+ rte_spinlock_unlock(&mem_area_lock);
+ return ret;
+}
+
+int
+rte_fbarray_destroy(struct rte_fbarray *arr)
+{
+ struct mem_area *tmp = NULL;
+ size_t mmap_len;
+ int fd, ret;
+ char path[PATH_MAX];
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * we don't need to synchronize detach as two values we need (element
+ * size and total capacity) are constant for the duration of life of
+ * the array, so the parts we care about will not race. if the user is
+ * detaching while doing something else in the same process, we can't
+ * really do anything about it, things will blow up either way.
+ */
+
+ size_t page_sz = sysconf(_SC_PAGESIZE);
+
+ if (page_sz == (size_t)-1)
+ return -1;
+
+ mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+ /* does this area exist? */
+ rte_spinlock_lock(&mem_area_lock);
+
+ TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
+ if (tmp->addr == arr->data && tmp->len == mmap_len)
+ break;
+ }
+ if (tmp == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto out;
+ }
+ /* with no shconf, there were never any files to begin with */
+ if (!internal_config.no_shconf) {
+ /*
+ * attempt to get an exclusive lock on the file, to ensure it
+ * has been detached by all other processes
+ */
+ fd = tmp->fd;
+ if (flock(fd, LOCK_EX | LOCK_NB)) {
+ RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
+ rte_errno = EBUSY;
+ ret = -1;
+ goto out;
+ }
+
+ /* we're OK to destroy the file */
+ eal_get_fbarray_path(path, sizeof(path), arr->name);
+ if (unlink(path)) {
+ RTE_LOG(DEBUG, EAL, "Cannot unlink fbarray: %s\n",
+ strerror(errno));
+ rte_errno = errno;
+ /*
+ * we're still holding an exclusive lock, so drop it to
+ * shared.
+ */
+ flock(fd, LOCK_SH | LOCK_NB);
+
+ ret = -1;
+ goto out;
+ }
+ close(fd);
+ }
+ munmap(arr->data, mmap_len);
+
+ /* area is unmapped, remove the tailq entry */
+ TAILQ_REMOVE(&mem_area_tailq, tmp, next);
+ free(tmp);
+ ret = 0;
+
+ /* reset the fbarray structure */
+ memset(arr, 0, sizeof(*arr));
+out:
+ rte_spinlock_unlock(&mem_area_lock);
+ return ret;
+}
+
+void *
+rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx)
+{
+ void *ret = NULL;
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (idx >= arr->len) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
+
+ return ret;
+}
+
+int
+rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx)
+{
+ return set_used(arr, idx, true);
+}
+
+int
+rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx)
+{
+ return set_used(arr, idx, false);
+}
+
+int
+rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
+{
+ struct used_mask *msk;
+ int msk_idx;
+ uint64_t msk_bit;
+ int ret = -1;
+
+ if (arr == NULL || idx >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+ msk_idx = MASK_LEN_TO_IDX(idx);
+ msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+
+ ret = (msk->data[msk_idx] & msk_bit) != 0;
+
+ rte_rwlock_read_unlock(&arr->rwlock);
+
+ return ret;
+}
+
+static int
+fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = start;
+ goto out;
+ }
+ } else {
+ if (arr->count == 0) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->len == arr->count) {
+ ret = start;
+ goto out;
+ }
+ }
+ if (next)
+ ret = find_next(arr, start, used);
+ else
+ ret = find_prev(arr, start, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+int
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, true, true);
+}
+
+int
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, true);
+}
+
+static int
+fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool next, bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ if (next && (arr->len - start) < n) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
+ if (!next && start < (n - 1)) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count || arr->len - arr->count < n) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
+ } else {
+ if (arr->count < n) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->count == arr->len) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
+ }
+
+ if (next)
+ ret = find_next_n(arr, start, n, used);
+ else
+ ret = find_prev_n(arr, start, n, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+int
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, true, false);
+}
+
+int
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, true, true);
+}
+
+int
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, false);
+}
+
+int
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, true);
+}
+
+static int
+fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
+ bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (used) {
+ if (arr->count == 0) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == arr->len) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == arr->len) {
+ ret = start + 1;
+ goto out;
+ }
+ } else {
+ if (arr->len == arr->count) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == 0) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == 0) {
+ ret = start + 1;
+ goto out;
+ }
+ }
+
+ if (next)
+ ret = find_contig(arr, start, used);
+ else
+ ret = find_rev_contig(arr, start, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+static int
+fbarray_find_biggest(struct rte_fbarray *arr, unsigned int start, bool used,
+ bool rev)
+{
+ int cur_idx, next_idx, cur_len, biggest_idx, biggest_len;
+ /* don't stack if conditions, use function pointers instead */
+ int (*find_func)(struct rte_fbarray *, unsigned int);
+ int (*find_contig_func)(struct rte_fbarray *, unsigned int);
+
+ if (arr == NULL || start >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ /* the other API calls already do their fair share of cheap checks, so
+ * no need to do them here.
+ */
+
+ /* the API's called are thread-safe, but something may still happen
+ * between the API calls, so lock the fbarray. all other API's are
+ * read-locking the fbarray, so read lock here is OK.
+ */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* pick out appropriate functions */
+ if (used) {
+ if (rev) {
+ find_func = rte_fbarray_find_prev_used;
+ find_contig_func = rte_fbarray_find_rev_contig_used;
+ } else {
+ find_func = rte_fbarray_find_next_used;
+ find_contig_func = rte_fbarray_find_contig_used;
+ }
+ } else {
+ if (rev) {
+ find_func = rte_fbarray_find_prev_free;
+ find_contig_func = rte_fbarray_find_rev_contig_free;
+ } else {
+ find_func = rte_fbarray_find_next_free;
+ find_contig_func = rte_fbarray_find_contig_free;
+ }
+ }
+
+ cur_idx = start;
+ biggest_idx = -1; /* default is error */
+ biggest_len = 0;
+ for (;;) {
+ cur_idx = find_func(arr, cur_idx);
+
+ /* block found, check its length */
+ if (cur_idx >= 0) {
+ cur_len = find_contig_func(arr, cur_idx);
+ /* decide where we go next */
+ next_idx = rev ? cur_idx - cur_len : cur_idx + cur_len;
+ /* move current index to start of chunk */
+ cur_idx = rev ? next_idx + 1 : cur_idx;
+
+ if (cur_len > biggest_len) {
+ biggest_idx = cur_idx;
+ biggest_len = cur_len;
+ }
+ cur_idx = next_idx;
+ /* in reverse mode, next_idx may be -1 if chunk started
+ * at array beginning. this means there's no more work
+ * to do.
+ */
+ if (cur_idx < 0)
+ break;
+ } else {
+ /* nothing more to find, stop. however, a failed API
+ * call has set rte_errno, which we want to ignore, as
+ * reaching the end of fbarray is not an error.
+ */
+ rte_errno = 0;
+ break;
+ }
+ }
+ /* if we didn't find anything at all, set rte_errno */
+ if (biggest_idx < 0)
+ rte_errno = used ? ENOENT : ENOSPC;
+
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return biggest_idx;
+}
+
+int
+rte_fbarray_find_biggest_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_biggest(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_biggest_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_biggest(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_rev_biggest_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_biggest(arr, start, false, true);
+}
+
+int
+rte_fbarray_find_rev_biggest_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_biggest(arr, start, true, true);
+}
+
+
+int
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, true, false);
+}
+
+int
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, true, true);
+}
+
+int
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, false);
+}
+
+int
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, true);
+}
+
+int
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
+{
+ void *end;
+ int ret = -1;
+
+ /*
+ * no need to synchronize as it doesn't matter if underlying data
+ * changes - we're doing pointer arithmetic here.
+ */
+
+ if (arr == NULL || elt == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
+ if (elt < arr->data || elt >= end) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
+
+ return ret;
+}
+
+void
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
+{
+ struct used_mask *msk;
+ unsigned int i;
+
+ if (arr == NULL || f == NULL) {
+ rte_errno = EINVAL;
+ return;
+ }
+
+ if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
+ fprintf(f, "Invalid file-backed array\n");
+ goto out;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ fprintf(f, "File-backed array: %s\n", arr->name);
+ fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
+ arr->len, arr->count, arr->elt_sz);
+
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+
+ for (i = 0; i < msk->n_masks; i++)
+ fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c
new file mode 100644
index 000000000..2d2179d41
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hexdump.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <rte_hexdump.h>
+#include <rte_string_fns.h>
+
+#define LINE_LEN 128
+
+void
+rte_hexdump(FILE *f, const char *title, const void *buf, unsigned int len)
+{
+ unsigned int i, out, ofs;
+ const unsigned char *data = buf;
+ char line[LINE_LEN]; /* space needed 8+16*3+3+16 == 75 */
+
+ fprintf(f, "%s at [%p], len=%u\n",
+ title ? : " Dump data", data, len);
+ ofs = 0;
+ while (ofs < len) {
+ /* format the line in the buffer */
+ out = snprintf(line, LINE_LEN, "%08X:", ofs);
+ for (i = 0; i < 16; i++) {
+ if (ofs + i < len)
+ snprintf(line + out, LINE_LEN - out,
+ " %02X", (data[ofs + i] & 0xff));
+ else
+ strcpy(line + out, " ");
+ out += 3;
+ }
+
+
+ for (; i <= 16; i++)
+ out += snprintf(line + out, LINE_LEN - out, " | ");
+
+ for (i = 0; ofs < len && i < 16; i++, ofs++) {
+ unsigned char c = data[ofs];
+
+ if (c < ' ' || c > '~')
+ c = '.';
+ out += snprintf(line + out, LINE_LEN - out, "%c", c);
+ }
+ fprintf(f, "%s\n", line);
+ }
+ fflush(f);
+}
+
+void
+rte_memdump(FILE *f, const char *title, const void *buf, unsigned int len)
+{
+ unsigned int i, out;
+ const unsigned char *data = buf;
+ char line[LINE_LEN];
+
+ if (title)
+ fprintf(f, "%s: ", title);
+
+ line[0] = '\0';
+ for (i = 0, out = 0; i < len; i++) {
+ /* Make sure we do not overrun the line buffer length. */
+ if (out >= LINE_LEN - 4) {
+ fprintf(f, "%s", line);
+ out = 0;
+ line[out] = '\0';
+ }
+ out += snprintf(line + out, LINE_LEN - out, "%02x%s",
+ (data[i] & 0xff), ((i + 1) < len) ? ":" : "");
+ }
+ if (out > 0)
+ fprintf(f, "%s", line);
+ fprintf(f, "\n");
+
+ fflush(f);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c
new file mode 100644
index 000000000..5388b81a5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_hypervisor.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include "rte_hypervisor.h"
+
+const char *
+rte_hypervisor_get_name(enum rte_hypervisor id)
+{
+ switch (id) {
+ case RTE_HYPERVISOR_NONE:
+ return "none";
+ case RTE_HYPERVISOR_KVM:
+ return "KVM";
+ case RTE_HYPERVISOR_HYPERV:
+ return "Hyper-V";
+ case RTE_HYPERVISOR_VMWARE:
+ return "VMware";
+ default:
+ return "unknown";
+ }
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c
new file mode 100644
index 000000000..cf52d717f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_launch.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_launch.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_atomic.h>
+#include <rte_pause.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+
+#include "eal_private.h"
+
+/*
+ * Wait until a lcore finished its job.
+ */
+int
+rte_eal_wait_lcore(unsigned slave_id)
+{
+ if (lcore_config[slave_id].state == WAIT)
+ return 0;
+
+ while (lcore_config[slave_id].state != WAIT &&
+ lcore_config[slave_id].state != FINISHED)
+ rte_pause();
+
+ rte_rmb();
+
+ /* we are in finished state, go to wait state */
+ lcore_config[slave_id].state = WAIT;
+ return lcore_config[slave_id].ret;
+}
+
+/*
+ * Check that every SLAVE lcores are in WAIT state, then call
+ * rte_eal_remote_launch() for all of them. If call_master is true
+ * (set to CALL_MASTER), also call the function on the master lcore.
+ */
+int
+rte_eal_mp_remote_launch(int (*f)(void *), void *arg,
+ enum rte_rmt_call_master_t call_master)
+{
+ int lcore_id;
+ int master = rte_get_master_lcore();
+
+ /* check state of lcores */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (lcore_config[lcore_id].state != WAIT)
+ return -EBUSY;
+ }
+
+ /* send messages to cores */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(f, arg, lcore_id);
+ }
+
+ if (call_master == CALL_MASTER) {
+ lcore_config[master].ret = f(arg);
+ lcore_config[master].state = FINISHED;
+ }
+
+ return 0;
+}
+
+/*
+ * Return the state of the lcore identified by slave_id.
+ */
+enum rte_lcore_state_t
+rte_eal_get_lcore_state(unsigned lcore_id)
+{
+ return lcore_config[lcore_id].state;
+}
+
+/*
+ * Do a rte_eal_wait_lcore() for every lcore. The return values are
+ * ignored.
+ */
+void
+rte_eal_mp_wait_lcore(void)
+{
+ unsigned lcore_id;
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_wait_lcore(lcore_id);
+ }
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c
new file mode 100644
index 000000000..5404922a8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_lcore.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+unsigned int rte_get_master_lcore(void)
+{
+ return rte_eal_get_configuration()->master_lcore;
+}
+
+unsigned int rte_lcore_count(void)
+{
+ return rte_eal_get_configuration()->lcore_count;
+}
+
+int rte_lcore_index(int lcore_id)
+{
+ if (unlikely(lcore_id >= RTE_MAX_LCORE))
+ return -1;
+
+ if (lcore_id < 0)
+ lcore_id = (int)rte_lcore_id();
+
+ return lcore_config[lcore_id].core_index;
+}
+
+int rte_lcore_to_cpu_id(int lcore_id)
+{
+ if (unlikely(lcore_id >= RTE_MAX_LCORE))
+ return -1;
+
+ if (lcore_id < 0)
+ lcore_id = (int)rte_lcore_id();
+
+ return lcore_config[lcore_id].core_id;
+}
+
+rte_cpuset_t rte_lcore_cpuset(unsigned int lcore_id)
+{
+ return lcore_config[lcore_id].cpuset;
+}
+
+enum rte_lcore_role_t
+rte_eal_lcore_role(unsigned int lcore_id)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (lcore_id >= RTE_MAX_LCORE)
+ return ROLE_OFF;
+ return cfg->lcore_role[lcore_id];
+}
+
+int rte_lcore_is_enabled(unsigned int lcore_id)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (lcore_id >= RTE_MAX_LCORE)
+ return 0;
+ return cfg->lcore_role[lcore_id] == ROLE_RTE;
+}
+
+unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap)
+{
+ i++;
+ if (wrap)
+ i %= RTE_MAX_LCORE;
+
+ while (i < RTE_MAX_LCORE) {
+ if (!rte_lcore_is_enabled(i) ||
+ (skip_master && (i == rte_get_master_lcore()))) {
+ i++;
+ if (wrap)
+ i %= RTE_MAX_LCORE;
+ continue;
+ }
+ break;
+ }
+ return i;
+}
+
+unsigned int
+rte_lcore_to_socket_id(unsigned int lcore_id)
+{
+ return lcore_config[lcore_id].socket_id;
+}
+
+static int
+socket_id_cmp(const void *a, const void *b)
+{
+ const int *lcore_id_a = a;
+ const int *lcore_id_b = b;
+
+ if (*lcore_id_a < *lcore_id_b)
+ return -1;
+ if (*lcore_id_a > *lcore_id_b)
+ return 1;
+ return 0;
+}
+
+/*
+ * Parse /sys/devices/system/cpu to get the number of physical and logical
+ * processors on the machine. The function will fill the cpu_info
+ * structure.
+ */
+int
+rte_eal_cpu_init(void)
+{
+ /* pointer to global configuration */
+ struct rte_config *config = rte_eal_get_configuration();
+ unsigned lcore_id;
+ unsigned count = 0;
+ unsigned int socket_id, prev_socket_id;
+ int lcore_to_socket_id[RTE_MAX_LCORE];
+
+ /*
+ * Parse the maximum set of logical cores, detect the subset of running
+ * ones and enable them by default.
+ */
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ lcore_config[lcore_id].core_index = count;
+
+ /* init cpuset for per lcore config */
+ CPU_ZERO(&lcore_config[lcore_id].cpuset);
+
+ /* find socket first */
+ socket_id = eal_cpu_socket_id(lcore_id);
+ lcore_to_socket_id[lcore_id] = socket_id;
+
+ if (eal_cpu_detected(lcore_id) == 0) {
+ config->lcore_role[lcore_id] = ROLE_OFF;
+ lcore_config[lcore_id].core_index = -1;
+ continue;
+ }
+
+ /* By default, lcore 1:1 map to cpu id */
+ CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset);
+
+ /* By default, each detected core is enabled */
+ config->lcore_role[lcore_id] = ROLE_RTE;
+ lcore_config[lcore_id].core_role = ROLE_RTE;
+ lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
+ lcore_config[lcore_id].socket_id = socket_id;
+ RTE_LOG(DEBUG, EAL, "Detected lcore %u as "
+ "core %u on socket %u\n",
+ lcore_id, lcore_config[lcore_id].core_id,
+ lcore_config[lcore_id].socket_id);
+ count++;
+ }
+ for (; lcore_id < CPU_SETSIZE; lcore_id++) {
+ if (eal_cpu_detected(lcore_id) == 0)
+ continue;
+ RTE_LOG(DEBUG, EAL, "Skipped lcore %u as core %u on socket %u\n",
+ lcore_id, eal_cpu_core_id(lcore_id),
+ eal_cpu_socket_id(lcore_id));
+ }
+
+ /* Set the count of enabled logical cores of the EAL configuration */
+ config->lcore_count = count;
+ RTE_LOG(DEBUG, EAL,
+ "Support maximum %u logical core(s) by configuration.\n",
+ RTE_MAX_LCORE);
+ RTE_LOG(INFO, EAL, "Detected %u lcore(s)\n", config->lcore_count);
+
+ /* sort all socket id's in ascending order */
+ qsort(lcore_to_socket_id, RTE_DIM(lcore_to_socket_id),
+ sizeof(lcore_to_socket_id[0]), socket_id_cmp);
+
+ prev_socket_id = -1;
+ config->numa_node_count = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ socket_id = lcore_to_socket_id[lcore_id];
+ if (socket_id != prev_socket_id)
+ config->numa_nodes[config->numa_node_count++] =
+ socket_id;
+ prev_socket_id = socket_id;
+ }
+ RTE_LOG(INFO, EAL, "Detected %u NUMA nodes\n", config->numa_node_count);
+
+ return 0;
+}
+
+unsigned int
+rte_socket_count(void)
+{
+ const struct rte_config *config = rte_eal_get_configuration();
+ return config->numa_node_count;
+}
+
+int
+rte_socket_id_by_idx(unsigned int idx)
+{
+ const struct rte_config *config = rte_eal_get_configuration();
+ if (idx >= config->numa_node_count) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ return config->numa_nodes[idx];
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c
new file mode 100644
index 000000000..8835c8fff
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_log.c
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <regex.h>
+#include <fnmatch.h>
+
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_per_lcore.h>
+
+#include "eal_private.h"
+
+/* global log structure */
+struct rte_logs rte_logs = {
+ .type = ~0,
+ .level = RTE_LOG_DEBUG,
+ .file = NULL,
+};
+
+struct rte_eal_opt_loglevel {
+ /** Next list entry */
+ TAILQ_ENTRY(rte_eal_opt_loglevel) next;
+ /** Compiled regular expression obtained from the option */
+ regex_t re_match;
+ /** Globbing pattern option */
+ char *pattern;
+ /** Log level value obtained from the option */
+ uint32_t level;
+};
+
+TAILQ_HEAD(rte_eal_opt_loglevel_list, rte_eal_opt_loglevel);
+
+/** List of valid EAL log level options */
+static struct rte_eal_opt_loglevel_list opt_loglevel_list =
+ TAILQ_HEAD_INITIALIZER(opt_loglevel_list);
+
+/* Stream to use for logging if rte_logs.file is NULL */
+static FILE *default_log_stream;
+
+/**
+ * This global structure stores some information about the message
+ * that is currently being processed by one lcore
+ */
+struct log_cur_msg {
+ uint32_t loglevel; /**< log level - see rte_log.h */
+ uint32_t logtype; /**< log type - see rte_log.h */
+};
+
+struct rte_log_dynamic_type {
+ const char *name;
+ uint32_t loglevel;
+};
+
+ /* per core log */
+static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg);
+
+/* default logs */
+
+/* Change the stream that will be used by logging system */
+int
+rte_openlog_stream(FILE *f)
+{
+ rte_logs.file = f;
+ return 0;
+}
+
+FILE *
+rte_log_get_stream(void)
+{
+ FILE *f = rte_logs.file;
+
+ if (f == NULL) {
+ /*
+ * Grab the current value of stderr here, rather than
+ * just initializing default_log_stream to stderr. This
+ * ensures that we will always use the current value
+ * of stderr, even if the application closes and
+ * reopens it.
+ */
+ return default_log_stream ? : stderr;
+ }
+ return f;
+}
+
+/* Set global log level */
+void
+rte_log_set_global_level(uint32_t level)
+{
+ rte_logs.level = (uint32_t)level;
+}
+
+/* Get global log level */
+uint32_t
+rte_log_get_global_level(void)
+{
+ return rte_logs.level;
+}
+
+int
+rte_log_get_level(uint32_t type)
+{
+ if (type >= rte_logs.dynamic_types_len)
+ return -1;
+
+ return rte_logs.dynamic_types[type].loglevel;
+}
+
+bool
+rte_log_can_log(uint32_t logtype, uint32_t level)
+{
+ int log_level;
+
+ if (level > rte_log_get_global_level())
+ return false;
+
+ log_level = rte_log_get_level(logtype);
+ if (log_level < 0)
+ return false;
+
+ if (level > (uint32_t)log_level)
+ return false;
+
+ return true;
+}
+
+int
+rte_log_set_level(uint32_t type, uint32_t level)
+{
+ if (type >= rte_logs.dynamic_types_len)
+ return -1;
+ if (level > RTE_LOG_DEBUG)
+ return -1;
+
+ rte_logs.dynamic_types[type].loglevel = level;
+
+ return 0;
+}
+
+/* set log level by regular expression */
+int
+rte_log_set_level_regexp(const char *regex, uint32_t level)
+{
+ regex_t r;
+ size_t i;
+
+ if (level > RTE_LOG_DEBUG)
+ return -1;
+
+ if (regcomp(&r, regex, 0) != 0)
+ return -1;
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+ if (regexec(&r, rte_logs.dynamic_types[i].name, 0,
+ NULL, 0) == 0)
+ rte_logs.dynamic_types[i].loglevel = level;
+ }
+
+ regfree(&r);
+
+ return 0;
+}
+
+/*
+ * Save the type string and the loglevel for later dynamic
+ * logtypes which may register later.
+ */
+static int rte_log_save_level(int priority,
+ const char *regex, const char *pattern)
+{
+ struct rte_eal_opt_loglevel *opt_ll = NULL;
+
+ opt_ll = malloc(sizeof(*opt_ll));
+ if (opt_ll == NULL)
+ goto fail;
+
+ opt_ll->level = priority;
+
+ if (regex) {
+ opt_ll->pattern = NULL;
+ if (regcomp(&opt_ll->re_match, regex, 0) != 0)
+ goto fail;
+ } else if (pattern) {
+ opt_ll->pattern = strdup(pattern);
+ if (opt_ll->pattern == NULL)
+ goto fail;
+ } else
+ goto fail;
+
+ TAILQ_INSERT_HEAD(&opt_loglevel_list, opt_ll, next);
+ return 0;
+fail:
+ free(opt_ll);
+ return -1;
+}
+
+int rte_log_save_regexp(const char *regex, int tmp)
+{
+ return rte_log_save_level(tmp, regex, NULL);
+}
+
+/* set log level based on globbing pattern */
+int
+rte_log_set_level_pattern(const char *pattern, uint32_t level)
+{
+ size_t i;
+
+ if (level > RTE_LOG_DEBUG)
+ return -1;
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+
+ if (fnmatch(pattern, rte_logs.dynamic_types[i].name, 0) == 0)
+ rte_logs.dynamic_types[i].loglevel = level;
+ }
+
+ return 0;
+}
+
+int rte_log_save_pattern(const char *pattern, int priority)
+{
+ return rte_log_save_level(priority, NULL, pattern);
+}
+
+/* get the current loglevel for the message being processed */
+int rte_log_cur_msg_loglevel(void)
+{
+ return RTE_PER_LCORE(log_cur_msg).loglevel;
+}
+
+/* get the current logtype for the message being processed */
+int rte_log_cur_msg_logtype(void)
+{
+ return RTE_PER_LCORE(log_cur_msg).logtype;
+}
+
+static int
+rte_log_lookup(const char *name)
+{
+ size_t i;
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+ if (strcmp(name, rte_logs.dynamic_types[i].name) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+/* register an extended log type, assuming table is large enough, and id
+ * is not yet registered.
+ */
+static int
+__rte_log_register(const char *name, int id)
+{
+ char *dup_name = strdup(name);
+
+ if (dup_name == NULL)
+ return -ENOMEM;
+
+ rte_logs.dynamic_types[id].name = dup_name;
+ rte_logs.dynamic_types[id].loglevel = RTE_LOG_INFO;
+
+ return id;
+}
+
+/* register an extended log type */
+int
+rte_log_register(const char *name)
+{
+ struct rte_log_dynamic_type *new_dynamic_types;
+ int id, ret;
+
+ id = rte_log_lookup(name);
+ if (id >= 0)
+ return id;
+
+ new_dynamic_types = realloc(rte_logs.dynamic_types,
+ sizeof(struct rte_log_dynamic_type) *
+ (rte_logs.dynamic_types_len + 1));
+ if (new_dynamic_types == NULL)
+ return -ENOMEM;
+ rte_logs.dynamic_types = new_dynamic_types;
+
+ ret = __rte_log_register(name, rte_logs.dynamic_types_len);
+ if (ret < 0)
+ return ret;
+
+ rte_logs.dynamic_types_len++;
+
+ return ret;
+}
+
+/* Register an extended log type and try to pick its level from EAL options */
+int
+rte_log_register_type_and_pick_level(const char *name, uint32_t level_def)
+{
+ struct rte_eal_opt_loglevel *opt_ll;
+ uint32_t level = level_def;
+ int type;
+
+ type = rte_log_register(name);
+ if (type < 0)
+ return type;
+
+ TAILQ_FOREACH(opt_ll, &opt_loglevel_list, next) {
+ if (opt_ll->level > RTE_LOG_DEBUG)
+ continue;
+
+ if (opt_ll->pattern) {
+ if (fnmatch(opt_ll->pattern, name, 0) == 0)
+ level = opt_ll->level;
+ } else {
+ if (regexec(&opt_ll->re_match, name, 0, NULL, 0) == 0)
+ level = opt_ll->level;
+ }
+ }
+
+ rte_logs.dynamic_types[type].loglevel = level;
+
+ return type;
+}
+
+struct logtype {
+ uint32_t log_id;
+ const char *logtype;
+};
+
+static const struct logtype logtype_strings[] = {
+ {RTE_LOGTYPE_EAL, "lib.eal"},
+ {RTE_LOGTYPE_MALLOC, "lib.malloc"},
+ {RTE_LOGTYPE_RING, "lib.ring"},
+ {RTE_LOGTYPE_MEMPOOL, "lib.mempool"},
+ {RTE_LOGTYPE_TIMER, "lib.timer"},
+ {RTE_LOGTYPE_PMD, "pmd"},
+ {RTE_LOGTYPE_HASH, "lib.hash"},
+ {RTE_LOGTYPE_LPM, "lib.lpm"},
+ {RTE_LOGTYPE_KNI, "lib.kni"},
+ {RTE_LOGTYPE_ACL, "lib.acl"},
+ {RTE_LOGTYPE_POWER, "lib.power"},
+ {RTE_LOGTYPE_METER, "lib.meter"},
+ {RTE_LOGTYPE_SCHED, "lib.sched"},
+ {RTE_LOGTYPE_PORT, "lib.port"},
+ {RTE_LOGTYPE_TABLE, "lib.table"},
+ {RTE_LOGTYPE_PIPELINE, "lib.pipeline"},
+ {RTE_LOGTYPE_MBUF, "lib.mbuf"},
+ {RTE_LOGTYPE_CRYPTODEV, "lib.cryptodev"},
+ {RTE_LOGTYPE_EFD, "lib.efd"},
+ {RTE_LOGTYPE_EVENTDEV, "lib.eventdev"},
+ {RTE_LOGTYPE_GSO, "lib.gso"},
+ {RTE_LOGTYPE_USER1, "user1"},
+ {RTE_LOGTYPE_USER2, "user2"},
+ {RTE_LOGTYPE_USER3, "user3"},
+ {RTE_LOGTYPE_USER4, "user4"},
+ {RTE_LOGTYPE_USER5, "user5"},
+ {RTE_LOGTYPE_USER6, "user6"},
+ {RTE_LOGTYPE_USER7, "user7"},
+ {RTE_LOGTYPE_USER8, "user8"}
+};
+
+/* Logging should be first initializer (before drivers and bus) */
+RTE_INIT_PRIO(rte_log_init, LOG)
+{
+ uint32_t i;
+
+ rte_log_set_global_level(RTE_LOG_DEBUG);
+
+ rte_logs.dynamic_types = calloc(RTE_LOGTYPE_FIRST_EXT_ID,
+ sizeof(struct rte_log_dynamic_type));
+ if (rte_logs.dynamic_types == NULL)
+ return;
+
+ /* register legacy log types */
+ for (i = 0; i < RTE_DIM(logtype_strings); i++)
+ __rte_log_register(logtype_strings[i].logtype,
+ logtype_strings[i].log_id);
+
+ rte_logs.dynamic_types_len = RTE_LOGTYPE_FIRST_EXT_ID;
+}
+
+static const char *
+loglevel_to_string(uint32_t level)
+{
+ switch (level) {
+ case 0: return "disabled";
+ case RTE_LOG_EMERG: return "emerg";
+ case RTE_LOG_ALERT: return "alert";
+ case RTE_LOG_CRIT: return "critical";
+ case RTE_LOG_ERR: return "error";
+ case RTE_LOG_WARNING: return "warning";
+ case RTE_LOG_NOTICE: return "notice";
+ case RTE_LOG_INFO: return "info";
+ case RTE_LOG_DEBUG: return "debug";
+ default: return "unknown";
+ }
+}
+
+/* dump global level and registered log types */
+void
+rte_log_dump(FILE *f)
+{
+ size_t i;
+
+ fprintf(f, "global log level is %s\n",
+ loglevel_to_string(rte_log_get_global_level()));
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+ fprintf(f, "id %zu: %s, level is %s\n",
+ i, rte_logs.dynamic_types[i].name,
+ loglevel_to_string(rte_logs.dynamic_types[i].loglevel));
+ }
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ */
+int
+rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
+{
+ FILE *f = rte_log_get_stream();
+ int ret;
+
+ if (logtype >= rte_logs.dynamic_types_len)
+ return -1;
+ if (!rte_log_can_log(logtype, level))
+ return 0;
+
+ /* save loglevel and logtype in a global per-lcore variable */
+ RTE_PER_LCORE(log_cur_msg).loglevel = level;
+ RTE_PER_LCORE(log_cur_msg).logtype = logtype;
+
+ ret = vfprintf(f, format, ap);
+ fflush(f);
+ return ret;
+}
+
+/*
+ * Generates a log message The message will be sent in the stream
+ * defined by the previous call to rte_openlog_stream().
+ * No need to check level here, done by rte_vlog().
+ */
+int
+rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, format);
+ ret = rte_vlog(level, logtype, format, ap);
+ va_end(ap);
+ return ret;
+}
+
+/*
+ * Called by environment-specific initialization functions.
+ */
+void
+eal_log_set_default(FILE *default_log)
+{
+ default_log_stream = default_log;
+
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+ RTE_LOG(NOTICE, EAL,
+ "Debug dataplane logs available - lower performance\n");
+#endif
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c
new file mode 100644
index 000000000..49d3ed0ce
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_mcfg.c
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_eal_memconfig.h>
+#include <rte_version.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+
+void
+eal_mcfg_complete(void)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ struct rte_mem_config *mcfg = cfg->mem_config;
+
+ /* ALL shared mem_config related INIT DONE */
+ if (cfg->process_type == RTE_PROC_PRIMARY)
+ mcfg->magic = RTE_MAGIC;
+
+ internal_config.init_complete = 1;
+}
+
+void
+eal_mcfg_wait_complete(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* wait until shared mem_config finish initialising */
+ while (mcfg->magic != RTE_MAGIC)
+ rte_pause();
+}
+
+int
+eal_mcfg_check_version(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* check if version from memconfig matches compiled in macro */
+ if (mcfg->version != RTE_VERSION)
+ return -1;
+
+ return 0;
+}
+
+void
+eal_mcfg_update_internal(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ internal_config.legacy_mem = mcfg->legacy_mem;
+ internal_config.single_file_segments = mcfg->single_file_segments;
+}
+
+void
+eal_mcfg_update_from_internal(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ mcfg->legacy_mem = internal_config.legacy_mem;
+ mcfg->single_file_segments = internal_config.single_file_segments;
+ /* record current DPDK version */
+ mcfg->version = RTE_VERSION;
+}
+
+void
+rte_mcfg_mem_read_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_read_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_write_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_mem_write_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+}
+
+void
+rte_mcfg_tailq_read_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_lock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_read_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_unlock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_write_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_lock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_tailq_write_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_unlock(&mcfg->qlock);
+}
+
+void
+rte_mcfg_mempool_read_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_lock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_read_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_unlock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_write_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_lock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_mempool_write_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_write_unlock(&mcfg->mplock);
+}
+
+void
+rte_mcfg_timer_lock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_spinlock_lock(&mcfg->tlock);
+}
+
+void
+rte_mcfg_timer_unlock(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ rte_spinlock_unlock(&mcfg->tlock);
+}
+
+bool
+rte_mcfg_get_single_file_segments(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ return (bool)mcfg->single_file_segments;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c
new file mode 100644
index 000000000..55189d072
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memalloc.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_fbarray.h>
+#include <rte_memzone.h>
+#include <rte_memory.h>
+#include <rte_string_fns.h>
+#include <rte_rwlock.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+
+struct mem_event_callback_entry {
+ TAILQ_ENTRY(mem_event_callback_entry) next;
+ char name[RTE_MEM_EVENT_CALLBACK_NAME_LEN];
+ rte_mem_event_callback_t clb;
+ void *arg;
+};
+
+struct mem_alloc_validator_entry {
+ TAILQ_ENTRY(mem_alloc_validator_entry) next;
+ char name[RTE_MEM_ALLOC_VALIDATOR_NAME_LEN];
+ rte_mem_alloc_validator_t clb;
+ int socket_id;
+ size_t limit;
+};
+
+/** Double linked list of actions. */
+TAILQ_HEAD(mem_event_callback_entry_list, mem_event_callback_entry);
+TAILQ_HEAD(mem_alloc_validator_entry_list, mem_alloc_validator_entry);
+
+static struct mem_event_callback_entry_list mem_event_callback_list =
+ TAILQ_HEAD_INITIALIZER(mem_event_callback_list);
+static rte_rwlock_t mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_alloc_validator_entry_list mem_alloc_validator_list =
+ TAILQ_HEAD_INITIALIZER(mem_alloc_validator_list);
+static rte_rwlock_t mem_alloc_validator_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_event_callback_entry *
+find_mem_event_callback(const char *name, void *arg)
+{
+ struct mem_event_callback_entry *r;
+
+ TAILQ_FOREACH(r, &mem_event_callback_list, next) {
+ if (!strcmp(r->name, name) && r->arg == arg)
+ break;
+ }
+ return r;
+}
+
+static struct mem_alloc_validator_entry *
+find_mem_alloc_validator(const char *name, int socket_id)
+{
+ struct mem_alloc_validator_entry *r;
+
+ TAILQ_FOREACH(r, &mem_alloc_validator_list, next) {
+ if (!strcmp(r->name, name) && r->socket_id == socket_id)
+ break;
+ }
+ return r;
+}
+
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+ size_t len)
+{
+ void *end, *aligned_start, *aligned_end;
+ size_t pgsz = (size_t)msl->page_sz;
+ const struct rte_memseg *ms;
+
+ /* for IOVA_VA, it's always contiguous */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA && !msl->external)
+ return true;
+
+ /* for legacy memory, it's always contiguous */
+ if (internal_config.legacy_mem)
+ return true;
+
+ end = RTE_PTR_ADD(start, len);
+
+ /* for nohuge, we check pagemap, otherwise check memseg */
+ if (!rte_eal_has_hugepages()) {
+ rte_iova_t cur, expected;
+
+ aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+ aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+ /* if start and end are on the same page, bail out early */
+ if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+ return true;
+
+ /* skip first iteration */
+ cur = rte_mem_virt2iova(aligned_start);
+ expected = cur + pgsz;
+ aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+
+ while (aligned_start < aligned_end) {
+ cur = rte_mem_virt2iova(aligned_start);
+ if (cur != expected)
+ return false;
+ aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+ expected += pgsz;
+ }
+ } else {
+ int start_seg, end_seg, cur_seg;
+ rte_iova_t cur, expected;
+
+ aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+ aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+ start_seg = RTE_PTR_DIFF(aligned_start, msl->base_va) /
+ pgsz;
+ end_seg = RTE_PTR_DIFF(aligned_end, msl->base_va) /
+ pgsz;
+
+ /* if start and end are on the same page, bail out early */
+ if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+ return true;
+
+ /* skip first iteration */
+ ms = rte_fbarray_get(&msl->memseg_arr, start_seg);
+ cur = ms->iova;
+ expected = cur + pgsz;
+
+ /* if we can't access IOVA addresses, assume non-contiguous */
+ if (cur == RTE_BAD_IOVA)
+ return false;
+
+ for (cur_seg = start_seg + 1; cur_seg < end_seg;
+ cur_seg++, expected += pgsz) {
+ ms = rte_fbarray_get(&msl->memseg_arr, cur_seg);
+
+ if (ms->iova != expected)
+ return false;
+ }
+ }
+ return true;
+}
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+ rte_mem_event_callback_t clb, void *arg)
+{
+ struct mem_event_callback_entry *entry;
+ int ret, len;
+ if (name == NULL || clb == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_event_rwlock);
+
+ entry = find_mem_event_callback(name, arg);
+ if (entry != NULL) {
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* callback successfully created and is valid, add it to the list */
+ entry->clb = clb;
+ entry->arg = arg;
+ strlcpy(entry->name, name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ TAILQ_INSERT_TAIL(&mem_event_callback_list, entry, next);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' registered\n",
+ name, arg);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_event_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg)
+{
+ struct mem_event_callback_entry *entry;
+ int ret, len;
+
+ if (name == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_event_rwlock);
+
+ entry = find_mem_event_callback(name, arg);
+ if (entry == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ TAILQ_REMOVE(&mem_event_callback_list, entry, next);
+ free(entry);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' unregistered\n",
+ name, arg);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_event_rwlock);
+ return ret;
+}
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+ size_t len)
+{
+ struct mem_event_callback_entry *entry;
+
+ rte_rwlock_read_lock(&mem_event_rwlock);
+
+ TAILQ_FOREACH(entry, &mem_event_callback_list, next) {
+ RTE_LOG(DEBUG, EAL, "Calling mem event callback '%s:%p'\n",
+ entry->name, entry->arg);
+ entry->clb(event, start, len, entry->arg);
+ }
+
+ rte_rwlock_read_unlock(&mem_event_rwlock);
+}
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret, len;
+ if (name == NULL || clb == NULL || socket_id < 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+ entry = find_mem_alloc_validator(name, socket_id);
+ if (entry != NULL) {
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* callback successfully created and is valid, add it to the list */
+ entry->clb = clb;
+ entry->socket_id = socket_id;
+ entry->limit = limit;
+ strlcpy(entry->name, name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ TAILQ_INSERT_TAIL(&mem_alloc_validator_list, entry, next);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i with limit %zu registered\n",
+ name, socket_id, limit);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret, len;
+
+ if (name == NULL || socket_id < 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+ entry = find_mem_alloc_validator(name, socket_id);
+ if (entry == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ TAILQ_REMOVE(&mem_alloc_validator_list, entry, next);
+ free(entry);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i unregistered\n",
+ name, socket_id);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret = 0;
+
+ rte_rwlock_read_lock(&mem_alloc_validator_rwlock);
+
+ TAILQ_FOREACH(entry, &mem_alloc_validator_list, next) {
+ if (entry->socket_id != socket_id || entry->limit > new_len)
+ continue;
+ RTE_LOG(DEBUG, EAL, "Calling mem alloc validator '%s' on socket %i\n",
+ entry->name, entry->socket_id);
+ if (entry->clb(socket_id, entry->limit, new_len) < 0)
+ ret = -1;
+ }
+
+ rte_rwlock_read_unlock(&mem_alloc_validator_rwlock);
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c
new file mode 100644
index 000000000..4c897a13f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memory.c
@@ -0,0 +1,939 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_fbarray.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+
+#include "eal_memalloc.h"
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_memcfg.h"
+#include "malloc_heap.h"
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by hugepage_sz until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of hugepage size.
+ */
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+
+static void *next_baseaddr;
+static uint64_t system_page_sz;
+
+#ifdef RTE_EXEC_ENV_LINUX
+#define RTE_DONTDUMP MADV_DONTDUMP
+#elif defined RTE_EXEC_ENV_FREEBSD
+#define RTE_DONTDUMP MADV_NOCORE
+#else
+#error "madvise doesn't support this OS"
+#endif
+
+#define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+ size_t page_sz, int flags, int mmap_flags)
+{
+ bool addr_is_hint, allow_shrink, unmap, no_align;
+ uint64_t map_sz;
+ void *mapped_addr, *aligned_addr;
+ uint8_t try = 0;
+
+ if (system_page_sz == 0)
+ system_page_sz = sysconf(_SC_PAGESIZE);
+
+ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+
+ RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
+
+ addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
+ allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
+ unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
+
+ if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) internal_config.base_virtaddr;
+
+#ifdef RTE_ARCH_64
+ if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) eal_get_baseaddr();
+#endif
+ if (requested_addr == NULL && next_baseaddr != NULL) {
+ requested_addr = next_baseaddr;
+ requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
+ addr_is_hint = true;
+ }
+
+ /* we don't need alignment of resulting pointer in the following cases:
+ *
+ * 1. page size is equal to system size
+ * 2. we have a requested address, and it is page-aligned, and we will
+ * be discarding the address if we get a different one.
+ *
+ * for all other cases, alignment is potentially necessary.
+ */
+ no_align = (requested_addr != NULL &&
+ requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
+ !addr_is_hint) ||
+ page_sz == system_page_sz;
+
+ do {
+ map_sz = no_align ? *size : *size + page_sz;
+ if (map_sz > SIZE_MAX) {
+ RTE_LOG(ERR, EAL, "Map size too big\n");
+ rte_errno = E2BIG;
+ return NULL;
+ }
+
+ mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
+ mmap_flags, -1, 0);
+ if (mapped_addr == MAP_FAILED && allow_shrink)
+ *size -= page_sz;
+
+ if (mapped_addr != MAP_FAILED && addr_is_hint &&
+ mapped_addr != requested_addr) {
+ try++;
+ next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
+ if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
+ /* hint was not used. Try with another offset */
+ munmap(mapped_addr, map_sz);
+ mapped_addr = MAP_FAILED;
+ requested_addr = next_baseaddr;
+ }
+ }
+ } while ((allow_shrink || addr_is_hint) &&
+ mapped_addr == MAP_FAILED && *size > 0);
+
+ /* align resulting address - if map failed, we will ignore the value
+ * anyway, so no need to add additional checks.
+ */
+ aligned_addr = no_align ? mapped_addr :
+ RTE_PTR_ALIGN(mapped_addr, page_sz);
+
+ if (*size == 0) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
+ strerror(errno));
+ rte_errno = errno;
+ return NULL;
+ } else if (mapped_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
+ strerror(errno));
+ /* pass errno up the call chain */
+ rte_errno = errno;
+ return NULL;
+ } else if (requested_addr != NULL && !addr_is_hint &&
+ aligned_addr != requested_addr) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
+ requested_addr, aligned_addr);
+ munmap(mapped_addr, map_sz);
+ rte_errno = EADDRNOTAVAIL;
+ return NULL;
+ } else if (requested_addr != NULL && addr_is_hint &&
+ aligned_addr != requested_addr) {
+ RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
+ requested_addr, aligned_addr);
+ RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n");
+ } else if (next_baseaddr != NULL) {
+ next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
+ }
+
+ RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+ aligned_addr, *size);
+
+ if (unmap) {
+ munmap(mapped_addr, map_sz);
+ } else if (!no_align) {
+ void *map_end, *aligned_end;
+ size_t before_len, after_len;
+
+ /* when we reserve space with alignment, we add alignment to
+ * mapping size. On 32-bit, if 1GB alignment was requested, this
+ * would waste 1GB of address space, which is a luxury we cannot
+ * afford. so, if alignment was performed, check if any unneeded
+ * address space can be unmapped back.
+ */
+
+ map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
+ aligned_end = RTE_PTR_ADD(aligned_addr, *size);
+
+ /* unmap space before aligned mmap address */
+ before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
+ if (before_len > 0)
+ munmap(mapped_addr, before_len);
+
+ /* unmap space after aligned end mmap address */
+ after_len = RTE_PTR_DIFF(map_end, aligned_end);
+ if (after_len > 0)
+ munmap(aligned_end, after_len);
+ }
+
+ if (!unmap) {
+ /* Exclude these pages from a core dump. */
+ if (madvise(aligned_addr, *size, RTE_DONTDUMP) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
+ strerror(errno));
+ }
+
+ return aligned_addr;
+}
+
+static struct rte_memseg *
+virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+ const struct rte_fbarray *arr;
+ void *start, *end;
+ int ms_idx;
+
+ if (msl == NULL)
+ return NULL;
+
+ /* a memseg list was specified, check if it's the right one */
+ start = msl->base_va;
+ end = RTE_PTR_ADD(start, msl->len);
+
+ if (addr < start || addr >= end)
+ return NULL;
+
+ /* now, calculate index */
+ arr = &msl->memseg_arr;
+ ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
+ return rte_fbarray_get(arr, ms_idx);
+}
+
+static struct rte_memseg_list *
+virt2memseg_list(const void *addr)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ int msl_idx;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+ void *start, *end;
+ msl = &mcfg->memsegs[msl_idx];
+
+ start = msl->base_va;
+ end = RTE_PTR_ADD(start, msl->len);
+ if (addr >= start && addr < end)
+ break;
+ }
+ /* if we didn't find our memseg list */
+ if (msl_idx == RTE_MAX_MEMSEG_LISTS)
+ return NULL;
+ return msl;
+}
+
+struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *addr)
+{
+ return virt2memseg_list(addr);
+}
+
+struct virtiova {
+ rte_iova_t iova;
+ void *virt;
+};
+static int
+find_virt(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, void *arg)
+{
+ struct virtiova *vi = arg;
+ if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) {
+ size_t offset = vi->iova - ms->iova;
+ vi->virt = RTE_PTR_ADD(ms->addr, offset);
+ /* stop the walk */
+ return 1;
+ }
+ return 0;
+}
+static int
+find_virt_legacy(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, size_t len, void *arg)
+{
+ struct virtiova *vi = arg;
+ if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) {
+ size_t offset = vi->iova - ms->iova;
+ vi->virt = RTE_PTR_ADD(ms->addr, offset);
+ /* stop the walk */
+ return 1;
+ }
+ return 0;
+}
+
+void *
+rte_mem_iova2virt(rte_iova_t iova)
+{
+ struct virtiova vi;
+
+ memset(&vi, 0, sizeof(vi));
+
+ vi.iova = iova;
+ /* for legacy mem, we can get away with scanning VA-contiguous segments,
+ * as we know they are PA-contiguous as well
+ */
+ if (internal_config.legacy_mem)
+ rte_memseg_contig_walk(find_virt_legacy, &vi);
+ else
+ rte_memseg_walk(find_virt, &vi);
+
+ return vi.virt;
+}
+
+struct rte_memseg *
+rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+ return virt2memseg(addr, msl != NULL ? msl :
+ rte_mem_virt2memseg_list(addr));
+}
+
+static int
+physmem_size(const struct rte_memseg_list *msl, void *arg)
+{
+ uint64_t *total_len = arg;
+
+ if (msl->external)
+ return 0;
+
+ *total_len += msl->memseg_arr.count * msl->page_sz;
+
+ return 0;
+}
+
+/* get the total size of memory */
+uint64_t
+rte_eal_get_physmem_size(void)
+{
+ uint64_t total_len = 0;
+
+ rte_memseg_list_walk(physmem_size, &total_len);
+
+ return total_len;
+}
+
+static int
+dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx, ms_idx, fd;
+ FILE *f = arg;
+
+ msl_idx = msl - mcfg->memsegs;
+ if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+ return -1;
+
+ ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+ if (ms_idx < 0)
+ return -1;
+
+ fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
+ fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
+ "virt:%p, socket_id:%"PRId32", "
+ "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
+ "nrank:%"PRIx32" fd:%i\n",
+ msl_idx, ms_idx,
+ ms->iova,
+ ms->len,
+ ms->addr,
+ ms->socket_id,
+ ms->hugepage_sz,
+ ms->nchannel,
+ ms->nrank,
+ fd);
+
+ return 0;
+}
+
+/*
+ * Defining here because declared in rte_memory.h, but the actual implementation
+ * is in eal_common_memalloc.c, like all other memalloc internals.
+ */
+int
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+ void *arg)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_event_callback_register(name, clb, arg);
+}
+
+int
+rte_mem_event_callback_unregister(const char *name, void *arg)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_event_callback_unregister(name, arg);
+}
+
+int
+rte_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id,
+ limit);
+}
+
+int
+rte_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_alloc_validator_unregister(name, socket_id);
+}
+
+/* Dump the physical memory layout on console */
+void
+rte_dump_physmem_layout(FILE *f)
+{
+ rte_memseg_walk(dump_memseg, f);
+}
+
+static int
+check_iova(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, void *arg)
+{
+ uint64_t *mask = arg;
+ rte_iova_t iova;
+
+ /* higher address within segment */
+ iova = (ms->iova + ms->len) - 1;
+ if (!(iova & *mask))
+ return 0;
+
+ RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
+ ms->iova, ms->len);
+
+ RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
+ return 1;
+}
+
+#define MAX_DMA_MASK_BITS 63
+
+/* check memseg iovas are within the required range based on dma mask */
+static int
+check_dma_mask(uint8_t maskbits, bool thread_unsafe)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint64_t mask;
+ int ret;
+
+ /* Sanity check. We only check width can be managed with 64 bits
+ * variables. Indeed any higher value is likely wrong. */
+ if (maskbits > MAX_DMA_MASK_BITS) {
+ RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
+ maskbits, MAX_DMA_MASK_BITS);
+ return -1;
+ }
+
+ /* create dma mask */
+ mask = ~((1ULL << maskbits) - 1);
+
+ if (thread_unsafe)
+ ret = rte_memseg_walk_thread_unsafe(check_iova, &mask);
+ else
+ ret = rte_memseg_walk(check_iova, &mask);
+
+ if (ret)
+ /*
+ * Dma mask precludes hugepage usage.
+ * This device can not be used and we do not need to keep
+ * the dma mask.
+ */
+ return 1;
+
+ /*
+ * we need to keep the more restricted maskbit for checking
+ * potential dynamic memory allocation in the future.
+ */
+ mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+ RTE_MIN(mcfg->dma_maskbits, maskbits);
+
+ return 0;
+}
+
+int
+rte_mem_check_dma_mask(uint8_t maskbits)
+{
+ return check_dma_mask(maskbits, false);
+}
+
+int
+rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits)
+{
+ return check_dma_mask(maskbits, true);
+}
+
+/*
+ * Set dma mask to use when memory initialization is done.
+ *
+ * This function should ONLY be used by code executed before the memory
+ * initialization. PMDs should use rte_mem_check_dma_mask if addressing
+ * limitations by the device.
+ */
+void
+rte_mem_set_dma_mask(uint8_t maskbits)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+ RTE_MIN(mcfg->dma_maskbits, maskbits);
+}
+
+/* return the number of memory channels */
+unsigned rte_memory_get_nchannel(void)
+{
+ return rte_eal_get_configuration()->mem_config->nchannel;
+}
+
+/* return the number of memory rank */
+unsigned rte_memory_get_nrank(void)
+{
+ return rte_eal_get_configuration()->mem_config->nrank;
+}
+
+static int
+rte_eal_memdevice_init(void)
+{
+ struct rte_config *config;
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ return 0;
+
+ config = rte_eal_get_configuration();
+ config->mem_config->nchannel = internal_config.force_nchannel;
+ config->mem_config->nrank = internal_config.force_nrank;
+
+ return 0;
+}
+
+/* Lock page in physical memory and prevent from swapping. */
+int
+rte_mem_lock_page(const void *virt)
+{
+ unsigned long virtual = (unsigned long)virt;
+ int page_size = getpagesize();
+ unsigned long aligned = (virtual & ~(page_size - 1));
+ return mlock((void *)aligned, page_size);
+}
+
+int
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ms_idx, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+ const struct rte_memseg *ms;
+ struct rte_fbarray *arr;
+
+ if (msl->memseg_arr.count == 0)
+ continue;
+
+ arr = &msl->memseg_arr;
+
+ ms_idx = rte_fbarray_find_next_used(arr, 0);
+ while (ms_idx >= 0) {
+ int n_segs;
+ size_t len;
+
+ ms = rte_fbarray_get(arr, ms_idx);
+
+ /* find how many more segments there are, starting with
+ * this one.
+ */
+ n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
+ len = n_segs * msl->page_sz;
+
+ ret = func(msl, ms, len, arg);
+ if (ret)
+ return ret;
+ ms_idx = rte_fbarray_find_next_used(arr,
+ ms_idx + n_segs);
+ }
+ }
+ return 0;
+}
+
+int
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+{
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ms_idx, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+ const struct rte_memseg *ms;
+ struct rte_fbarray *arr;
+
+ if (msl->memseg_arr.count == 0)
+ continue;
+
+ arr = &msl->memseg_arr;
+
+ ms_idx = rte_fbarray_find_next_used(arr, 0);
+ while (ms_idx >= 0) {
+ ms = rte_fbarray_get(arr, ms_idx);
+ ret = func(msl, ms, arg);
+ if (ret)
+ return ret;
+ ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
+ }
+ }
+ return 0;
+}
+
+int
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+{
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_walk_thread_unsafe(func, arg);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+ if (msl->base_va == NULL)
+ continue;
+
+ ret = func(msl, arg);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_list_walk_thread_unsafe(func, arg);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ int msl_idx, seg_idx, ret;
+
+ if (ms == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ msl = rte_mem_virt2memseg_list(ms->addr);
+ if (msl == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+
+ msl_idx = msl - mcfg->memsegs;
+ seg_idx = rte_fbarray_find_idx(arr, ms);
+
+ if (!rte_fbarray_is_used(arr, seg_idx)) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+
+ /* segment fd API is not supported for external segments */
+ if (msl->external) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
+ if (ret < 0) {
+ rte_errno = -ret;
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+rte_memseg_get_fd(const struct rte_memseg *ms)
+{
+ int ret;
+
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_get_fd_thread_unsafe(ms);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+ size_t *offset)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ int msl_idx, seg_idx, ret;
+
+ if (ms == NULL || offset == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ msl = rte_mem_virt2memseg_list(ms->addr);
+ if (msl == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+
+ msl_idx = msl - mcfg->memsegs;
+ seg_idx = rte_fbarray_find_idx(arr, ms);
+
+ if (!rte_fbarray_is_used(arr, seg_idx)) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+
+ /* segment fd API is not supported for external segments */
+ if (msl->external) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
+ if (ret < 0) {
+ rte_errno = -ret;
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
+{
+ int ret;
+
+ rte_mcfg_mem_read_lock();
+ ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
+ unsigned int n_pages, size_t page_sz)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int socket_id, n;
+ int ret = 0;
+
+ if (va_addr == NULL || page_sz == 0 || len == 0 ||
+ !rte_is_power_of_2(page_sz) ||
+ RTE_ALIGN(len, page_sz) != len ||
+ ((len / page_sz) != n_pages && iova_addrs != NULL) ||
+ !rte_is_aligned(va_addr, page_sz)) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* make sure the segment doesn't already exist */
+ if (malloc_heap_find_external_seg(va_addr, len) != NULL) {
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* get next available socket ID */
+ socket_id = mcfg->next_socket_id;
+ if (socket_id > INT32_MAX) {
+ RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+ rte_errno = ENOSPC;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* we can create a new memseg */
+ n = len / page_sz;
+ if (malloc_heap_create_external_seg(va_addr, iova_addrs, n,
+ page_sz, "extmem", socket_id) == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+
+ /* memseg list successfully created - increment next socket ID */
+ mcfg->next_socket_id++;
+unlock:
+ rte_mcfg_mem_write_unlock();
+ return ret;
+}
+
+int
+rte_extmem_unregister(void *va_addr, size_t len)
+{
+ struct rte_memseg_list *msl;
+ int ret = 0;
+
+ if (va_addr == NULL || len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* find our segment */
+ msl = malloc_heap_find_external_seg(va_addr, len);
+ if (msl == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = malloc_heap_destroy_external_seg(msl);
+unlock:
+ rte_mcfg_mem_write_unlock();
+ return ret;
+}
+
+static int
+sync_memory(void *va_addr, size_t len, bool attach)
+{
+ struct rte_memseg_list *msl;
+ int ret = 0;
+
+ if (va_addr == NULL || len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* find our segment */
+ msl = malloc_heap_find_external_seg(va_addr, len);
+ if (msl == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ if (attach)
+ ret = rte_fbarray_attach(&msl->memseg_arr);
+ else
+ ret = rte_fbarray_detach(&msl->memseg_arr);
+
+unlock:
+ rte_mcfg_mem_write_unlock();
+ return ret;
+}
+
+int
+rte_extmem_attach(void *va_addr, size_t len)
+{
+ return sync_memory(va_addr, len, true);
+}
+
+int
+rte_extmem_detach(void *va_addr, size_t len)
+{
+ return sync_memory(va_addr, len, false);
+}
+
+/* init memory subsystem */
+int
+rte_eal_memory_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int retval;
+ RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
+
+ if (!mcfg)
+ return -1;
+
+ /* lock mem hotplug here, to prevent races while we init */
+ rte_mcfg_mem_read_lock();
+
+ if (rte_eal_memseg_init() < 0)
+ goto fail;
+
+ if (eal_memalloc_init() < 0)
+ goto fail;
+
+ retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ rte_eal_hugepage_init() :
+ rte_eal_hugepage_attach();
+ if (retval < 0)
+ goto fail;
+
+ if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
+ goto fail;
+
+ return 0;
+fail:
+ rte_mcfg_mem_read_unlock();
+ return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c
new file mode 100644
index 000000000..7c21aa921
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_memzone.c
@@ -0,0 +1,420 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+#include <rte_eal_trace.h>
+
+#include "malloc_heap.h"
+#include "malloc_elem.h"
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+static inline const struct rte_memzone *
+memzone_lookup_thread_unsafe(const char *name)
+{
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ const struct rte_memzone *mz;
+ int i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
+
+ /*
+ * the algorithm is not optimal (linear), but there are few
+ * zones and this function should be called at init only
+ */
+ i = rte_fbarray_find_next_used(arr, 0);
+ while (i >= 0) {
+ mz = rte_fbarray_get(arr, i);
+ if (mz->addr != NULL &&
+ !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
+ return mz;
+ i = rte_fbarray_find_next_used(arr, i + 1);
+ }
+ return NULL;
+}
+
+static const struct rte_memzone *
+memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
+ int socket_id, unsigned int flags, unsigned int align,
+ unsigned int bound)
+{
+ struct rte_memzone *mz;
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ void *mz_addr;
+ size_t requested_len;
+ int mz_idx;
+ bool contig;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
+
+ /* no more room in config */
+ if (arr->count >= arr->len) {
+ RTE_LOG(ERR, EAL,
+ "%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
+ __func__);
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ if (strlen(name) > sizeof(mz->name) - 1) {
+ RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long\n",
+ __func__, name);
+ rte_errno = ENAMETOOLONG;
+ return NULL;
+ }
+
+ /* zone already exist */
+ if ((memzone_lookup_thread_unsafe(name)) != NULL) {
+ RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
+ __func__, name);
+ rte_errno = EEXIST;
+ return NULL;
+ }
+
+ /* if alignment is not a power of two */
+ if (align && !rte_is_power_of_2(align)) {
+ RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
+ align);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* alignment less than cache size is not allowed */
+ if (align < RTE_CACHE_LINE_SIZE)
+ align = RTE_CACHE_LINE_SIZE;
+
+ /* align length on cache boundary. Check for overflow before doing so */
+ if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
+ rte_errno = EINVAL; /* requested size too big */
+ return NULL;
+ }
+
+ len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
+
+ /* save minimal requested length */
+ requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
+
+ /* check that boundary condition is valid */
+ if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* only set socket to SOCKET_ID_ANY if we aren't allocating for an
+ * external heap.
+ */
+ if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
+ socket_id = SOCKET_ID_ANY;
+
+ contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
+ /* malloc only cares about size flags, remove contig flag from flags */
+ flags &= ~RTE_MEMZONE_IOVA_CONTIG;
+
+ if (len == 0 && bound == 0) {
+ /* no size constraints were placed, so use malloc elem len */
+ requested_len = 0;
+ mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
+ align, contig);
+ } else {
+ if (len == 0)
+ requested_len = bound;
+ /* allocate memory on heap */
+ mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
+ flags, align, bound, contig);
+ }
+ if (mz_addr == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
+
+ /* fill the zone in config */
+ mz_idx = rte_fbarray_find_next_free(arr, 0);
+
+ if (mz_idx < 0) {
+ mz = NULL;
+ } else {
+ rte_fbarray_set_used(arr, mz_idx);
+ mz = rte_fbarray_get(arr, mz_idx);
+ }
+
+ if (mz == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone\n", __func__);
+ malloc_heap_free(elem);
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ strlcpy(mz->name, name, sizeof(mz->name));
+ mz->iova = rte_malloc_virt2iova(mz_addr);
+ mz->addr = mz_addr;
+ mz->len = requested_len == 0 ?
+ elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
+ requested_len;
+ mz->hugepage_sz = elem->msl->page_sz;
+ mz->socket_id = elem->msl->socket_id;
+ mz->flags = 0;
+
+ return mz;
+}
+
+static const struct rte_memzone *
+rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
+ unsigned int flags, unsigned int align, unsigned int bound)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *mz = NULL;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ mz = memzone_reserve_aligned_thread_unsafe(
+ name, len, socket_id, flags, align, bound);
+
+ rte_eal_trace_memzone_reserve(name, len, socket_id, flags, align,
+ bound, mz);
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return mz;
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment and boundary). If the allocation cannot be done,
+ * return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
+ unsigned flags, unsigned align, unsigned bound)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+ align, bound);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
+ unsigned flags, unsigned align)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
+ align, 0);
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor. If the
+ * allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve(const char *name, size_t len, int socket_id,
+ unsigned flags)
+{
+ return rte_memzone_reserve_thread_safe(name, len, socket_id,
+ flags, RTE_CACHE_LINE_SIZE, 0);
+}
+
+int
+rte_memzone_free(const struct rte_memzone *mz)
+{
+ char name[RTE_MEMZONE_NAMESIZE];
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ struct rte_memzone *found_mz;
+ int ret = 0;
+ void *addr = NULL;
+ unsigned idx;
+
+ if (mz == NULL)
+ return -EINVAL;
+
+ rte_strlcpy(name, mz->name, RTE_MEMZONE_NAMESIZE);
+ mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ idx = rte_fbarray_find_idx(arr, mz);
+ found_mz = rte_fbarray_get(arr, idx);
+
+ if (found_mz == NULL) {
+ ret = -EINVAL;
+ } else if (found_mz->addr == NULL) {
+ RTE_LOG(ERR, EAL, "Memzone is not allocated\n");
+ ret = -EINVAL;
+ } else {
+ addr = found_mz->addr;
+ memset(found_mz, 0, sizeof(*found_mz));
+ rte_fbarray_set_free(arr, idx);
+ }
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ if (addr != NULL)
+ rte_free(addr);
+
+ rte_eal_trace_memzone_free(name, addr, ret);
+ return ret;
+}
+
+/*
+ * Lookup for the memzone identified by the given name
+ */
+const struct rte_memzone *
+rte_memzone_lookup(const char *name)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *memzone = NULL;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+
+ memzone = memzone_lookup_thread_unsafe(name);
+
+ rte_rwlock_read_unlock(&mcfg->mlock);
+
+ rte_eal_trace_memzone_lookup(name, memzone);
+ return memzone;
+}
+
+static void
+dump_memzone(const struct rte_memzone *mz, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl = NULL;
+ void *cur_addr, *mz_end;
+ struct rte_memseg *ms;
+ int mz_idx, ms_idx;
+ size_t page_sz;
+ FILE *f = arg;
+
+ mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz);
+
+ fprintf(f, "Zone %u: name:<%s>, len:0x%zx, virt:%p, "
+ "socket_id:%"PRId32", flags:%"PRIx32"\n",
+ mz_idx,
+ mz->name,
+ mz->len,
+ mz->addr,
+ mz->socket_id,
+ mz->flags);
+
+ /* go through each page occupied by this memzone */
+ msl = rte_mem_virt2memseg_list(mz->addr);
+ if (!msl) {
+ RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n");
+ return;
+ }
+ page_sz = (size_t)mz->hugepage_sz;
+ cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz);
+ mz_end = RTE_PTR_ADD(cur_addr, mz->len);
+
+ fprintf(f, "physical segments used:\n");
+ ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz;
+ ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
+
+ do {
+ fprintf(f, " addr: %p iova: 0x%" PRIx64 " "
+ "len: 0x%zx "
+ "pagesz: 0x%zx\n",
+ cur_addr, ms->iova, ms->len, page_sz);
+
+ /* advance VA to next page */
+ cur_addr = RTE_PTR_ADD(cur_addr, page_sz);
+
+ /* memzones occupy contiguous segments */
+ ++ms;
+ } while (cur_addr < mz_end);
+}
+
+/* Dump all reserved memory zones on console */
+void
+rte_memzone_dump(FILE *f)
+{
+ rte_memzone_walk(dump_memzone, f);
+}
+
+/*
+ * Init the memzone subsystem
+ */
+int
+rte_eal_memzone_init(void)
+{
+ struct rte_mem_config *mcfg;
+ int ret = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+ rte_fbarray_init(&mcfg->memzones, "memzone",
+ RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
+ ret = -1;
+ } else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+ rte_fbarray_attach(&mcfg->memzones)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n");
+ ret = -1;
+ }
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return ret;
+}
+
+/* Walk all reserved memory zones */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
+ void *arg)
+{
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ int i;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+ i = rte_fbarray_find_next_used(arr, 0);
+ while (i >= 0) {
+ struct rte_memzone *mz = rte_fbarray_get(arr, i);
+ (*func)(mz, arg);
+ i = rte_fbarray_find_next_used(arr, i + 1);
+ }
+ rte_rwlock_read_unlock(&mcfg->mlock);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c
new file mode 100644
index 000000000..8f2cbd1c6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_options.c
@@ -0,0 +1,1861 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <syslog.h>
+#endif
+#include <ctype.h>
+#include <limits.h>
+#include <errno.h>
+#include <getopt.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <dlfcn.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+
+#include <rte_string_fns.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_tailq.h>
+#include <rte_version.h>
+#include <rte_devargs.h>
+#include <rte_memcpy.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <rte_telemetry.h>
+#endif
+
+#include "eal_internal_cfg.h"
+#include "eal_options.h"
+#include "eal_filesystem.h"
+#include "eal_private.h"
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include "eal_trace.h"
+#endif
+
+#define BITS_PER_HEX 4
+#define LCORE_OPT_LST 1
+#define LCORE_OPT_MSK 2
+#define LCORE_OPT_MAP 3
+
+const char
+eal_short_options[] =
+ "b:" /* pci-blacklist */
+ "c:" /* coremask */
+ "s:" /* service coremask */
+ "d:" /* driver */
+ "h" /* help */
+ "l:" /* corelist */
+ "S:" /* service corelist */
+ "m:" /* memory size */
+ "n:" /* memory channels */
+ "r:" /* memory ranks */
+ "v" /* version */
+ "w:" /* pci-whitelist */
+ ;
+
+const struct option
+eal_long_options[] = {
+ {OPT_BASE_VIRTADDR, 1, NULL, OPT_BASE_VIRTADDR_NUM },
+ {OPT_CREATE_UIO_DEV, 0, NULL, OPT_CREATE_UIO_DEV_NUM },
+ {OPT_FILE_PREFIX, 1, NULL, OPT_FILE_PREFIX_NUM },
+ {OPT_HELP, 0, NULL, OPT_HELP_NUM },
+ {OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM },
+ {OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM },
+ {OPT_IOVA_MODE, 1, NULL, OPT_IOVA_MODE_NUM },
+ {OPT_LCORES, 1, NULL, OPT_LCORES_NUM },
+ {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM },
+ {OPT_TRACE, 1, NULL, OPT_TRACE_NUM },
+ {OPT_TRACE_DIR, 1, NULL, OPT_TRACE_DIR_NUM },
+ {OPT_TRACE_BUF_SIZE, 1, NULL, OPT_TRACE_BUF_SIZE_NUM },
+ {OPT_TRACE_MODE, 1, NULL, OPT_TRACE_MODE_NUM },
+ {OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM },
+ {OPT_MBUF_POOL_OPS_NAME, 1, NULL, OPT_MBUF_POOL_OPS_NAME_NUM},
+ {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM },
+ {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM },
+ {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM },
+ {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM },
+ {OPT_IN_MEMORY, 0, NULL, OPT_IN_MEMORY_NUM },
+ {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM },
+ {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM },
+ {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM },
+ {OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM },
+ {OPT_SOCKET_LIMIT, 1, NULL, OPT_SOCKET_LIMIT_NUM },
+ {OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM },
+ {OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
+ {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
+ {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
+ {OPT_LEGACY_MEM, 0, NULL, OPT_LEGACY_MEM_NUM },
+ {OPT_SINGLE_FILE_SEGMENTS, 0, NULL, OPT_SINGLE_FILE_SEGMENTS_NUM},
+ {OPT_MATCH_ALLOCATIONS, 0, NULL, OPT_MATCH_ALLOCATIONS_NUM},
+ {OPT_TELEMETRY, 0, NULL, OPT_TELEMETRY_NUM },
+ {OPT_NO_TELEMETRY, 0, NULL, OPT_NO_TELEMETRY_NUM },
+ {0, 0, NULL, 0 }
+};
+
+TAILQ_HEAD(shared_driver_list, shared_driver);
+
+/* Definition for shared object drivers. */
+struct shared_driver {
+ TAILQ_ENTRY(shared_driver) next;
+
+ char name[PATH_MAX];
+ void* lib_handle;
+};
+
+/* List of external loadable drivers */
+static struct shared_driver_list solib_list =
+TAILQ_HEAD_INITIALIZER(solib_list);
+
+/* Default path of external loadable drivers */
+static const char *default_solib_dir = RTE_EAL_PMD_PATH;
+
+/*
+ * Stringified version of solib path used by dpdk-pmdinfo.py
+ * Note: PLEASE DO NOT ALTER THIS without making a corresponding
+ * change to usertools/dpdk-pmdinfo.py
+ */
+static const char dpdk_solib_path[] __rte_used =
+"DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH;
+
+TAILQ_HEAD(device_option_list, device_option);
+
+struct device_option {
+ TAILQ_ENTRY(device_option) next;
+
+ enum rte_devtype type;
+ char arg[];
+};
+
+static struct device_option_list devopt_list =
+TAILQ_HEAD_INITIALIZER(devopt_list);
+
+static int master_lcore_parsed;
+static int mem_parsed;
+static int core_parsed;
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+static char **eal_args;
+static char **eal_app_args;
+
+#define EAL_PARAM_REQ "/eal/params"
+#define EAL_APP_PARAM_REQ "/eal/app_params"
+
+/* callback handler for telemetry library to report out EAL flags */
+int
+handle_eal_info_request(const char *cmd, const char *params __rte_unused,
+ struct rte_tel_data *d)
+{
+ char **args;
+ int used = 0;
+ int i = 0;
+
+ if (strcmp(cmd, EAL_PARAM_REQ) == 0)
+ args = eal_args;
+ else
+ args = eal_app_args;
+
+ rte_tel_data_start_array(d, RTE_TEL_STRING_VAL);
+ if (args == NULL || args[0] == NULL)
+ return 0;
+
+ for ( ; args[i] != NULL; i++)
+ used = rte_tel_data_add_array_string(d, args[i]);
+ return used;
+}
+
+int
+eal_save_args(int argc, char **argv)
+{
+ int i, j;
+
+ rte_telemetry_register_cmd(EAL_PARAM_REQ, handle_eal_info_request,
+ "Returns EAL commandline parameters used. Takes no parameters");
+ rte_telemetry_register_cmd(EAL_APP_PARAM_REQ, handle_eal_info_request,
+ "Returns app commandline parameters used. Takes no parameters");
+
+ /* clone argv to report out later. We overprovision, but
+ * this does not waste huge amounts of memory
+ */
+ eal_args = calloc(argc + 1, sizeof(*eal_args));
+ if (eal_args == NULL)
+ return -1;
+
+ for (i = 0; i < argc; i++) {
+ eal_args[i] = strdup(argv[i]);
+ if (strcmp(argv[i], "--") == 0)
+ break;
+ }
+ eal_args[i++] = NULL; /* always finish with NULL */
+
+ /* allow reporting of any app args we know about too */
+ if (i >= argc)
+ return 0;
+
+ eal_app_args = calloc(argc - i + 1, sizeof(*eal_args));
+ if (eal_app_args == NULL)
+ return -1;
+
+ for (j = 0; i < argc; j++, i++)
+ eal_app_args[j] = strdup(argv[i]);
+ eal_app_args[j] = NULL;
+
+ return 0;
+}
+#endif
+
+static int
+eal_option_device_add(enum rte_devtype type, const char *optarg)
+{
+ struct device_option *devopt;
+ size_t optlen;
+ int ret;
+
+ optlen = strlen(optarg) + 1;
+ devopt = calloc(1, sizeof(*devopt) + optlen);
+ if (devopt == NULL) {
+ RTE_LOG(ERR, EAL, "Unable to allocate device option\n");
+ return -ENOMEM;
+ }
+
+ devopt->type = type;
+ ret = strlcpy(devopt->arg, optarg, optlen);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Unable to copy device option\n");
+ free(devopt);
+ return -EINVAL;
+ }
+ TAILQ_INSERT_TAIL(&devopt_list, devopt, next);
+ return 0;
+}
+
+int
+eal_option_device_parse(void)
+{
+ struct device_option *devopt;
+ void *tmp;
+ int ret = 0;
+
+ TAILQ_FOREACH_SAFE(devopt, &devopt_list, next, tmp) {
+ if (ret == 0) {
+ ret = rte_devargs_add(devopt->type, devopt->arg);
+ if (ret)
+ RTE_LOG(ERR, EAL, "Unable to parse device '%s'\n",
+ devopt->arg);
+ }
+ TAILQ_REMOVE(&devopt_list, devopt, next);
+ free(devopt);
+ }
+ return ret;
+}
+
+const char *
+eal_get_hugefile_prefix(void)
+{
+ if (internal_config.hugefile_prefix != NULL)
+ return internal_config.hugefile_prefix;
+ return HUGEFILE_PREFIX_DEFAULT;
+}
+
+void
+eal_reset_internal_config(struct internal_config *internal_cfg)
+{
+ int i;
+
+ internal_cfg->memory = 0;
+ internal_cfg->force_nrank = 0;
+ internal_cfg->force_nchannel = 0;
+ internal_cfg->hugefile_prefix = NULL;
+ internal_cfg->hugepage_dir = NULL;
+ internal_cfg->force_sockets = 0;
+ /* zero out the NUMA config */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->socket_mem[i] = 0;
+ internal_cfg->force_socket_limits = 0;
+ /* zero out the NUMA limits config */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->socket_limit[i] = 0;
+ /* zero out hugedir descriptors */
+ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
+ memset(&internal_cfg->hugepage_info[i], 0,
+ sizeof(internal_cfg->hugepage_info[0]));
+ internal_cfg->hugepage_info[i].lock_descriptor = -1;
+ }
+ internal_cfg->base_virtaddr = 0;
+
+#ifdef LOG_DAEMON
+ internal_cfg->syslog_facility = LOG_DAEMON;
+#endif
+
+ /* if set to NONE, interrupt mode is determined automatically */
+ internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE;
+
+#ifdef RTE_LIBEAL_USE_HPET
+ internal_cfg->no_hpet = 0;
+#else
+ internal_cfg->no_hpet = 1;
+#endif
+ internal_cfg->vmware_tsc_map = 0;
+ internal_cfg->create_uio_dev = 0;
+ internal_cfg->iova_mode = RTE_IOVA_DC;
+ internal_cfg->user_mbuf_pool_ops_name = NULL;
+ CPU_ZERO(&internal_cfg->ctrl_cpuset);
+ internal_cfg->init_complete = 0;
+}
+
+static int
+eal_plugin_add(const char *path)
+{
+ struct shared_driver *solib;
+
+ solib = malloc(sizeof(*solib));
+ if (solib == NULL) {
+ RTE_LOG(ERR, EAL, "malloc(solib) failed\n");
+ return -1;
+ }
+ memset(solib, 0, sizeof(*solib));
+ strlcpy(solib->name, path, PATH_MAX-1);
+ solib->name[PATH_MAX-1] = 0;
+ TAILQ_INSERT_TAIL(&solib_list, solib, next);
+
+ return 0;
+}
+
+static int
+eal_plugindir_init(const char *path)
+{
+ DIR *d = NULL;
+ struct dirent *dent = NULL;
+ char sopath[PATH_MAX];
+
+ if (path == NULL || *path == '\0')
+ return 0;
+
+ d = opendir(path);
+ if (d == NULL) {
+ RTE_LOG(ERR, EAL, "failed to open directory %s: %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ while ((dent = readdir(d)) != NULL) {
+ struct stat sb;
+
+ snprintf(sopath, sizeof(sopath), "%s/%s", path, dent->d_name);
+
+ if (!(stat(sopath, &sb) == 0 && S_ISREG(sb.st_mode)))
+ continue;
+
+ if (eal_plugin_add(sopath) == -1)
+ break;
+ }
+
+ closedir(d);
+ /* XXX this ignores failures from readdir() itself */
+ return (dent == NULL) ? 0 : -1;
+}
+
+int
+eal_plugins_init(void)
+{
+#ifndef RTE_EXEC_ENV_WINDOWS
+ struct shared_driver *solib = NULL;
+ struct stat sb;
+
+ if (*default_solib_dir != '\0' && stat(default_solib_dir, &sb) == 0 &&
+ S_ISDIR(sb.st_mode))
+ eal_plugin_add(default_solib_dir);
+
+ TAILQ_FOREACH(solib, &solib_list, next) {
+
+ if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) {
+ if (eal_plugindir_init(solib->name) == -1) {
+ RTE_LOG(ERR, EAL,
+ "Cannot init plugin directory %s\n",
+ solib->name);
+ return -1;
+ }
+ } else {
+ RTE_LOG(DEBUG, EAL, "open shared lib %s\n",
+ solib->name);
+ solib->lib_handle = dlopen(solib->name, RTLD_NOW);
+ if (solib->lib_handle == NULL) {
+ RTE_LOG(ERR, EAL, "%s\n", dlerror());
+ return -1;
+ }
+ }
+
+ }
+ return 0;
+#endif
+}
+
+/*
+ * Parse the coremask given as argument (hexadecimal string) and fill
+ * the global configuration (core role and core count) with the parsed
+ * value.
+ */
+static int xdigit2val(unsigned char c)
+{
+ int val;
+
+ if (isdigit(c))
+ val = c - '0';
+ else if (isupper(c))
+ val = c - 'A' + 10;
+ else
+ val = c - 'a' + 10;
+ return val;
+}
+
+static int
+eal_parse_service_coremask(const char *coremask)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ int i, j, idx = 0;
+ unsigned int count = 0;
+ char c;
+ int val;
+ uint32_t taken_lcore_count = 0;
+
+ if (coremask == NULL)
+ return -1;
+ /* Remove all blank characters ahead and after .
+ * Remove 0x/0X if exists.
+ */
+ while (isblank(*coremask))
+ coremask++;
+ if (coremask[0] == '0' && ((coremask[1] == 'x')
+ || (coremask[1] == 'X')))
+ coremask += 2;
+ i = strlen(coremask);
+ while ((i > 0) && isblank(coremask[i - 1]))
+ i--;
+
+ if (i == 0)
+ return -1;
+
+ for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+ c = coremask[i];
+ if (isxdigit(c) == 0) {
+ /* invalid characters */
+ return -1;
+ }
+ val = xdigit2val(c);
+ for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE;
+ j++, idx++) {
+ if ((1 << j) & val) {
+ /* handle master lcore already parsed */
+ uint32_t lcore = idx;
+ if (master_lcore_parsed &&
+ cfg->master_lcore == lcore) {
+ RTE_LOG(ERR, EAL,
+ "lcore %u is master lcore, cannot use as service core\n",
+ idx);
+ return -1;
+ }
+
+ if (eal_cpu_detected(idx) == 0) {
+ RTE_LOG(ERR, EAL,
+ "lcore %u unavailable\n", idx);
+ return -1;
+ }
+
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
+ lcore_config[idx].core_role = ROLE_SERVICE;
+ count++;
+ }
+ }
+ }
+
+ for (; i >= 0; i--)
+ if (coremask[i] != '0')
+ return -1;
+
+ for (; idx < RTE_MAX_LCORE; idx++)
+ lcore_config[idx].core_index = -1;
+
+ if (count == 0)
+ return -1;
+
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores are in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
+ cfg->service_lcore_count = count;
+ return 0;
+}
+
+static int
+eal_service_cores_parsed(void)
+{
+ int idx;
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (lcore_config[idx].core_role == ROLE_SERVICE)
+ return 1;
+ }
+ return 0;
+}
+
+static int
+update_lcore_config(int *cores)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ unsigned int count = 0;
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (cores[i] != -1) {
+ if (eal_cpu_detected(i) == 0) {
+ RTE_LOG(ERR, EAL, "lcore %u unavailable\n", i);
+ ret = -1;
+ continue;
+ }
+ cfg->lcore_role[i] = ROLE_RTE;
+ count++;
+ } else {
+ cfg->lcore_role[i] = ROLE_OFF;
+ }
+ lcore_config[i].core_index = cores[i];
+ }
+ if (!ret)
+ cfg->lcore_count = count;
+ return ret;
+}
+
+static int
+eal_parse_coremask(const char *coremask, int *cores)
+{
+ unsigned count = 0;
+ int i, j, idx;
+ int val;
+ char c;
+
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++)
+ cores[idx] = -1;
+ idx = 0;
+
+ /* Remove all blank characters ahead and after .
+ * Remove 0x/0X if exists.
+ */
+ while (isblank(*coremask))
+ coremask++;
+ if (coremask[0] == '0' && ((coremask[1] == 'x')
+ || (coremask[1] == 'X')))
+ coremask += 2;
+ i = strlen(coremask);
+ while ((i > 0) && isblank(coremask[i - 1]))
+ i--;
+ if (i == 0)
+ return -1;
+
+ for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+ c = coremask[i];
+ if (isxdigit(c) == 0) {
+ /* invalid characters */
+ return -1;
+ }
+ val = xdigit2val(c);
+ for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
+ {
+ if ((1 << j) & val) {
+ cores[idx] = count;
+ count++;
+ }
+ }
+ }
+ for (; i >= 0; i--)
+ if (coremask[i] != '0')
+ return -1;
+ if (count == 0)
+ return -1;
+ return 0;
+}
+
+static int
+eal_parse_service_corelist(const char *corelist)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ int i, idx = 0;
+ unsigned count = 0;
+ char *end = NULL;
+ int min, max;
+ uint32_t taken_lcore_count = 0;
+
+ if (corelist == NULL)
+ return -1;
+
+ /* Remove all blank characters ahead and after */
+ while (isblank(*corelist))
+ corelist++;
+ i = strlen(corelist);
+ while ((i > 0) && isblank(corelist[i - 1]))
+ i--;
+
+ /* Get list of cores */
+ min = RTE_MAX_LCORE;
+ do {
+ while (isblank(*corelist))
+ corelist++;
+ if (*corelist == '\0')
+ return -1;
+ errno = 0;
+ idx = strtoul(corelist, &end, 10);
+ if (errno || end == NULL)
+ return -1;
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ min = idx;
+ } else if ((*end == ',') || (*end == '\0')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = min; idx <= max; idx++) {
+ if (cfg->lcore_role[idx] != ROLE_SERVICE) {
+ /* handle master lcore already parsed */
+ uint32_t lcore = idx;
+ if (cfg->master_lcore == lcore &&
+ master_lcore_parsed) {
+ RTE_LOG(ERR, EAL,
+ "Error: lcore %u is master lcore, cannot use as service core\n",
+ idx);
+ return -1;
+ }
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
+ lcore_config[idx].core_role =
+ ROLE_SERVICE;
+ count++;
+ }
+ }
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+ corelist = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ return -1;
+
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores were in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
+ return 0;
+}
+
+static int
+eal_parse_corelist(const char *corelist, int *cores)
+{
+ unsigned count = 0;
+ char *end = NULL;
+ int min, max;
+ int idx;
+
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++)
+ cores[idx] = -1;
+
+ /* Remove all blank characters ahead */
+ while (isblank(*corelist))
+ corelist++;
+
+ /* Get list of cores */
+ min = RTE_MAX_LCORE;
+ do {
+ while (isblank(*corelist))
+ corelist++;
+ if (*corelist == '\0')
+ return -1;
+ errno = 0;
+ idx = strtol(corelist, &end, 10);
+ if (errno || end == NULL)
+ return -1;
+ if (idx < 0 || idx >= RTE_MAX_LCORE)
+ return -1;
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ min = idx;
+ } else if ((*end == ',') || (*end == '\0')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = min; idx <= max; idx++) {
+ if (cores[idx] == -1) {
+ cores[idx] = count;
+ count++;
+ }
+ }
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+ corelist = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ return -1;
+ return 0;
+}
+
+/* Changes the lcore id of the master thread */
+static int
+eal_parse_master_lcore(const char *arg)
+{
+ char *parsing_end;
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ errno = 0;
+ cfg->master_lcore = (uint32_t) strtol(arg, &parsing_end, 0);
+ if (errno || parsing_end[0] != 0)
+ return -1;
+ if (cfg->master_lcore >= RTE_MAX_LCORE)
+ return -1;
+ master_lcore_parsed = 1;
+
+ /* ensure master core is not used as service core */
+ if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
+ RTE_LOG(ERR, EAL,
+ "Error: Master lcore is used as a service core\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Parse elem, the elem could be single number/range or '(' ')' group
+ * 1) A single number elem, it's just a simple digit. e.g. 9
+ * 2) A single range elem, two digits with a '-' between. e.g. 2-6
+ * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6)
+ * Within group elem, '-' used for a range separator;
+ * ',' used for a single number.
+ */
+static int
+eal_parse_set(const char *input, rte_cpuset_t *set)
+{
+ unsigned idx;
+ const char *str = input;
+ char *end = NULL;
+ unsigned min, max;
+
+ CPU_ZERO(set);
+
+ while (isblank(*str))
+ str++;
+
+ /* only digit or left bracket is qualify for start point */
+ if ((!isdigit(*str) && *str != '(') || *str == '\0')
+ return -1;
+
+ /* process single number or single range of number */
+ if (*str != '(') {
+ errno = 0;
+ idx = strtoul(str, &end, 10);
+ if (errno || end == NULL || idx >= CPU_SETSIZE)
+ return -1;
+ else {
+ while (isblank(*end))
+ end++;
+
+ min = idx;
+ max = idx;
+ if (*end == '-') {
+ /* process single <number>-<number> */
+ end++;
+ while (isblank(*end))
+ end++;
+ if (!isdigit(*end))
+ return -1;
+
+ errno = 0;
+ idx = strtoul(end, &end, 10);
+ if (errno || end == NULL || idx >= CPU_SETSIZE)
+ return -1;
+ max = idx;
+ while (isblank(*end))
+ end++;
+ if (*end != ',' && *end != '\0')
+ return -1;
+ }
+
+ if (*end != ',' && *end != '\0' &&
+ *end != '@')
+ return -1;
+
+ for (idx = RTE_MIN(min, max);
+ idx <= RTE_MAX(min, max); idx++)
+ CPU_SET(idx, set);
+
+ return end - input;
+ }
+ }
+
+ /* process set within bracket */
+ str++;
+ while (isblank(*str))
+ str++;
+ if (*str == '\0')
+ return -1;
+
+ min = RTE_MAX_LCORE;
+ do {
+
+ /* go ahead to the first digit */
+ while (isblank(*str))
+ str++;
+ if (!isdigit(*str))
+ return -1;
+
+ /* get the digit value */
+ errno = 0;
+ idx = strtoul(str, &end, 10);
+ if (errno || end == NULL || idx >= CPU_SETSIZE)
+ return -1;
+
+ /* go ahead to separator '-',',' and ')' */
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ else /* avoid continuous '-' */
+ return -1;
+ } else if ((*end == ',') || (*end == ')')) {
+ max = idx;
+ if (min == RTE_MAX_LCORE)
+ min = idx;
+ for (idx = RTE_MIN(min, max);
+ idx <= RTE_MAX(min, max); idx++)
+ CPU_SET(idx, set);
+
+ min = RTE_MAX_LCORE;
+ } else
+ return -1;
+
+ str = end + 1;
+ } while (*end != '\0' && *end != ')');
+
+ /*
+ * to avoid failure that tail blank makes end character check fail
+ * in eal_parse_lcores( )
+ */
+ while (isblank(*str))
+ str++;
+
+ return str - input;
+}
+
+static int
+check_cpuset(rte_cpuset_t *set)
+{
+ unsigned int idx;
+
+ for (idx = 0; idx < CPU_SETSIZE; idx++) {
+ if (!CPU_ISSET(idx, set))
+ continue;
+
+ if (eal_cpu_detected(idx) == 0) {
+ RTE_LOG(ERR, EAL, "core %u "
+ "unavailable\n", idx);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * The format pattern: --lcores='<lcores[@cpus]>[<,lcores[@cpus]>...]'
+ * lcores, cpus could be a single digit/range or a group.
+ * '(' and ')' are necessary if it's a group.
+ * If not supply '@cpus', the value of cpus uses the same as lcores.
+ * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below
+ * lcore 0 runs on cpuset 0x41 (cpu 0,6)
+ * lcore 1 runs on cpuset 0x2 (cpu 1)
+ * lcore 2 runs on cpuset 0xe0 (cpu 5,6,7)
+ * lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2)
+ * lcore 6 runs on cpuset 0x41 (cpu 0,6)
+ * lcore 7 runs on cpuset 0x80 (cpu 7)
+ * lcore 8 runs on cpuset 0x100 (cpu 8)
+ */
+static int
+eal_parse_lcores(const char *lcores)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+ rte_cpuset_t lcore_set;
+ unsigned int set_count;
+ unsigned idx = 0;
+ unsigned count = 0;
+ const char *lcore_start = NULL;
+ const char *end = NULL;
+ int offset;
+ rte_cpuset_t cpuset;
+ int lflags;
+ int ret = -1;
+
+ if (lcores == NULL)
+ return -1;
+
+ /* Remove all blank characters ahead and after */
+ while (isblank(*lcores))
+ lcores++;
+
+ CPU_ZERO(&cpuset);
+
+ /* Reset lcore config */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ cfg->lcore_role[idx] = ROLE_OFF;
+ lcore_config[idx].core_index = -1;
+ CPU_ZERO(&lcore_config[idx].cpuset);
+ }
+
+ /* Get list of cores */
+ do {
+ while (isblank(*lcores))
+ lcores++;
+ if (*lcores == '\0')
+ goto err;
+
+ lflags = 0;
+
+ /* record lcore_set start point */
+ lcore_start = lcores;
+
+ /* go across a complete bracket */
+ if (*lcore_start == '(') {
+ lcores += strcspn(lcores, ")");
+ if (*lcores++ == '\0')
+ goto err;
+ }
+
+ /* scan the separator '@', ','(next) or '\0'(finish) */
+ lcores += strcspn(lcores, "@,");
+
+ if (*lcores == '@') {
+ /* explicit assign cpuset and update the end cursor */
+ offset = eal_parse_set(lcores + 1, &cpuset);
+ if (offset < 0)
+ goto err;
+ end = lcores + 1 + offset;
+ } else { /* ',' or '\0' */
+ /* haven't given cpuset, current loop done */
+ end = lcores;
+
+ /* go back to check <number>-<number> */
+ offset = strcspn(lcore_start, "(-");
+ if (offset < (end - lcore_start) &&
+ *(lcore_start + offset) != '(')
+ lflags = 1;
+ }
+
+ if (*end != ',' && *end != '\0')
+ goto err;
+
+ /* parse lcore_set from start point */
+ if (eal_parse_set(lcore_start, &lcore_set) < 0)
+ goto err;
+
+ /* without '@', by default using lcore_set as cpuset */
+ if (*lcores != '@')
+ rte_memcpy(&cpuset, &lcore_set, sizeof(cpuset));
+
+ set_count = CPU_COUNT(&lcore_set);
+ /* start to update lcore_set */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (!CPU_ISSET(idx, &lcore_set))
+ continue;
+ set_count--;
+
+ if (cfg->lcore_role[idx] != ROLE_RTE) {
+ lcore_config[idx].core_index = count;
+ cfg->lcore_role[idx] = ROLE_RTE;
+ count++;
+ }
+
+ if (lflags) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(idx, &cpuset);
+ }
+
+ if (check_cpuset(&cpuset) < 0)
+ goto err;
+ rte_memcpy(&lcore_config[idx].cpuset, &cpuset,
+ sizeof(rte_cpuset_t));
+ }
+
+ /* some cores from the lcore_set can't be handled by EAL */
+ if (set_count != 0)
+ goto err;
+
+ lcores = end + 1;
+ } while (*end != '\0');
+
+ if (count == 0)
+ goto err;
+
+ cfg->lcore_count = count;
+ ret = 0;
+
+err:
+
+ return ret;
+}
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+static int
+eal_parse_syslog(const char *facility, struct internal_config *conf)
+{
+ int i;
+ static const struct {
+ const char *name;
+ int value;
+ } map[] = {
+ { "auth", LOG_AUTH },
+ { "cron", LOG_CRON },
+ { "daemon", LOG_DAEMON },
+ { "ftp", LOG_FTP },
+ { "kern", LOG_KERN },
+ { "lpr", LOG_LPR },
+ { "mail", LOG_MAIL },
+ { "news", LOG_NEWS },
+ { "syslog", LOG_SYSLOG },
+ { "user", LOG_USER },
+ { "uucp", LOG_UUCP },
+ { "local0", LOG_LOCAL0 },
+ { "local1", LOG_LOCAL1 },
+ { "local2", LOG_LOCAL2 },
+ { "local3", LOG_LOCAL3 },
+ { "local4", LOG_LOCAL4 },
+ { "local5", LOG_LOCAL5 },
+ { "local6", LOG_LOCAL6 },
+ { "local7", LOG_LOCAL7 },
+ { NULL, 0 }
+ };
+
+ for (i = 0; map[i].name; i++) {
+ if (!strcmp(facility, map[i].name)) {
+ conf->syslog_facility = map[i].value;
+ return 0;
+ }
+ }
+ return -1;
+}
+#endif
+
+static int
+eal_parse_log_priority(const char *level)
+{
+ static const char * const levels[] = {
+ [RTE_LOG_EMERG] = "emergency",
+ [RTE_LOG_ALERT] = "alert",
+ [RTE_LOG_CRIT] = "critical",
+ [RTE_LOG_ERR] = "error",
+ [RTE_LOG_WARNING] = "warning",
+ [RTE_LOG_NOTICE] = "notice",
+ [RTE_LOG_INFO] = "info",
+ [RTE_LOG_DEBUG] = "debug",
+ };
+ size_t len = strlen(level);
+ unsigned long tmp;
+ char *end;
+ unsigned int i;
+
+ if (len == 0)
+ return -1;
+
+ /* look for named values, skip 0 which is not a valid level */
+ for (i = 1; i < RTE_DIM(levels); i++) {
+ if (strncmp(levels[i], level, len) == 0)
+ return i;
+ }
+
+ /* not a string, maybe it is numeric */
+ errno = 0;
+ tmp = strtoul(level, &end, 0);
+
+ /* check for errors */
+ if (errno != 0 || end == NULL || *end != '\0' ||
+ tmp >= UINT32_MAX)
+ return -1;
+
+ return tmp;
+}
+
+static int
+eal_parse_log_level(const char *arg)
+{
+ const char *pattern = NULL;
+ const char *regex = NULL;
+ char *str, *level;
+ int priority;
+
+ str = strdup(arg);
+ if (str == NULL)
+ return -1;
+
+ if ((level = strchr(str, ','))) {
+ regex = str;
+ *level++ = '\0';
+ } else if ((level = strchr(str, ':'))) {
+ pattern = str;
+ *level++ = '\0';
+ } else {
+ level = str;
+ }
+
+ priority = eal_parse_log_priority(level);
+ if (priority < 0) {
+ fprintf(stderr, "invalid log priority: %s\n", level);
+ goto fail;
+ }
+
+ if (regex) {
+ if (rte_log_set_level_regexp(regex, priority) < 0) {
+ fprintf(stderr, "cannot set log level %s,%d\n",
+ regex, priority);
+ goto fail;
+ }
+ if (rte_log_save_regexp(regex, priority) < 0)
+ goto fail;
+ } else if (pattern) {
+ if (rte_log_set_level_pattern(pattern, priority) < 0) {
+ fprintf(stderr, "cannot set log level %s:%d\n",
+ pattern, priority);
+ goto fail;
+ }
+ if (rte_log_save_pattern(pattern, priority) < 0)
+ goto fail;
+ } else {
+ rte_log_set_global_level(priority);
+ }
+
+ free(str);
+ return 0;
+
+fail:
+ free(str);
+ return -1;
+}
+
+static enum rte_proc_type_t
+eal_parse_proc_type(const char *arg)
+{
+ if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
+ return RTE_PROC_PRIMARY;
+ if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
+ return RTE_PROC_SECONDARY;
+ if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
+ return RTE_PROC_AUTO;
+
+ return RTE_PROC_INVALID;
+}
+
+static int
+eal_parse_iova_mode(const char *name)
+{
+ int mode;
+
+ if (name == NULL)
+ return -1;
+
+ if (!strcmp("pa", name))
+ mode = RTE_IOVA_PA;
+ else if (!strcmp("va", name))
+ mode = RTE_IOVA_VA;
+ else
+ return -1;
+
+ internal_config.iova_mode = mode;
+ return 0;
+}
+
+static int
+eal_parse_base_virtaddr(const char *arg)
+{
+ char *end;
+ uint64_t addr;
+
+ errno = 0;
+ addr = strtoull(arg, &end, 16);
+
+ /* check for errors */
+ if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
+ return -1;
+
+ /* make sure we don't exceed 32-bit boundary on 32-bit target */
+#ifndef RTE_ARCH_64
+ if (addr >= UINTPTR_MAX)
+ return -1;
+#endif
+
+ /* align the addr on 16M boundary, 16MB is the minimum huge page
+ * size on IBM Power architecture. If the addr is aligned to 16MB,
+ * it can align to 2MB for x86. So this alignment can also be used
+ * on x86 and other architectures.
+ */
+ internal_config.base_virtaddr =
+ RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M);
+
+ return 0;
+}
+
+/* caller is responsible for freeing the returned string */
+static char *
+available_cores(void)
+{
+ char *str = NULL;
+ int previous;
+ int sequence;
+ char *tmp;
+ int idx;
+
+ /* find the first available cpu */
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (eal_cpu_detected(idx) == 0)
+ continue;
+ break;
+ }
+ if (idx >= RTE_MAX_LCORE)
+ return NULL;
+
+ /* first sequence */
+ if (asprintf(&str, "%d", idx) < 0)
+ return NULL;
+ previous = idx;
+ sequence = 0;
+
+ for (idx++ ; idx < RTE_MAX_LCORE; idx++) {
+ if (eal_cpu_detected(idx) == 0)
+ continue;
+
+ if (idx == previous + 1) {
+ previous = idx;
+ sequence = 1;
+ continue;
+ }
+
+ /* finish current sequence */
+ if (sequence) {
+ if (asprintf(&tmp, "%s-%d", str, previous) < 0) {
+ free(str);
+ return NULL;
+ }
+ free(str);
+ str = tmp;
+ }
+
+ /* new sequence */
+ if (asprintf(&tmp, "%s,%d", str, idx) < 0) {
+ free(str);
+ return NULL;
+ }
+ free(str);
+ str = tmp;
+ previous = idx;
+ sequence = 0;
+ }
+
+ /* finish last sequence */
+ if (sequence) {
+ if (asprintf(&tmp, "%s-%d", str, previous) < 0) {
+ free(str);
+ return NULL;
+ }
+ free(str);
+ str = tmp;
+ }
+
+ return str;
+}
+
+int
+eal_parse_common_option(int opt, const char *optarg,
+ struct internal_config *conf)
+{
+ static int b_used;
+ static int w_used;
+
+ switch (opt) {
+ /* blacklist */
+ case 'b':
+ if (w_used)
+ goto bw_used;
+ if (eal_option_device_add(RTE_DEVTYPE_BLACKLISTED_PCI,
+ optarg) < 0) {
+ return -1;
+ }
+ b_used = 1;
+ break;
+ /* whitelist */
+ case 'w':
+ if (b_used)
+ goto bw_used;
+ if (eal_option_device_add(RTE_DEVTYPE_WHITELISTED_PCI,
+ optarg) < 0) {
+ return -1;
+ }
+ w_used = 1;
+ break;
+ /* coremask */
+ case 'c': {
+ int lcore_indexes[RTE_MAX_LCORE];
+
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. Please ensure -c is before -s or -S\n");
+ if (eal_parse_coremask(optarg, lcore_indexes) < 0) {
+ RTE_LOG(ERR, EAL, "invalid coremask syntax\n");
+ return -1;
+ }
+ if (update_lcore_config(lcore_indexes) < 0) {
+ char *available = available_cores();
+
+ RTE_LOG(ERR, EAL,
+ "invalid coremask, please check specified cores are part of %s\n",
+ available);
+ free(available);
+ return -1;
+ }
+
+ if (core_parsed) {
+ RTE_LOG(ERR, EAL, "Option -c is ignored, because (%s) is set!\n",
+ (core_parsed == LCORE_OPT_LST) ? "-l" :
+ (core_parsed == LCORE_OPT_MAP) ? "--lcore" :
+ "-c");
+ return -1;
+ }
+
+ core_parsed = LCORE_OPT_MSK;
+ break;
+ }
+ /* corelist */
+ case 'l': {
+ int lcore_indexes[RTE_MAX_LCORE];
+
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. Please ensure -l is before -s or -S\n");
+
+ if (eal_parse_corelist(optarg, lcore_indexes) < 0) {
+ RTE_LOG(ERR, EAL, "invalid core list syntax\n");
+ return -1;
+ }
+ if (update_lcore_config(lcore_indexes) < 0) {
+ char *available = available_cores();
+
+ RTE_LOG(ERR, EAL,
+ "invalid core list, please check specified cores are part of %s\n",
+ available);
+ free(available);
+ return -1;
+ }
+
+ if (core_parsed) {
+ RTE_LOG(ERR, EAL, "Option -l is ignored, because (%s) is set!\n",
+ (core_parsed == LCORE_OPT_MSK) ? "-c" :
+ (core_parsed == LCORE_OPT_MAP) ? "--lcore" :
+ "-l");
+ return -1;
+ }
+
+ core_parsed = LCORE_OPT_LST;
+ break;
+ }
+ /* service coremask */
+ case 's':
+ if (eal_parse_service_coremask(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid service coremask\n");
+ return -1;
+ }
+ break;
+ /* service corelist */
+ case 'S':
+ if (eal_parse_service_corelist(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid service core list\n");
+ return -1;
+ }
+ break;
+ /* size of memory */
+ case 'm':
+ conf->memory = atoi(optarg);
+ conf->memory *= 1024ULL;
+ conf->memory *= 1024ULL;
+ mem_parsed = 1;
+ break;
+ /* force number of channels */
+ case 'n':
+ conf->force_nchannel = atoi(optarg);
+ if (conf->force_nchannel == 0) {
+ RTE_LOG(ERR, EAL, "invalid channel number\n");
+ return -1;
+ }
+ break;
+ /* force number of ranks */
+ case 'r':
+ conf->force_nrank = atoi(optarg);
+ if (conf->force_nrank == 0 ||
+ conf->force_nrank > 16) {
+ RTE_LOG(ERR, EAL, "invalid rank number\n");
+ return -1;
+ }
+ break;
+ /* force loading of external driver */
+ case 'd':
+ if (eal_plugin_add(optarg) == -1)
+ return -1;
+ break;
+ case 'v':
+ /* since message is explicitly requested by user, we
+ * write message at highest log level so it can always
+ * be seen
+ * even if info or warning messages are disabled */
+ RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
+ break;
+
+ /* long options */
+ case OPT_HUGE_UNLINK_NUM:
+ conf->hugepage_unlink = 1;
+ break;
+
+ case OPT_NO_HUGE_NUM:
+ conf->no_hugetlbfs = 1;
+ /* no-huge is legacy mem */
+ conf->legacy_mem = 1;
+ break;
+
+ case OPT_NO_PCI_NUM:
+ conf->no_pci = 1;
+ break;
+
+ case OPT_NO_HPET_NUM:
+ conf->no_hpet = 1;
+ break;
+
+ case OPT_VMWARE_TSC_MAP_NUM:
+ conf->vmware_tsc_map = 1;
+ break;
+
+ case OPT_NO_SHCONF_NUM:
+ conf->no_shconf = 1;
+ break;
+
+ case OPT_IN_MEMORY_NUM:
+ conf->in_memory = 1;
+ /* in-memory is a superset of noshconf and huge-unlink */
+ conf->no_shconf = 1;
+ conf->hugepage_unlink = 1;
+ break;
+
+ case OPT_PROC_TYPE_NUM:
+ conf->process_type = eal_parse_proc_type(optarg);
+ break;
+
+ case OPT_MASTER_LCORE_NUM:
+ if (eal_parse_master_lcore(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_MASTER_LCORE "\n");
+ return -1;
+ }
+ break;
+
+ case OPT_VDEV_NUM:
+ if (eal_option_device_add(RTE_DEVTYPE_VIRTUAL,
+ optarg) < 0) {
+ return -1;
+ }
+ break;
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+ case OPT_SYSLOG_NUM:
+ if (eal_parse_syslog(optarg, conf) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_SYSLOG "\n");
+ return -1;
+ }
+ break;
+#endif
+
+ case OPT_LOG_LEVEL_NUM: {
+ if (eal_parse_log_level(optarg) < 0) {
+ RTE_LOG(ERR, EAL,
+ "invalid parameters for --"
+ OPT_LOG_LEVEL "\n");
+ return -1;
+ }
+ break;
+ }
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+ case OPT_TRACE_NUM: {
+ if (eal_trace_args_save(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_TRACE "\n");
+ return -1;
+ }
+ break;
+ }
+
+ case OPT_TRACE_DIR_NUM: {
+ if (eal_trace_dir_args_save(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_TRACE_DIR "\n");
+ return -1;
+ }
+ break;
+ }
+
+ case OPT_TRACE_BUF_SIZE_NUM: {
+ if (eal_trace_bufsz_args_save(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_TRACE_BUF_SIZE "\n");
+ return -1;
+ }
+ break;
+ }
+
+ case OPT_TRACE_MODE_NUM: {
+ if (eal_trace_mode_args_save(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_TRACE_MODE "\n");
+ return -1;
+ }
+ break;
+ }
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+
+ case OPT_LCORES_NUM:
+ if (eal_parse_lcores(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_LCORES "\n");
+ return -1;
+ }
+
+ if (core_parsed) {
+ RTE_LOG(ERR, EAL, "Option --lcore is ignored, because (%s) is set!\n",
+ (core_parsed == LCORE_OPT_LST) ? "-l" :
+ (core_parsed == LCORE_OPT_MSK) ? "-c" :
+ "--lcore");
+ return -1;
+ }
+
+ core_parsed = LCORE_OPT_MAP;
+ break;
+ case OPT_LEGACY_MEM_NUM:
+ conf->legacy_mem = 1;
+ break;
+ case OPT_SINGLE_FILE_SEGMENTS_NUM:
+ conf->single_file_segments = 1;
+ break;
+ case OPT_IOVA_MODE_NUM:
+ if (eal_parse_iova_mode(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_IOVA_MODE "\n");
+ return -1;
+ }
+ break;
+ case OPT_BASE_VIRTADDR_NUM:
+ if (eal_parse_base_virtaddr(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameter for --"
+ OPT_BASE_VIRTADDR "\n");
+ return -1;
+ }
+ break;
+ case OPT_TELEMETRY_NUM:
+ break;
+ case OPT_NO_TELEMETRY_NUM:
+ conf->no_telemetry = 1;
+ break;
+
+ /* don't know what to do, leave this to caller */
+ default:
+ return 1;
+
+ }
+
+ return 0;
+bw_used:
+ RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) "
+ "cannot be used at the same time\n");
+ return -1;
+}
+
+static void
+eal_auto_detect_cores(struct rte_config *cfg)
+{
+ unsigned int lcore_id;
+ unsigned int removed = 0;
+ rte_cpuset_t affinity_set;
+
+ if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+ &affinity_set))
+ CPU_ZERO(&affinity_set);
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (cfg->lcore_role[lcore_id] == ROLE_RTE &&
+ !CPU_ISSET(lcore_id, &affinity_set)) {
+ cfg->lcore_role[lcore_id] = ROLE_OFF;
+ removed++;
+ }
+ }
+
+ cfg->lcore_count -= removed;
+}
+
+static void
+compute_ctrl_threads_cpuset(struct internal_config *internal_cfg)
+{
+ rte_cpuset_t *cpuset = &internal_cfg->ctrl_cpuset;
+ rte_cpuset_t default_set;
+ unsigned int lcore_id;
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_has_role(lcore_id, ROLE_OFF))
+ continue;
+ RTE_CPU_OR(cpuset, cpuset, &lcore_config[lcore_id].cpuset);
+ }
+ RTE_CPU_NOT(cpuset, cpuset);
+
+ if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+ &default_set))
+ CPU_ZERO(&default_set);
+
+ RTE_CPU_AND(cpuset, cpuset, &default_set);
+
+ /* if no remaining cpu, use master lcore cpu affinity */
+ if (!CPU_COUNT(cpuset)) {
+ memcpy(cpuset, &lcore_config[rte_get_master_lcore()].cpuset,
+ sizeof(*cpuset));
+ }
+}
+
+int
+eal_cleanup_config(struct internal_config *internal_cfg)
+{
+ if (internal_cfg->hugefile_prefix != NULL)
+ free(internal_cfg->hugefile_prefix);
+ if (internal_cfg->hugepage_dir != NULL)
+ free(internal_cfg->hugepage_dir);
+ if (internal_cfg->user_mbuf_pool_ops_name != NULL)
+ free(internal_cfg->user_mbuf_pool_ops_name);
+
+ return 0;
+}
+
+int
+eal_adjust_config(struct internal_config *internal_cfg)
+{
+ int i;
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (!core_parsed)
+ eal_auto_detect_cores(cfg);
+
+ if (internal_config.process_type == RTE_PROC_AUTO)
+ internal_config.process_type = eal_proc_type_detect();
+
+ /* default master lcore is the first one */
+ if (!master_lcore_parsed) {
+ cfg->master_lcore = rte_get_next_lcore(-1, 0, 0);
+ if (cfg->master_lcore >= RTE_MAX_LCORE)
+ return -1;
+ lcore_config[cfg->master_lcore].core_role = ROLE_RTE;
+ }
+
+ compute_ctrl_threads_cpuset(internal_cfg);
+
+ /* if no memory amounts were requested, this will result in 0 and
+ * will be overridden later, right after eal_hugepage_info_init() */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->memory += internal_cfg->socket_mem[i];
+
+ return 0;
+}
+
+int
+eal_check_common_options(struct internal_config *internal_cfg)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) {
+ RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n");
+ return -1;
+ }
+
+ if (internal_cfg->process_type == RTE_PROC_INVALID) {
+ RTE_LOG(ERR, EAL, "Invalid process type specified\n");
+ return -1;
+ }
+ if (internal_cfg->hugefile_prefix != NULL &&
+ strlen(internal_cfg->hugefile_prefix) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_FILE_PREFIX " option\n");
+ return -1;
+ }
+ if (internal_cfg->hugepage_dir != NULL &&
+ strlen(internal_cfg->hugepage_dir) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_HUGE_DIR" option\n");
+ return -1;
+ }
+ if (internal_cfg->user_mbuf_pool_ops_name != NULL &&
+ strlen(internal_cfg->user_mbuf_pool_ops_name) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_MBUF_POOL_OPS_NAME" option\n");
+ return -1;
+ }
+ if (index(eal_get_hugefile_prefix(), '%') != NULL) {
+ RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" "
+ "option\n");
+ return -1;
+ }
+ if (mem_parsed && internal_cfg->force_sockets == 1) {
+ RTE_LOG(ERR, EAL, "Options -m and --"OPT_SOCKET_MEM" cannot "
+ "be specified at the same time\n");
+ return -1;
+ }
+ if (internal_cfg->no_hugetlbfs && internal_cfg->force_sockets == 1) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_MEM" cannot "
+ "be specified together with --"OPT_NO_HUGE"\n");
+ return -1;
+ }
+ if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+ !internal_cfg->in_memory) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
+ "be specified together with --"OPT_NO_HUGE"\n");
+ return -1;
+ }
+ if (internal_config.force_socket_limits && internal_config.legacy_mem) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
+ " is only supported in non-legacy memory mode\n");
+ }
+ if (internal_cfg->single_file_segments &&
+ internal_cfg->hugepage_unlink &&
+ !internal_cfg->in_memory) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
+ "not compatible with --"OPT_HUGE_UNLINK"\n");
+ return -1;
+ }
+ if (internal_cfg->legacy_mem &&
+ internal_cfg->in_memory) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+ "with --"OPT_IN_MEMORY"\n");
+ return -1;
+ }
+ if (internal_cfg->legacy_mem && internal_cfg->match_allocations) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+ "with --"OPT_MATCH_ALLOCATIONS"\n");
+ return -1;
+ }
+ if (internal_cfg->no_hugetlbfs && internal_cfg->match_allocations) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_NO_HUGE" is not compatible "
+ "with --"OPT_MATCH_ALLOCATIONS"\n");
+ return -1;
+ }
+ if (internal_cfg->legacy_mem && internal_cfg->memory == 0) {
+ RTE_LOG(NOTICE, EAL, "Static memory layout is selected, "
+ "amount of reserved memory can be adjusted with "
+ "-m or --"OPT_SOCKET_MEM"\n");
+ }
+
+ return 0;
+}
+
+void
+eal_common_usage(void)
+{
+ printf("[options]\n\n"
+ "EAL common options:\n"
+ " -c COREMASK Hexadecimal bitmask of cores to run on\n"
+ " -l CORELIST List of cores to run on\n"
+ " The argument format is <c1>[-c2][,c3[-c4],...]\n"
+ " where c1, c2, etc are core indexes between 0 and %d\n"
+ " --"OPT_LCORES" COREMAP Map lcore set to physical cpu set\n"
+ " The argument format is\n"
+ " '<lcores[@cpus]>[<,lcores[@cpus]>...]'\n"
+ " lcores and cpus list are grouped by '(' and ')'\n"
+ " Within the group, '-' is used for range separator,\n"
+ " ',' is used for single number separator.\n"
+ " '( )' can be omitted for single element group,\n"
+ " '@' can be omitted if cpus and lcores have the same value\n"
+ " -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n"
+ " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n"
+ " --"OPT_MBUF_POOL_OPS_NAME" Pool ops name for mbuf to use\n"
+ " -n CHANNELS Number of memory channels\n"
+ " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
+ " -r RANKS Force number of memory ranks (don't detect)\n"
+ " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n"
+ " Prevent EAL from using this PCI device. The argument\n"
+ " format is <domain:bus:devid.func>.\n"
+ " -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n"
+ " Only use the specified PCI devices. The argument format\n"
+ " is <[domain:]bus:devid.func>. This option can be present\n"
+ " several times (once per device).\n"
+ " [NOTE: PCI whitelist cannot be used with -b option]\n"
+ " --"OPT_VDEV" Add a virtual device.\n"
+ " The argument format is <driver><id>[,key=val,...]\n"
+ " (ex: --vdev=net_pcap0,iface=eth2).\n"
+ " --"OPT_IOVA_MODE" Set IOVA mode. 'pa' for IOVA_PA\n"
+ " 'va' for IOVA_VA\n"
+ " -d LIB.so|DIR Add a driver or driver directory\n"
+ " (can be used multiple times)\n"
+ " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n"
+ " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n"
+#ifndef RTE_EXEC_ENV_WINDOWS
+ " --"OPT_SYSLOG" Set syslog facility\n"
+#endif
+ " --"OPT_LOG_LEVEL"=<int> Set global log level\n"
+ " --"OPT_LOG_LEVEL"=<type-match>:<int>\n"
+ " Set specific log level\n"
+#ifndef RTE_EXEC_ENV_WINDOWS
+ " --"OPT_TRACE"=<regex-match>\n"
+ " Enable trace based on regular expression trace name.\n"
+ " By default, the trace is disabled.\n"
+ " User must specify this option to enable trace.\n"
+ " --"OPT_TRACE_DIR"=<directory path>\n"
+ " Specify trace directory for trace output.\n"
+ " By default, trace output will created at\n"
+ " $HOME directory and parameter must be\n"
+ " specified once only.\n"
+ " --"OPT_TRACE_BUF_SIZE"=<int>\n"
+ " Specify maximum size of allocated memory\n"
+ " for trace output for each thread. Valid\n"
+ " unit can be either 'B|K|M' for 'Bytes',\n"
+ " 'KBytes' and 'MBytes' respectively.\n"
+ " Default is 1MB and parameter must be\n"
+ " specified once only.\n"
+ " --"OPT_TRACE_MODE"=<o[verwrite] | d[iscard]>\n"
+ " Specify the mode of update of trace\n"
+ " output file. Either update on a file can\n"
+ " be wrapped or discarded when file size\n"
+ " reaches its maximum limit.\n"
+ " Default mode is 'overwrite' and parameter\n"
+ " must be specified once only.\n"
+#endif /* !RTE_EXEC_ENV_WINDOWS */
+ " -v Display version information on startup\n"
+ " -h, --help This help\n"
+ " --"OPT_IN_MEMORY" Operate entirely in memory. This will\n"
+ " disable secondary process support\n"
+ " --"OPT_BASE_VIRTADDR" Base virtual address\n"
+ " --"OPT_TELEMETRY" Enable telemetry support (on by default)\n"
+ " --"OPT_NO_TELEMETRY" Disable telemetry support\n"
+ "\nEAL options for DEBUG use only:\n"
+ " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n"
+ " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n"
+ " --"OPT_NO_PCI" Disable PCI\n"
+ " --"OPT_NO_HPET" Disable HPET\n"
+ " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n"
+ "\n", RTE_MAX_LCORE);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c
new file mode 100644
index 000000000..935e8fefe
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_proc.c
@@ -0,0 +1,1217 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Intel Corporation
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_tailq.h>
+
+#include "eal_private.h"
+#include "eal_filesystem.h"
+#include "eal_internal_cfg.h"
+
+static int mp_fd = -1;
+static char mp_filter[PATH_MAX]; /* Filter for secondary process sockets */
+static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
+static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
+static char peer_name[PATH_MAX];
+
+struct action_entry {
+ TAILQ_ENTRY(action_entry) next;
+ char action_name[RTE_MP_MAX_NAME_LEN];
+ rte_mp_t action;
+};
+
+/** Double linked list of actions. */
+TAILQ_HEAD(action_entry_list, action_entry);
+
+static struct action_entry_list action_entry_list =
+ TAILQ_HEAD_INITIALIZER(action_entry_list);
+
+enum mp_type {
+ MP_MSG, /* Share message with peers, will not block */
+ MP_REQ, /* Request for information, Will block for a reply */
+ MP_REP, /* Response to previously-received request */
+ MP_IGN, /* Response telling requester to ignore this response */
+};
+
+struct mp_msg_internal {
+ int type;
+ struct rte_mp_msg msg;
+};
+
+struct async_request_param {
+ rte_mp_async_reply_t clb;
+ struct rte_mp_reply user_reply;
+ struct timespec end;
+ int n_responses_processed;
+};
+
+struct pending_request {
+ TAILQ_ENTRY(pending_request) next;
+ enum {
+ REQUEST_TYPE_SYNC,
+ REQUEST_TYPE_ASYNC
+ } type;
+ char dst[PATH_MAX];
+ struct rte_mp_msg *request;
+ struct rte_mp_msg *reply;
+ int reply_received;
+ RTE_STD_C11
+ union {
+ struct {
+ struct async_request_param *param;
+ } async;
+ struct {
+ pthread_cond_t cond;
+ } sync;
+ };
+};
+
+TAILQ_HEAD(pending_request_list, pending_request);
+
+static struct {
+ struct pending_request_list requests;
+ pthread_mutex_t lock;
+} pending_requests = {
+ .requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
+ .lock = PTHREAD_MUTEX_INITIALIZER,
+ /**< used in async requests only */
+};
+
+/* forward declarations */
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type);
+
+/* for use with alarm callback */
+static void
+async_reply_handle(void *arg);
+
+/* for use with process_msg */
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg);
+
+static void
+trigger_async_action(struct pending_request *req);
+
+static struct pending_request *
+find_pending_request(const char *dst, const char *act_name)
+{
+ struct pending_request *r;
+
+ TAILQ_FOREACH(r, &pending_requests.requests, next) {
+ if (!strcmp(r->dst, dst) &&
+ !strcmp(r->request->name, act_name))
+ break;
+ }
+
+ return r;
+}
+
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+ const char *prefix = eal_mp_socket_path();
+
+ if (strlen(name) > 0)
+ snprintf(buf, len, "%s_%s", prefix, name);
+ else
+ strlcpy(buf, prefix, len);
+}
+
+int
+rte_eal_primary_proc_alive(const char *config_file_path)
+{
+ int config_fd;
+
+ if (config_file_path)
+ config_fd = open(config_file_path, O_RDONLY);
+ else {
+ const char *path;
+
+ path = eal_runtime_config_path();
+ config_fd = open(path, O_RDONLY);
+ }
+ if (config_fd < 0)
+ return 0;
+
+ int ret = lockf(config_fd, F_TEST, 0);
+ close(config_fd);
+
+ return !!ret;
+}
+
+static struct action_entry *
+find_action_entry_by_name(const char *name)
+{
+ struct action_entry *entry;
+
+ TAILQ_FOREACH(entry, &action_entry_list, next) {
+ if (strncmp(entry->action_name, name, RTE_MP_MAX_NAME_LEN) == 0)
+ break;
+ }
+
+ return entry;
+}
+
+static int
+validate_action_name(const char *name)
+{
+ if (name == NULL) {
+ RTE_LOG(ERR, EAL, "Action name cannot be NULL\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+ if (strnlen(name, RTE_MP_MAX_NAME_LEN) == 0) {
+ RTE_LOG(ERR, EAL, "Length of action name is zero\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+ if (strnlen(name, RTE_MP_MAX_NAME_LEN) == RTE_MP_MAX_NAME_LEN) {
+ rte_errno = E2BIG;
+ return -1;
+ }
+ return 0;
+}
+
+int
+rte_mp_action_register(const char *name, rte_mp_t action)
+{
+ struct action_entry *entry;
+
+ if (validate_action_name(name) != 0)
+ return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ entry = malloc(sizeof(struct action_entry));
+ if (entry == NULL) {
+ rte_errno = ENOMEM;
+ return -1;
+ }
+ strlcpy(entry->action_name, name, sizeof(entry->action_name));
+ entry->action = action;
+
+ pthread_mutex_lock(&mp_mutex_action);
+ if (find_action_entry_by_name(name) != NULL) {
+ pthread_mutex_unlock(&mp_mutex_action);
+ rte_errno = EEXIST;
+ free(entry);
+ return -1;
+ }
+ TAILQ_INSERT_TAIL(&action_entry_list, entry, next);
+ pthread_mutex_unlock(&mp_mutex_action);
+ return 0;
+}
+
+void
+rte_mp_action_unregister(const char *name)
+{
+ struct action_entry *entry;
+
+ if (validate_action_name(name) != 0)
+ return;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return;
+ }
+
+ pthread_mutex_lock(&mp_mutex_action);
+ entry = find_action_entry_by_name(name);
+ if (entry == NULL) {
+ pthread_mutex_unlock(&mp_mutex_action);
+ return;
+ }
+ TAILQ_REMOVE(&action_entry_list, entry, next);
+ pthread_mutex_unlock(&mp_mutex_action);
+ free(entry);
+}
+
+static int
+read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+{
+ int msglen;
+ struct iovec iov;
+ struct msghdr msgh;
+ char control[CMSG_SPACE(sizeof(m->msg.fds))];
+ struct cmsghdr *cmsg;
+ int buflen = sizeof(*m) - sizeof(m->msg.fds);
+
+ memset(&msgh, 0, sizeof(msgh));
+ iov.iov_base = m;
+ iov.iov_len = buflen;
+
+ msgh.msg_name = s;
+ msgh.msg_namelen = sizeof(*s);
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ msglen = recvmsg(mp_fd, &msgh, 0);
+ if (msglen < 0) {
+ RTE_LOG(ERR, EAL, "recvmsg failed, %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (msglen != buflen || (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+ RTE_LOG(ERR, EAL, "truncated msg\n");
+ return -1;
+ }
+
+ /* read auxiliary FDs if any */
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+ if ((cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS)) {
+ memcpy(m->msg.fds, CMSG_DATA(cmsg), sizeof(m->msg.fds));
+ break;
+ }
+ }
+ /* sanity-check the response */
+ if (m->msg.num_fds < 0 || m->msg.num_fds > RTE_MP_MAX_FD_NUM) {
+ RTE_LOG(ERR, EAL, "invalid number of fd's received\n");
+ return -1;
+ }
+ if (m->msg.len_param < 0 || m->msg.len_param > RTE_MP_MAX_PARAM_LEN) {
+ RTE_LOG(ERR, EAL, "invalid received data length\n");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
+{
+ struct pending_request *pending_req;
+ struct action_entry *entry;
+ struct rte_mp_msg *msg = &m->msg;
+ rte_mp_t action = NULL;
+
+ RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
+
+ if (m->type == MP_REP || m->type == MP_IGN) {
+ struct pending_request *req = NULL;
+
+ pthread_mutex_lock(&pending_requests.lock);
+ pending_req = find_pending_request(s->sun_path, msg->name);
+ if (pending_req) {
+ memcpy(pending_req->reply, msg, sizeof(*msg));
+ /* -1 indicates that we've been asked to ignore */
+ pending_req->reply_received =
+ m->type == MP_REP ? 1 : -1;
+
+ if (pending_req->type == REQUEST_TYPE_SYNC)
+ pthread_cond_signal(&pending_req->sync.cond);
+ else if (pending_req->type == REQUEST_TYPE_ASYNC)
+ req = async_reply_handle_thread_unsafe(
+ pending_req);
+ } else
+ RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ if (req != NULL)
+ trigger_async_action(req);
+ return;
+ }
+
+ pthread_mutex_lock(&mp_mutex_action);
+ entry = find_action_entry_by_name(msg->name);
+ if (entry != NULL)
+ action = entry->action;
+ pthread_mutex_unlock(&mp_mutex_action);
+
+ if (!action) {
+ if (m->type == MP_REQ && !internal_config.init_complete) {
+ /* if this is a request, and init is not yet complete,
+ * and callback wasn't registered, we should tell the
+ * requester to ignore our existence because we're not
+ * yet ready to process this request.
+ */
+ struct rte_mp_msg dummy;
+
+ memset(&dummy, 0, sizeof(dummy));
+ strlcpy(dummy.name, msg->name, sizeof(dummy.name));
+ mp_send(&dummy, s->sun_path, MP_IGN);
+ } else {
+ RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
+ msg->name);
+ }
+ } else if (action(msg, s->sun_path) < 0) {
+ RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
+ }
+}
+
+static void *
+mp_handle(void *arg __rte_unused)
+{
+ struct mp_msg_internal msg;
+ struct sockaddr_un sa;
+
+ while (1) {
+ if (read_msg(&msg, &sa) == 0)
+ process_msg(&msg, &sa);
+ }
+
+ return NULL;
+}
+
+static int
+timespec_cmp(const struct timespec *a, const struct timespec *b)
+{
+ if (a->tv_sec < b->tv_sec)
+ return -1;
+ if (a->tv_sec > b->tv_sec)
+ return 1;
+ if (a->tv_nsec < b->tv_nsec)
+ return -1;
+ if (a->tv_nsec > b->tv_nsec)
+ return 1;
+ return 0;
+}
+
+enum async_action {
+ ACTION_FREE, /**< free the action entry, but don't trigger callback */
+ ACTION_TRIGGER /**< trigger callback, then free action entry */
+};
+
+static enum async_action
+process_async_request(struct pending_request *sr, const struct timespec *now)
+{
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+ bool timeout, last_msg;
+
+ param = sr->async.param;
+ reply = &param->user_reply;
+
+ /* did we timeout? */
+ timeout = timespec_cmp(&param->end, now) <= 0;
+
+ /* if we received a response, adjust relevant data and copy mesasge. */
+ if (sr->reply_received == 1 && sr->reply) {
+ struct rte_mp_msg *msg, *user_msgs, *tmp;
+
+ msg = sr->reply;
+ user_msgs = reply->msgs;
+
+ tmp = realloc(user_msgs, sizeof(*msg) *
+ (reply->nb_received + 1));
+ if (!tmp) {
+ RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
+ sr->dst, sr->request->name);
+ /* this entry is going to be removed and its message
+ * dropped, but we don't want to leak memory, so
+ * continue.
+ */
+ } else {
+ user_msgs = tmp;
+ reply->msgs = user_msgs;
+ memcpy(&user_msgs[reply->nb_received],
+ msg, sizeof(*msg));
+ reply->nb_received++;
+ }
+
+ /* mark this request as processed */
+ param->n_responses_processed++;
+ } else if (sr->reply_received == -1) {
+ /* we were asked to ignore this process */
+ reply->nb_sent--;
+ } else if (timeout) {
+ /* count it as processed response, but don't increment
+ * nb_received.
+ */
+ param->n_responses_processed++;
+ }
+
+ free(sr->reply);
+
+ last_msg = param->n_responses_processed == reply->nb_sent;
+
+ return last_msg ? ACTION_TRIGGER : ACTION_FREE;
+}
+
+static void
+trigger_async_action(struct pending_request *sr)
+{
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+
+ param = sr->async.param;
+ reply = &param->user_reply;
+
+ param->clb(sr->request, reply);
+
+ /* clean up */
+ free(sr->async.param->user_reply.msgs);
+ free(sr->async.param);
+ free(sr->request);
+ free(sr);
+}
+
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg)
+{
+ struct pending_request *req = (struct pending_request *)arg;
+ enum async_action action;
+ struct timespec ts_now;
+ struct timeval now;
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot get current time\n");
+ goto no_trigger;
+ }
+ ts_now.tv_nsec = now.tv_usec * 1000;
+ ts_now.tv_sec = now.tv_sec;
+
+ action = process_async_request(req, &ts_now);
+
+ TAILQ_REMOVE(&pending_requests.requests, req, next);
+
+ if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+ /* if we failed to cancel the alarm because it's already in
+ * progress, don't proceed because otherwise we will end up
+ * handling the same message twice.
+ */
+ if (rte_errno == EINPROGRESS) {
+ RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
+ goto no_trigger;
+ }
+ RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
+ }
+
+ if (action == ACTION_TRIGGER)
+ return req;
+no_trigger:
+ free(req);
+ return NULL;
+}
+
+static void
+async_reply_handle(void *arg)
+{
+ struct pending_request *req;
+
+ pthread_mutex_lock(&pending_requests.lock);
+ req = async_reply_handle_thread_unsafe(arg);
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ if (req != NULL)
+ trigger_async_action(req);
+}
+
+static int
+open_socket_fd(void)
+{
+ struct sockaddr_un un;
+
+ peer_name[0] = '\0';
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ snprintf(peer_name, sizeof(peer_name),
+ "%d_%"PRIx64, getpid(), rte_rdtsc());
+
+ mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (mp_fd < 0) {
+ RTE_LOG(ERR, EAL, "failed to create unix socket\n");
+ return -1;
+ }
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+
+ create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
+ unlink(un.sun_path); /* May still exist since last run */
+
+ if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+ RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
+ un.sun_path, strerror(errno));
+ close(mp_fd);
+ return -1;
+ }
+
+ RTE_LOG(INFO, EAL, "Multi-process socket %s\n", un.sun_path);
+ return mp_fd;
+}
+
+static void
+close_socket_fd(void)
+{
+ char path[PATH_MAX];
+
+ if (mp_fd < 0)
+ return;
+
+ close(mp_fd);
+ create_socket_path(peer_name, path, sizeof(path));
+ unlink(path);
+}
+
+int
+rte_mp_channel_init(void)
+{
+ char path[PATH_MAX];
+ int dir_fd;
+ pthread_t mp_handle_tid;
+
+ /* in no shared files mode, we do not have secondary processes support,
+ * so no need to initialize IPC.
+ */
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ /* create filter path */
+ create_socket_path("*", path, sizeof(path));
+ strlcpy(mp_filter, basename(path), sizeof(mp_filter));
+
+ /* path may have been modified, so recreate it */
+ create_socket_path("*", path, sizeof(path));
+ strlcpy(mp_dir_path, dirname(path), sizeof(mp_dir_path));
+
+ /* lock the directory */
+ dir_fd = open(mp_dir_path, O_RDONLY);
+ if (dir_fd < 0) {
+ RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+ mp_dir_path, strerror(errno));
+ return -1;
+ }
+
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+ mp_dir_path, strerror(errno));
+ close(dir_fd);
+ return -1;
+ }
+
+ if (open_socket_fd() < 0) {
+ close(dir_fd);
+ return -1;
+ }
+
+ if (rte_ctrl_thread_create(&mp_handle_tid, "rte_mp_handle",
+ NULL, mp_handle, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
+ strerror(errno));
+ close(mp_fd);
+ close(dir_fd);
+ mp_fd = -1;
+ return -1;
+ }
+
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+ close(dir_fd);
+
+ return 0;
+}
+
+void
+rte_mp_channel_cleanup(void)
+{
+ close_socket_fd();
+}
+
+/**
+ * Return -1, as fail to send message and it's caused by the local side.
+ * Return 0, as fail to send message and it's caused by the remote side.
+ * Return 1, as succeed to send message.
+ *
+ */
+static int
+send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
+{
+ int snd;
+ struct iovec iov;
+ struct msghdr msgh;
+ struct cmsghdr *cmsg;
+ struct sockaddr_un dst;
+ struct mp_msg_internal m;
+ int fd_size = msg->num_fds * sizeof(int);
+ char control[CMSG_SPACE(fd_size)];
+
+ m.type = type;
+ memcpy(&m.msg, msg, sizeof(*msg));
+
+ memset(&dst, 0, sizeof(dst));
+ dst.sun_family = AF_UNIX;
+ strlcpy(dst.sun_path, dst_path, sizeof(dst.sun_path));
+
+ memset(&msgh, 0, sizeof(msgh));
+ memset(control, 0, sizeof(control));
+
+ iov.iov_base = &m;
+ iov.iov_len = sizeof(m) - sizeof(msg->fds);
+
+ msgh.msg_name = &dst;
+ msgh.msg_namelen = sizeof(dst);
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_len = CMSG_LEN(fd_size);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), msg->fds, fd_size);
+
+ do {
+ snd = sendmsg(mp_fd, &msgh, 0);
+ } while (snd < 0 && errno == EINTR);
+
+ if (snd < 0) {
+ rte_errno = errno;
+ /* Check if it caused by peer process exits */
+ if (errno == ECONNREFUSED &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ unlink(dst_path);
+ return 0;
+ }
+ RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
+ dst_path, strerror(errno));
+ return -1;
+ }
+
+ return 1;
+}
+
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type)
+{
+ int dir_fd, ret = 0;
+ DIR *mp_dir;
+ struct dirent *ent;
+
+ if (!peer && (rte_eal_process_type() == RTE_PROC_SECONDARY))
+ peer = eal_mp_socket_path();
+
+ if (peer) {
+ if (send_msg(peer, msg, type) < 0)
+ return -1;
+ else
+ return 0;
+ }
+
+ /* broadcast to all secondary processes */
+ mp_dir = opendir(mp_dir_path);
+ if (!mp_dir) {
+ RTE_LOG(ERR, EAL, "Unable to open directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ return -1;
+ }
+
+ dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ closedir(mp_dir);
+ return -1;
+ }
+
+ while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
+ if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+ if (send_msg(path, msg, type) < 0)
+ ret = -1;
+ }
+ /* unlock the dir */
+ flock(dir_fd, LOCK_UN);
+
+ /* dir_fd automatically closed on closedir */
+ closedir(mp_dir);
+ return ret;
+}
+
+static int
+check_input(const struct rte_mp_msg *msg)
+{
+ if (msg == NULL) {
+ RTE_LOG(ERR, EAL, "Msg cannot be NULL\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (validate_action_name(msg->name) != 0)
+ return -1;
+
+ if (msg->len_param < 0) {
+ RTE_LOG(ERR, EAL, "Message data length is negative\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (msg->num_fds < 0) {
+ RTE_LOG(ERR, EAL, "Number of fd's is negative\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (msg->len_param > RTE_MP_MAX_PARAM_LEN) {
+ RTE_LOG(ERR, EAL, "Message data is too long\n");
+ rte_errno = E2BIG;
+ return -1;
+ }
+
+ if (msg->num_fds > RTE_MP_MAX_FD_NUM) {
+ RTE_LOG(ERR, EAL, "Cannot send more than %d FDs\n",
+ RTE_MP_MAX_FD_NUM);
+ rte_errno = E2BIG;
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+rte_mp_sendmsg(struct rte_mp_msg *msg)
+{
+ if (check_input(msg) != 0)
+ return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ RTE_LOG(DEBUG, EAL, "sendmsg: %s\n", msg->name);
+ return mp_send(msg, NULL, MP_MSG);
+}
+
+static int
+mp_request_async(const char *dst, struct rte_mp_msg *req,
+ struct async_request_param *param, const struct timespec *ts)
+{
+ struct rte_mp_msg *reply_msg;
+ struct pending_request *pending_req, *exist;
+ int ret = -1;
+
+ pending_req = calloc(1, sizeof(*pending_req));
+ reply_msg = calloc(1, sizeof(*reply_msg));
+ if (pending_req == NULL || reply_msg == NULL) {
+ RTE_LOG(ERR, EAL, "Could not allocate space for sync request\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto fail;
+ }
+
+ pending_req->type = REQUEST_TYPE_ASYNC;
+ strlcpy(pending_req->dst, dst, sizeof(pending_req->dst));
+ pending_req->request = req;
+ pending_req->reply = reply_msg;
+ pending_req->async.param = param;
+
+ /* queue already locked by caller */
+
+ exist = find_pending_request(dst, req->name);
+ if (exist) {
+ RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
+ rte_errno = EEXIST;
+ ret = -1;
+ goto fail;
+ }
+
+ ret = send_msg(dst, req, MP_REQ);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
+ dst, req->name);
+ ret = -1;
+ goto fail;
+ } else if (ret == 0) {
+ ret = 0;
+ goto fail;
+ }
+ param->user_reply.nb_sent++;
+
+ /* if alarm set fails, we simply ignore the reply */
+ if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+ async_reply_handle, pending_req) < 0) {
+ RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+ dst, req->name);
+ ret = -1;
+ goto fail;
+ }
+ TAILQ_INSERT_TAIL(&pending_requests.requests, pending_req, next);
+
+ return 0;
+fail:
+ free(pending_req);
+ free(reply_msg);
+ return ret;
+}
+
+static int
+mp_request_sync(const char *dst, struct rte_mp_msg *req,
+ struct rte_mp_reply *reply, const struct timespec *ts)
+{
+ int ret;
+ struct rte_mp_msg msg, *tmp;
+ struct pending_request pending_req, *exist;
+
+ pending_req.type = REQUEST_TYPE_SYNC;
+ pending_req.reply_received = 0;
+ strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
+ pending_req.request = req;
+ pending_req.reply = &msg;
+ pthread_cond_init(&pending_req.sync.cond, NULL);
+
+ exist = find_pending_request(dst, req->name);
+ if (exist) {
+ RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
+ rte_errno = EEXIST;
+ return -1;
+ }
+
+ ret = send_msg(dst, req, MP_REQ);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
+ dst, req->name);
+ return -1;
+ } else if (ret == 0)
+ return 0;
+
+ TAILQ_INSERT_TAIL(&pending_requests.requests, &pending_req, next);
+
+ reply->nb_sent++;
+
+ do {
+ ret = pthread_cond_timedwait(&pending_req.sync.cond,
+ &pending_requests.lock, ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ TAILQ_REMOVE(&pending_requests.requests, &pending_req, next);
+
+ if (pending_req.reply_received == 0) {
+ RTE_LOG(ERR, EAL, "Fail to recv reply for request %s:%s\n",
+ dst, req->name);
+ rte_errno = ETIMEDOUT;
+ return -1;
+ }
+ if (pending_req.reply_received == -1) {
+ RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
+ /* not receiving this message is not an error, so decrement
+ * number of sent messages
+ */
+ reply->nb_sent--;
+ return 0;
+ }
+
+ tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
+ if (!tmp) {
+ RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
+ dst, req->name);
+ rte_errno = ENOMEM;
+ return -1;
+ }
+ memcpy(&tmp[reply->nb_received], &msg, sizeof(msg));
+ reply->msgs = tmp;
+ reply->nb_received++;
+ return 0;
+}
+
+int
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+ const struct timespec *ts)
+{
+ int dir_fd, ret = -1;
+ DIR *mp_dir;
+ struct dirent *ent;
+ struct timeval now;
+ struct timespec end;
+
+ RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
+
+ reply->nb_sent = 0;
+ reply->nb_received = 0;
+ reply->msgs = NULL;
+
+ if (check_input(req) != 0)
+ goto end;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to get current time\n");
+ rte_errno = errno;
+ goto end;
+ }
+
+ end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+ end.tv_sec = now.tv_sec + ts->tv_sec +
+ (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+
+ /* for secondary process, send request to the primary process only */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ pthread_mutex_lock(&pending_requests.lock);
+ ret = mp_request_sync(eal_mp_socket_path(), req, reply, &end);
+ pthread_mutex_unlock(&pending_requests.lock);
+ goto end;
+ }
+
+ /* for primary process, broadcast request, and collect reply 1 by 1 */
+ mp_dir = opendir(mp_dir_path);
+ if (!mp_dir) {
+ RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
+ rte_errno = errno;
+ goto end;
+ }
+
+ dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ goto close_end;
+ }
+
+ pthread_mutex_lock(&pending_requests.lock);
+ while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
+ if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+
+ /* unlocks the mutex while waiting for response,
+ * locks on receive
+ */
+ if (mp_request_sync(path, req, reply, &end))
+ goto unlock_end;
+ }
+ ret = 0;
+
+unlock_end:
+ pthread_mutex_unlock(&pending_requests.lock);
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+
+close_end:
+ /* dir_fd automatically closed on closedir */
+ closedir(mp_dir);
+
+end:
+ if (ret) {
+ free(reply->msgs);
+ reply->nb_received = 0;
+ reply->msgs = NULL;
+ }
+ return ret;
+}
+
+int
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+ rte_mp_async_reply_t clb)
+{
+ struct rte_mp_msg *copy;
+ struct pending_request *dummy;
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+ int dir_fd, ret = 0;
+ DIR *mp_dir;
+ struct dirent *ent;
+ struct timeval now;
+ struct timespec *end;
+ bool dummy_used = false;
+
+ RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
+
+ if (check_input(req) != 0)
+ return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to get current time\n");
+ rte_errno = errno;
+ return -1;
+ }
+ copy = calloc(1, sizeof(*copy));
+ dummy = calloc(1, sizeof(*dummy));
+ param = calloc(1, sizeof(*param));
+ if (copy == NULL || dummy == NULL || param == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to allocate memory for async reply\n");
+ rte_errno = ENOMEM;
+ goto fail;
+ }
+
+ /* copy message */
+ memcpy(copy, req, sizeof(*copy));
+
+ param->n_responses_processed = 0;
+ param->clb = clb;
+ end = &param->end;
+ reply = &param->user_reply;
+
+ end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+ end->tv_sec = now.tv_sec + ts->tv_sec +
+ (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+ reply->nb_sent = 0;
+ reply->nb_received = 0;
+ reply->msgs = NULL;
+
+ /* we have to lock the request queue here, as we will be adding a bunch
+ * of requests to the queue at once, and some of the replies may arrive
+ * before we add all of the requests to the queue.
+ */
+ pthread_mutex_lock(&pending_requests.lock);
+
+ /* we have to ensure that callback gets triggered even if we don't send
+ * anything, therefore earlier we have allocated a dummy request. fill
+ * it, and put it on the queue if we don't send any requests.
+ */
+ dummy->type = REQUEST_TYPE_ASYNC;
+ dummy->request = copy;
+ dummy->reply = NULL;
+ dummy->async.param = param;
+ dummy->reply_received = 1; /* short-circuit the timeout */
+
+ /* for secondary process, send request to the primary process only */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
+
+ /* if we didn't send anything, put dummy request on the queue */
+ if (ret == 0 && reply->nb_sent == 0) {
+ TAILQ_INSERT_TAIL(&pending_requests.requests, dummy,
+ next);
+ dummy_used = true;
+ }
+
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ /* if we couldn't send anything, clean up */
+ if (ret != 0)
+ goto fail;
+ return 0;
+ }
+
+ /* for primary process, broadcast request */
+ mp_dir = opendir(mp_dir_path);
+ if (!mp_dir) {
+ RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
+ rte_errno = errno;
+ goto unlock_fail;
+ }
+ dir_fd = dirfd(mp_dir);
+
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ goto closedir_fail;
+ }
+
+ while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
+ if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+
+ if (mp_request_async(path, copy, param, ts))
+ ret = -1;
+ }
+ /* if we didn't send anything, put dummy request on the queue */
+ if (ret == 0 && reply->nb_sent == 0) {
+ TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next);
+ dummy_used = true;
+ }
+
+ /* finally, unlock the queue */
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+
+ /* dir_fd automatically closed on closedir */
+ closedir(mp_dir);
+
+ /* if dummy was unused, free it */
+ if (!dummy_used)
+ free(dummy);
+
+ return ret;
+closedir_fail:
+ closedir(mp_dir);
+unlock_fail:
+ pthread_mutex_unlock(&pending_requests.lock);
+fail:
+ free(dummy);
+ free(param);
+ free(copy);
+ return -1;
+}
+
+int
+rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
+{
+ RTE_LOG(DEBUG, EAL, "reply: %s\n", msg->name);
+
+ if (check_input(msg) != 0)
+ return -1;
+
+ if (peer == NULL) {
+ RTE_LOG(ERR, EAL, "peer is not specified\n");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
+ return mp_send(msg, peer, MP_REP);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c
new file mode 100644
index 000000000..60c5dd66f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_string_fns.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_string_fns.h>
+
+/* split string into tokens */
+int
+rte_strsplit(char *string, int stringlen,
+ char **tokens, int maxtokens, char delim)
+{
+ int i, tok = 0;
+ int tokstart = 1; /* first token is right at start of string */
+
+ if (string == NULL || tokens == NULL)
+ goto einval_error;
+
+ for (i = 0; i < stringlen; i++) {
+ if (string[i] == '\0' || tok >= maxtokens)
+ break;
+ if (tokstart) {
+ tokstart = 0;
+ tokens[tok++] = &string[i];
+ }
+ if (string[i] == delim) {
+ string[i] = '\0';
+ tokstart = 1;
+ }
+ }
+ return tok;
+
+einval_error:
+ errno = EINVAL;
+ return -1;
+}
+
+/* Copy src string into dst.
+ *
+ * Return negative value and NUL-terminate if dst is too short,
+ * Otherwise return number of bytes copied.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize)
+{
+ size_t nleft = dsize;
+ size_t res = 0;
+
+ /* Copy as many bytes as will fit. */
+ while (nleft != 0) {
+ dst[res] = src[res];
+ if (src[res] == '\0')
+ return res;
+ res++;
+ nleft--;
+ }
+
+ /* Not enough room in dst, set NUL and return error. */
+ if (res != 0)
+ dst[res - 1] = '\0';
+ return -E2BIG;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c
new file mode 100644
index 000000000..ead06897b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_tailqs.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <sys/queue.h>
+#include <stdint.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <rte_memory.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+TAILQ_HEAD(rte_tailq_elem_head, rte_tailq_elem);
+/* local tailq list */
+static struct rte_tailq_elem_head rte_tailq_elem_head =
+ TAILQ_HEAD_INITIALIZER(rte_tailq_elem_head);
+
+/* number of tailqs registered, -1 before call to rte_eal_tailqs_init */
+static int rte_tailqs_count = -1;
+
+struct rte_tailq_head *
+rte_eal_tailq_lookup(const char *name)
+{
+ unsigned i;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ if (name == NULL)
+ return NULL;
+
+ for (i = 0; i < RTE_MAX_TAILQ; i++) {
+ if (!strncmp(name, mcfg->tailq_head[i].name,
+ RTE_TAILQ_NAMESIZE-1))
+ return &mcfg->tailq_head[i];
+ }
+
+ return NULL;
+}
+
+void
+rte_dump_tailq(FILE *f)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_mcfg_tailq_read_lock();
+ for (i = 0; i < RTE_MAX_TAILQ; i++) {
+ const struct rte_tailq_head *tailq = &mcfg->tailq_head[i];
+ const struct rte_tailq_entry_head *head = &tailq->tailq_head;
+
+ fprintf(f, "Tailq %u: qname:<%s>, tqh_first:%p, tqh_last:%p\n",
+ i, tailq->name, head->tqh_first, head->tqh_last);
+ }
+ rte_mcfg_tailq_read_unlock();
+}
+
+static struct rte_tailq_head *
+rte_eal_tailq_create(const char *name)
+{
+ struct rte_tailq_head *head = NULL;
+
+ if (!rte_eal_tailq_lookup(name) &&
+ (rte_tailqs_count + 1 < RTE_MAX_TAILQ)) {
+ struct rte_mem_config *mcfg;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+ head = &mcfg->tailq_head[rte_tailqs_count];
+ strlcpy(head->name, name, sizeof(head->name) - 1);
+ TAILQ_INIT(&head->tailq_head);
+ rte_tailqs_count++;
+ }
+
+ return head;
+}
+
+/* local register, used to store "early" tailqs before rte_eal_init() and to
+ * ensure secondary process only registers tailqs once. */
+static int
+rte_eal_tailq_local_register(struct rte_tailq_elem *t)
+{
+ struct rte_tailq_elem *temp;
+
+ TAILQ_FOREACH(temp, &rte_tailq_elem_head, next) {
+ if (!strncmp(t->name, temp->name, sizeof(temp->name)))
+ return -1;
+ }
+
+ TAILQ_INSERT_TAIL(&rte_tailq_elem_head, t, next);
+ return 0;
+}
+
+static void
+rte_eal_tailq_update(struct rte_tailq_elem *t)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* primary process is the only one that creates */
+ t->head = rte_eal_tailq_create(t->name);
+ } else {
+ t->head = rte_eal_tailq_lookup(t->name);
+ }
+}
+
+int
+rte_eal_tailq_register(struct rte_tailq_elem *t)
+{
+ if (rte_eal_tailq_local_register(t) < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s tailq is already registered\n", t->name);
+ goto error;
+ }
+
+ /* if a register happens after rte_eal_tailqs_init(), then we can update
+ * tailq head */
+ if (rte_tailqs_count >= 0) {
+ rte_eal_tailq_update(t);
+ if (t->head == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot initialize tailq: %s\n", t->name);
+ TAILQ_REMOVE(&rte_tailq_elem_head, t, next);
+ goto error;
+ }
+ }
+
+ return 0;
+
+error:
+ t->head = NULL;
+ return -1;
+}
+
+int
+rte_eal_tailqs_init(void)
+{
+ struct rte_tailq_elem *t;
+
+ rte_tailqs_count = 0;
+
+ TAILQ_FOREACH(t, &rte_tailq_elem_head, next) {
+ /* second part of register job for "early" tailqs, see
+ * rte_eal_tailq_register and EAL_REGISTER_TAILQ */
+ rte_eal_tailq_update(t);
+ if (t->head == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot initialize tailq: %s\n", t->name);
+ /* TAILQ_REMOVE not needed, error is already fatal */
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ rte_dump_tailq(stderr);
+ return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c
new file mode 100644
index 000000000..f9f588c17
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_thread.c
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <assert.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+#ifndef RTE_EXEC_ENV_WINDOWS
+#include <rte_trace_point.h>
+#endif
+
+#include "eal_internal_cfg.h"
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
+
+unsigned rte_socket_id(void)
+{
+ return RTE_PER_LCORE(_socket_id);
+}
+
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (lcore_id >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ return cfg->lcore_role[lcore_id] == role;
+}
+
+static int
+eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
+{
+ unsigned cpu = 0;
+ int socket_id = SOCKET_ID_ANY;
+ int sid;
+
+ if (cpusetp == NULL)
+ return SOCKET_ID_ANY;
+
+ do {
+ if (!CPU_ISSET(cpu, cpusetp))
+ continue;
+
+ if (socket_id == SOCKET_ID_ANY)
+ socket_id = eal_cpu_socket_id(cpu);
+
+ sid = eal_cpu_socket_id(cpu);
+ if (socket_id != sid) {
+ socket_id = SOCKET_ID_ANY;
+ break;
+ }
+
+ } while (++cpu < CPU_SETSIZE);
+
+ return socket_id;
+}
+
+int
+rte_thread_set_affinity(rte_cpuset_t *cpusetp)
+{
+ int s;
+ unsigned lcore_id;
+ pthread_t tid;
+
+ tid = pthread_self();
+
+ s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp);
+ if (s != 0) {
+ RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n");
+ return -1;
+ }
+
+ /* store socket_id in TLS for quick access */
+ RTE_PER_LCORE(_socket_id) =
+ eal_cpuset_socket_id(cpusetp);
+
+ /* store cpuset in TLS for quick access */
+ memmove(&RTE_PER_LCORE(_cpuset), cpusetp,
+ sizeof(rte_cpuset_t));
+
+ lcore_id = rte_lcore_id();
+ if (lcore_id != (unsigned)LCORE_ID_ANY) {
+ /* EAL thread will update lcore_config */
+ lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id);
+ memmove(&lcore_config[lcore_id].cpuset, cpusetp,
+ sizeof(rte_cpuset_t));
+ }
+
+ return 0;
+}
+
+void
+rte_thread_get_affinity(rte_cpuset_t *cpusetp)
+{
+ assert(cpusetp);
+ memmove(cpusetp, &RTE_PER_LCORE(_cpuset),
+ sizeof(rte_cpuset_t));
+}
+
+int
+eal_thread_dump_affinity(char *str, unsigned size)
+{
+ rte_cpuset_t cpuset;
+ unsigned cpu;
+ int ret;
+ unsigned int out = 0;
+
+ rte_thread_get_affinity(&cpuset);
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ if (!CPU_ISSET(cpu, &cpuset))
+ continue;
+
+ ret = snprintf(str + out,
+ size - out, "%u,", cpu);
+ if (ret < 0 || (unsigned)ret >= size - out) {
+ /* string will be truncated */
+ ret = -1;
+ goto exit;
+ }
+
+ out += ret;
+ }
+
+ ret = 0;
+exit:
+ /* remove the last separator */
+ if (out > 0)
+ str[out - 1] = '\0';
+
+ return ret;
+}
+
+
+struct rte_thread_ctrl_params {
+ void *(*start_routine)(void *);
+ void *arg;
+ pthread_barrier_t configured;
+};
+
+static void *rte_thread_init(void *arg)
+{
+ int ret;
+ rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
+ struct rte_thread_ctrl_params *params = arg;
+ void *(*start_routine)(void *) = params->start_routine;
+ void *routine_arg = params->arg;
+
+ /* Store cpuset in TLS for quick access */
+ memmove(&RTE_PER_LCORE(_cpuset), cpuset, sizeof(rte_cpuset_t));
+
+ ret = pthread_barrier_wait(&params->configured);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+
+#ifndef RTE_EXEC_ENV_WINDOWS
+ __rte_trace_mem_per_thread_alloc();
+#endif
+ return start_routine(routine_arg);
+}
+
+int
+rte_ctrl_thread_create(pthread_t *thread, const char *name,
+ const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg)
+{
+ rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
+ struct rte_thread_ctrl_params *params;
+ int ret;
+
+ params = malloc(sizeof(*params));
+ if (!params)
+ return -ENOMEM;
+
+ params->start_routine = start_routine;
+ params->arg = arg;
+
+ pthread_barrier_init(&params->configured, NULL, 2);
+
+ ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+ if (ret != 0) {
+ free(params);
+ return -ret;
+ }
+
+ if (name != NULL) {
+ ret = rte_thread_setname(*thread, name);
+ if (ret < 0)
+ RTE_LOG(DEBUG, EAL,
+ "Cannot set name for ctrl thread\n");
+ }
+
+ ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
+ if (ret)
+ goto fail;
+
+ ret = pthread_barrier_wait(&params->configured);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+
+ return 0;
+
+fail:
+ if (PTHREAD_BARRIER_SERIAL_THREAD ==
+ pthread_barrier_wait(&params->configured)) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+ pthread_cancel(*thread);
+ pthread_join(*thread, NULL);
+ return -ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c
new file mode 100644
index 000000000..fa9ee1b22
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_timer.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <time.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_pause.h>
+#include <rte_eal.h>
+
+#include "eal_private.h"
+#include "eal_memcfg.h"
+
+/* The frequency of the RDTSC timer resolution */
+static uint64_t eal_tsc_resolution_hz;
+
+/* Pointer to user delay function */
+void (*rte_delay_us)(unsigned int) = NULL;
+
+void
+rte_delay_us_block(unsigned int us)
+{
+ const uint64_t start = rte_get_timer_cycles();
+ const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
+ while ((rte_get_timer_cycles() - start) < ticks)
+ rte_pause();
+}
+
+void
+rte_delay_us_sleep(unsigned int us)
+{
+ struct timespec wait[2];
+ int ind = 0;
+
+ wait[0].tv_sec = 0;
+ if (us >= US_PER_S) {
+ wait[0].tv_sec = us / US_PER_S;
+ us -= wait[0].tv_sec * US_PER_S;
+ }
+ wait[0].tv_nsec = 1000 * us;
+
+ while (nanosleep(&wait[ind], &wait[1 - ind]) && errno == EINTR) {
+ /*
+ * Sleep was interrupted. Flip the index, so the 'remainder'
+ * will become the 'request' for a next call.
+ */
+ ind = 1 - ind;
+ }
+}
+
+uint64_t
+rte_get_tsc_hz(void)
+{
+ return eal_tsc_resolution_hz;
+}
+
+static uint64_t
+estimate_tsc_freq(void)
+{
+#define CYC_PER_10MHZ 1E7
+ RTE_LOG(WARNING, EAL, "WARNING: TSC frequency estimated roughly"
+ " - clock timings may be less accurate.\n");
+ /* assume that the sleep(1) will sleep for 1 second */
+ uint64_t start = rte_rdtsc();
+ sleep(1);
+ /* Round up to 10Mhz. 1E7 ~ 10Mhz */
+ return RTE_ALIGN_MUL_NEAR(rte_rdtsc() - start, CYC_PER_10MHZ);
+}
+
+void
+set_tsc_freq(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint64_t freq;
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ /*
+ * Just use the primary process calculated TSC rate in any
+ * secondary process. It avoids any unnecessary overhead on
+ * systems where arch-specific frequency detection is not
+ * available.
+ */
+ eal_tsc_resolution_hz = mcfg->tsc_hz;
+ return;
+ }
+
+ freq = get_tsc_freq_arch();
+ if (!freq)
+ freq = get_tsc_freq();
+ if (!freq)
+ freq = estimate_tsc_freq();
+
+ RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000);
+ eal_tsc_resolution_hz = freq;
+ mcfg->tsc_hz = freq;
+}
+
+void rte_delay_us_callback_register(void (*userfunc)(unsigned int))
+{
+ rte_delay_us = userfunc;
+}
+
+RTE_INIT(rte_timer_init)
+{
+ /* set rte_delay_us_block as a delay function */
+ rte_delay_us_callback_register(rte_delay_us_block);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c
new file mode 100644
index 000000000..875553d7e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace.c
@@ -0,0 +1,498 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+#include <regex.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_string_fns.h>
+
+#include "eal_trace.h"
+
+RTE_DEFINE_PER_LCORE(volatile int, trace_point_sz);
+RTE_DEFINE_PER_LCORE(void *, trace_mem);
+static RTE_DEFINE_PER_LCORE(char, ctf_field[TRACE_CTF_FIELD_SIZE]);
+static RTE_DEFINE_PER_LCORE(int, ctf_count);
+
+static struct trace_point_head tp_list = STAILQ_HEAD_INITIALIZER(tp_list);
+static struct trace trace = { .args = STAILQ_HEAD_INITIALIZER(trace.args), };
+
+struct trace *
+trace_obj_get(void)
+{
+ return &trace;
+}
+
+struct trace_point_head *
+trace_list_head_get(void)
+{
+ return &tp_list;
+}
+
+int
+eal_trace_init(void)
+{
+ struct trace_arg *arg;
+
+ /* Trace memory should start with 8B aligned for natural alignment */
+ RTE_BUILD_BUG_ON((offsetof(struct __rte_trace_header, mem) % 8) != 0);
+
+ /* One of the trace point registration failed */
+ if (trace.register_errno) {
+ rte_errno = trace.register_errno;
+ goto fail;
+ }
+
+ if (!STAILQ_EMPTY(&trace.args))
+ trace.status = true;
+
+ if (!rte_trace_is_enabled())
+ return 0;
+
+ rte_spinlock_init(&trace.lock);
+
+ /* Is duplicate trace name registered */
+ if (trace_has_duplicate_entry())
+ goto fail;
+
+ /* Generate UUID ver 4 with total size of events and number of
+ * events
+ */
+ trace_uuid_generate();
+
+ /* Apply buffer size configuration for trace output */
+ trace_bufsz_args_apply();
+
+ /* Generate CTF TDSL metadata */
+ if (trace_metadata_create() < 0)
+ goto fail;
+
+ /* Create trace directory */
+ if (trace_mkdir())
+ goto free_meta;
+
+ /* Save current epoch timestamp for future use */
+ if (trace_epoch_time_save() < 0)
+ goto fail;
+
+ /* Apply global configurations */
+ STAILQ_FOREACH(arg, &trace.args, next)
+ trace_args_apply(arg->val);
+
+ rte_trace_mode_set(trace.mode);
+
+ return 0;
+
+free_meta:
+ trace_metadata_destroy();
+fail:
+ trace_err("failed to initialize trace [%s]", rte_strerror(rte_errno));
+ return -rte_errno;
+}
+
+void
+eal_trace_fini(void)
+{
+ if (!rte_trace_is_enabled())
+ return;
+ trace_mem_per_thread_free();
+ trace_metadata_destroy();
+ eal_trace_args_free();
+}
+
+bool
+rte_trace_is_enabled(void)
+{
+ return trace.status;
+}
+
+static void
+trace_mode_set(rte_trace_point_t *trace, enum rte_trace_mode mode)
+{
+ if (mode == RTE_TRACE_MODE_OVERWRITE)
+ __atomic_and_fetch(trace, ~__RTE_TRACE_FIELD_ENABLE_DISCARD,
+ __ATOMIC_RELEASE);
+ else
+ __atomic_or_fetch(trace, __RTE_TRACE_FIELD_ENABLE_DISCARD,
+ __ATOMIC_RELEASE);
+}
+
+void
+rte_trace_mode_set(enum rte_trace_mode mode)
+{
+ struct trace_point *tp;
+
+ if (!rte_trace_is_enabled())
+ return;
+
+ STAILQ_FOREACH(tp, &tp_list, next)
+ trace_mode_set(tp->handle, mode);
+
+ trace.mode = mode;
+}
+
+enum
+rte_trace_mode rte_trace_mode_get(void)
+{
+ return trace.mode;
+}
+
+static bool
+trace_point_is_invalid(rte_trace_point_t *t)
+{
+ return (t == NULL) || (trace_id_get(t) >= trace.nb_trace_points);
+}
+
+bool
+rte_trace_point_is_enabled(rte_trace_point_t *trace)
+{
+ uint64_t val;
+
+ if (trace_point_is_invalid(trace))
+ return false;
+
+ val = __atomic_load_n(trace, __ATOMIC_ACQUIRE);
+ return (val & __RTE_TRACE_FIELD_ENABLE_MASK) != 0;
+}
+
+int
+rte_trace_point_enable(rte_trace_point_t *trace)
+{
+ if (trace_point_is_invalid(trace))
+ return -ERANGE;
+
+ __atomic_or_fetch(trace, __RTE_TRACE_FIELD_ENABLE_MASK,
+ __ATOMIC_RELEASE);
+ return 0;
+}
+
+int
+rte_trace_point_disable(rte_trace_point_t *trace)
+{
+ if (trace_point_is_invalid(trace))
+ return -ERANGE;
+
+ __atomic_and_fetch(trace, ~__RTE_TRACE_FIELD_ENABLE_MASK,
+ __ATOMIC_RELEASE);
+ return 0;
+}
+
+int
+rte_trace_pattern(const char *pattern, bool enable)
+{
+ struct trace_point *tp;
+ int rc = 0, found = 0;
+
+ STAILQ_FOREACH(tp, &tp_list, next) {
+ if (fnmatch(pattern, tp->name, 0) == 0) {
+ if (enable)
+ rc = rte_trace_point_enable(tp->handle);
+ else
+ rc = rte_trace_point_disable(tp->handle);
+ found = 1;
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return rc | found;
+}
+
+int
+rte_trace_regexp(const char *regex, bool enable)
+{
+ struct trace_point *tp;
+ int rc = 0, found = 0;
+ regex_t r;
+
+ if (regcomp(&r, regex, 0) != 0)
+ return -EINVAL;
+
+ STAILQ_FOREACH(tp, &tp_list, next) {
+ if (regexec(&r, tp->name, 0, NULL, 0) == 0) {
+ if (enable)
+ rc = rte_trace_point_enable(tp->handle);
+ else
+ rc = rte_trace_point_disable(tp->handle);
+ found = 1;
+ }
+ if (rc < 0)
+ return rc;
+ }
+ regfree(&r);
+
+ return rc | found;
+}
+
+rte_trace_point_t *
+rte_trace_point_lookup(const char *name)
+{
+ struct trace_point *tp;
+
+ if (name == NULL)
+ return NULL;
+
+ STAILQ_FOREACH(tp, &tp_list, next)
+ if (strncmp(tp->name, name, TRACE_POINT_NAME_SIZE) == 0)
+ return tp->handle;
+
+ return NULL;
+}
+
+static void
+trace_point_dump(FILE *f, struct trace_point *tp)
+{
+ rte_trace_point_t *handle = tp->handle;
+
+ fprintf(f, "\tid %d, %s, size is %d, %s\n",
+ trace_id_get(handle), tp->name,
+ (uint16_t)(*handle & __RTE_TRACE_FIELD_SIZE_MASK),
+ rte_trace_point_is_enabled(handle) ? "enabled" : "disabled");
+}
+
+static void
+trace_lcore_mem_dump(FILE *f)
+{
+ struct trace *trace = trace_obj_get();
+ struct __rte_trace_header *header;
+ uint32_t count;
+
+ if (trace->nb_trace_mem_list == 0)
+ return;
+
+ rte_spinlock_lock(&trace->lock);
+ fprintf(f, "nb_trace_mem_list = %d\n", trace->nb_trace_mem_list);
+ fprintf(f, "\nTrace mem info\n--------------\n");
+ for (count = 0; count < trace->nb_trace_mem_list; count++) {
+ header = trace->lcore_meta[count].mem;
+ fprintf(f, "\tid %d, mem=%p, area=%s, lcore_id=%d, name=%s\n",
+ count, header,
+ trace_area_to_string(trace->lcore_meta[count].area),
+ header->stream_header.lcore_id,
+ header->stream_header.thread_name);
+ }
+ rte_spinlock_unlock(&trace->lock);
+}
+
+void
+rte_trace_dump(FILE *f)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace *trace = trace_obj_get();
+ struct trace_point *tp;
+
+ fprintf(f, "\nGlobal info\n-----------\n");
+ fprintf(f, "status = %s\n",
+ rte_trace_is_enabled() ? "enabled" : "disabled");
+ fprintf(f, "mode = %s\n",
+ trace_mode_to_string(rte_trace_mode_get()));
+ fprintf(f, "dir = %s\n", trace->dir);
+ fprintf(f, "buffer len = %d\n", trace->buff_len);
+ fprintf(f, "number of trace points = %d\n", trace->nb_trace_points);
+
+ trace_lcore_mem_dump(f);
+ fprintf(f, "\nTrace point info\n----------------\n");
+ STAILQ_FOREACH(tp, tp_list, next)
+ trace_point_dump(f, tp);
+}
+
+void
+__rte_trace_mem_per_thread_alloc(void)
+{
+ struct trace *trace = trace_obj_get();
+ struct __rte_trace_header *header;
+ uint32_t count;
+
+ if (!rte_trace_is_enabled())
+ return;
+
+ if (RTE_PER_LCORE(trace_mem))
+ return;
+
+ rte_spinlock_lock(&trace->lock);
+
+ count = trace->nb_trace_mem_list;
+
+ /* Allocate room for storing the thread trace mem meta */
+ trace->lcore_meta = realloc(trace->lcore_meta,
+ sizeof(trace->lcore_meta[0]) * (count + 1));
+
+ /* Provide dummy space for fast path to consume */
+ if (trace->lcore_meta == NULL) {
+ trace_crit("trace mem meta memory realloc failed");
+ header = NULL;
+ goto fail;
+ }
+
+ /* First attempt from huge page */
+ header = eal_malloc_no_trace(NULL, trace_mem_sz(trace->buff_len), 8);
+ if (header) {
+ trace->lcore_meta[count].area = TRACE_AREA_HUGEPAGE;
+ goto found;
+ }
+
+ /* Second attempt from heap */
+ header = malloc(trace_mem_sz(trace->buff_len));
+ if (header == NULL) {
+ trace_crit("trace mem malloc attempt failed");
+ header = NULL;
+ goto fail;
+
+ }
+
+ /* Second attempt from heap is success */
+ trace->lcore_meta[count].area = TRACE_AREA_HEAP;
+
+ /* Initialize the trace header */
+found:
+ header->offset = 0;
+ header->len = trace->buff_len;
+ header->stream_header.magic = TRACE_CTF_MAGIC;
+ rte_uuid_copy(header->stream_header.uuid, trace->uuid);
+ header->stream_header.lcore_id = rte_lcore_id();
+
+ /* Store the thread name */
+ char *name = header->stream_header.thread_name;
+ memset(name, 0, __RTE_TRACE_EMIT_STRING_LEN_MAX);
+ rte_thread_getname(pthread_self(), name,
+ __RTE_TRACE_EMIT_STRING_LEN_MAX);
+
+ trace->lcore_meta[count].mem = header;
+ trace->nb_trace_mem_list++;
+fail:
+ RTE_PER_LCORE(trace_mem) = header;
+ rte_spinlock_unlock(&trace->lock);
+}
+
+void
+trace_mem_per_thread_free(void)
+{
+ struct trace *trace = trace_obj_get();
+ uint32_t count;
+ void *mem;
+
+ if (!rte_trace_is_enabled())
+ return;
+
+ rte_spinlock_lock(&trace->lock);
+ for (count = 0; count < trace->nb_trace_mem_list; count++) {
+ mem = trace->lcore_meta[count].mem;
+ if (trace->lcore_meta[count].area == TRACE_AREA_HUGEPAGE)
+ eal_free_no_trace(mem);
+ else if (trace->lcore_meta[count].area == TRACE_AREA_HEAP)
+ free(mem);
+ }
+ rte_spinlock_unlock(&trace->lock);
+}
+
+void
+__rte_trace_point_emit_field(size_t sz, const char *in, const char *datatype)
+{
+ char *field = RTE_PER_LCORE(ctf_field);
+ int count = RTE_PER_LCORE(ctf_count);
+ size_t size;
+ int rc;
+
+ size = RTE_MAX(0, TRACE_CTF_FIELD_SIZE - 1 - count);
+ RTE_PER_LCORE(trace_point_sz) += sz;
+ rc = snprintf(RTE_PTR_ADD(field, count), size, "%s %s;", datatype, in);
+ if (rc <= 0 || (size_t)rc >= size) {
+ RTE_PER_LCORE(trace_point_sz) = 0;
+ trace_crit("CTF field is too long");
+ return;
+ }
+ RTE_PER_LCORE(ctf_count) += rc;
+}
+
+int
+__rte_trace_point_register(rte_trace_point_t *handle, const char *name,
+ void (*register_fn)(void))
+{
+ char *field = RTE_PER_LCORE(ctf_field);
+ struct trace_point *tp;
+ uint16_t sz;
+
+ /* Sanity checks of arguments */
+ if (name == NULL || register_fn == NULL || handle == NULL) {
+ trace_err("invalid arguments");
+ rte_errno = EINVAL;
+ goto fail;
+ }
+
+ /* Check the size of the trace point object */
+ RTE_PER_LCORE(trace_point_sz) = 0;
+ RTE_PER_LCORE(ctf_count) = 0;
+ register_fn();
+ if (RTE_PER_LCORE(trace_point_sz) == 0) {
+ trace_err("missing rte_trace_emit_header() in register fn");
+ rte_errno = EBADF;
+ goto fail;
+ }
+
+ /* Is size overflowed */
+ if (RTE_PER_LCORE(trace_point_sz) > UINT16_MAX) {
+ trace_err("trace point size overflowed");
+ rte_errno = ENOSPC;
+ goto fail;
+ }
+
+ /* Are we running out of space to store trace points? */
+ if (trace.nb_trace_points > UINT16_MAX) {
+ trace_err("trace point exceeds the max count");
+ rte_errno = ENOSPC;
+ goto fail;
+ }
+
+ /* Get the size of the trace point */
+ sz = RTE_PER_LCORE(trace_point_sz);
+ tp = calloc(1, sizeof(struct trace_point));
+ if (tp == NULL) {
+ trace_err("fail to allocate trace point memory");
+ rte_errno = ENOMEM;
+ goto fail;
+ }
+
+ /* Initialize the trace point */
+ if (rte_strscpy(tp->name, name, TRACE_POINT_NAME_SIZE) < 0) {
+ trace_err("name is too long");
+ rte_errno = E2BIG;
+ goto free;
+ }
+
+ /* Copy the field data for future use */
+ if (rte_strscpy(tp->ctf_field, field, TRACE_CTF_FIELD_SIZE) < 0) {
+ trace_err("CTF field size is too long");
+ rte_errno = E2BIG;
+ goto free;
+ }
+
+ /* Clear field memory for the next event */
+ memset(field, 0, TRACE_CTF_FIELD_SIZE);
+
+ /* Form the trace handle */
+ *handle = sz;
+ *handle |= trace.nb_trace_points << __RTE_TRACE_FIELD_ID_SHIFT;
+
+ trace.nb_trace_points++;
+ tp->handle = handle;
+
+ /* Add the trace point at tail */
+ STAILQ_INSERT_TAIL(&tp_list, tp, next);
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+
+ /* All Good !!! */
+ return 0;
+free:
+ free(tp);
+fail:
+ if (trace.register_errno == 0)
+ trace.register_errno = rte_errno;
+
+ return -rte_errno;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c
new file mode 100644
index 000000000..302e2bb74
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_ctf.c
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <inttypes.h>
+#include <time.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_time.h>
+#include <rte_trace.h>
+#include <rte_version.h>
+
+#include "eal_trace.h"
+
+__rte_format_printf(2, 0)
+static int
+metadata_printf(char **str, const char *fmt, ...)
+{
+ va_list ap;
+ int rc;
+
+ *str = NULL;
+ va_start(ap, fmt);
+ rc = vasprintf(str, fmt, ap);
+ va_end(ap);
+
+ return rc;
+}
+
+static int
+meta_copy(char **meta, int *offset, char *str, int rc)
+{
+ int count = *offset;
+ char *ptr = *meta;
+
+ if (rc < 0)
+ return rc;
+
+ ptr = realloc(ptr, count + rc);
+ if (ptr == NULL)
+ goto free_str;
+
+ memcpy(RTE_PTR_ADD(ptr, count), str, rc);
+ count += rc;
+ free(str);
+
+ *meta = ptr;
+ *offset = count;
+
+ return rc;
+
+free_str:
+ if (str)
+ free(str);
+ return -ENOMEM;
+}
+
+static int
+meta_data_type_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "/* CTF 1.8 */\n"
+ "typealias integer {size = 8; base = x;}:= uint8_t;\n"
+ "typealias integer {size = 16; base = x;} := uint16_t;\n"
+ "typealias integer {size = 32; base = x;} := uint32_t;\n"
+ "typealias integer {size = 64; base = x;} := uint64_t;\n"
+ "typealias integer {size = 8; signed = true;} := int8_t;\n"
+ "typealias integer {size = 16; signed = true;} := int16_t;\n"
+ "typealias integer {size = 32; signed = true;} := int32_t;\n"
+ "typealias integer {size = 64; signed = true;} := int64_t;\n"
+#ifdef RTE_ARCH_64
+ "typealias integer {size = 64; base = x;} := uintptr_t;\n"
+#else
+ "typealias integer {size = 32; base = x;} := uintptr_t;\n"
+#endif
+#ifdef RTE_ARCH_64
+ "typealias integer {size = 64; base = x;} := long;\n"
+#else
+ "typealias integer {size = 32; base = x;} := long;\n"
+#endif
+ "typealias integer {size = 8; signed = false; encoding = ASCII; } := string_bounded_t;\n\n"
+ "typealias floating_point {\n"
+ " exp_dig = 8;\n"
+ " mant_dig = 24;\n"
+ "} := float;\n\n"
+ "typealias floating_point {\n"
+ " exp_dig = 11;\n"
+ " mant_dig = 53;\n"
+ "} := double;\n\n");
+
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+is_be(void)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+static int
+meta_header_emit(char **meta, int *offset)
+{
+ struct trace *trace = trace_obj_get();
+ char uustr[RTE_UUID_STRLEN];
+ char *str = NULL;
+ int rc;
+
+ rte_uuid_unparse(trace->uuid, uustr, RTE_UUID_STRLEN);
+ rc = metadata_printf(&str,
+ "trace {\n"
+ " major = 1;\n"
+ " minor = 8;\n"
+ " uuid = \"%s\";\n"
+ " byte_order = %s;\n"
+ " packet.header := struct {\n"
+ " uint32_t magic;\n"
+ " uint8_t uuid[16];\n"
+ " };\n"
+ "};\n\n", uustr, is_be() ? "be" : "le");
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_env_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "env {\n"
+ " dpdk_version = \"%s\";\n"
+ " tracer_name = \"dpdk\";\n"
+ "};\n\n", rte_version());
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass1_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "clock {\n"
+ " name = \"dpdk\";\n"
+ " freq = ");
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass2_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "%20"PRIu64";\n"
+ " offset_s =", 0);
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass3_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "%20"PRIu64";\n"
+ " offset =", 0);
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_clock_pass4_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "%20"PRIu64";\n};\n\n"
+ "typealias integer {\n"
+ " size = 48; align = 1; signed = false;\n"
+ " map = clock.dpdk.value;\n"
+ "} := uint48_clock_dpdk_t;\n\n", 0);
+
+ return meta_copy(meta, offset, str, rc);
+}
+
+static int
+meta_stream_emit(char **meta, int *offset)
+{
+ char *str = NULL;
+ int rc;
+
+ rc = metadata_printf(&str,
+ "stream {\n"
+ " packet.context := struct {\n"
+ " uint32_t cpu_id;\n"
+ " string_bounded_t name[32];\n"
+ " };\n"
+ " event.header := struct {\n"
+ " uint48_clock_dpdk_t timestamp;\n"
+ " uint16_t id;\n"
+ " } align(64);\n"
+ "};\n\n");
+ return meta_copy(meta, offset, str, rc);
+}
+
+static void
+string_fixed_replace(char *input, const char *search, const char *replace)
+{
+ char *found;
+ size_t len;
+
+ found = strstr(input, search);
+ if (found == NULL)
+ return;
+
+ if (strlen(found) != strlen(search))
+ return;
+
+ len = strlen(replace);
+ memcpy(found, replace, len);
+ found[len] = '\0';
+}
+
+static void
+ctf_fixup_align(char *str)
+{
+ string_fixed_replace(str, "align", "_align");
+}
+
+static void
+ctf_fixup_arrow_deref(char *str)
+{
+ const char *replace = "_";
+ const char *search = "->";
+ char *found;
+ size_t len;
+
+ found = strstr(str, search);
+ if (found == NULL)
+ return;
+
+ do {
+ memcpy(found, replace, strlen(replace));
+ len = strlen(found + 2);
+ memcpy(found + 1, found + 2, len);
+ found[len + 1] = '\0';
+ found = strstr(str, search);
+ } while (found != NULL);
+}
+
+static void
+ctf_fixup_dot_deref(char *str)
+{
+ const char *replace = "_";
+ const char *search = ".";
+ char *found;
+ size_t len;
+
+ found = strstr(str, search);
+ if (found == NULL)
+ return;
+
+ len = strlen(replace);
+ do {
+ memcpy(found, replace, len);
+ found = strstr(str, search);
+ } while (found != NULL);
+}
+
+static void
+ctf_fixup_event(char *str)
+{
+ string_fixed_replace(str, "event", "_event");
+}
+
+static int
+ctf_fixup_keyword(char *str)
+{
+ char dup_str[TRACE_CTF_FIELD_SIZE];
+ char input[TRACE_CTF_FIELD_SIZE];
+ const char *delim = ";";
+ char *from;
+ int len;
+
+ if (str == NULL)
+ return 0;
+
+ len = strlen(str);
+ if (len >= TRACE_CTF_FIELD_SIZE) {
+ trace_err("ctf_field reached its maximum limit");
+ return -EMSGSIZE;
+ }
+
+ /* Create duplicate string */
+ strcpy(dup_str, str);
+
+ len = 0;
+ from = strtok(dup_str, delim);
+ while (from != NULL) {
+ strcpy(input, from);
+ ctf_fixup_align(input);
+ ctf_fixup_dot_deref(input);
+ ctf_fixup_arrow_deref(input);
+ ctf_fixup_event(input);
+
+ strcpy(&input[strlen(input)], delim);
+ if ((len + strlen(input)) >= TRACE_CTF_FIELD_SIZE) {
+ trace_err("ctf_field reached its maximum limit");
+ return -EMSGSIZE;
+ }
+
+ strcpy(str + len, input);
+ len += strlen(input);
+ from = strtok(NULL, delim);
+ }
+
+ return 0;
+}
+
+static int
+meta_event_emit(char **meta, int *offset, struct trace_point *tp)
+{
+ char *str = NULL;
+ int rc;
+
+ /* Fixup ctf field string in case it using reserved ctf keywords */
+ rc = ctf_fixup_keyword(tp->ctf_field);
+ if (rc)
+ return rc;
+
+ rc = metadata_printf(&str,
+ "event {\n"
+ " id = %d;\n"
+ " name = \"%s\";\n"
+ " fields := struct {\n"
+ " %s\n"
+ " };\n"
+ "};\n\n", trace_id_get(tp->handle), tp->name, tp->ctf_field);
+ return meta_copy(meta, offset, str, rc);
+}
+
+int
+trace_metadata_create(void)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace *trace = trace_obj_get();
+ struct trace_point *tp;
+ int rc, offset = 0;
+ char *meta = NULL;
+
+ rc = meta_data_type_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ rc = meta_header_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ rc = meta_env_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ rc = meta_clock_pass1_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+ trace->ctf_meta_offset_freq = offset;
+
+ rc = meta_clock_pass2_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+ trace->ctf_meta_offset_freq_off_s = offset;
+
+ rc = meta_clock_pass3_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+ trace->ctf_meta_offset_freq_off = offset;
+
+ rc = meta_clock_pass4_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ rc = meta_stream_emit(&meta, &offset);
+ if (rc < 0)
+ goto fail;
+
+ STAILQ_FOREACH(tp, tp_list, next)
+ if (meta_event_emit(&meta, &offset, tp) < 0)
+ goto fail;
+
+ trace->ctf_meta = meta;
+ return 0;
+
+fail:
+ if (meta)
+ free(meta);
+ return -EBADF;
+}
+
+void
+trace_metadata_destroy(void)
+{
+ struct trace *trace = trace_obj_get();
+
+ if (trace->ctf_meta) {
+ free(trace->ctf_meta);
+ trace->ctf_meta = NULL;
+ }
+}
+
+static void
+meta_fix_freq(struct trace *trace, char *meta)
+{
+ char *str;
+ int rc;
+
+ str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq);
+ rc = sprintf(str, "%20"PRIu64"", rte_get_timer_hz());
+ str[rc] = ';';
+}
+
+static void
+meta_fix_freq_offset(struct trace *trace, char *meta)
+{
+ uint64_t uptime_tickes_floor, uptime_ticks, freq, uptime_sec;
+ uint64_t offset, offset_s;
+ char *str;
+ int rc;
+
+ uptime_ticks = trace->uptime_ticks &
+ ((1ULL << __RTE_TRACE_EVENT_HEADER_ID_SHIFT) - 1);
+ freq = rte_get_tsc_hz();
+ uptime_tickes_floor = RTE_ALIGN_MUL_FLOOR(uptime_ticks, freq);
+
+ uptime_sec = uptime_tickes_floor / freq;
+ offset_s = trace->epoch_sec - uptime_sec;
+
+ offset = uptime_ticks - uptime_tickes_floor;
+ offset += trace->epoch_nsec * (freq / NSEC_PER_SEC);
+
+ str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq_off_s);
+ rc = sprintf(str, "%20"PRIu64"", offset_s);
+ str[rc] = ';';
+ str = RTE_PTR_ADD(meta, trace->ctf_meta_offset_freq_off);
+ rc = sprintf(str, "%20"PRIu64"", offset);
+ str[rc] = ';';
+}
+
+static void
+meta_fixup(struct trace *trace, char *meta)
+{
+ meta_fix_freq(trace, meta);
+ meta_fix_freq_offset(trace, meta);
+}
+
+int
+rte_trace_metadata_dump(FILE *f)
+{
+ struct trace *trace = trace_obj_get();
+ char *ctf_meta = trace->ctf_meta;
+ int rc;
+
+ if (!rte_trace_is_enabled())
+ return 0;
+
+ if (ctf_meta == NULL)
+ return -EINVAL;
+
+ if (!__atomic_load_n(&trace->ctf_fixup_done, __ATOMIC_SEQ_CST) &&
+ rte_get_timer_hz()) {
+ meta_fixup(trace, ctf_meta);
+ __atomic_store_n(&trace->ctf_fixup_done, 1, __ATOMIC_SEQ_CST);
+ }
+
+ rc = fprintf(f, "%s", ctf_meta);
+ return rc < 0 ? rc : 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c
new file mode 100644
index 000000000..4a8ce9088
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_points.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <rte_trace_point_register.h>
+
+#include <rte_eal_trace.h>
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_void);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u64);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u32);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u16);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_u8);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i64);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i32);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i16);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_i8);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_int);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_long);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_float);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_double);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_ptr);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_str);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_generic_func);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_alarm_set);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_alarm_cancel);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_zmalloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_malloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_realloc);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_mem_free);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_reserve);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_lookup);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_memzone_free);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_thread_remote_launch);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_thread_lcore_ready);
+
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_callback_register);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_callback_unregister);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_enable);
+RTE_TRACE_POINT_DEFINE(rte_eal_trace_intr_disable);
+
+RTE_INIT(eal_trace_init)
+{
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_void,
+ lib.eal.generic.void);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u64,
+ lib.eal.generic.u64);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u32,
+ lib.eal.generic.u32);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u16,
+ lib.eal.generic.u16);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_u8,
+ lib.eal.generic.u8);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i64,
+ lib.eal.generic.i64);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i32,
+ lib.eal.generic.i32);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i16,
+ lib.eal.generic.i16);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_i8,
+ lib.eal.generic.i8);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_int,
+ lib.eal.generic.int);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_long,
+ lib.eal.generic.long);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_float,
+ lib.eal.generic.float);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_double,
+ lib.eal.generic.double);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_ptr,
+ lib.eal.generic.ptr);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_str,
+ lib.eal.generic.string);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_generic_func,
+ lib.eal.generic.func);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_alarm_set,
+ lib.eal.alarm.set);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_alarm_cancel,
+ lib.eal.alarm.cancel);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_zmalloc,
+ lib.eal.mem.zmalloc);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_malloc,
+ lib.eal.mem.malloc);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_realloc,
+ lib.eal.mem.realloc);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_mem_free,
+ lib.eal.mem.free);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_reserve,
+ lib.eal.memzone.reserve);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_lookup,
+ lib.eal.memzone.lookup);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_memzone_free,
+ lib.eal.memzone.free);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_remote_launch,
+ lib.eal.thread.remote.launch);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_lcore_ready,
+ lib.eal.thread.lcore.ready);
+
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_callback_register,
+ lib.eal.intr.register);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_callback_unregister,
+ lib.eal.intr.unregister);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_enable,
+ lib.eal.intr.enable);
+ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_disable,
+ lib.eal.intr.disable);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c
new file mode 100644
index 000000000..64f58fb66
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_trace_utils.c
@@ -0,0 +1,448 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <fnmatch.h>
+#include <pwd.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+#include "eal_filesystem.h"
+#include "eal_trace.h"
+
+const char *
+trace_mode_to_string(enum rte_trace_mode mode)
+{
+ switch (mode) {
+ case RTE_TRACE_MODE_OVERWRITE: return "overwrite";
+ case RTE_TRACE_MODE_DISCARD: return "discard";
+ default: return "unknown";
+ }
+}
+
+const char *
+trace_area_to_string(enum trace_area_e area)
+{
+ switch (area) {
+ case TRACE_AREA_HEAP: return "heap";
+ case TRACE_AREA_HUGEPAGE: return "hugepage";
+ default: return "unknown";
+ }
+}
+
+static bool
+trace_entry_compare(const char *name)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace_point *tp;
+ int count = 0;
+
+ STAILQ_FOREACH(tp, tp_list, next) {
+ if (strncmp(tp->name, name, TRACE_POINT_NAME_SIZE) == 0)
+ count++;
+ if (count > 1) {
+ trace_err("found duplicate entry %s", name);
+ rte_errno = EEXIST;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool
+trace_has_duplicate_entry(void)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace_point *tp;
+
+ /* Is duplicate trace name registered */
+ STAILQ_FOREACH(tp, tp_list, next)
+ if (trace_entry_compare(tp->name))
+ return true;
+
+ return false;
+}
+
+void
+trace_uuid_generate(void)
+{
+ struct trace_point_head *tp_list = trace_list_head_get();
+ struct trace *trace = trace_obj_get();
+ struct trace_point *tp;
+ uint64_t sz_total = 0;
+
+ /* Go over the registered trace points to get total size of events */
+ STAILQ_FOREACH(tp, tp_list, next) {
+ const uint16_t sz = *tp->handle & __RTE_TRACE_FIELD_SIZE_MASK;
+ sz_total += sz;
+ }
+
+ rte_uuid_t uuid = RTE_UUID_INIT(sz_total, trace->nb_trace_points,
+ 0x4370, 0x8f50, 0x222ddd514176ULL);
+ rte_uuid_copy(trace->uuid, uuid);
+}
+
+static int
+trace_session_name_generate(char *trace_dir)
+{
+ struct tm *tm_result;
+ time_t tm;
+ int rc;
+
+ tm = time(NULL);
+ if ((int)tm == -1)
+ goto fail;
+
+ tm_result = localtime(&tm);
+ if (tm_result == NULL)
+ goto fail;
+
+ rc = rte_strscpy(trace_dir, eal_get_hugefile_prefix(),
+ TRACE_PREFIX_LEN);
+ if (rc == -E2BIG)
+ rc = TRACE_PREFIX_LEN;
+ trace_dir[rc++] = '-';
+
+ rc = strftime(trace_dir + rc, TRACE_DIR_STR_LEN - rc,
+ "%Y-%m-%d-%p-%I-%M-%S", tm_result);
+ if (rc == 0)
+ goto fail;
+
+ return rc;
+fail:
+ rte_errno = errno;
+ return -rte_errno;
+}
+
+static int
+trace_dir_update(const char *str)
+{
+ struct trace *trace = trace_obj_get();
+ int rc, remaining;
+
+ remaining = sizeof(trace->dir) - trace->dir_offset;
+ rc = rte_strscpy(&trace->dir[0] + trace->dir_offset, str, remaining);
+ if (rc < 0)
+ goto fail;
+
+ trace->dir_offset += rc;
+fail:
+ return rc;
+}
+
+int
+eal_trace_args_save(const char *val)
+{
+ struct trace *trace = trace_obj_get();
+ struct trace_arg *arg = malloc(sizeof(*arg));
+
+ if (arg == NULL) {
+ trace_err("failed to allocate memory for %s", val);
+ return -ENOMEM;
+ }
+
+ arg->val = strdup(val);
+ if (arg->val == NULL) {
+ trace_err("failed to allocate memory for %s", val);
+ free(arg);
+ return -ENOMEM;
+ }
+
+ STAILQ_INSERT_TAIL(&trace->args, arg, next);
+ return 0;
+}
+
+void
+eal_trace_args_free(void)
+{
+ struct trace *trace = trace_obj_get();
+ struct trace_arg *arg;
+
+ while (!STAILQ_EMPTY(&trace->args)) {
+ arg = STAILQ_FIRST(&trace->args);
+ STAILQ_REMOVE_HEAD(&trace->args, next);
+ free(arg->val);
+ free(arg);
+ }
+}
+
+int
+trace_args_apply(const char *arg)
+{
+ if (rte_trace_regexp(arg, true) < 0) {
+ trace_err("cannot enable trace for %s", arg);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+eal_trace_bufsz_args_save(char const *val)
+{
+ struct trace *trace = trace_obj_get();
+ uint64_t bufsz;
+
+ bufsz = rte_str_to_size(val);
+ if (bufsz == 0) {
+ trace_err("buffer size cannot be zero");
+ return -EINVAL;
+ }
+
+ trace->buff_len = bufsz;
+ return 0;
+}
+
+void
+trace_bufsz_args_apply(void)
+{
+ struct trace *trace = trace_obj_get();
+
+ if (trace->buff_len == 0)
+ trace->buff_len = 1024 * 1024; /* 1MB */
+}
+
+int
+eal_trace_mode_args_save(const char *val)
+{
+ struct trace *trace = trace_obj_get();
+ size_t len = strlen(val);
+ unsigned long tmp;
+ char *pattern;
+
+ if (len == 0) {
+ trace_err("value is not provided with option");
+ return -EINVAL;
+ }
+
+ pattern = (char *)calloc(1, len + 2);
+ if (pattern == NULL) {
+ trace_err("fail to allocate memory");
+ return -ENOMEM;
+ }
+
+ sprintf(pattern, "%s*", val);
+
+ if (fnmatch(pattern, "overwrite", 0) == 0)
+ tmp = RTE_TRACE_MODE_OVERWRITE;
+ else if (fnmatch(pattern, "discard", 0) == 0)
+ tmp = RTE_TRACE_MODE_DISCARD;
+ else {
+ free(pattern);
+ return -EINVAL;
+ }
+
+ trace->mode = tmp;
+ free(pattern);
+ return 0;
+}
+
+int
+eal_trace_dir_args_save(char const *val)
+{
+ struct trace *trace = trace_obj_get();
+ char *dir_path;
+ int rc;
+
+ if (strlen(val) >= sizeof(trace->dir) - 1) {
+ trace_err("input string is too big");
+ return -ENAMETOOLONG;
+ }
+
+ if (asprintf(&dir_path, "%s/", val) == -1) {
+ trace_err("failed to copy directory: %s", strerror(errno));
+ return -ENOMEM;
+ }
+
+ rc = trace_dir_update(dir_path);
+
+ free(dir_path);
+ return rc;
+}
+
+int
+trace_epoch_time_save(void)
+{
+ struct trace *trace = trace_obj_get();
+ struct timespec epoch = { 0, 0 };
+ uint64_t avg, start, end;
+
+ start = rte_get_tsc_cycles();
+ if (clock_gettime(CLOCK_REALTIME, &epoch) < 0) {
+ trace_err("failed to get the epoch time");
+ return -1;
+ }
+ end = rte_get_tsc_cycles();
+ avg = (start + end) >> 1;
+
+ trace->epoch_sec = (uint64_t) epoch.tv_sec;
+ trace->epoch_nsec = (uint64_t) epoch.tv_nsec;
+ trace->uptime_ticks = avg;
+
+ return 0;
+}
+
+static int
+trace_dir_default_path_get(char *dir_path)
+{
+ struct trace *trace = trace_obj_get();
+ uint32_t size = sizeof(trace->dir);
+ struct passwd *pwd;
+ char *home_dir;
+
+ /* First check for shell environment variable */
+ home_dir = getenv("HOME");
+ if (home_dir == NULL) {
+ /* Fallback to password file entry */
+ pwd = getpwuid(getuid());
+ if (pwd == NULL)
+ return -EINVAL;
+
+ home_dir = pwd->pw_dir;
+ }
+
+ /* Append dpdk-traces to directory */
+ if (snprintf(dir_path, size, "%s/dpdk-traces/", home_dir) < 0)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
+int
+trace_mkdir(void)
+{
+ struct trace *trace = trace_obj_get();
+ char session[TRACE_DIR_STR_LEN];
+ char *dir_path;
+ int rc;
+
+ if (!trace->dir_offset) {
+ dir_path = calloc(1, sizeof(trace->dir));
+ if (dir_path == NULL) {
+ trace_err("fail to allocate memory");
+ return -ENOMEM;
+ }
+
+ rc = trace_dir_default_path_get(dir_path);
+ if (rc < 0) {
+ trace_err("fail to get default path");
+ free(dir_path);
+ return rc;
+ }
+
+ rc = trace_dir_update(dir_path);
+ free(dir_path);
+ if (rc < 0)
+ return rc;
+ }
+
+ /* Create the path if it t exist, no "mkdir -p" available here */
+ rc = mkdir(trace->dir, 0700);
+ if (rc < 0 && errno != EEXIST) {
+ trace_err("mkdir %s failed [%s]", trace->dir, strerror(errno));
+ rte_errno = errno;
+ return -rte_errno;
+ }
+
+ rc = trace_session_name_generate(session);
+ if (rc < 0)
+ return rc;
+ rc = trace_dir_update(session);
+ if (rc < 0)
+ return rc;
+
+ rc = mkdir(trace->dir, 0700);
+ if (rc < 0) {
+ trace_err("mkdir %s failed [%s]", trace->dir, strerror(errno));
+ rte_errno = errno;
+ return -rte_errno;
+ }
+
+ RTE_LOG(INFO, EAL, "Trace dir: %s\n", trace->dir);
+ return 0;
+}
+
+static int
+trace_meta_save(struct trace *trace)
+{
+ char file_name[PATH_MAX];
+ FILE *f;
+ int rc;
+
+ rc = snprintf(file_name, PATH_MAX, "%s/metadata", trace->dir);
+ if (rc < 0)
+ return rc;
+
+ f = fopen(file_name, "w");
+ if (f == NULL)
+ return -errno;
+
+ rc = rte_trace_metadata_dump(f);
+
+ if (fclose(f))
+ rc = -errno;
+
+ return rc;
+}
+
+
+static inline int
+trace_file_sz(struct __rte_trace_header *hdr)
+{
+ return sizeof(struct __rte_trace_stream_header) + hdr->offset;
+}
+
+static int
+trace_mem_save(struct trace *trace, struct __rte_trace_header *hdr,
+ uint32_t cnt)
+{
+ char file_name[PATH_MAX];
+ FILE *f;
+ int rc;
+
+ rc = snprintf(file_name, PATH_MAX, "%s/channel0_%d", trace->dir, cnt);
+ if (rc < 0)
+ return rc;
+
+ f = fopen(file_name, "w");
+ if (f == NULL)
+ return -errno;
+
+ rc = fwrite(&hdr->stream_header, trace_file_sz(hdr), 1, f);
+ rc = (rc == 1) ? 0 : -EACCES;
+
+ if (fclose(f))
+ rc = -errno;
+
+ return rc;
+}
+
+int
+rte_trace_save(void)
+{
+ struct trace *trace = trace_obj_get();
+ struct __rte_trace_header *header;
+ uint32_t count;
+ int rc = 0;
+
+ if (trace->nb_trace_mem_list == 0)
+ return rc;
+
+ rc = trace_meta_save(trace);
+ if (rc)
+ return rc;
+
+ rte_spinlock_lock(&trace->lock);
+ for (count = 0; count < trace->nb_trace_mem_list; count++) {
+ header = trace->lcore_meta[count].mem;
+ rc = trace_mem_save(trace, header, count);
+ if (rc)
+ break;
+ }
+ rte_spinlock_unlock(&trace->lock);
+ return rc;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c b/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c
new file mode 100644
index 000000000..0a80bfbb3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_common_uuid.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <rte_uuid.h>
+
+/* UUID packed form */
+struct uuid {
+ uint32_t time_low;
+ uint16_t time_mid;
+ uint16_t time_hi_and_version;
+ uint16_t clock_seq;
+ uint8_t node[6];
+};
+
+static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr)
+{
+ uint32_t tmp;
+ uint8_t *out = ptr;
+
+ tmp = uu->time_low;
+ out[3] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[2] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[1] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[0] = (uint8_t) tmp;
+
+ tmp = uu->time_mid;
+ out[5] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[4] = (uint8_t) tmp;
+
+ tmp = uu->time_hi_and_version;
+ out[7] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[6] = (uint8_t) tmp;
+
+ tmp = uu->clock_seq;
+ out[9] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[8] = (uint8_t) tmp;
+
+ memcpy(out+10, uu->node, 6);
+}
+
+static void uuid_unpack(const rte_uuid_t in, struct uuid *uu)
+{
+ const uint8_t *ptr = in;
+ uint32_t tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_low = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_mid = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_hi_and_version = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->clock_seq = tmp;
+
+ memcpy(uu->node, ptr, 6);
+}
+
+bool rte_uuid_is_null(const rte_uuid_t uu)
+{
+ const uint8_t *cp = uu;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ if (*cp++)
+ return false;
+ return true;
+}
+
+/*
+ * rte_uuid_compare() - compare two UUIDs.
+ */
+int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2)
+{
+ struct uuid uuid1, uuid2;
+
+ uuid_unpack(uu1, &uuid1);
+ uuid_unpack(uu2, &uuid2);
+
+#define UUCMP(u1, u2) \
+ do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0)
+
+ UUCMP(uuid1.time_low, uuid2.time_low);
+ UUCMP(uuid1.time_mid, uuid2.time_mid);
+ UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+ UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+#undef UUCMP
+
+ return memcmp(uuid1.node, uuid2.node, 6);
+}
+
+int rte_uuid_parse(const char *in, rte_uuid_t uu)
+{
+ struct uuid uuid;
+ int i;
+ const char *cp;
+ char buf[3];
+
+ if (strlen(in) != 36)
+ return -1;
+
+ for (i = 0, cp = in; i <= 36; i++, cp++) {
+ if ((i == 8) || (i == 13) || (i == 18) ||
+ (i == 23)) {
+ if (*cp == '-')
+ continue;
+ else
+ return -1;
+ }
+ if (i == 36)
+ if (*cp == 0)
+ continue;
+ if (!isxdigit(*cp))
+ return -1;
+ }
+
+ uuid.time_low = strtoul(in, NULL, 16);
+ uuid.time_mid = strtoul(in+9, NULL, 16);
+ uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+ uuid.clock_seq = strtoul(in+19, NULL, 16);
+ cp = in+24;
+ buf[2] = 0;
+
+ for (i = 0; i < 6; i++) {
+ buf[0] = *cp++;
+ buf[1] = *cp++;
+ uuid.node[i] = strtoul(buf, NULL, 16);
+ }
+
+ uuid_pack(&uuid, uu);
+ return 0;
+}
+
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len)
+{
+ struct uuid uuid;
+
+ uuid_unpack(uu, &uuid);
+
+ snprintf(out, len,
+ "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+ uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+ uuid.node[0], uuid.node[1], uuid.node[2],
+ uuid.node[3], uuid.node[4], uuid.node[5]);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h b/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h
new file mode 100644
index 000000000..5d21f07c2
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_filesystem.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+/**
+ * @file
+ * Stores functions and path defines for files and directories
+ * on the filesystem for Linux, that are used by the Linux EAL.
+ */
+
+#ifndef EAL_FILESYSTEM_H
+#define EAL_FILESYSTEM_H
+
+/** Path of rte config file. */
+
+#include <stdint.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <rte_string_fns.h>
+#include "eal_internal_cfg.h"
+
+/* sets up platform-specific runtime data dir */
+int
+eal_create_runtime_dir(void);
+
+int
+eal_clean_runtime_dir(void);
+
+/** Function to return hugefile prefix that's currently set up */
+const char *
+eal_get_hugefile_prefix(void);
+
+#define RUNTIME_CONFIG_FNAME "config"
+static inline const char *
+eal_runtime_config_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+ RUNTIME_CONFIG_FNAME);
+ return buffer;
+}
+
+/** Path of primary/secondary communication unix socket file. */
+#define MP_SOCKET_FNAME "mp_socket"
+static inline const char *
+eal_mp_socket_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+ MP_SOCKET_FNAME);
+ return buffer;
+}
+
+#define FBARRAY_NAME_FMT "%s/fbarray_%s"
+static inline const char *
+eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
+ snprintf(buffer, buflen, FBARRAY_NAME_FMT, rte_eal_get_runtime_dir(),
+ name);
+ return buffer;
+}
+
+/** Path of hugepage info file. */
+#define HUGEPAGE_INFO_FNAME "hugepage_info"
+static inline const char *
+eal_hugepage_info_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+ HUGEPAGE_INFO_FNAME);
+ return buffer;
+}
+
+/** Path of hugepage data file. */
+#define HUGEPAGE_DATA_FNAME "hugepage_data"
+static inline const char *
+eal_hugepage_data_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer), "%s/%s", rte_eal_get_runtime_dir(),
+ HUGEPAGE_DATA_FNAME);
+ return buffer;
+}
+
+/** String format for hugepage map files. */
+#define HUGEFILE_FMT "%s/%smap_%d"
+static inline const char *
+eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+ snprintf(buffer, buflen, HUGEFILE_FMT, hugedir,
+ eal_get_hugefile_prefix(), f_id);
+ return buffer;
+}
+
+/** define the default filename prefix for the %s values above */
+#define HUGEFILE_PREFIX_DEFAULT "rte"
+
+/** Function to read a single numeric value from a file on the filesystem.
+ * Used to read information from files on /sys */
+int eal_parse_sysfs_value(const char *filename, unsigned long *val);
+
+#endif /* EAL_FILESYSTEM_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h b/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h
new file mode 100644
index 000000000..1b560d337
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_hugepages.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef EAL_HUGEPAGES_H
+#define EAL_HUGEPAGES_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+
+#define MAX_HUGEPAGE_PATH PATH_MAX
+
+/**
+ * Structure used to store information about hugepages that we mapped
+ * through the files in hugetlbfs.
+ */
+struct hugepage_file {
+ void *orig_va; /**< virtual addr of first mmap() */
+ void *final_va; /**< virtual addr of 2nd mmap() */
+ uint64_t physaddr; /**< physical addr */
+ size_t size; /**< the page size */
+ int socket_id; /**< NUMA socket ID */
+ int file_id; /**< the '%d' in HUGEFILE_FMT */
+ char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
+};
+
+/**
+ * Read the information on what hugepages are available for the EAL to use,
+ * clearing out any unused ones.
+ */
+int eal_hugepage_info_init(void);
+
+/**
+ * Read whatever information primary process has shared about hugepages into
+ * secondary process.
+ */
+int eal_hugepage_info_read(void);
+
+#endif /* EAL_HUGEPAGES_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h b/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h
new file mode 100644
index 000000000..c650bc081
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_internal_cfg.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/**
+ * @file
+ * Holds the structures for the eal internal configuration
+ */
+
+#ifndef EAL_INTERNAL_CFG_H
+#define EAL_INTERNAL_CFG_H
+
+#include <rte_eal.h>
+#include <rte_pci_dev_feature_defs.h>
+
+#include "eal_thread.h"
+
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+#define MAX_HUGEPAGE_SIZES 4 /**< support up to 4 page sizes */
+#else
+#define MAX_HUGEPAGE_SIZES 3 /**< support up to 3 page sizes */
+#endif
+
+/*
+ * internal configuration structure for the number, size and
+ * mount points of hugepages
+ */
+struct hugepage_info {
+ uint64_t hugepage_sz; /**< size of a huge page */
+ char hugedir[PATH_MAX]; /**< dir where hugetlbfs is mounted */
+ uint32_t num_pages[RTE_MAX_NUMA_NODES];
+ /**< number of hugepages of that size on each socket */
+ int lock_descriptor; /**< file descriptor for hugepage dir */
+};
+
+/**
+ * internal configuration
+ */
+struct internal_config {
+ volatile size_t memory; /**< amount of asked memory */
+ volatile unsigned force_nchannel; /**< force number of channels */
+ volatile unsigned force_nrank; /**< force number of ranks */
+ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
+ unsigned hugepage_unlink; /**< true to unlink backing files */
+ volatile unsigned no_pci; /**< true to disable PCI */
+ volatile unsigned no_hpet; /**< true to disable HPET */
+ volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
+ * instead of native TSC */
+ volatile unsigned no_shconf; /**< true if there is no shared config */
+ volatile unsigned in_memory;
+ /**< true if DPDK should operate entirely in-memory and not create any
+ * shared files or runtime data.
+ */
+ volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
+ volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
+ /** true to try allocating memory on specific sockets */
+ volatile unsigned force_sockets;
+ volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+ volatile unsigned force_socket_limits;
+ volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */
+ uintptr_t base_virtaddr; /**< base address to try and reserve memory from */
+ volatile unsigned legacy_mem;
+ /**< true to enable legacy memory behavior (no dynamic allocation,
+ * IOVA-contiguous segments).
+ */
+ volatile unsigned match_allocations;
+ /**< true to free hugepages exactly as allocated */
+ volatile unsigned single_file_segments;
+ /**< true if storing all pages within single files (per-page-size,
+ * per-node) non-legacy mode only.
+ */
+ volatile int syslog_facility; /**< facility passed to openlog() */
+ /** default interrupt mode for VFIO */
+ volatile enum rte_intr_mode vfio_intr_mode;
+ char *hugefile_prefix; /**< the base filename of hugetlbfs files */
+ char *hugepage_dir; /**< specific hugetlbfs directory to use */
+ char *user_mbuf_pool_ops_name;
+ /**< user defined mbuf pool ops name */
+ unsigned num_hugepage_sizes; /**< how many sizes on this system */
+ struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+ enum rte_iova_mode iova_mode ; /**< Set IOVA mode on this system */
+ rte_cpuset_t ctrl_cpuset; /**< cpuset for ctrl threads */
+ volatile unsigned int init_complete;
+ /**< indicates whether EAL has completed initialization */
+ unsigned int no_telemetry; /**< true to disable Telemetry */
+};
+extern struct internal_config internal_config; /**< Global EAL configuration. */
+
+void eal_reset_internal_config(struct internal_config *internal_cfg);
+
+#endif /* EAL_INTERNAL_CFG_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h b/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h
new file mode 100644
index 000000000..e953cd84e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_memalloc.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef EAL_MEMALLOC_H
+#define EAL_MEMALLOC_H
+
+#include <stdbool.h>
+
+#include <rte_memory.h>
+
+/*
+ * Allocate segment of specified page size.
+ */
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t page_sz, int socket);
+
+/*
+ * Allocate `n_segs` segments.
+ *
+ * Note: `ms` can be NULL.
+ *
+ * Note: it is possible to request best-effort allocation by setting `exact` to
+ * `false`, in which case allocator will return however many pages it managed to
+ * allocate successfully.
+ */
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
+ int socket, bool exact);
+
+/*
+ * Deallocate segment
+ */
+int
+eal_memalloc_free_seg(struct rte_memseg *ms);
+
+/*
+ * Deallocate `n_segs` segments. Returns 0 on successful deallocation of all
+ * segments, returns -1 on error. Any segments that could have been deallocated,
+ * will be deallocated even in case of error.
+ */
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs);
+
+/*
+ * Check if memory pointed to by `start` and of `length` that resides in
+ * memseg list `msl` is IOVA-contiguous.
+ */
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+ size_t len);
+
+/* synchronize local memory map to primary process */
+int
+eal_memalloc_sync_with_primary(void);
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+ rte_mem_event_callback_t clb, void *arg);
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg);
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+ size_t len);
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id);
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len);
+
+/* returns fd or -errno */
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_list_fd(int list_idx, int fd);
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset);
+
+int
+eal_memalloc_init(void);
+
+#endif /* EAL_MEMALLOC_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h b/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h
new file mode 100644
index 000000000..583fcb595
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_memcfg.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef EAL_MEMCFG_H
+#define EAL_MEMCFG_H
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_pause.h>
+#include <rte_spinlock.h>
+#include <rte_rwlock.h>
+#include <rte_tailq.h>
+
+#include "malloc_heap.h"
+
+/**
+ * Memory configuration shared across multiple processes.
+ */
+struct rte_mem_config {
+ volatile uint32_t magic; /**< Magic number - sanity check. */
+ uint32_t version;
+ /**< Prevent secondary processes using different DPDK versions. */
+
+ /* memory topology */
+ uint32_t nchannel; /**< Number of channels (0 if unknown). */
+ uint32_t nrank; /**< Number of ranks (0 if unknown). */
+
+ /**
+ * current lock nest order
+ * - qlock->mlock (ring/hash/lpm)
+ * - mplock->qlock->mlock (mempool)
+ * Notice:
+ * *ALWAYS* obtain qlock first if having to obtain both qlock and mlock
+ */
+ rte_rwlock_t mlock; /**< used by memzones for thread safety. */
+ rte_rwlock_t qlock; /**< used by tailqs for thread safety. */
+ rte_rwlock_t mplock; /**< used by mempool library for thread safety. */
+ rte_spinlock_t tlock; /**< used by timer library for thread safety. */
+
+ rte_rwlock_t memory_hotplug_lock;
+ /**< Indicates whether memory hotplug request is in progress. */
+
+ /* memory segments and zones */
+ struct rte_fbarray memzones; /**< Memzone descriptors. */
+
+ struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS];
+ /**< List of dynamic arrays holding memsegs */
+
+ struct rte_tailq_head tailq_head[RTE_MAX_TAILQ];
+ /**< Tailqs for objects */
+
+ struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
+ /**< DPDK malloc heaps */
+
+ int next_socket_id; /**< Next socket ID for external malloc heap */
+
+ /* rte_mem_config has to be mapped at the exact same address in all
+ * processes, so we need to store it.
+ */
+ uint64_t mem_cfg_addr; /**< Address of this structure in memory. */
+
+ /* Primary and secondary processes cannot run with different legacy or
+ * single file segments options, so to avoid having to specify these
+ * options to all processes, store them in shared config and update the
+ * internal config at init time.
+ */
+ uint32_t legacy_mem; /**< stored legacy mem parameter. */
+ uint32_t single_file_segments;
+ /**< stored single file segments parameter. */
+
+ uint64_t tsc_hz;
+ /**< TSC rate */
+
+ uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+};
+
+/* update internal config from shared mem config */
+void
+eal_mcfg_update_internal(void);
+
+/* update shared mem config from internal config */
+void
+eal_mcfg_update_from_internal(void);
+
+/* wait until primary process initialization is complete */
+void
+eal_mcfg_wait_complete(void);
+
+/* check if DPDK version of current process matches one stored in the config */
+int
+eal_mcfg_check_version(void);
+
+/* set mem config as complete */
+void
+eal_mcfg_complete(void);
+
+#endif /* EAL_MEMCFG_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_options.h b/src/spdk/dpdk/lib/librte_eal/common/eal_options.h
new file mode 100644
index 000000000..18e6da9ab
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_options.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#ifndef EAL_OPTIONS_H
+#define EAL_OPTIONS_H
+
+#include "getopt.h"
+
+struct rte_tel_data;
+
+enum {
+ /* long options mapped to a short option */
+#define OPT_HELP "help"
+ OPT_HELP_NUM = 'h',
+#define OPT_PCI_BLACKLIST "pci-blacklist"
+ OPT_PCI_BLACKLIST_NUM = 'b',
+#define OPT_PCI_WHITELIST "pci-whitelist"
+ OPT_PCI_WHITELIST_NUM = 'w',
+
+ /* first long only option value must be >= 256, so that we won't
+ * conflict with short options */
+ OPT_LONG_MIN_NUM = 256,
+#define OPT_BASE_VIRTADDR "base-virtaddr"
+ OPT_BASE_VIRTADDR_NUM,
+#define OPT_CREATE_UIO_DEV "create-uio-dev"
+ OPT_CREATE_UIO_DEV_NUM,
+#define OPT_FILE_PREFIX "file-prefix"
+ OPT_FILE_PREFIX_NUM,
+#define OPT_HUGE_DIR "huge-dir"
+ OPT_HUGE_DIR_NUM,
+#define OPT_HUGE_UNLINK "huge-unlink"
+ OPT_HUGE_UNLINK_NUM,
+#define OPT_LCORES "lcores"
+ OPT_LCORES_NUM,
+#define OPT_LOG_LEVEL "log-level"
+ OPT_LOG_LEVEL_NUM,
+#define OPT_TRACE "trace"
+ OPT_TRACE_NUM,
+#define OPT_TRACE_DIR "trace-dir"
+ OPT_TRACE_DIR_NUM,
+#define OPT_TRACE_BUF_SIZE "trace-bufsz"
+ OPT_TRACE_BUF_SIZE_NUM,
+#define OPT_TRACE_MODE "trace-mode"
+ OPT_TRACE_MODE_NUM,
+#define OPT_MASTER_LCORE "master-lcore"
+ OPT_MASTER_LCORE_NUM,
+#define OPT_MBUF_POOL_OPS_NAME "mbuf-pool-ops-name"
+ OPT_MBUF_POOL_OPS_NAME_NUM,
+#define OPT_PROC_TYPE "proc-type"
+ OPT_PROC_TYPE_NUM,
+#define OPT_NO_HPET "no-hpet"
+ OPT_NO_HPET_NUM,
+#define OPT_NO_HUGE "no-huge"
+ OPT_NO_HUGE_NUM,
+#define OPT_NO_PCI "no-pci"
+ OPT_NO_PCI_NUM,
+#define OPT_NO_SHCONF "no-shconf"
+ OPT_NO_SHCONF_NUM,
+#define OPT_IN_MEMORY "in-memory"
+ OPT_IN_MEMORY_NUM,
+#define OPT_SOCKET_MEM "socket-mem"
+ OPT_SOCKET_MEM_NUM,
+#define OPT_SOCKET_LIMIT "socket-limit"
+ OPT_SOCKET_LIMIT_NUM,
+#define OPT_SYSLOG "syslog"
+ OPT_SYSLOG_NUM,
+#define OPT_VDEV "vdev"
+ OPT_VDEV_NUM,
+#define OPT_VFIO_INTR "vfio-intr"
+ OPT_VFIO_INTR_NUM,
+#define OPT_VMWARE_TSC_MAP "vmware-tsc-map"
+ OPT_VMWARE_TSC_MAP_NUM,
+#define OPT_LEGACY_MEM "legacy-mem"
+ OPT_LEGACY_MEM_NUM,
+#define OPT_SINGLE_FILE_SEGMENTS "single-file-segments"
+ OPT_SINGLE_FILE_SEGMENTS_NUM,
+#define OPT_IOVA_MODE "iova-mode"
+ OPT_IOVA_MODE_NUM,
+#define OPT_MATCH_ALLOCATIONS "match-allocations"
+ OPT_MATCH_ALLOCATIONS_NUM,
+#define OPT_TELEMETRY "telemetry"
+ OPT_TELEMETRY_NUM,
+#define OPT_NO_TELEMETRY "no-telemetry"
+ OPT_NO_TELEMETRY_NUM,
+ OPT_LONG_MAX_NUM
+};
+
+extern const char eal_short_options[];
+extern const struct option eal_long_options[];
+
+int eal_parse_common_option(int opt, const char *argv,
+ struct internal_config *conf);
+int eal_option_device_parse(void);
+int eal_adjust_config(struct internal_config *internal_cfg);
+int eal_cleanup_config(struct internal_config *internal_cfg);
+int eal_check_common_options(struct internal_config *internal_cfg);
+void eal_common_usage(void);
+enum rte_proc_type_t eal_proc_type_detect(void);
+int eal_plugins_init(void);
+int eal_save_args(int argc, char **argv);
+int handle_eal_info_request(const char *cmd, const char *params __rte_unused,
+ struct rte_tel_data *d);
+
+#endif /* EAL_OPTIONS_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_private.h b/src/spdk/dpdk/lib/librte_eal/common/eal_private.h
new file mode 100644
index 000000000..869ce183a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_private.h
@@ -0,0 +1,423 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef _EAL_PRIVATE_H_
+#define _EAL_PRIVATE_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_dev.h>
+#include <rte_lcore.h>
+
+/**
+ * Structure storing internal configuration (per-lcore)
+ */
+struct lcore_config {
+ pthread_t thread_id; /**< pthread identifier */
+ int pipe_master2slave[2]; /**< communication pipe with master */
+ int pipe_slave2master[2]; /**< communication pipe with master */
+
+ lcore_function_t * volatile f; /**< function to call */
+ void * volatile arg; /**< argument of function */
+ volatile int ret; /**< return value of function */
+
+ volatile enum rte_lcore_state_t state; /**< lcore state */
+ unsigned int socket_id; /**< physical socket id for this lcore */
+ unsigned int core_id; /**< core number on socket for this lcore */
+ int core_index; /**< relative index, starting from 0 */
+ uint8_t core_role; /**< role of core eg: OFF, RTE, SERVICE */
+
+ rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */
+};
+
+extern struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/**
+ * The global RTE configuration structure.
+ */
+struct rte_config {
+ uint32_t master_lcore; /**< Id of the master lcore */
+ uint32_t lcore_count; /**< Number of available logical cores. */
+ uint32_t numa_node_count; /**< Number of detected NUMA nodes. */
+ uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */
+ uint32_t service_lcore_count;/**< Number of available service cores. */
+ enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
+
+ /** Primary or secondary configuration */
+ enum rte_proc_type_t process_type;
+
+ /** PA or VA mapping mode */
+ enum rte_iova_mode iova_mode;
+
+ /**
+ * Pointer to memory configuration, which may be shared across multiple
+ * DPDK instances
+ */
+ struct rte_mem_config *mem_config;
+} __rte_packed;
+
+/**
+ * Get the global configuration structure.
+ *
+ * @return
+ * A pointer to the global configuration structure.
+ */
+struct rte_config *rte_eal_get_configuration(void);
+
+/**
+ * Initialize the memzone subsystem (private to eal).
+ *
+ * @return
+ * - 0 on success
+ * - Negative on error
+ */
+int rte_eal_memzone_init(void);
+
+/**
+ * Common log initialization function (private to eal). Determines
+ * where log data is written when no call to rte_openlog_stream is
+ * in effect.
+ *
+ * @param default_log
+ * The default log stream to be used.
+ * @return
+ * - 0 on success
+ * - Negative on error
+ */
+void eal_log_set_default(FILE *default_log);
+
+/**
+ * Fill configuration with number of physical and logical processors
+ *
+ * This function is private to EAL.
+ *
+ * Parse /proc/cpuinfo to get the number of physical and logical
+ * processors on the machine.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_cpu_init(void);
+
+/**
+ * Create memseg lists
+ *
+ * This function is private to EAL.
+ *
+ * Preallocate virtual memory.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_memseg_init(void);
+
+/**
+ * Map memory
+ *
+ * This function is private to EAL.
+ *
+ * Fill configuration structure with these infos, and return 0 on success.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_memory_init(void);
+
+/**
+ * Configure timers
+ *
+ * This function is private to EAL.
+ *
+ * Mmap memory areas used by HPET (high precision event timer) that will
+ * provide our time reference, and configure the TSC frequency also for it
+ * to be used as a reference.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_timer_init(void);
+
+/**
+ * Init the default log stream
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_log_init(const char *id, int facility);
+
+/**
+ * Save the log regexp for later
+ */
+int rte_log_save_regexp(const char *type, int priority);
+int rte_log_save_pattern(const char *pattern, int priority);
+
+/**
+ * Init tail queues for non-EAL library structures. This is to allow
+ * the rings, mempools, etc. lists to be shared among multiple processes
+ *
+ * This function is private to EAL
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_tailqs_init(void);
+
+/**
+ * Init interrupt handling.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_intr_init(void);
+
+/**
+ * Init alarm mechanism. This is to allow a callback be called after
+ * specific time.
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_alarm_init(void);
+
+/**
+ * Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
+ * etc.) loaded.
+ *
+ * @param module_name
+ * The module's name which need to be checked
+ *
+ * @return
+ * -1 means some error happens(NULL pointer or open failure)
+ * 0 means the module not loaded
+ * 1 means the module loaded
+ */
+int rte_eal_check_module(const char *module_name);
+
+/**
+ * Get virtual area of specified size from the OS.
+ *
+ * This function is private to the EAL.
+ *
+ * @param requested_addr
+ * Address where to request address space.
+ * @param size
+ * Size of requested area.
+ * @param page_sz
+ * Page size on which to align requested virtual area.
+ * @param flags
+ * EAL_VIRTUAL_AREA_* flags.
+ * @param mmap_flags
+ * Extra flags passed directly to mmap().
+ *
+ * @return
+ * Virtual area address if successful.
+ * NULL if unsuccessful.
+ */
+
+#define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0)
+/**< don't fail if cannot get exact requested address. */
+#define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1)
+/**< try getting smaller sized (decrement by page size) virtual areas if cannot
+ * get area of requested size.
+ */
+#define EAL_VIRTUAL_AREA_UNMAP (1 << 2)
+/**< immediately unmap reserved virtual area. */
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+ size_t page_sz, int flags, int mmap_flags);
+
+/**
+ * Get cpu core_id.
+ *
+ * This function is private to the EAL.
+ */
+unsigned eal_cpu_core_id(unsigned lcore_id);
+
+/**
+ * Check if cpu is present.
+ *
+ * This function is private to the EAL.
+ */
+int eal_cpu_detected(unsigned lcore_id);
+
+/**
+ * Set TSC frequency from precise value or estimation
+ *
+ * This function is private to the EAL.
+ */
+void set_tsc_freq(void);
+
+/**
+ * Get precise TSC frequency from system
+ *
+ * This function is private to the EAL.
+ */
+uint64_t get_tsc_freq(void);
+
+/**
+ * Get TSC frequency if the architecture supports.
+ *
+ * This function is private to the EAL.
+ *
+ * @return
+ * The number of TSC cycles in one second.
+ * Returns zero if the architecture support is not available.
+ */
+uint64_t get_tsc_freq_arch(void);
+
+/**
+ * Prepare physical memory mapping
+ * i.e. hugepages on Linux and
+ * contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_init(void);
+
+/**
+ * Creates memory mapping in secondary process
+ * i.e. hugepages on Linux and
+ * contigmem on BSD.
+ *
+ * This function is private to the EAL.
+ */
+int rte_eal_hugepage_attach(void);
+
+/**
+ * Find a bus capable of identifying a device.
+ *
+ * @param str
+ * A device identifier (PCI address, virtual PMD name, ...).
+ *
+ * @return
+ * A valid bus handle if found.
+ * NULL if no bus is able to parse this device.
+ */
+struct rte_bus *rte_bus_find_by_device_name(const char *str);
+
+/**
+ * Create the unix channel for primary/secondary communication.
+ *
+ * @return
+ * 0 on success;
+ * (<0) on failure.
+ */
+int rte_mp_channel_init(void);
+
+/**
+ * Primary/secondary communication cleanup.
+ */
+void rte_mp_channel_cleanup(void);
+
+/**
+ * @internal
+ * Parse a device string and store its information in an
+ * rte_devargs structure.
+ *
+ * A device description is split by layers of abstraction of the device:
+ * bus, class and driver. Each layer will offer a set of properties that
+ * can be applied either to configure or recognize a device.
+ *
+ * This function will parse those properties and prepare the rte_devargs
+ * to be given to each layers for processing.
+ *
+ * Note: if the "data" field of the devargs points to devstr,
+ * then no dynamic allocation is performed and the rte_devargs
+ * can be safely discarded.
+ *
+ * Otherwise ``data`` will hold a workable copy of devstr, that will be
+ * used by layers descriptors within rte_devargs. In this case,
+ * any rte_devargs should be cleaned-up before being freed.
+ *
+ * @param da
+ * rte_devargs structure to fill.
+ *
+ * @param devstr
+ * Device string.
+ *
+ * @return
+ * 0 on success.
+ * Negative errno values on error (rte_errno is set).
+ */
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr);
+
+/*
+ * probe a device at local process.
+ *
+ * @param devargs
+ * Device arguments including bus, class and driver properties.
+ * @param new_dev
+ * new device be probed as output.
+ * @return
+ * 0 on success, negative on error.
+ */
+int local_dev_probe(const char *devargs, struct rte_device **new_dev);
+
+/**
+ * Hotplug remove a given device from a specific bus at local process.
+ *
+ * @param dev
+ * Data structure of the device to remove.
+ * @return
+ * 0 on success, negative on error.
+ */
+int local_dev_remove(struct rte_device *dev);
+
+/**
+ * Iterate over all buses to find the corresponding bus to handle the sigbus
+ * error.
+ * @param failure_addr
+ * Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ * 0 success to handle the sigbus.
+ * -1 failed to handle the sigbus
+ * 1 no bus can handler the sigbus
+ */
+int rte_bus_sigbus_handler(const void *failure_addr);
+
+/**
+ * @internal
+ * Register the sigbus handler.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_register(void);
+
+/**
+ * @internal
+ * Unregister the sigbus handler.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_unregister(void);
+
+/**
+ * Get OS-specific EAL mapping base address.
+ */
+uint64_t
+eal_get_baseaddr(void);
+
+void *
+eal_malloc_no_trace(const char *type, size_t size, unsigned int align);
+
+void eal_free_no_trace(void *addr);
+
+#endif /* _EAL_PRIVATE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h b/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h
new file mode 100644
index 000000000..b40ed249e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_thread.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef EAL_THREAD_H
+#define EAL_THREAD_H
+
+#include <rte_lcore.h>
+
+/**
+ * basic loop of thread, called for each thread by eal_init().
+ *
+ * @param arg
+ * opaque pointer
+ */
+__rte_noreturn void *eal_thread_loop(void *arg);
+
+/**
+ * Init per-lcore info for master thread
+ *
+ * @param lcore_id
+ * identifier of master lcore
+ */
+void eal_thread_init_master(unsigned lcore_id);
+
+/**
+ * Get the NUMA socket id from cpu id.
+ * This function is private to EAL.
+ *
+ * @param cpu_id
+ * The logical process id.
+ * @return
+ * socket_id or SOCKET_ID_ANY
+ */
+unsigned eal_cpu_socket_id(unsigned cpu_id);
+
+/**
+ * Default buffer size to use with eal_thread_dump_affinity()
+ */
+#define RTE_CPU_AFFINITY_STR_LEN 256
+
+/**
+ * Dump the current pthread cpuset.
+ * This function is private to EAL.
+ *
+ * Note:
+ * If the dump size is greater than the size of given buffer,
+ * the string will be truncated and with '\0' at the end.
+ *
+ * @param str
+ * The string buffer the cpuset will dump to.
+ * @param size
+ * The string buffer size.
+ * @return
+ * 0 for success, -1 if truncation happens.
+ */
+int
+eal_thread_dump_affinity(char *str, unsigned size);
+
+#endif /* EAL_THREAD_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h b/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h
new file mode 100644
index 000000000..8f6061615
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/eal_trace.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __EAL_TRACE_H
+#define __EAL_TRACE_H
+
+#include <rte_cycles.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+#include <rte_trace.h>
+#include <rte_trace_point.h>
+#include <rte_uuid.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+#define trace_err(fmt, args...) \
+ RTE_LOG(ERR, EAL, "%s():%u " fmt "\n", __func__, __LINE__, ## args)
+
+#define trace_crit(fmt, args...) \
+ RTE_LOG(CRIT, EAL, "%s():%u " fmt "\n", __func__, __LINE__, ## args)
+
+#define TRACE_PREFIX_LEN 12
+#define TRACE_DIR_STR_LEN (sizeof("YYYY-mm-dd-AM-HH-MM-SS") + TRACE_PREFIX_LEN)
+#define TRACE_CTF_FIELD_SIZE 384
+#define TRACE_POINT_NAME_SIZE 64
+#define TRACE_CTF_MAGIC 0xC1FC1FC1
+#define TRACE_MAX_ARGS 32
+
+struct trace_point {
+ STAILQ_ENTRY(trace_point) next;
+ rte_trace_point_t *handle;
+ char name[TRACE_POINT_NAME_SIZE];
+ char ctf_field[TRACE_CTF_FIELD_SIZE];
+};
+
+enum trace_area_e {
+ TRACE_AREA_HEAP,
+ TRACE_AREA_HUGEPAGE,
+};
+
+struct thread_mem_meta {
+ void *mem;
+ enum trace_area_e area;
+};
+
+struct trace_arg {
+ STAILQ_ENTRY(trace_arg) next;
+ char *val;
+};
+
+struct trace {
+ char dir[PATH_MAX];
+ int dir_offset;
+ int register_errno;
+ bool status;
+ enum rte_trace_mode mode;
+ rte_uuid_t uuid;
+ uint32_t buff_len;
+ STAILQ_HEAD(, trace_arg) args;
+ uint32_t nb_trace_points;
+ uint32_t nb_trace_mem_list;
+ struct thread_mem_meta *lcore_meta;
+ uint64_t epoch_sec;
+ uint64_t epoch_nsec;
+ uint64_t uptime_ticks;
+ char *ctf_meta;
+ uint32_t ctf_meta_offset_freq;
+ uint32_t ctf_meta_offset_freq_off_s;
+ uint32_t ctf_meta_offset_freq_off;
+ uint16_t ctf_fixup_done;
+ rte_spinlock_t lock;
+};
+
+/* Helper functions */
+static inline uint16_t
+trace_id_get(rte_trace_point_t *trace)
+{
+ return (*trace & __RTE_TRACE_FIELD_ID_MASK) >>
+ __RTE_TRACE_FIELD_ID_SHIFT;
+}
+
+static inline size_t
+trace_mem_sz(uint32_t len)
+{
+ return len + sizeof(struct __rte_trace_header);
+}
+
+/* Trace object functions */
+struct trace *trace_obj_get(void);
+
+/* Trace point list functions */
+STAILQ_HEAD(trace_point_head, trace_point);
+struct trace_point_head *trace_list_head_get(void);
+
+/* Util functions */
+const char *trace_mode_to_string(enum rte_trace_mode mode);
+const char *trace_area_to_string(enum trace_area_e area);
+int trace_args_apply(const char *arg);
+void trace_bufsz_args_apply(void);
+bool trace_has_duplicate_entry(void);
+void trace_uuid_generate(void);
+int trace_metadata_create(void);
+void trace_metadata_destroy(void);
+int trace_mkdir(void);
+int trace_epoch_time_save(void);
+void trace_mem_per_thread_free(void);
+
+/* EAL interface */
+int eal_trace_init(void);
+void eal_trace_fini(void);
+int eal_trace_args_save(const char *val);
+void eal_trace_args_free(void);
+int eal_trace_dir_args_save(const char *val);
+int eal_trace_mode_args_save(const char *val);
+int eal_trace_bufsz_args_save(const char *val);
+
+#endif /* __EAL_TRACE_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c
new file mode 100644
index 000000000..ee791903b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.c
@@ -0,0 +1,465 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_alarm.h>
+#include <rte_string_fns.h>
+#include <rte_devargs.h>
+
+#include "hotplug_mp.h"
+#include "eal_private.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+struct mp_reply_bundle {
+ struct rte_mp_msg msg;
+ void *peer;
+};
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(dev->name, name);
+}
+
+/**
+ * Secondary to primary request.
+ * start from function eal_dev_hotplug_request_to_primary.
+ *
+ * device attach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary receive the request and attach the new device if
+ * failed goto i).
+ * c) primary forward attach sync request to all secondary.
+ * d) secondary receive the request and attach the device and send a reply.
+ * e) primary check the reply if all success goes to j).
+ * f) primary send attach rollback sync request to all secondary.
+ * g) secondary receive the request and detach the device and send a reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send attach fail to secondary as a reply of step a), goto k).
+ * j) send attach success to secondary as a reply of step a).
+ * k) secondary receive reply and return.
+ *
+ * device detach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary send detach sync request to all secondary.
+ * c) secondary detach the device and send a reply.
+ * d) primary check the reply if all success goes to g).
+ * e) primary send detach rollback sync request to all secondary.
+ * f) secondary receive the request and attach back device. goto h).
+ * g) primary detach the device if success goto i), else goto e).
+ * h) primary send detach fail to secondary as a reply of step a), goto j).
+ * i) primary send detach success to secondary as a reply of step a).
+ * j) secondary receive reply and return.
+ */
+
+static int
+send_response_to_secondary(const struct eal_dev_mp_req *req,
+ int result,
+ const void *peer)
+{
+ struct rte_mp_msg mp_resp;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ int ret;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+ mp_resp.len_param = sizeof(*resp);
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ memcpy(resp, req, sizeof(*req));
+ resp->result = result;
+
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret != 0)
+ RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+ return ret;
+}
+
+static void
+__handle_secondary_request(void *param)
+{
+ struct mp_reply_bundle *bundle = param;
+ const struct rte_mp_msg *msg = &bundle->msg;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct eal_dev_mp_req tmp_req;
+ struct rte_devargs da;
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ tmp_req = *req;
+
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+ ret = local_dev_probe(req->devargs, &dev);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug add device on primary\n");
+ if (ret != -EEXIST)
+ goto finish;
+ }
+ ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+ if (tmp_req.result != 0) {
+ ret = tmp_req.result;
+ RTE_LOG(ERR, EAL, "Failed to hotplug add device on secondary\n");
+ if (ret != -EEXIST)
+ goto rollback;
+ }
+ } else if (req->t == EAL_DEV_REQ_TYPE_DETACH) {
+ ret = rte_devargs_parse(&da, req->devargs);
+ if (ret != 0)
+ goto finish;
+ free(da.args); /* we don't need those */
+ da.args = NULL;
+
+ ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ bus = rte_bus_find_by_name(da.bus->name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da.bus->name);
+ ret = -ENOENT;
+ goto finish;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, da.name);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da.name);
+ ret = -ENOENT;
+ goto finish;
+ }
+
+ if (tmp_req.result != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug remove device on secondary\n");
+ ret = tmp_req.result;
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+
+ ret = local_dev_remove(dev);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug remove device on primary\n");
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+ } else {
+ RTE_LOG(ERR, EAL, "unsupported secondary to primary request\n");
+ ret = -ENOTSUP;
+ }
+ goto finish;
+
+rollback:
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+ tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+ eal_dev_hotplug_request_to_secondary(&tmp_req);
+ local_dev_remove(dev);
+ } else {
+ tmp_req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+ eal_dev_hotplug_request_to_secondary(&tmp_req);
+ }
+
+finish:
+ ret = send_response_to_secondary(&tmp_req, ret, bundle->peer);
+ if (ret)
+ RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+ free(bundle->peer);
+ free(bundle);
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct mp_reply_bundle *bundle;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ int ret = 0;
+
+ bundle = malloc(sizeof(*bundle));
+ if (bundle == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ return send_response_to_secondary(req, -ENOMEM, peer);
+ }
+
+ bundle->msg = *msg;
+ /**
+ * We need to send reply on interrupt thread, but peer can't be
+ * parsed directly, so this is a temporal hack, need to be fixed
+ * when it is ready.
+ */
+ bundle->peer = strdup(peer);
+ if (bundle->peer == NULL) {
+ free(bundle);
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ return send_response_to_secondary(req, -ENOMEM, peer);
+ }
+
+ /**
+ * We are at IPC callback thread, sync IPC is not allowed due to
+ * dead lock, so we delegate the task to interrupt thread.
+ */
+ ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "failed to add mp task\n");
+ free(bundle->peer);
+ free(bundle);
+ return send_response_to_secondary(req, ret, peer);
+ }
+ return 0;
+}
+
+static void __handle_primary_request(void *param)
+{
+ struct mp_reply_bundle *bundle = param;
+ struct rte_mp_msg *msg = &bundle->msg;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct rte_mp_msg mp_resp;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ struct rte_devargs *da;
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+
+ switch (req->t) {
+ case EAL_DEV_REQ_TYPE_ATTACH:
+ case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK:
+ ret = local_dev_probe(req->devargs, &dev);
+ break;
+ case EAL_DEV_REQ_TYPE_DETACH:
+ case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
+ da = calloc(1, sizeof(*da));
+ if (da == NULL) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ret = rte_devargs_parse(da, req->devargs);
+ if (ret != 0)
+ goto quit;
+
+ bus = rte_bus_find_by_name(da->bus->name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+ ret = -ENOENT;
+ goto quit;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, da->name);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+ ret = -ENOENT;
+ goto quit;
+ }
+
+ if (!rte_dev_is_probed(dev)) {
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK) {
+ /**
+ * Don't fail the rollback just because there's
+ * nothing to do.
+ */
+ ret = 0;
+ } else
+ ret = -ENODEV;
+
+ goto quit;
+ }
+
+ ret = local_dev_remove(dev);
+quit:
+ free(da->args);
+ free(da);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+ resp->result = ret;
+ if (rte_mp_reply(&mp_resp, bundle->peer) < 0)
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+
+ free(bundle->peer);
+ free(bundle);
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct rte_mp_msg mp_resp;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ struct mp_reply_bundle *bundle;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+
+ bundle = calloc(1, sizeof(*bundle));
+ if (bundle == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ resp->result = -ENOMEM;
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret)
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+ return ret;
+ }
+
+ bundle->msg = *msg;
+ /**
+ * We need to send reply on interrupt thread, but peer can't be
+ * parsed directly, so this is a temporal hack, need to be fixed
+ * when it is ready.
+ */
+ bundle->peer = (void *)strdup(peer);
+ if (bundle->peer == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ free(bundle);
+ resp->result = -ENOMEM;
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret)
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+ return ret;
+ }
+
+ /**
+ * We are at IPC callback thread, sync IPC is not allowed due to
+ * dead lock, so we delegate the task to interrupt thread.
+ */
+ ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
+ if (ret != 0) {
+ free(bundle->peer);
+ free(bundle);
+ resp->result = ret;
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+ return ret;
+ }
+ }
+ return 0;
+}
+
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ struct eal_dev_mp_req *resp;
+ int ret;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret || mp_reply.nb_received != 1) {
+ RTE_LOG(ERR, EAL, "Cannot send request to primary\n");
+ if (!ret)
+ return -1;
+ return ret;
+ }
+
+ resp = (struct eal_dev_mp_req *)mp_reply.msgs[0].param;
+ req->result = resp->result;
+
+ free(mp_reply.msgs);
+ return ret;
+}
+
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret != 0) {
+ /* if IPC is not supported, behave as if the call succeeded */
+ if (rte_errno != ENOTSUP)
+ RTE_LOG(ERR, EAL, "rte_mp_request_sync failed\n");
+ else
+ ret = 0;
+ return ret;
+ }
+
+ if (mp_reply.nb_sent != mp_reply.nb_received) {
+ RTE_LOG(ERR, EAL, "not all secondary reply\n");
+ free(mp_reply.msgs);
+ return -1;
+ }
+
+ req->result = 0;
+ for (i = 0; i < mp_reply.nb_received; i++) {
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_reply.msgs[i].param;
+ if (resp->result != 0) {
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+ resp->result == -EEXIST)
+ continue;
+ if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
+ resp->result == -ENOENT)
+ continue;
+ req->result = resp->result;
+ }
+ }
+
+ free(mp_reply.msgs);
+ return 0;
+}
+
+int eal_mp_dev_hotplug_init(void)
+{
+ int ret;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+ handle_secondary_request);
+ /* primary is allowed to not support IPC */
+ if (ret != 0 && rte_errno != ENOTSUP) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ EAL_DEV_MP_ACTION_REQUEST);
+ return ret;
+ }
+ } else {
+ ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+ handle_primary_request);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ EAL_DEV_MP_ACTION_REQUEST);
+ return ret;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h
new file mode 100644
index 000000000..8fcf9b52e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/hotplug_mp.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _HOTPLUG_MP_H_
+#define _HOTPLUG_MP_H_
+
+#include "rte_dev.h"
+#include "rte_bus.h"
+
+#define EAL_DEV_MP_ACTION_REQUEST "eal_dev_mp_request"
+#define EAL_DEV_MP_ACTION_RESPONSE "eal_dev_mp_response"
+
+#define EAL_DEV_MP_DEV_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
+#define EAL_DEV_MP_BUS_NAME_MAX_LEN 32
+#define EAL_DEV_MP_DEV_ARGS_MAX_LEN 128
+
+enum eal_dev_req_type {
+ EAL_DEV_REQ_TYPE_ATTACH,
+ EAL_DEV_REQ_TYPE_DETACH,
+ EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK,
+ EAL_DEV_REQ_TYPE_DETACH_ROLLBACK,
+};
+
+struct eal_dev_mp_req {
+ enum eal_dev_req_type t;
+ char devargs[EAL_DEV_MP_DEV_ARGS_MAX_LEN];
+ int result;
+};
+
+/**
+ * Register all mp action callbacks for hotplug.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int
+eal_mp_dev_hotplug_init(void);
+
+/**
+ * This is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request is issued from primary process.
+ */
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req);
+
+
+#endif /* _HOTPLUG_MP_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c
new file mode 100644
index 000000000..51cdfc5d5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.c
@@ -0,0 +1,682 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <inttypes.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+
+/*
+ * If debugging is enabled, freed memory is set to poison value
+ * to catch buggy programs. Otherwise, freed memory is set to zero
+ * to avoid having to zero in zmalloc
+ */
+#ifdef RTE_MALLOC_DEBUG
+#define MALLOC_POISON 0x6b
+#else
+#define MALLOC_POISON 0
+#endif
+
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
+{
+ void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
+ void *data_start, *data_end;
+ rte_iova_t expected_iova;
+ struct rte_memseg *ms;
+ size_t page_sz, cur, max;
+
+ page_sz = (size_t)elem->msl->page_sz;
+ data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+ data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
+ /* segment must start after header and with specified alignment */
+ contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+
+ /* return if aligned address is already out of malloc element */
+ if (contig_seg_start > data_end)
+ return 0;
+
+ /* if we're in IOVA as VA mode, or if we're in legacy mode with
+ * hugepages, all elements are IOVA-contiguous. however, we can only
+ * make these assumptions about internal memory - externally allocated
+ * segments have to be checked.
+ */
+ if (!elem->msl->external &&
+ (rte_eal_iova_mode() == RTE_IOVA_VA ||
+ (internal_config.legacy_mem &&
+ rte_eal_has_hugepages())))
+ return RTE_PTR_DIFF(data_end, contig_seg_start);
+
+ cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
+ ms = rte_mem_virt2memseg(cur_page, elem->msl);
+
+ /* do first iteration outside the loop */
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
+ MALLOC_ELEM_TRAILER_LEN;
+ max = cur;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+
+ cur_page = RTE_PTR_ADD(cur_page, page_sz);
+
+ while (cur_page < data_end) {
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+
+ /* reset start of contiguous segment if unexpected iova */
+ if (ms->iova != expected_iova) {
+ /* next contiguous segment must start at specified
+ * alignment.
+ */
+ contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
+ /* new segment start may be on a different page, so find
+ * the page and skip to next iteration to make sure
+ * we're not blowing past data end.
+ */
+ ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
+ cur_page = ms->addr;
+ /* don't trigger another recalculation */
+ expected_iova = ms->iova;
+ continue;
+ }
+ /* cur_seg_end ends on a page boundary or on data end. if we're
+ * looking at data end, then malloc trailer is already included
+ * in the calculations. if we're looking at page end, then we
+ * know there's more data past this page and thus there's space
+ * for malloc element trailer, so don't count it here.
+ */
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
+ /* update max if cur value is bigger */
+ if (cur > max)
+ max = cur;
+
+ /* move to next page */
+ cur_page = page_end;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+ }
+
+ return max;
+}
+
+/*
+ * Initialize a general malloc_elem header structure
+ */
+void
+malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
+ struct rte_memseg_list *msl, size_t size,
+ struct malloc_elem *orig_elem, size_t orig_size)
+{
+ elem->heap = heap;
+ elem->msl = msl;
+ elem->prev = NULL;
+ elem->next = NULL;
+ memset(&elem->free_list, 0, sizeof(elem->free_list));
+ elem->state = ELEM_FREE;
+ elem->size = size;
+ elem->pad = 0;
+ elem->orig_elem = orig_elem;
+ elem->orig_size = orig_size;
+ set_header(elem);
+ set_trailer(elem);
+}
+
+void
+malloc_elem_insert(struct malloc_elem *elem)
+{
+ struct malloc_elem *prev_elem, *next_elem;
+ struct malloc_heap *heap = elem->heap;
+
+ /* first and last elements must be both NULL or both non-NULL */
+ if ((heap->first == NULL) != (heap->last == NULL)) {
+ RTE_LOG(ERR, EAL, "Heap is probably corrupt\n");
+ return;
+ }
+
+ if (heap->first == NULL && heap->last == NULL) {
+ /* if empty heap */
+ heap->first = elem;
+ heap->last = elem;
+ prev_elem = NULL;
+ next_elem = NULL;
+ } else if (elem < heap->first) {
+ /* if lower than start */
+ prev_elem = NULL;
+ next_elem = heap->first;
+ heap->first = elem;
+ } else if (elem > heap->last) {
+ /* if higher than end */
+ prev_elem = heap->last;
+ next_elem = NULL;
+ heap->last = elem;
+ } else {
+ /* the new memory is somewhere between start and end */
+ uint64_t dist_from_start, dist_from_end;
+
+ dist_from_end = RTE_PTR_DIFF(heap->last, elem);
+ dist_from_start = RTE_PTR_DIFF(elem, heap->first);
+
+ /* check which is closer, and find closest list entries */
+ if (dist_from_start < dist_from_end) {
+ prev_elem = heap->first;
+ while (prev_elem->next < elem)
+ prev_elem = prev_elem->next;
+ next_elem = prev_elem->next;
+ } else {
+ next_elem = heap->last;
+ while (next_elem->prev > elem)
+ next_elem = next_elem->prev;
+ prev_elem = next_elem->prev;
+ }
+ }
+
+ /* insert new element */
+ elem->prev = prev_elem;
+ elem->next = next_elem;
+ if (prev_elem)
+ prev_elem->next = elem;
+ if (next_elem)
+ next_elem->prev = elem;
+}
+
+/*
+ * Attempt to find enough physically contiguous memory in this block to store
+ * our data. Assume that element has at least enough space to fit in the data,
+ * so we just check the page addresses.
+ */
+static bool
+elem_check_phys_contig(const struct rte_memseg_list *msl,
+ void *start, size_t size)
+{
+ return eal_memalloc_is_contig(msl, start, size);
+}
+
+/*
+ * calculate the starting point of where data of the requested size
+ * and alignment would fit in the current element. If the data doesn't
+ * fit, return NULL.
+ */
+static void *
+elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound, bool contig)
+{
+ size_t elem_size = elem->size;
+
+ /*
+ * we're allocating from the end, so adjust the size of element by
+ * alignment size.
+ */
+ while (elem_size >= size) {
+ const size_t bmask = ~(bound - 1);
+ uintptr_t end_pt = (uintptr_t)elem +
+ elem_size - MALLOC_ELEM_TRAILER_LEN;
+ uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+ align);
+ uintptr_t new_elem_start;
+
+ /* check boundary */
+ if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
+ end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+ new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+ align);
+ end_pt = new_data_start + size;
+
+ if (((end_pt - 1) & bmask) != (new_data_start & bmask))
+ return NULL;
+ }
+
+ new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+
+ /* if the new start point is before the exist start,
+ * it won't fit
+ */
+ if (new_elem_start < (uintptr_t)elem)
+ return NULL;
+
+ if (contig) {
+ size_t new_data_size = end_pt - new_data_start;
+
+ /*
+ * if physical contiguousness was requested and we
+ * couldn't fit all data into one physically contiguous
+ * block, try again with lower addresses.
+ */
+ if (!elem_check_phys_contig(elem->msl,
+ (void *)new_data_start,
+ new_data_size)) {
+ elem_size -= align;
+ continue;
+ }
+ }
+ return (void *)new_elem_start;
+ }
+ return NULL;
+}
+
+/*
+ * use elem_start_pt to determine if we get meet the size and
+ * alignment request from the current element
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound, bool contig)
+{
+ return elem_start_pt(elem, size, align, bound, contig) != NULL;
+}
+
+/*
+ * split an existing element into two smaller elements at the given
+ * split_pt parameter.
+ */
+static void
+split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
+{
+ struct malloc_elem *next_elem = elem->next;
+ const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
+ const size_t new_elem_size = elem->size - old_elem_size;
+
+ malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
+ elem->orig_elem, elem->orig_size);
+ split_pt->prev = elem;
+ split_pt->next = next_elem;
+ if (next_elem)
+ next_elem->prev = split_pt;
+ else
+ elem->heap->last = split_pt;
+ elem->next = split_pt;
+ elem->size = old_elem_size;
+ set_trailer(elem);
+ if (elem->pad) {
+ /* Update inner padding inner element size. */
+ elem = RTE_PTR_ADD(elem, elem->pad);
+ elem->size = old_elem_size - elem->pad;
+ }
+}
+
+/*
+ * our malloc heap is a doubly linked list, so doubly remove our element.
+ */
+static void __rte_unused
+remove_elem(struct malloc_elem *elem)
+{
+ struct malloc_elem *next, *prev;
+ next = elem->next;
+ prev = elem->prev;
+
+ if (next)
+ next->prev = prev;
+ else
+ elem->heap->last = prev;
+ if (prev)
+ prev->next = next;
+ else
+ elem->heap->first = next;
+
+ elem->prev = NULL;
+ elem->next = NULL;
+}
+
+static int
+next_elem_is_adjacent(struct malloc_elem *elem)
+{
+ return elem->next == RTE_PTR_ADD(elem, elem->size) &&
+ elem->next->msl == elem->msl &&
+ (!internal_config.match_allocations ||
+ elem->orig_elem == elem->next->orig_elem);
+}
+
+static int
+prev_elem_is_adjacent(struct malloc_elem *elem)
+{
+ return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) &&
+ elem->prev->msl == elem->msl &&
+ (!internal_config.match_allocations ||
+ elem->orig_elem == elem->prev->orig_elem);
+}
+
+/*
+ * Given an element size, compute its freelist index.
+ * We free an element into the freelist containing similarly-sized elements.
+ * We try to allocate elements starting with the freelist containing
+ * similarly-sized elements, and if necessary, we search freelists
+ * containing larger elements.
+ *
+ * Example element size ranges for a heap with five free lists:
+ * heap->free_head[0] - (0 , 2^8]
+ * heap->free_head[1] - (2^8 , 2^10]
+ * heap->free_head[2] - (2^10 ,2^12]
+ * heap->free_head[3] - (2^12, 2^14]
+ * heap->free_head[4] - (2^14, MAX_SIZE]
+ */
+size_t
+malloc_elem_free_list_index(size_t size)
+{
+#define MALLOC_MINSIZE_LOG2 8
+#define MALLOC_LOG2_INCREMENT 2
+
+ size_t log2;
+ size_t index;
+
+ if (size <= (1UL << MALLOC_MINSIZE_LOG2))
+ return 0;
+
+ /* Find next power of 2 >= size. */
+ log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
+
+ /* Compute freelist index, based on log2(size). */
+ index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
+ MALLOC_LOG2_INCREMENT;
+
+ return index <= RTE_HEAP_NUM_FREELISTS-1?
+ index: RTE_HEAP_NUM_FREELISTS-1;
+}
+
+/*
+ * Add the specified element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem)
+{
+ size_t idx;
+
+ idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
+ elem->state = ELEM_FREE;
+ LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
+}
+
+/*
+ * Remove the specified element from its heap's free list.
+ */
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem)
+{
+ LIST_REMOVE(elem, free_list);
+}
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ * This function is only called from malloc_heap_alloc so parameter checking
+ * is not done here, as it's done there previously.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
+ size_t bound, bool contig)
+{
+ struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
+ contig);
+ const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
+ const size_t trailer_size = elem->size - old_elem_size - size -
+ MALLOC_ELEM_OVERHEAD;
+
+ malloc_elem_free_list_remove(elem);
+
+ if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split it, too much free space after elem */
+ struct malloc_elem *new_free_elem =
+ RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
+
+ split_elem(elem, new_free_elem);
+ malloc_elem_free_list_insert(new_free_elem);
+
+ if (elem == elem->heap->last)
+ elem->heap->last = new_free_elem;
+ }
+
+ if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* don't split it, pad the element instead */
+ elem->state = ELEM_BUSY;
+ elem->pad = old_elem_size;
+
+ /* put a dummy header in padding, to point to real element header */
+ if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
+ * is cache-line aligned */
+ new_elem->pad = elem->pad;
+ new_elem->state = ELEM_PAD;
+ new_elem->size = elem->size - elem->pad;
+ set_header(new_elem);
+ }
+
+ return new_elem;
+ }
+
+ /* we are going to split the element in two. The original element
+ * remains free, and the new element is the one allocated.
+ * Re-insert original element, in case its new size makes it
+ * belong on a different list.
+ */
+ split_elem(elem, new_elem);
+ new_elem->state = ELEM_BUSY;
+ malloc_elem_free_list_insert(elem);
+
+ return new_elem;
+}
+
+/*
+ * join two struct malloc_elem together. elem1 and elem2 must
+ * be contiguous in memory.
+ */
+static inline void
+join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
+{
+ struct malloc_elem *next = elem2->next;
+ elem1->size += elem2->size;
+ if (next)
+ next->prev = elem1;
+ else
+ elem1->heap->last = elem1;
+ elem1->next = next;
+ if (elem1->pad) {
+ struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
+ inner->size = elem1->size - elem1->pad;
+ }
+}
+
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem)
+{
+ /*
+ * check if next element exists, is adjacent and is free, if so join
+ * with it, need to remove from free list.
+ */
+ if (elem->next != NULL && elem->next->state == ELEM_FREE &&
+ next_elem_is_adjacent(elem)) {
+ void *erase;
+ size_t erase_len;
+
+ /* we will want to erase the trailer and header */
+ erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
+
+ /* remove from free list, join to this one */
+ malloc_elem_free_list_remove(elem->next);
+ join_elem(elem, elem->next);
+
+ /* erase header, trailer and pad */
+ memset(erase, MALLOC_POISON, erase_len);
+ }
+
+ /*
+ * check if prev element exists, is adjacent and is free, if so join
+ * with it, need to remove from free list.
+ */
+ if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
+ prev_elem_is_adjacent(elem)) {
+ struct malloc_elem *new_elem;
+ void *erase;
+ size_t erase_len;
+
+ /* we will want to erase trailer and header */
+ erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
+
+ /* remove from free list, join to this one */
+ malloc_elem_free_list_remove(elem->prev);
+
+ new_elem = elem->prev;
+ join_elem(new_elem, elem);
+
+ /* erase header, trailer and pad */
+ memset(erase, MALLOC_POISON, erase_len);
+
+ elem = new_elem;
+ }
+
+ return elem;
+}
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+struct malloc_elem *
+malloc_elem_free(struct malloc_elem *elem)
+{
+ void *ptr;
+ size_t data_len;
+
+ ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+ data_len = elem->size - MALLOC_ELEM_OVERHEAD;
+
+ elem = malloc_elem_join_adjacent_free(elem);
+
+ malloc_elem_free_list_insert(elem);
+
+ elem->pad = 0;
+
+ /* decrease heap's count of allocated elements */
+ elem->heap->alloc_count--;
+
+ /* poison memory */
+ memset(ptr, MALLOC_POISON, data_len);
+
+ return elem;
+}
+
+/* assume all checks were already done */
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
+{
+ struct malloc_elem *hide_start, *hide_end, *prev, *next;
+ size_t len_before, len_after;
+
+ hide_start = start;
+ hide_end = RTE_PTR_ADD(start, len);
+
+ prev = elem->prev;
+ next = elem->next;
+
+ /* we cannot do anything with non-adjacent elements */
+ if (next && next_elem_is_adjacent(elem)) {
+ len_after = RTE_PTR_DIFF(next, hide_end);
+ if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split after */
+ split_elem(elem, hide_end);
+
+ malloc_elem_free_list_insert(hide_end);
+ } else if (len_after > 0) {
+ RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+ return;
+ }
+ }
+
+ /* we cannot do anything with non-adjacent elements */
+ if (prev && prev_elem_is_adjacent(elem)) {
+ len_before = RTE_PTR_DIFF(hide_start, elem);
+ if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split before */
+ split_elem(elem, hide_start);
+
+ prev = elem;
+ elem = hide_start;
+
+ malloc_elem_free_list_insert(prev);
+ } else if (len_before > 0) {
+ RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+ return;
+ }
+ }
+
+ remove_elem(elem);
+}
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size)
+{
+ const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
+
+ /* if we request a smaller size, then always return ok */
+ if (elem->size >= new_size)
+ return 0;
+
+ /* check if there is a next element, it's free and adjacent */
+ if (!elem->next || elem->next->state != ELEM_FREE ||
+ !next_elem_is_adjacent(elem))
+ return -1;
+ if (elem->size + elem->next->size < new_size)
+ return -1;
+
+ /* we now know the element fits, so remove from free list,
+ * join the two
+ */
+ malloc_elem_free_list_remove(elem->next);
+ join_elem(elem, elem->next);
+
+ if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
+ /* now we have a big block together. Lets cut it down a bit, by splitting */
+ struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
+ split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
+ split_elem(elem, split_pt);
+ malloc_elem_free_list_insert(split_pt);
+ }
+ return 0;
+}
+
+static inline const char *
+elem_state_to_str(enum elem_state state)
+{
+ switch (state) {
+ case ELEM_PAD:
+ return "PAD";
+ case ELEM_BUSY:
+ return "BUSY";
+ case ELEM_FREE:
+ return "FREE";
+ }
+ return "ERROR";
+}
+
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
+{
+ fprintf(f, "Malloc element at %p (%s)\n", elem,
+ elem_state_to_str(elem->state));
+ fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
+ fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h
new file mode 100644
index 000000000..a1e5f7f02
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_elem.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef MALLOC_ELEM_H_
+#define MALLOC_ELEM_H_
+
+#include <stdbool.h>
+
+#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
+
+/* dummy definition of struct so we can use pointers to it in malloc_elem struct */
+struct malloc_heap;
+
+enum elem_state {
+ ELEM_FREE = 0,
+ ELEM_BUSY,
+ ELEM_PAD /* element is a padding-only header */
+};
+
+struct malloc_elem {
+ struct malloc_heap *heap;
+ struct malloc_elem *volatile prev;
+ /**< points to prev elem in memseg */
+ struct malloc_elem *volatile next;
+ /**< points to next elem in memseg */
+ LIST_ENTRY(malloc_elem) free_list;
+ /**< list of free elements in heap */
+ struct rte_memseg_list *msl;
+ volatile enum elem_state state;
+ uint32_t pad;
+ size_t size;
+ struct malloc_elem *orig_elem;
+ size_t orig_size;
+#ifdef RTE_MALLOC_DEBUG
+ uint64_t header_cookie; /* Cookie marking start of data */
+ /* trailer cookie at start + size */
+#endif
+} __rte_cache_aligned;
+
+#ifndef RTE_MALLOC_DEBUG
+static const unsigned MALLOC_ELEM_TRAILER_LEN = 0;
+
+/* dummy function - just check if pointer is non-null */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem){ return elem != NULL; }
+
+/* dummy function - no header if malloc_debug is not enabled */
+static inline void
+set_header(struct malloc_elem *elem __rte_unused){ }
+
+/* dummy function - no trailer if malloc_debug is not enabled */
+static inline void
+set_trailer(struct malloc_elem *elem __rte_unused){ }
+
+
+#else
+static const unsigned MALLOC_ELEM_TRAILER_LEN = RTE_CACHE_LINE_SIZE;
+
+#define MALLOC_HEADER_COOKIE 0xbadbadbadadd2e55ULL /**< Header cookie. */
+#define MALLOC_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/
+
+/* define macros to make referencing the header and trailer cookies easier */
+#define MALLOC_ELEM_TRAILER(elem) (*((uint64_t*)RTE_PTR_ADD(elem, \
+ elem->size - MALLOC_ELEM_TRAILER_LEN)))
+#define MALLOC_ELEM_HEADER(elem) (elem->header_cookie)
+
+static inline void
+set_header(struct malloc_elem *elem)
+{
+ if (elem != NULL)
+ MALLOC_ELEM_HEADER(elem) = MALLOC_HEADER_COOKIE;
+}
+
+static inline void
+set_trailer(struct malloc_elem *elem)
+{
+ if (elem != NULL)
+ MALLOC_ELEM_TRAILER(elem) = MALLOC_TRAILER_COOKIE;
+}
+
+/* check that the header and trailer cookies are set correctly */
+static inline int
+malloc_elem_cookies_ok(const struct malloc_elem *elem)
+{
+ return elem != NULL &&
+ MALLOC_ELEM_HEADER(elem) == MALLOC_HEADER_COOKIE &&
+ MALLOC_ELEM_TRAILER(elem) == MALLOC_TRAILER_COOKIE;
+}
+
+#endif
+
+static const unsigned MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
+#define MALLOC_ELEM_OVERHEAD (MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN)
+
+/*
+ * Given a pointer to the start of a memory block returned by malloc, get
+ * the actual malloc_elem header for that block.
+ */
+static inline struct malloc_elem *
+malloc_elem_from_data(const void *data)
+{
+ if (data == NULL)
+ return NULL;
+
+ struct malloc_elem *elem = RTE_PTR_SUB(data, MALLOC_ELEM_HEADER_LEN);
+ if (!malloc_elem_cookies_ok(elem))
+ return NULL;
+ return elem->state != ELEM_PAD ? elem: RTE_PTR_SUB(elem, elem->pad);
+}
+
+/*
+ * initialise a malloc_elem header
+ */
+void
+malloc_elem_init(struct malloc_elem *elem,
+ struct malloc_heap *heap,
+ struct rte_memseg_list *msl,
+ size_t size,
+ struct malloc_elem *orig_elem,
+ size_t orig_size);
+
+void
+malloc_elem_insert(struct malloc_elem *elem);
+
+/*
+ * return true if the current malloc_elem can hold a block of data
+ * of the requested size and with the requested alignment
+ */
+int
+malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
+ unsigned int align, size_t bound, bool contig);
+
+/*
+ * reserve a block of data in an existing malloc_elem. If the malloc_elem
+ * is much larger than the data block requested, we split the element in two.
+ */
+struct malloc_elem *
+malloc_elem_alloc(struct malloc_elem *elem, size_t size,
+ unsigned int align, size_t bound, bool contig);
+
+/*
+ * free a malloc_elem block by adding it to the free list. If the
+ * blocks either immediately before or immediately after newly freed block
+ * are also free, the blocks are merged together.
+ */
+struct malloc_elem *
+malloc_elem_free(struct malloc_elem *elem);
+
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem);
+
+/*
+ * attempt to resize a malloc_elem by expanding into any free space
+ * immediately after it in memory.
+ */
+int
+malloc_elem_resize(struct malloc_elem *elem, size_t size);
+
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len);
+
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem);
+
+/*
+ * dump contents of malloc elem to a file.
+ */
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f);
+
+/*
+ * Given an element size, compute its freelist index.
+ */
+size_t
+malloc_elem_free_list_index(size_t size);
+
+/*
+ * Add element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem);
+
+/*
+ * Find biggest IOVA-contiguous zone within an element with specified alignment.
+ */
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align);
+
+#endif /* MALLOC_ELEM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c
new file mode 100644
index 000000000..bd5065698
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.c
@@ -0,0 +1,1367 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_errno.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_atomic.h>
+#include <rte_fbarray.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+#include "malloc_mp.h"
+
+/* start external socket ID's at a very high number */
+#define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
+#define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
+
+static unsigned
+check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
+{
+ unsigned check_flag = 0;
+
+ if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
+ return 1;
+
+ switch (hugepage_sz) {
+ case RTE_PGSIZE_256K:
+ check_flag = RTE_MEMZONE_256KB;
+ break;
+ case RTE_PGSIZE_2M:
+ check_flag = RTE_MEMZONE_2MB;
+ break;
+ case RTE_PGSIZE_16M:
+ check_flag = RTE_MEMZONE_16MB;
+ break;
+ case RTE_PGSIZE_256M:
+ check_flag = RTE_MEMZONE_256MB;
+ break;
+ case RTE_PGSIZE_512M:
+ check_flag = RTE_MEMZONE_512MB;
+ break;
+ case RTE_PGSIZE_1G:
+ check_flag = RTE_MEMZONE_1GB;
+ break;
+ case RTE_PGSIZE_4G:
+ check_flag = RTE_MEMZONE_4GB;
+ break;
+ case RTE_PGSIZE_16G:
+ check_flag = RTE_MEMZONE_16GB;
+ }
+
+ return check_flag & flags;
+}
+
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i;
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+ if (heap->socket_id == socket_id)
+ return i;
+ }
+ return -1;
+}
+
+/*
+ * Expand the heap with a memory area.
+ */
+static struct malloc_elem *
+malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
+ void *start, size_t len)
+{
+ struct malloc_elem *elem = start;
+
+ malloc_elem_init(elem, heap, msl, len, elem, len);
+
+ malloc_elem_insert(elem);
+
+ elem = malloc_elem_join_adjacent_free(elem);
+
+ malloc_elem_free_list_insert(elem);
+
+ return elem;
+}
+
+static int
+malloc_add_seg(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *found_msl;
+ struct malloc_heap *heap;
+ int msl_idx, heap_idx;
+
+ if (msl->external)
+ return 0;
+
+ heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+ if (heap_idx < 0) {
+ RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
+ return -1;
+ }
+ heap = &mcfg->malloc_heaps[heap_idx];
+
+ /* msl is const, so find it */
+ msl_idx = msl - mcfg->memsegs;
+
+ if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+ return -1;
+
+ found_msl = &mcfg->memsegs[msl_idx];
+
+ malloc_heap_add_memory(heap, found_msl, ms->addr, len);
+
+ heap->total_size += len;
+
+ RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
+ msl->socket_id);
+ return 0;
+}
+
+/*
+ * Iterates through the freelist for a heap to find a free element
+ * which can store data of the required size and with the requested alignment.
+ * If size is 0, find the biggest available elem.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_suitable_element(struct malloc_heap *heap, size_t size,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ size_t idx;
+ struct malloc_elem *elem, *alt_elem = NULL;
+
+ for (idx = malloc_elem_free_list_index(size);
+ idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list)) {
+ if (malloc_elem_can_hold(elem, size, align, bound,
+ contig)) {
+ if (check_hugepage_sz(flags,
+ elem->msl->page_sz))
+ return elem;
+ if (alt_elem == NULL)
+ alt_elem = elem;
+ }
+ }
+ }
+
+ if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY))
+ return alt_elem;
+
+ return NULL;
+}
+
+/*
+ * Iterates through the freelist for a heap to find a free element with the
+ * biggest size and requested alignment. Will also set size to whatever element
+ * size that was found.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_biggest_element(struct malloc_heap *heap, size_t *size,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem, *max_elem = NULL;
+ size_t idx, max_size = 0;
+
+ for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list)) {
+ size_t cur_size;
+ if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
+ !check_hugepage_sz(flags,
+ elem->msl->page_sz))
+ continue;
+ if (contig) {
+ cur_size =
+ malloc_elem_find_max_iova_contig(elem,
+ align);
+ } else {
+ void *data_start = RTE_PTR_ADD(elem,
+ MALLOC_ELEM_HEADER_LEN);
+ void *data_end = RTE_PTR_ADD(elem, elem->size -
+ MALLOC_ELEM_TRAILER_LEN);
+ void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
+ align);
+ /* check if aligned data start is beyond end */
+ if (aligned >= data_end)
+ continue;
+ cur_size = RTE_PTR_DIFF(data_end, aligned);
+ }
+ if (cur_size > max_size) {
+ max_size = cur_size;
+ max_elem = elem;
+ }
+ }
+ }
+
+ *size = max_size;
+ return max_elem;
+}
+
+/*
+ * Main function to allocate a block of memory from the heap.
+ * It locks the free list, scans it, and adds a new memseg if the
+ * scan fails. Once the new memseg is added, it re-scans and should return
+ * the new element after releasing the lock.
+ */
+static void *
+heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ struct malloc_elem *elem;
+
+ size = RTE_CACHE_LINE_ROUNDUP(size);
+ align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ /* roundup might cause an overflow */
+ if (size == 0)
+ return NULL;
+ elem = find_suitable_element(heap, size, flags, align, bound, contig);
+ if (elem != NULL) {
+ elem = malloc_elem_alloc(elem, size, align, bound, contig);
+
+ /* increase heap's count of allocated elements */
+ heap->alloc_count++;
+ }
+
+ return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+static void *
+heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem;
+ size_t size;
+
+ align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ elem = find_biggest_element(heap, &size, flags, align, contig);
+ if (elem != NULL) {
+ elem = malloc_elem_alloc(elem, size, align, 0, contig);
+
+ /* increase heap's count of allocated elements */
+ heap->alloc_count++;
+ }
+
+ return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+/* this function is exposed in malloc_mp.h */
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+ struct malloc_elem *elem, void *map_addr, size_t map_len)
+{
+ if (elem != NULL) {
+ malloc_elem_free_list_remove(elem);
+ malloc_elem_hide_region(elem, map_addr, map_len);
+ }
+
+ eal_memalloc_free_seg_bulk(ms, n_segs);
+}
+
+/* this function is exposed in malloc_mp.h */
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig, struct rte_memseg **ms, int n_segs)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct malloc_elem *elem = NULL;
+ size_t alloc_sz;
+ int allocd_pages;
+ void *ret, *map_addr;
+
+ alloc_sz = (size_t)pg_sz * n_segs;
+
+ /* first, check if we're allowed to allocate this memory */
+ if (eal_memalloc_mem_alloc_validate(socket,
+ heap->total_size + alloc_sz) < 0) {
+ RTE_LOG(DEBUG, EAL, "User has disallowed allocation\n");
+ return NULL;
+ }
+
+ allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz,
+ socket, true);
+
+ /* make sure we've allocated our pages... */
+ if (allocd_pages < 0)
+ return NULL;
+
+ map_addr = ms[0]->addr;
+ msl = rte_mem_virt2memseg_list(map_addr);
+
+ /* check if we wanted contiguous memory but didn't get it */
+ if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n",
+ __func__);
+ goto fail;
+ }
+
+ /*
+ * Once we have all the memseg lists configured, if there is a dma mask
+ * set, check iova addresses are not out of range. Otherwise the device
+ * setting the dma mask could have problems with the mapped memory.
+ *
+ * There are two situations when this can happen:
+ * 1) memory initialization
+ * 2) dynamic memory allocation
+ *
+ * For 1), an error when checking dma mask implies app can not be
+ * executed. For 2) implies the new memory can not be added.
+ */
+ if (mcfg->dma_maskbits &&
+ rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
+ /*
+ * Currently this can only happen if IOMMU is enabled
+ * and the address width supported by the IOMMU hw is
+ * not enough for using the memory mapped IOVAs.
+ *
+ * If IOVA is VA, advice to try with '--iova-mode pa'
+ * which could solve some situations when IOVA VA is not
+ * really needed.
+ */
+ RTE_LOG(ERR, EAL,
+ "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n",
+ __func__);
+
+ /*
+ * If IOVA is VA and it is possible to run with IOVA PA,
+ * because user is root, give and advice for solving the
+ * problem.
+ */
+ if ((rte_eal_iova_mode() == RTE_IOVA_VA) &&
+ rte_eal_using_phys_addrs())
+ RTE_LOG(ERR, EAL,
+ "%s(): Please try initializing EAL with --iova-mode=pa parameter\n",
+ __func__);
+ goto fail;
+ }
+
+ /* add newly minted memsegs to malloc heap */
+ elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
+
+ /* try once more, as now we have allocated new memory */
+ ret = find_suitable_element(heap, elt_size, flags, align, bound,
+ contig);
+
+ if (ret == NULL)
+ goto fail;
+
+ return elem;
+
+fail:
+ rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+ return NULL;
+}
+
+static int
+try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
+ size_t elt_size, int socket, unsigned int flags, size_t align,
+ size_t bound, bool contig)
+{
+ struct malloc_elem *elem;
+ struct rte_memseg **ms;
+ void *map_addr;
+ size_t alloc_sz;
+ int n_segs;
+ bool callback_triggered = false;
+
+ alloc_sz = RTE_ALIGN_CEIL(align + elt_size +
+ MALLOC_ELEM_TRAILER_LEN, pg_sz);
+ n_segs = alloc_sz / pg_sz;
+
+ /* we can't know in advance how many pages we'll need, so we malloc */
+ ms = malloc(sizeof(*ms) * n_segs);
+ if (ms == NULL)
+ return -1;
+ memset(ms, 0, sizeof(*ms) * n_segs);
+
+ elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
+ bound, contig, ms, n_segs);
+
+ if (elem == NULL)
+ goto free_ms;
+
+ map_addr = ms[0]->addr;
+
+ /* notify user about changes in memory map */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
+ /* notify other processes that this has happened */
+ if (request_sync()) {
+ /* we couldn't ensure all processes have mapped memory,
+ * so free it back and notify everyone that it's been
+ * freed back.
+ *
+ * technically, we could've avoided adding memory addresses to
+ * the map, but that would've led to inconsistent behavior
+ * between primary and secondary processes, as those get
+ * callbacks during sync. therefore, force primary process to
+ * do alloc-and-rollback syncs as well.
+ */
+ callback_triggered = true;
+ goto free_elem;
+ }
+ heap->total_size += alloc_sz;
+
+ RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",
+ socket, alloc_sz >> 20ULL);
+
+ free(ms);
+
+ return 0;
+
+free_elem:
+ if (callback_triggered)
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ map_addr, alloc_sz);
+
+ rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+
+ request_sync();
+free_ms:
+ free(ms);
+
+ return -1;
+}
+
+static int
+try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz,
+ size_t elt_size, int socket, unsigned int flags, size_t align,
+ size_t bound, bool contig)
+{
+ struct malloc_mp_req req;
+ int req_result;
+
+ memset(&req, 0, sizeof(req));
+
+ req.t = REQ_TYPE_ALLOC;
+ req.alloc_req.align = align;
+ req.alloc_req.bound = bound;
+ req.alloc_req.contig = contig;
+ req.alloc_req.flags = flags;
+ req.alloc_req.elt_size = elt_size;
+ req.alloc_req.page_sz = pg_sz;
+ req.alloc_req.socket = socket;
+ req.alloc_req.heap = heap; /* it's in shared memory */
+
+ req_result = request_to_primary(&req);
+
+ if (req_result != 0)
+ return -1;
+
+ if (req.result != REQ_RESULT_SUCCESS)
+ return -1;
+
+ return 0;
+}
+
+static int
+try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig)
+{
+ int ret;
+
+ rte_mcfg_mem_write_lock();
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket,
+ flags, align, bound, contig);
+ } else {
+ ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket,
+ flags, align, bound, contig);
+ }
+
+ rte_mcfg_mem_write_unlock();
+ return ret;
+}
+
+static int
+compare_pagesz(const void *a, const void *b)
+{
+ const struct rte_memseg_list * const*mpa = a;
+ const struct rte_memseg_list * const*mpb = b;
+ const struct rte_memseg_list *msla = *mpa;
+ const struct rte_memseg_list *mslb = *mpb;
+ uint64_t pg_sz_a = msla->page_sz;
+ uint64_t pg_sz_b = mslb->page_sz;
+
+ if (pg_sz_a < pg_sz_b)
+ return -1;
+ if (pg_sz_a > pg_sz_b)
+ return 1;
+ return 0;
+}
+
+static int
+alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
+ struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
+ uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
+ uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
+ uint64_t prev_pg_sz;
+ int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
+ bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
+ unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+ void *ret;
+
+ memset(requested_msls, 0, sizeof(requested_msls));
+ memset(other_msls, 0, sizeof(other_msls));
+ memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
+ memset(other_pg_sz, 0, sizeof(other_pg_sz));
+
+ /*
+ * go through memseg list and take note of all the page sizes available,
+ * and if any of them were specifically requested by the user.
+ */
+ n_requested_msls = 0;
+ n_other_msls = 0;
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+ if (msl->socket_id != socket)
+ continue;
+
+ if (msl->base_va == NULL)
+ continue;
+
+ /* if pages of specific size were requested */
+ if (size_flags != 0 && check_hugepage_sz(size_flags,
+ msl->page_sz))
+ requested_msls[n_requested_msls++] = msl;
+ else if (size_flags == 0 || size_hint)
+ other_msls[n_other_msls++] = msl;
+ }
+
+ /* sort the lists, smallest first */
+ qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
+ compare_pagesz);
+ qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
+ compare_pagesz);
+
+ /* now, extract page sizes we are supposed to try */
+ prev_pg_sz = 0;
+ n_requested_pg_sz = 0;
+ for (i = 0; i < n_requested_msls; i++) {
+ uint64_t pg_sz = requested_msls[i]->page_sz;
+
+ if (prev_pg_sz != pg_sz) {
+ requested_pg_sz[n_requested_pg_sz++] = pg_sz;
+ prev_pg_sz = pg_sz;
+ }
+ }
+ prev_pg_sz = 0;
+ n_other_pg_sz = 0;
+ for (i = 0; i < n_other_msls; i++) {
+ uint64_t pg_sz = other_msls[i]->page_sz;
+
+ if (prev_pg_sz != pg_sz) {
+ other_pg_sz[n_other_pg_sz++] = pg_sz;
+ prev_pg_sz = pg_sz;
+ }
+ }
+
+ /* finally, try allocating memory of specified page sizes, starting from
+ * the smallest sizes
+ */
+ for (i = 0; i < n_requested_pg_sz; i++) {
+ uint64_t pg_sz = requested_pg_sz[i];
+
+ /*
+ * do not pass the size hint here, as user expects other page
+ * sizes first, before resorting to best effort allocation.
+ */
+ if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
+ align, bound, contig))
+ return 0;
+ }
+ if (n_other_pg_sz == 0)
+ return -1;
+
+ /* now, check if we can reserve anything with size hint */
+ ret = find_suitable_element(heap, size, flags, align, bound, contig);
+ if (ret != NULL)
+ return 0;
+
+ /*
+ * we still couldn't reserve memory, so try expanding heap with other
+ * page sizes, if there are any
+ */
+ for (i = 0; i < n_other_pg_sz; i++) {
+ uint64_t pg_sz = other_pg_sz[i];
+
+ if (!try_expand_heap(heap, pg_sz, size, socket, flags,
+ align, bound, contig))
+ return 0;
+ }
+ return -1;
+}
+
+/* this will try lower page sizes first */
+static void *
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+ unsigned int heap_id, unsigned int flags, size_t align,
+ size_t bound, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+ unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+ int socket_id;
+ void *ret;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ align = align == 0 ? 1 : align;
+
+ /* for legacy mode, try once and with all flags */
+ if (internal_config.legacy_mem) {
+ ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+ goto alloc_unlock;
+ }
+
+ /*
+ * we do not pass the size hint here, because even if allocation fails,
+ * we may still be able to allocate memory from appropriate page sizes,
+ * we just need to request more memory first.
+ */
+
+ socket_id = rte_socket_id_by_idx(heap_id);
+ /*
+ * if socket ID is negative, we cannot find a socket ID for this heap -
+ * which means it's an external heap. those can have unexpected page
+ * sizes, so if the user asked to allocate from there - assume user
+ * knows what they're doing, and allow allocating from there with any
+ * page size flags.
+ */
+ if (socket_id < 0)
+ size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
+ ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
+ if (ret != NULL)
+ goto alloc_unlock;
+
+ /* if socket ID is invalid, this is an external heap */
+ if (socket_id < 0)
+ goto alloc_unlock;
+
+ if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+ bound, contig)) {
+ ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+
+ /* this should have succeeded */
+ if (ret == NULL)
+ RTE_LOG(ERR, EAL, "Error allocating from heap\n");
+ }
+alloc_unlock:
+ rte_spinlock_unlock(&(heap->lock));
+ return ret;
+}
+
+void *
+malloc_heap_alloc(const char *type, size_t size, int socket_arg,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ int socket, heap_id, i;
+ void *ret;
+
+ /* return NULL if size is 0 or alignment is not power-of-2 */
+ if (size == 0 || (align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* turn socket ID into heap ID */
+ heap_id = malloc_socket_to_heap_id(socket);
+ /* if heap id is negative, socket ID was invalid */
+ if (heap_id < 0)
+ return NULL;
+
+ ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+ bound, contig);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps. we are only iterating through native DPDK sockets,
+ * so external heaps won't be included.
+ */
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ if (i == heap_id)
+ continue;
+ ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+ bound, contig);
+ if (ret != NULL)
+ return ret;
+ }
+ return NULL;
+}
+
+static void *
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+ void *ret;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ align = align == 0 ? 1 : align;
+
+ ret = heap_alloc_biggest(heap, type, flags, align, contig);
+
+ rte_spinlock_unlock(&(heap->lock));
+
+ return ret;
+}
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
+ size_t align, bool contig)
+{
+ int socket, i, cur_socket, heap_id;
+ void *ret;
+
+ /* return NULL if align is not power-of-2 */
+ if ((align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* turn socket ID into heap ID */
+ heap_id = malloc_socket_to_heap_id(socket);
+ /* if heap id is negative, socket ID was invalid */
+ if (heap_id < 0)
+ return NULL;
+
+ ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
+ contig);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps */
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ cur_socket = rte_socket_id_by_idx(i);
+ if (cur_socket == socket)
+ continue;
+ ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+ contig);
+ if (ret != NULL)
+ return ret;
+ }
+ return NULL;
+}
+
+/* this function is exposed in malloc_mp.h */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
+{
+ int n_segs, seg_idx, max_seg_idx;
+ struct rte_memseg_list *msl;
+ size_t page_sz;
+
+ msl = rte_mem_virt2memseg_list(aligned_start);
+ if (msl == NULL)
+ return -1;
+
+ page_sz = (size_t)msl->page_sz;
+ n_segs = aligned_len / page_sz;
+ seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz;
+ max_seg_idx = seg_idx + n_segs;
+
+ for (; seg_idx < max_seg_idx; seg_idx++) {
+ struct rte_memseg *ms;
+
+ ms = rte_fbarray_get(&msl->memseg_arr, seg_idx);
+ eal_memalloc_free_seg(ms);
+ }
+ return 0;
+}
+
+int
+malloc_heap_free(struct malloc_elem *elem)
+{
+ struct malloc_heap *heap;
+ void *start, *aligned_start, *end, *aligned_end;
+ size_t len, aligned_len, page_sz;
+ struct rte_memseg_list *msl;
+ unsigned int i, n_segs, before_space, after_space;
+ int ret;
+
+ if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+ return -1;
+
+ /* elem may be merged with previous element, so keep heap address */
+ heap = elem->heap;
+ msl = elem->msl;
+ page_sz = (size_t)msl->page_sz;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ /* mark element as free */
+ elem->state = ELEM_FREE;
+
+ elem = malloc_elem_free(elem);
+
+ /* anything after this is a bonus */
+ ret = 0;
+
+ /* ...of which we can't avail if we are in legacy mode, or if this is an
+ * externally allocated segment.
+ */
+ if (internal_config.legacy_mem || (msl->external > 0))
+ goto free_unlock;
+
+ /* check if we can free any memory back to the system */
+ if (elem->size < page_sz)
+ goto free_unlock;
+
+ /* if user requested to match allocations, the sizes must match - if not,
+ * we will defer freeing these hugepages until the entire original allocation
+ * can be freed
+ */
+ if (internal_config.match_allocations && elem->size != elem->orig_size)
+ goto free_unlock;
+
+ /* probably, but let's make sure, as we may not be using up full page */
+ start = elem;
+ len = elem->size;
+ aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz);
+ end = RTE_PTR_ADD(elem, len);
+ aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz);
+
+ aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+
+ /* can't free anything */
+ if (aligned_len < page_sz)
+ goto free_unlock;
+
+ /* we can free something. however, some of these pages may be marked as
+ * unfreeable, so also check that as well
+ */
+ n_segs = aligned_len / page_sz;
+ for (i = 0; i < n_segs; i++) {
+ const struct rte_memseg *tmp =
+ rte_mem_virt2memseg(aligned_start, msl);
+
+ if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
+ /* this is an unfreeable segment, so move start */
+ aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len);
+ }
+ }
+
+ /* recalculate length and number of segments */
+ aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+ n_segs = aligned_len / page_sz;
+
+ /* check if we can still free some pages */
+ if (n_segs == 0)
+ goto free_unlock;
+
+ /* We're not done yet. We also have to check if by freeing space we will
+ * be leaving free elements that are too small to store new elements.
+ * Check if we have enough space in the beginning and at the end, or if
+ * start/end are exactly page aligned.
+ */
+ before_space = RTE_PTR_DIFF(aligned_start, elem);
+ after_space = RTE_PTR_DIFF(end, aligned_end);
+ if (before_space != 0 &&
+ before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* There is not enough space before start, but we may be able to
+ * move the start forward by one page.
+ */
+ if (n_segs == 1)
+ goto free_unlock;
+
+ /* move start */
+ aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
+ aligned_len -= page_sz;
+ n_segs--;
+ }
+ if (after_space != 0 && after_space <
+ MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* There is not enough space after end, but we may be able to
+ * move the end backwards by one page.
+ */
+ if (n_segs == 1)
+ goto free_unlock;
+
+ /* move end */
+ aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
+ aligned_len -= page_sz;
+ n_segs--;
+ }
+
+ /* now we can finally free us some pages */
+
+ rte_mcfg_mem_write_lock();
+
+ /*
+ * we allow secondary processes to clear the heap of this allocated
+ * memory because it is safe to do so, as even if notifications about
+ * unmapped pages don't make it to other processes, heap is shared
+ * across all processes, and will become empty of this memory anyway,
+ * and nothing can allocate it back unless primary process will be able
+ * to deliver allocation message to every single running process.
+ */
+
+ malloc_elem_free_list_remove(elem);
+
+ malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len);
+
+ heap->total_size -= aligned_len;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* notify user about changes in memory map */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ aligned_start, aligned_len);
+
+ /* don't care if any of this fails */
+ malloc_heap_free_pages(aligned_start, aligned_len);
+
+ request_sync();
+ } else {
+ struct malloc_mp_req req;
+
+ memset(&req, 0, sizeof(req));
+
+ req.t = REQ_TYPE_FREE;
+ req.free_req.addr = aligned_start;
+ req.free_req.len = aligned_len;
+
+ /*
+ * we request primary to deallocate pages, but we don't do it
+ * in this thread. instead, we notify primary that we would like
+ * to deallocate pages, and this process will receive another
+ * request (in parallel) that will do it for us on another
+ * thread.
+ *
+ * we also don't really care if this succeeds - the data is
+ * already removed from the heap, so it is, for all intents and
+ * purposes, hidden from the rest of DPDK even if some other
+ * process (including this one) may have these pages mapped.
+ *
+ * notifications about deallocated memory happen during sync.
+ */
+ request_to_primary(&req);
+ }
+
+ RTE_LOG(DEBUG, EAL, "Heap on socket %d was shrunk by %zdMB\n",
+ msl->socket_id, aligned_len >> 20ULL);
+
+ rte_mcfg_mem_write_unlock();
+free_unlock:
+ rte_spinlock_unlock(&(heap->lock));
+ return ret;
+}
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size)
+{
+ int ret;
+
+ if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+ return -1;
+
+ rte_spinlock_lock(&(elem->heap->lock));
+
+ ret = malloc_elem_resize(elem, size);
+
+ rte_spinlock_unlock(&(elem->heap->lock));
+
+ return ret;
+}
+
+/*
+ * Function to retrieve data for a given heap
+ */
+int
+malloc_heap_get_stats(struct malloc_heap *heap,
+ struct rte_malloc_socket_stats *socket_stats)
+{
+ size_t idx;
+ struct malloc_elem *elem;
+
+ rte_spinlock_lock(&heap->lock);
+
+ /* Initialise variables for heap */
+ socket_stats->free_count = 0;
+ socket_stats->heap_freesz_bytes = 0;
+ socket_stats->greatest_free_size = 0;
+
+ /* Iterate through free list */
+ for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list))
+ {
+ socket_stats->free_count++;
+ socket_stats->heap_freesz_bytes += elem->size;
+ if (elem->size > socket_stats->greatest_free_size)
+ socket_stats->greatest_free_size = elem->size;
+ }
+ }
+ /* Get stats on overall heap and allocated memory on this heap */
+ socket_stats->heap_totalsz_bytes = heap->total_size;
+ socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
+ socket_stats->heap_freesz_bytes);
+ socket_stats->alloc_count = heap->alloc_count;
+
+ rte_spinlock_unlock(&heap->lock);
+ return 0;
+}
+
+/*
+ * Function to retrieve data for a given heap
+ */
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f)
+{
+ struct malloc_elem *elem;
+
+ rte_spinlock_lock(&heap->lock);
+
+ fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
+ fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
+
+ elem = heap->first;
+ while (elem) {
+ malloc_elem_dump(elem, f);
+ elem = elem->next;
+ }
+
+ rte_spinlock_unlock(&heap->lock);
+}
+
+static int
+destroy_elem(struct malloc_elem *elem, size_t len)
+{
+ struct malloc_heap *heap = elem->heap;
+
+ /* notify all subscribers that a memory area is going to be removed */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
+
+ /* this element can be removed */
+ malloc_elem_free_list_remove(elem);
+ malloc_elem_hide_region(elem, elem, len);
+
+ heap->total_size -= len;
+
+ memset(elem, 0, sizeof(*elem));
+
+ return 0;
+}
+
+struct rte_memseg_list *
+malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
+ unsigned int n_pages, size_t page_sz, const char *seg_name,
+ unsigned int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ char fbarray_name[RTE_FBARRAY_NAME_LEN];
+ struct rte_memseg_list *msl = NULL;
+ struct rte_fbarray *arr;
+ size_t seg_len = n_pages * page_sz;
+ unsigned int i;
+
+ /* first, find a free memseg list */
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *tmp = &mcfg->memsegs[i];
+ if (tmp->base_va == NULL) {
+ msl = tmp;
+ break;
+ }
+ }
+ if (msl == NULL) {
+ RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n");
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p",
+ seg_name, va_addr);
+
+ /* create the backing fbarray */
+ if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
+ sizeof(struct rte_memseg)) < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n");
+ return NULL;
+ }
+ arr = &msl->memseg_arr;
+
+ /* fbarray created, fill it up */
+ for (i = 0; i < n_pages; i++) {
+ struct rte_memseg *ms;
+
+ rte_fbarray_set_used(arr, i);
+ ms = rte_fbarray_get(arr, i);
+ ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
+ ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
+ ms->hugepage_sz = page_sz;
+ ms->len = page_sz;
+ ms->nchannel = rte_memory_get_nchannel();
+ ms->nrank = rte_memory_get_nrank();
+ ms->socket_id = socket_id;
+ }
+
+ /* set up the memseg list */
+ msl->base_va = va_addr;
+ msl->page_sz = page_sz;
+ msl->socket_id = socket_id;
+ msl->len = seg_len;
+ msl->version = 0;
+ msl->external = 1;
+
+ return msl;
+}
+
+struct extseg_walk_arg {
+ void *va_addr;
+ size_t len;
+ struct rte_memseg_list *msl;
+};
+
+static int
+extseg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct extseg_walk_arg *wa = arg;
+
+ if (msl->base_va == wa->va_addr && msl->len == wa->len) {
+ unsigned int found_idx;
+
+ /* msl is const */
+ found_idx = msl - mcfg->memsegs;
+ wa->msl = &mcfg->memsegs[found_idx];
+ return 1;
+ }
+ return 0;
+}
+
+struct rte_memseg_list *
+malloc_heap_find_external_seg(void *va_addr, size_t len)
+{
+ struct extseg_walk_arg wa;
+ int res;
+
+ wa.va_addr = va_addr;
+ wa.len = len;
+
+ res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa);
+
+ if (res != 1) {
+ /* 0 means nothing was found, -1 shouldn't happen */
+ if (res == 0)
+ rte_errno = ENOENT;
+ return NULL;
+ }
+ return wa.msl;
+}
+
+int
+malloc_heap_destroy_external_seg(struct rte_memseg_list *msl)
+{
+ /* destroy the fbarray backing this memory */
+ if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
+ return -1;
+
+ /* reset the memseg list */
+ memset(msl, 0, sizeof(*msl));
+
+ return 0;
+}
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap,
+ struct rte_memseg_list *msl)
+{
+ /* erase contents of new memory */
+ memset(msl->base_va, 0, msl->len);
+
+ /* now, add newly minted memory to the malloc heap */
+ malloc_heap_add_memory(heap, msl, msl->base_va, msl->len);
+
+ heap->total_size += msl->len;
+
+ /* all done! */
+ RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n",
+ heap->name, msl->base_va);
+
+ /* notify all subscribers that a new memory area has been added */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+ msl->base_va, msl->len);
+
+ return 0;
+}
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+ size_t len)
+{
+ struct malloc_elem *elem = heap->first;
+
+ /* find element with specified va address */
+ while (elem != NULL && elem != va_addr) {
+ elem = elem->next;
+ /* stop if we've blown past our VA */
+ if (elem > (struct malloc_elem *)va_addr) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+ }
+ /* check if element was found */
+ if (elem == NULL || elem->msl->len != len) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+ /* if element's size is not equal to segment len, segment is busy */
+ if (elem->state == ELEM_BUSY || elem->size != len) {
+ rte_errno = EBUSY;
+ return -1;
+ }
+ return destroy_elem(elem, len);
+}
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint32_t next_socket_id = mcfg->next_socket_id;
+
+ /* prevent overflow. did you really create 2 billion heaps??? */
+ if (next_socket_id > INT32_MAX) {
+ RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+ rte_errno = ENOSPC;
+ return -1;
+ }
+
+ /* initialize empty heap */
+ heap->alloc_count = 0;
+ heap->first = NULL;
+ heap->last = NULL;
+ LIST_INIT(heap->free_head);
+ rte_spinlock_init(&heap->lock);
+ heap->total_size = 0;
+ heap->socket_id = next_socket_id;
+
+ /* we hold a global mem hotplug writelock, so it's safe to increment */
+ mcfg->next_socket_id++;
+
+ /* set up name */
+ strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+ return 0;
+}
+
+int
+malloc_heap_destroy(struct malloc_heap *heap)
+{
+ if (heap->alloc_count != 0) {
+ RTE_LOG(ERR, EAL, "Heap is still in use\n");
+ rte_errno = EBUSY;
+ return -1;
+ }
+ if (heap->first != NULL || heap->last != NULL) {
+ RTE_LOG(ERR, EAL, "Heap still contains memory segments\n");
+ rte_errno = EBUSY;
+ return -1;
+ }
+ if (heap->total_size != 0)
+ RTE_LOG(ERR, EAL, "Total size not zero, heap is likely corrupt\n");
+
+ /* after this, the lock will be dropped */
+ memset(heap, 0, sizeof(*heap));
+
+ return 0;
+}
+
+int
+rte_eal_malloc_heap_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int i;
+
+ if (internal_config.match_allocations) {
+ RTE_LOG(DEBUG, EAL, "Hugepages will be freed exactly as allocated.\n");
+ }
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* assign min socket ID to external heaps */
+ mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
+
+ /* assign names to default DPDK heaps */
+ for (i = 0; i < rte_socket_count(); i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+ char heap_name[RTE_HEAP_NAME_MAX_LEN];
+ int socket_id = rte_socket_id_by_idx(i);
+
+ snprintf(heap_name, sizeof(heap_name),
+ "socket_%i", socket_id);
+ strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+ heap->socket_id = socket_id;
+ }
+ }
+
+
+ if (register_mp_requests()) {
+ RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
+ rte_mcfg_mem_read_unlock();
+ return -1;
+ }
+
+ /* unlock mem hotplug here. it's safe for primary as no requests can
+ * even come before primary itself is fully initialized, and secondaries
+ * do not need to initialize the heap.
+ */
+ rte_mcfg_mem_read_unlock();
+
+ /* secondary process does not need to initialize anything */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
+ /* add all IOVA-contiguous areas to the heap */
+ return rte_memseg_contig_walk(malloc_add_seg, NULL);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h
new file mode 100644
index 000000000..772736b53
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_heap.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef MALLOC_HEAP_H_
+#define MALLOC_HEAP_H_
+
+#include <stdbool.h>
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+
+/* Number of free lists per heap, grouped by size. */
+#define RTE_HEAP_NUM_FREELISTS 13
+#define RTE_HEAP_NAME_MAX_LEN 32
+
+/* dummy definition, for pointers */
+struct malloc_elem;
+
+/**
+ * Structure to hold malloc heap
+ */
+struct malloc_heap {
+ rte_spinlock_t lock;
+ LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+ struct malloc_elem *volatile first;
+ struct malloc_elem *volatile last;
+
+ unsigned int alloc_count;
+ unsigned int socket_id;
+ size_t total_size;
+ char name[RTE_HEAP_NAME_MAX_LEN];
+} __rte_cache_aligned;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline unsigned
+malloc_get_numa_socket(void)
+{
+ unsigned socket_id = rte_socket_id();
+
+ if (socket_id == (unsigned)SOCKET_ID_ANY)
+ return 0;
+
+ return socket_id;
+}
+
+void *
+malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
+ size_t align, size_t bound, bool contig);
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
+ size_t align, bool contig);
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name);
+
+int
+malloc_heap_destroy(struct malloc_heap *heap);
+
+struct rte_memseg_list *
+malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
+ unsigned int n_pages, size_t page_sz, const char *seg_name,
+ unsigned int socket_id);
+
+struct rte_memseg_list *
+malloc_heap_find_external_seg(void *va_addr, size_t len);
+
+int
+malloc_heap_destroy_external_seg(struct rte_memseg_list *msl);
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap,
+ struct rte_memseg_list *msl);
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+ size_t len);
+
+int
+malloc_heap_free(struct malloc_elem *elem);
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size);
+
+int
+malloc_heap_get_stats(struct malloc_heap *heap,
+ struct rte_malloc_socket_stats *socket_stats);
+
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f);
+
+int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
+int
+rte_eal_malloc_heap_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MALLOC_HEAP_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c
new file mode 100644
index 000000000..1f212f834
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.c
@@ -0,0 +1,751 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <sys/time.h>
+
+#include <rte_alarm.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+
+#include "malloc_elem.h"
+#include "malloc_mp.h"
+
+#define MP_ACTION_SYNC "mp_malloc_sync"
+/**< request sent by primary process to notify of changes in memory map */
+#define MP_ACTION_ROLLBACK "mp_malloc_rollback"
+/**< request sent by primary process to notify of changes in memory map. this is
+ * essentially a regular sync request, but we cannot send sync requests while
+ * another one is in progress, and we might have to - therefore, we do this as
+ * a separate callback.
+ */
+#define MP_ACTION_REQUEST "mp_malloc_request"
+/**< request sent by secondary process to ask for allocation/deallocation */
+#define MP_ACTION_RESPONSE "mp_malloc_response"
+/**< response sent to secondary process to indicate result of request */
+
+/* forward declarations */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply);
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply);
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+/* when we're allocating, we need to store some state to ensure that we can
+ * roll back later
+ */
+struct primary_alloc_req_state {
+ struct malloc_heap *heap;
+ struct rte_memseg **ms;
+ int ms_len;
+ struct malloc_elem *elem;
+ void *map_addr;
+ size_t map_len;
+};
+
+enum req_state {
+ REQ_STATE_INACTIVE = 0,
+ REQ_STATE_ACTIVE,
+ REQ_STATE_COMPLETE
+};
+
+struct mp_request {
+ TAILQ_ENTRY(mp_request) next;
+ struct malloc_mp_req user_req; /**< contents of request */
+ pthread_cond_t cond; /**< variable we use to time out on this request */
+ enum req_state state; /**< indicate status of this request */
+ struct primary_alloc_req_state alloc_state;
+};
+
+/*
+ * We could've used just a single request, but it may be possible for
+ * secondaries to timeout earlier than the primary, and send a new request while
+ * primary is still expecting replies to the old one. Therefore, each new
+ * request will get assigned a new ID, which is how we will distinguish between
+ * expected and unexpected messages.
+ */
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+ struct mp_request_list list;
+ pthread_mutex_t lock;
+} mp_request_list = {
+ .list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+ .lock = PTHREAD_MUTEX_INITIALIZER
+};
+
+/**
+ * General workflow is the following:
+ *
+ * Allocation:
+ * S: send request to primary
+ * P: attempt to allocate memory
+ * if failed, sendmsg failure
+ * if success, send sync request
+ * S: if received msg of failure, quit
+ * if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * if success, sendmsg success
+ * if failure, roll back allocation and send a rollback request
+ * S: if received msg of success, quit
+ * if received rollback request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * Aside from timeouts, there are three points where we can quit:
+ * - if allocation failed straight away
+ * - if allocation and sync request succeeded
+ * - if allocation succeeded, sync request failed, allocation rolled back and
+ * rollback request received (irrespective of whether it succeeded or failed)
+ *
+ * Deallocation:
+ * S: send request to primary
+ * P: attempt to deallocate memory
+ * if failed, sendmsg failure
+ * if success, send sync request
+ * S: if received msg of failure, quit
+ * if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * There is no "rollback" from deallocation, as it's safe to have some memory
+ * mapped in some processes - it's absent from the heap, so it won't get used.
+ */
+
+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+ struct mp_request *req;
+ TAILQ_FOREACH(req, &mp_request_list.list, next) {
+ if (req->user_req.id == id)
+ break;
+ }
+ return req;
+}
+
+/* this ID is, like, totally guaranteed to be absolutely unique. pinky swear. */
+static uint64_t
+get_unique_id(void)
+{
+ uint64_t id;
+ do {
+ id = rte_rand();
+ } while (find_request_by_id(id) != NULL);
+ return id;
+}
+
+/* secondary will respond to sync requests thusly */
+static int
+handle_sync(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct rte_mp_msg reply;
+ const struct malloc_mp_req *req =
+ (const struct malloc_mp_req *)msg->param;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply.param;
+ int ret;
+
+ if (req->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected request from primary\n");
+ return -1;
+ }
+
+ memset(&reply, 0, sizeof(reply));
+
+ reply.num_fds = 0;
+ strlcpy(reply.name, msg->name, sizeof(reply.name));
+ reply.len_param = sizeof(*resp);
+
+ ret = eal_memalloc_sync_with_primary();
+
+ resp->t = REQ_TYPE_SYNC;
+ resp->id = req->id;
+ resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL;
+
+ rte_mp_reply(&reply, peer);
+
+ return 0;
+}
+
+static int
+handle_alloc_request(const struct malloc_mp_req *m,
+ struct mp_request *req)
+{
+ const struct malloc_req_alloc *ar = &m->alloc_req;
+ struct malloc_heap *heap;
+ struct malloc_elem *elem;
+ struct rte_memseg **ms;
+ size_t alloc_sz;
+ int n_segs;
+ void *map_addr;
+
+ alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size +
+ MALLOC_ELEM_TRAILER_LEN, ar->page_sz);
+ n_segs = alloc_sz / ar->page_sz;
+
+ heap = ar->heap;
+
+ /* we can't know in advance how many pages we'll need, so we malloc */
+ ms = malloc(sizeof(*ms) * n_segs);
+ if (ms == NULL) {
+ RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
+ goto fail;
+ }
+ memset(ms, 0, sizeof(*ms) * n_segs);
+
+ elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
+ ar->flags, ar->align, ar->bound, ar->contig, ms,
+ n_segs);
+
+ if (elem == NULL)
+ goto fail;
+
+ map_addr = ms[0]->addr;
+
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
+ /* we have succeeded in allocating memory, but we still need to sync
+ * with other processes. however, since DPDK IPC is single-threaded, we
+ * send an asynchronous request and exit this callback.
+ */
+
+ req->alloc_state.ms = ms;
+ req->alloc_state.ms_len = n_segs;
+ req->alloc_state.map_addr = map_addr;
+ req->alloc_state.map_len = alloc_sz;
+ req->alloc_state.elem = elem;
+ req->alloc_state.heap = heap;
+
+ return 0;
+fail:
+ free(ms);
+ return -1;
+}
+
+/* first stage of primary handling requests from secondary */
+static int
+handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
+{
+ const struct malloc_mp_req *m =
+ (const struct malloc_mp_req *)msg->param;
+ struct mp_request *entry;
+ int ret;
+
+ /* lock access to request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ /* make sure it's not a dupe */
+ entry = find_request_by_id(m->id);
+ if (entry != NULL) {
+ RTE_LOG(ERR, EAL, "Duplicate request id\n");
+ goto fail;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Unable to allocate memory for request\n");
+ goto fail;
+ }
+
+ /* erase all data */
+ memset(entry, 0, sizeof(*entry));
+
+ if (m->t == REQ_TYPE_ALLOC) {
+ ret = handle_alloc_request(m, entry);
+ } else if (m->t == REQ_TYPE_FREE) {
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ m->free_req.addr, m->free_req.len);
+
+ ret = malloc_heap_free_pages(m->free_req.addr,
+ m->free_req.len);
+ } else {
+ RTE_LOG(ERR, EAL, "Unexpected request from secondary\n");
+ goto fail;
+ }
+
+ if (ret != 0) {
+ struct rte_mp_msg resp_msg;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)resp_msg.param;
+
+ /* send failure message straight away */
+ resp_msg.num_fds = 0;
+ resp_msg.len_param = sizeof(*resp);
+ strlcpy(resp_msg.name, MP_ACTION_RESPONSE,
+ sizeof(resp_msg.name));
+
+ resp->t = m->t;
+ resp->result = REQ_RESULT_FAIL;
+ resp->id = m->id;
+
+ if (rte_mp_sendmsg(&resp_msg)) {
+ RTE_LOG(ERR, EAL, "Couldn't send response\n");
+ goto fail;
+ }
+ /* we did not modify the request */
+ free(entry);
+ } else {
+ struct rte_mp_msg sr_msg;
+ struct malloc_mp_req *sr =
+ (struct malloc_mp_req *)sr_msg.param;
+ struct timespec ts;
+
+ memset(&sr_msg, 0, sizeof(sr_msg));
+
+ /* we can do something, so send sync request asynchronously */
+ sr_msg.num_fds = 0;
+ sr_msg.len_param = sizeof(*sr);
+ strlcpy(sr_msg.name, MP_ACTION_SYNC, sizeof(sr_msg.name));
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* sync requests carry no data */
+ sr->t = REQ_TYPE_SYNC;
+ sr->id = m->id;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_async(&sr_msg, &ts,
+ handle_sync_response);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Couldn't send sync request\n");
+ if (m->t == REQ_TYPE_ALLOC)
+ free(entry->alloc_state.ms);
+ goto fail;
+ }
+
+ /* mark request as in progress */
+ memcpy(&entry->user_req, m, sizeof(*m));
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+ }
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return -1;
+}
+
+/* callback for asynchronous sync requests for primary. this will either do a
+ * sendmsg with results, or trigger rollback request.
+ */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
+ enum malloc_req_result result;
+ struct mp_request *entry;
+ const struct malloc_mp_req *mpreq =
+ (const struct malloc_mp_req *)request->param;
+ int i;
+
+ /* lock the request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(mpreq->id);
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ goto fail;
+ }
+
+ result = REQ_RESULT_SUCCESS;
+
+ if (reply->nb_received != reply->nb_sent)
+ result = REQ_RESULT_FAIL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected response to sync request\n");
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ if (resp->id != entry->user_req.id) {
+ RTE_LOG(ERR, EAL, "Response to wrong sync request\n");
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ if (resp->result == REQ_RESULT_FAIL) {
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ }
+
+ if (entry->user_req.t == REQ_TYPE_FREE) {
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+
+ memset(&msg, 0, sizeof(msg));
+
+ /* this is a free request, just sendmsg result */
+ resp->t = REQ_TYPE_FREE;
+ resp->result = result;
+ resp->id = entry->user_req.id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ } else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+ result == REQ_RESULT_SUCCESS) {
+ struct malloc_heap *heap = entry->alloc_state.heap;
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)msg.param;
+
+ memset(&msg, 0, sizeof(msg));
+
+ heap->total_size += entry->alloc_state.map_len;
+
+ /* result is success, so just notify secondary about this */
+ resp->t = REQ_TYPE_ALLOC;
+ resp->result = result;
+ resp->id = entry->user_req.id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry->alloc_state.ms);
+ free(entry);
+ } else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+ result == REQ_RESULT_FAIL) {
+ struct rte_mp_msg rb_msg;
+ struct malloc_mp_req *rb =
+ (struct malloc_mp_req *)rb_msg.param;
+ struct timespec ts;
+ struct primary_alloc_req_state *state =
+ &entry->alloc_state;
+ int ret;
+
+ memset(&rb_msg, 0, sizeof(rb_msg));
+
+ /* we've failed to sync, so do a rollback */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ state->map_addr, state->map_len);
+
+ rollback_expand_heap(state->ms, state->ms_len, state->elem,
+ state->map_addr, state->map_len);
+
+ /* send rollback request */
+ rb_msg.num_fds = 0;
+ rb_msg.len_param = sizeof(*rb);
+ strlcpy(rb_msg.name, MP_ACTION_ROLLBACK, sizeof(rb_msg.name));
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* sync requests carry no data */
+ rb->t = REQ_TYPE_SYNC;
+ rb->id = entry->user_req.id;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_async(&rb_msg, &ts,
+ handle_rollback_response);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Could not send rollback request to secondary process\n");
+
+ /* we couldn't send rollback request, but that's OK -
+ * secondary will time out, and memory has been removed
+ * from heap anyway.
+ */
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(state->ms);
+ free(entry);
+ goto fail;
+ }
+ } else {
+ RTE_LOG(ERR, EAL, " to sync request of unknown type\n");
+ goto fail;
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return -1;
+}
+
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply __rte_unused)
+{
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+ const struct malloc_mp_req *mpreq =
+ (const struct malloc_mp_req *)request->param;
+ struct mp_request *entry;
+
+ /* lock the request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ memset(&msg, 0, sizeof(msg));
+
+ entry = find_request_by_id(mpreq->id);
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ goto fail;
+ }
+
+ if (entry->user_req.t != REQ_TYPE_ALLOC) {
+ RTE_LOG(ERR, EAL, "Unexpected active request\n");
+ goto fail;
+ }
+
+ /* we don't care if rollback succeeded, request still failed */
+ resp->t = REQ_TYPE_ALLOC;
+ resp->result = REQ_RESULT_FAIL;
+ resp->id = mpreq->id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ /* clean up */
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry->alloc_state.ms);
+ free(entry);
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return -1;
+}
+
+/* final stage of the request from secondary */
+static int
+handle_response(const struct rte_mp_msg *msg, const void *peer __rte_unused)
+{
+ const struct malloc_mp_req *m =
+ (const struct malloc_mp_req *)msg->param;
+ struct mp_request *entry;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(m->id);
+ if (entry != NULL) {
+ /* update request status */
+ entry->user_req.result = m->result;
+
+ entry->state = REQ_STATE_COMPLETE;
+
+ /* trigger thread wakeup */
+ pthread_cond_signal(&entry->cond);
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+
+ return 0;
+}
+
+/* synchronously request memory map sync, this is only called whenever primary
+ * process initiates the allocation.
+ */
+int
+request_sync(void)
+{
+ struct rte_mp_msg msg;
+ struct rte_mp_reply reply;
+ struct malloc_mp_req *req = (struct malloc_mp_req *)msg.param;
+ struct timespec ts;
+ int i, ret = -1;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&reply, 0, sizeof(reply));
+
+ /* no need to create tailq entries as this is entirely synchronous */
+
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*req);
+ strlcpy(msg.name, MP_ACTION_SYNC, sizeof(msg.name));
+
+ /* sync request carries no data */
+ req->t = REQ_TYPE_SYNC;
+ req->id = get_unique_id();
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_sync(&msg, &reply, &ts);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ /* if IPC is unsupported, behave as if the call succeeded */
+ if (rte_errno != ENOTSUP)
+ RTE_LOG(ERR, EAL, "Could not send sync request to secondary process\n");
+ else
+ ret = 0;
+ goto out;
+ }
+
+ if (reply.nb_received != reply.nb_sent) {
+ RTE_LOG(ERR, EAL, "Not all secondaries have responded\n");
+ goto out;
+ }
+
+ for (i = 0; i < reply.nb_received; i++) {
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply.msgs[i].param;
+ if (resp->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected response from secondary\n");
+ goto out;
+ }
+ if (resp->id != req->id) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ goto out;
+ }
+ if (resp->result != REQ_RESULT_SUCCESS) {
+ RTE_LOG(ERR, EAL, "Secondary process failed to synchronize\n");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ free(reply.msgs);
+ return ret;
+}
+
+/* this is a synchronous wrapper around a bunch of asynchronous requests to
+ * primary process. this will initiate a request and wait until responses come.
+ */
+int
+request_to_primary(struct malloc_mp_req *user_req)
+{
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *msg_req = (struct malloc_mp_req *)msg.param;
+ struct mp_request *entry;
+ struct timespec ts;
+ struct timeval now;
+ int ret;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&ts, 0, sizeof(ts));
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for request\n");
+ goto fail;
+ }
+
+ memset(entry, 0, sizeof(*entry));
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot get current time\n");
+ goto fail;
+ }
+
+ ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
+ ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
+ (now.tv_usec * 1000) / 1000000000;
+
+ /* initialize the request */
+ pthread_cond_init(&entry->cond, NULL);
+
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*msg_req);
+ strlcpy(msg.name, MP_ACTION_REQUEST, sizeof(msg.name));
+
+ /* (attempt to) get a unique id */
+ user_req->id = get_unique_id();
+
+ /* copy contents of user request into the message */
+ memcpy(msg_req, user_req, sizeof(*msg_req));
+
+ if (rte_mp_sendmsg(&msg)) {
+ RTE_LOG(ERR, EAL, "Cannot send message to primary\n");
+ goto fail;
+ }
+
+ /* copy contents of user request into active request */
+ memcpy(&entry->user_req, user_req, sizeof(*user_req));
+
+ /* mark request as in progress */
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+
+ /* finally, wait on timeout */
+ do {
+ ret = pthread_cond_timedwait(&entry->cond,
+ &mp_request_list.lock, &ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ if (entry->state != REQ_STATE_COMPLETE) {
+ RTE_LOG(ERR, EAL, "Request timed out\n");
+ ret = -1;
+ } else {
+ ret = 0;
+ user_req->result = entry->user_req.result;
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return -1;
+}
+
+int
+register_mp_requests(void)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* it's OK for primary to not support IPC */
+ if (rte_mp_action_register(MP_ACTION_REQUEST, handle_request) &&
+ rte_errno != ENOTSUP) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_REQUEST);
+ return -1;
+ }
+ } else {
+ if (rte_mp_action_register(MP_ACTION_SYNC, handle_sync)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_SYNC);
+ return -1;
+ }
+ if (rte_mp_action_register(MP_ACTION_ROLLBACK, handle_sync)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_SYNC);
+ return -1;
+ }
+ if (rte_mp_action_register(MP_ACTION_RESPONSE,
+ handle_response)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_RESPONSE);
+ return -1;
+ }
+ }
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h
new file mode 100644
index 000000000..2b86b76f6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/malloc_mp.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef MALLOC_MP_H
+#define MALLOC_MP_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+/* forward declarations */
+struct malloc_heap;
+struct rte_memseg;
+
+/* multiprocess synchronization structures for malloc */
+enum malloc_req_type {
+ REQ_TYPE_ALLOC, /**< ask primary to allocate */
+ REQ_TYPE_FREE, /**< ask primary to free */
+ REQ_TYPE_SYNC /**< ask secondary to synchronize its memory map */
+};
+
+enum malloc_req_result {
+ REQ_RESULT_SUCCESS,
+ REQ_RESULT_FAIL
+};
+
+struct malloc_req_alloc {
+ struct malloc_heap *heap;
+ uint64_t page_sz;
+ size_t elt_size;
+ int socket;
+ unsigned int flags;
+ size_t align;
+ size_t bound;
+ bool contig;
+};
+
+struct malloc_req_free {
+ RTE_STD_C11
+ union {
+ void *addr;
+ uint64_t addr_64;
+ };
+ uint64_t len;
+};
+
+struct malloc_mp_req {
+ enum malloc_req_type t;
+ RTE_STD_C11
+ union {
+ struct malloc_req_alloc alloc_req;
+ struct malloc_req_free free_req;
+ };
+ uint64_t id; /**< not to be populated by caller */
+ enum malloc_req_result result;
+};
+
+int
+register_mp_requests(void);
+
+int
+request_to_primary(struct malloc_mp_req *req);
+
+/* synchronous memory map sync request */
+int
+request_sync(void);
+
+/* functions from malloc_heap exposed here */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len);
+
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig, struct rte_memseg **ms, int n_segs);
+
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+ struct malloc_elem *elem, void *map_addr, size_t map_len);
+
+#endif /* MALLOC_MP_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/common/meson.build b/src/spdk/dpdk/lib/librte_eal/common/meson.build
new file mode 100644
index 000000000..55aaeb18e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/meson.build
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+includes += include_directories('.')
+
+if is_windows
+ sources += files(
+ 'eal_common_bus.c',
+ 'eal_common_class.c',
+ 'eal_common_devargs.c',
+ 'eal_common_errno.c',
+ 'eal_common_launch.c',
+ 'eal_common_lcore.c',
+ 'eal_common_log.c',
+ 'eal_common_options.c',
+ 'eal_common_thread.c',
+ )
+ subdir_done()
+endif
+
+sources += files(
+ 'eal_common_bus.c',
+ 'eal_common_cpuflags.c',
+ 'eal_common_class.c',
+ 'eal_common_devargs.c',
+ 'eal_common_dev.c',
+ 'eal_common_errno.c',
+ 'eal_common_fbarray.c',
+ 'eal_common_hexdump.c',
+ 'eal_common_hypervisor.c',
+ 'eal_common_launch.c',
+ 'eal_common_lcore.c',
+ 'eal_common_log.c',
+ 'eal_common_mcfg.c',
+ 'eal_common_memalloc.c',
+ 'eal_common_memory.c',
+ 'eal_common_memzone.c',
+ 'eal_common_options.c',
+ 'eal_common_proc.c',
+ 'eal_common_string_fns.c',
+ 'eal_common_tailqs.c',
+ 'eal_common_thread.c',
+ 'eal_common_timer.c',
+ 'eal_common_trace.c',
+ 'eal_common_trace_ctf.c',
+ 'eal_common_trace_points.c',
+ 'eal_common_trace_utils.c',
+ 'eal_common_uuid.c',
+ 'hotplug_mp.c',
+ 'malloc_elem.c',
+ 'malloc_heap.c',
+ 'malloc_mp.c',
+ 'rte_keepalive.c',
+ 'rte_malloc.c',
+ 'rte_random.c',
+ 'rte_reciprocal.c',
+ 'rte_service.c',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c b/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c
new file mode 100644
index 000000000..e0494b201
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_keepalive.c
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2016 Intel Corporation
+ */
+
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_keepalive.h>
+#include <rte_malloc.h>
+
+struct rte_keepalive {
+ /** Core Liveness. */
+ struct {
+ /*
+ * Each element must be cache aligned to prevent false sharing.
+ */
+ enum rte_keepalive_state core_state __rte_cache_aligned;
+ } live_data[RTE_KEEPALIVE_MAXCORES];
+
+ /** Last-seen-alive timestamps */
+ uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
+
+ /**
+ * Cores to check.
+ * Indexed by core id, non-zero if the core should be checked.
+ */
+ uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
+
+ /** Dead core handler. */
+ rte_keepalive_failure_callback_t callback;
+
+ /**
+ * Dead core handler app data.
+ * Pointer is passed to dead core handler.
+ */
+ void *callback_data;
+ uint64_t tsc_initial;
+ uint64_t tsc_mhz;
+
+ /** Core state relay handler. */
+ rte_keepalive_relay_callback_t relay_callback;
+
+ /**
+ * Core state relay handler app data.
+ * Pointer is passed to live core handler.
+ */
+ void *relay_callback_data;
+};
+
+static void
+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
+{
+ RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n",
+ msg,
+ ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
+ / rte_get_tsc_hz()
+ );
+}
+
+void
+rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
+ void *ptr_data)
+{
+ struct rte_keepalive *keepcfg = ptr_data;
+ int idx_core;
+
+ for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
+ if (keepcfg->active_cores[idx_core] == 0)
+ continue;
+
+ switch (keepcfg->live_data[idx_core].core_state) {
+ case RTE_KA_STATE_UNUSED:
+ break;
+ case RTE_KA_STATE_ALIVE: /* Alive */
+ keepcfg->live_data[idx_core].core_state =
+ RTE_KA_STATE_MISSING;
+ keepcfg->last_alive[idx_core] = rte_rdtsc();
+ break;
+ case RTE_KA_STATE_MISSING: /* MIA */
+ print_trace("Core MIA. ", keepcfg, idx_core);
+ keepcfg->live_data[idx_core].core_state =
+ RTE_KA_STATE_DEAD;
+ break;
+ case RTE_KA_STATE_DEAD: /* Dead */
+ keepcfg->live_data[idx_core].core_state =
+ RTE_KA_STATE_GONE;
+ print_trace("Core died. ", keepcfg, idx_core);
+ if (keepcfg->callback)
+ keepcfg->callback(
+ keepcfg->callback_data,
+ idx_core
+ );
+ break;
+ case RTE_KA_STATE_GONE: /* Buried */
+ break;
+ case RTE_KA_STATE_DOZING: /* Core going idle */
+ keepcfg->live_data[idx_core].core_state =
+ RTE_KA_STATE_SLEEP;
+ keepcfg->last_alive[idx_core] = rte_rdtsc();
+ break;
+ case RTE_KA_STATE_SLEEP: /* Idled core */
+ break;
+ }
+ if (keepcfg->relay_callback)
+ keepcfg->relay_callback(
+ keepcfg->relay_callback_data,
+ idx_core,
+ keepcfg->live_data[idx_core].core_state,
+ keepcfg->last_alive[idx_core]
+ );
+ }
+}
+
+struct rte_keepalive *
+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
+ void *data)
+{
+ struct rte_keepalive *keepcfg;
+
+ keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE",
+ sizeof(struct rte_keepalive),
+ RTE_CACHE_LINE_SIZE);
+ if (keepcfg != NULL) {
+ keepcfg->callback = callback;
+ keepcfg->callback_data = data;
+ keepcfg->tsc_initial = rte_rdtsc();
+ keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
+ }
+ return keepcfg;
+}
+
+void rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg,
+ rte_keepalive_relay_callback_t callback,
+ void *data)
+{
+ keepcfg->relay_callback = callback;
+ keepcfg->relay_callback_data = data;
+}
+
+void
+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
+{
+ if (id_core < RTE_KEEPALIVE_MAXCORES) {
+ keepcfg->active_cores[id_core] = RTE_KA_STATE_ALIVE;
+ keepcfg->last_alive[id_core] = rte_rdtsc();
+ }
+}
+
+void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
+{
+ keepcfg->live_data[rte_lcore_id()].core_state = RTE_KA_STATE_ALIVE;
+}
+
+void
+rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg)
+{
+ keepcfg->live_data[rte_lcore_id()].core_state = RTE_KA_STATE_DOZING;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c b/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c
new file mode 100644
index 000000000..f1b73168b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_malloc.c
@@ -0,0 +1,668 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_errno.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_spinlock.h>
+#include <rte_eal_trace.h>
+
+#include <rte_malloc.h>
+#include "malloc_elem.h"
+#include "malloc_heap.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+
+
+/* Free the memory space back to heap */
+static void
+mem_free(void *addr, const bool trace_ena)
+{
+ if (trace_ena)
+ rte_eal_trace_mem_free(addr);
+
+ if (addr == NULL) return;
+ if (malloc_heap_free(malloc_elem_from_data(addr)) < 0)
+ RTE_LOG(ERR, EAL, "Error: Invalid memory\n");
+}
+
+void
+rte_free(void *addr)
+{
+ return mem_free(addr, true);
+}
+
+void
+eal_free_no_trace(void *addr)
+{
+ return mem_free(addr, false);
+}
+
+static void *
+malloc_socket(const char *type, size_t size, unsigned int align,
+ int socket_arg, const bool trace_ena)
+{
+ void *ptr;
+
+ /* return NULL if size is 0 or alignment is not power-of-2 */
+ if (size == 0 || (align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ /* if there are no hugepages and if we are not allocating from an
+ * external heap, use memory from any socket available. checking for
+ * socket being external may return -1 in case of invalid socket, but
+ * that's OK - if there are no hugepages, it doesn't matter.
+ */
+ if (rte_malloc_heap_socket_is_external(socket_arg) != 1 &&
+ !rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ ptr = malloc_heap_alloc(type, size, socket_arg, 0,
+ align == 0 ? 1 : align, 0, false);
+
+ if (trace_ena)
+ rte_eal_trace_mem_malloc(type, size, align, socket_arg, ptr);
+ return ptr;
+}
+
+/*
+ * Allocate memory on specified heap.
+ */
+void *
+rte_malloc_socket(const char *type, size_t size, unsigned int align,
+ int socket_arg)
+{
+ return malloc_socket(type, size, align, socket_arg, true);
+}
+
+void *
+eal_malloc_no_trace(const char *type, size_t size, unsigned int align)
+{
+ return malloc_socket(type, size, align, SOCKET_ID_ANY, false);
+}
+
+/*
+ * Allocate memory on default heap.
+ */
+void *
+rte_malloc(const char *type, size_t size, unsigned align)
+{
+ return rte_malloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket)
+{
+ void *ptr = rte_malloc_socket(type, size, align, socket);
+
+#ifdef RTE_MALLOC_DEBUG
+ /*
+ * If DEBUG is enabled, then freed memory is marked with poison
+ * value and set to zero on allocation.
+ * If DEBUG is not enabled then memory is already zeroed.
+ */
+ if (ptr != NULL)
+ memset(ptr, 0, size);
+#endif
+
+ rte_eal_trace_mem_zmalloc(type, size, align, socket, ptr);
+ return ptr;
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_zmalloc(const char *type, size_t size, unsigned align)
+{
+ return rte_zmalloc_socket(type, size, align, SOCKET_ID_ANY);
+}
+
+/*
+ * Allocate zero'd memory on specified heap.
+ */
+void *
+rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket)
+{
+ return rte_zmalloc_socket(type, num * size, align, socket);
+}
+
+/*
+ * Allocate zero'd memory on default heap.
+ */
+void *
+rte_calloc(const char *type, size_t num, size_t size, unsigned align)
+{
+ return rte_zmalloc(type, num * size, align);
+}
+
+/*
+ * Resize allocated memory on specified heap.
+ */
+void *
+rte_realloc_socket(void *ptr, size_t size, unsigned int align, int socket)
+{
+ if (ptr == NULL)
+ return rte_malloc_socket(NULL, size, align, socket);
+
+ struct malloc_elem *elem = malloc_elem_from_data(ptr);
+ if (elem == NULL) {
+ RTE_LOG(ERR, EAL, "Error: memory corruption detected\n");
+ return NULL;
+ }
+
+ size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ /* check requested socket id and alignment matches first, and if ok,
+ * see if we can resize block
+ */
+ if ((socket == SOCKET_ID_ANY ||
+ (unsigned int)socket == elem->heap->socket_id) &&
+ RTE_PTR_ALIGN(ptr, align) == ptr &&
+ malloc_heap_resize(elem, size) == 0) {
+ rte_eal_trace_mem_realloc(size, align, socket, ptr);
+ return ptr;
+ }
+
+ /* either requested socket id doesn't match, alignment is off
+ * or we have no room to expand,
+ * so move the data.
+ */
+ void *new_ptr = rte_malloc_socket(NULL, size, align, socket);
+ if (new_ptr == NULL)
+ return NULL;
+ /* elem: |pad|data_elem|data|trailer| */
+ const size_t old_size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD;
+ rte_memcpy(new_ptr, ptr, old_size < size ? old_size : size);
+ rte_free(ptr);
+
+ rte_eal_trace_mem_realloc(size, align, socket, new_ptr);
+ return new_ptr;
+}
+
+/*
+ * Resize allocated memory.
+ */
+void *
+rte_realloc(void *ptr, size_t size, unsigned int align)
+{
+ return rte_realloc_socket(ptr, size, align, SOCKET_ID_ANY);
+}
+
+int
+rte_malloc_validate(const void *ptr, size_t *size)
+{
+ const struct malloc_elem *elem = malloc_elem_from_data(ptr);
+ if (!malloc_elem_cookies_ok(elem))
+ return -1;
+ if (size != NULL)
+ *size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD;
+ return 0;
+}
+
+/*
+ * Function to retrieve data for heap on given socket
+ */
+int
+rte_malloc_get_socket_stats(int socket,
+ struct rte_malloc_socket_stats *socket_stats)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int heap_idx;
+
+ heap_idx = malloc_socket_to_heap_id(socket);
+ if (heap_idx < 0)
+ return -1;
+
+ return malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+ socket_stats);
+}
+
+/*
+ * Function to dump contents of all heaps
+ */
+void
+rte_malloc_dump_heaps(FILE *f)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int idx;
+
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ fprintf(f, "Heap id: %u\n", idx);
+ malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
+ }
+}
+
+int
+rte_malloc_heap_get_socket(const char *name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ unsigned int idx;
+ int ret;
+
+ if (name == NULL ||
+ strnlen(name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_read_lock();
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+ if (!strncmp(name, tmp->name, RTE_HEAP_NAME_MAX_LEN)) {
+ heap = tmp;
+ break;
+ }
+ }
+
+ if (heap != NULL) {
+ ret = heap->socket_id;
+ } else {
+ rte_errno = ENOENT;
+ ret = -1;
+ }
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+int
+rte_malloc_heap_socket_is_external(int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int idx;
+ int ret = -1;
+
+ if (socket_id == SOCKET_ID_ANY)
+ return 0;
+
+ rte_mcfg_mem_read_lock();
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+ if ((int)tmp->socket_id == socket_id) {
+ /* external memory always has large socket ID's */
+ ret = tmp->socket_id >= RTE_MAX_NUMA_NODES;
+ break;
+ }
+ }
+ rte_mcfg_mem_read_unlock();
+
+ return ret;
+}
+
+/*
+ * Print stats on memory type. If type is NULL, info on all types is printed
+ */
+void
+rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int heap_id;
+ struct rte_malloc_socket_stats sock_stats;
+
+ /* Iterate through all initialised heaps */
+ for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+
+ malloc_heap_get_stats(heap, &sock_stats);
+
+ fprintf(f, "Heap id:%u\n", heap_id);
+ fprintf(f, "\tHeap name:%s\n", heap->name);
+ fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
+ fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
+ fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
+ fprintf(f, "\tGreatest_free_size:%zu,\n",
+ sock_stats.greatest_free_size);
+ fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
+ fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
+ }
+ return;
+}
+
+/*
+ * TODO: Set limit to memory that can be allocated to memory type
+ */
+int
+rte_malloc_set_limit(__rte_unused const char *type,
+ __rte_unused size_t max)
+{
+ return 0;
+}
+
+/*
+ * Return the IO address of a virtual address obtained through rte_malloc
+ */
+rte_iova_t
+rte_malloc_virt2iova(const void *addr)
+{
+ const struct rte_memseg *ms;
+ struct malloc_elem *elem = malloc_elem_from_data(addr);
+
+ if (elem == NULL)
+ return RTE_BAD_IOVA;
+
+ if (!elem->msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
+ return (uintptr_t) addr;
+
+ ms = rte_mem_virt2memseg(addr, elem->msl);
+ if (ms == NULL)
+ return RTE_BAD_IOVA;
+
+ if (ms->iova == RTE_BAD_IOVA)
+ return RTE_BAD_IOVA;
+
+ return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
+}
+
+static struct malloc_heap *
+find_named_heap(const char *name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int i;
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+ if (!strncmp(name, heap->name, RTE_HEAP_NAME_MAX_LEN))
+ return heap;
+ }
+ return NULL;
+}
+
+int
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+ rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+ struct malloc_heap *heap = NULL;
+ struct rte_memseg_list *msl;
+ unsigned int n;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL ||
+ page_sz == 0 || !rte_is_power_of_2(page_sz) ||
+ RTE_ALIGN(len, page_sz) != len ||
+ !rte_is_aligned(va_addr, page_sz) ||
+ ((len / page_sz) != n_pages && iova_addrs != NULL) ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ /* cannot add memory to internal heaps */
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+ n = len / page_sz;
+
+ msl = malloc_heap_create_external_seg(va_addr, iova_addrs, n, page_sz,
+ heap_name, heap->socket_id);
+ if (msl == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_add_external_memory(heap, msl);
+ msl->heap = 1; /* mark it as heap segment */
+ rte_spinlock_unlock(&heap->lock);
+
+unlock:
+ rte_mcfg_mem_write_unlock();
+
+ return ret;
+}
+
+int
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len)
+{
+ struct malloc_heap *heap = NULL;
+ struct rte_memseg_list *msl;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL || len == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ /* cannot remove memory from internal heaps */
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+
+ msl = malloc_heap_find_external_seg(va_addr, len);
+ if (msl == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_remove_external_memory(heap, va_addr, len);
+ rte_spinlock_unlock(&heap->lock);
+ if (ret != 0)
+ goto unlock;
+
+ ret = malloc_heap_destroy_external_seg(msl);
+
+unlock:
+ rte_mcfg_mem_write_unlock();
+
+ return ret;
+}
+
+static int
+sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
+{
+ struct malloc_heap *heap = NULL;
+ struct rte_memseg_list *msl;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL || len == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_read_lock();
+
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ /* we shouldn't be able to sync to internal heaps */
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* find corresponding memseg list to sync to */
+ msl = malloc_heap_find_external_seg(va_addr, len);
+ if (msl == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+
+ if (attach) {
+ ret = rte_fbarray_attach(&msl->memseg_arr);
+ if (ret == 0) {
+ /* notify all subscribers that a new memory area was
+ * added.
+ */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+ va_addr, len);
+ } else {
+ ret = -1;
+ goto unlock;
+ }
+ } else {
+ /* notify all subscribers that a memory area is about to
+ * be removed.
+ */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ msl->base_va, msl->len);
+ ret = rte_fbarray_detach(&msl->memseg_arr);
+ if (ret < 0) {
+ ret = -1;
+ goto unlock;
+ }
+ }
+unlock:
+ rte_mcfg_mem_read_unlock();
+ return ret;
+}
+
+int
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len)
+{
+ return sync_memory(heap_name, va_addr, len, true);
+}
+
+int
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len)
+{
+ return sync_memory(heap_name, va_addr, len, false);
+}
+
+int
+rte_malloc_heap_create(const char *heap_name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ int i, ret;
+
+ if (heap_name == NULL ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ /* check if there is space in the heap list, or if heap with this name
+ * already exists.
+ */
+ rte_mcfg_mem_write_lock();
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[i];
+ /* existing heap */
+ if (strncmp(heap_name, tmp->name,
+ RTE_HEAP_NAME_MAX_LEN) == 0) {
+ RTE_LOG(ERR, EAL, "Heap %s already exists\n",
+ heap_name);
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+ /* empty heap */
+ if (strnlen(tmp->name, RTE_HEAP_NAME_MAX_LEN) == 0) {
+ heap = tmp;
+ break;
+ }
+ }
+ if (heap == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot create new heap: no space\n");
+ rte_errno = ENOSPC;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* we're sure that we can create a new heap, so do it */
+ ret = malloc_heap_create(heap, heap_name);
+unlock:
+ rte_mcfg_mem_write_unlock();
+
+ return ret;
+}
+
+int
+rte_malloc_heap_destroy(const char *heap_name)
+{
+ struct malloc_heap *heap = NULL;
+ int ret;
+
+ if (heap_name == NULL ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_mcfg_mem_write_lock();
+
+ /* start from non-socket heaps */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ RTE_LOG(ERR, EAL, "Heap %s not found\n", heap_name);
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ /* we shouldn't be able to destroy internal heaps */
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+ /* sanity checks done, now we can destroy the heap */
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_destroy(heap);
+
+ /* if we failed, lock is still active */
+ if (ret < 0)
+ rte_spinlock_unlock(&heap->lock);
+unlock:
+ rte_mcfg_mem_write_unlock();
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_random.c b/src/spdk/dpdk/lib/librte_eal/common/rte_random.c
new file mode 100644
index 000000000..b7a089ac4
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_random.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Ericsson AB
+ */
+
+#ifdef RTE_MACHINE_CPUFLAG_RDSEED
+#include <x86intrin.h>
+#endif
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_random.h>
+
+struct rte_rand_state {
+ uint64_t z1;
+ uint64_t z2;
+ uint64_t z3;
+ uint64_t z4;
+ uint64_t z5;
+} __rte_cache_aligned;
+
+static struct rte_rand_state rand_states[RTE_MAX_LCORE];
+
+static uint32_t
+__rte_rand_lcg32(uint32_t *seed)
+{
+ *seed = 1103515245U * *seed + 12345U;
+
+ return *seed;
+}
+
+static uint64_t
+__rte_rand_lcg64(uint32_t *seed)
+{
+ uint64_t low;
+ uint64_t high;
+
+ /* A 64-bit LCG would have been much cleaner, but good
+ * multiplier/increments for such seem hard to come by.
+ */
+
+ low = __rte_rand_lcg32(seed);
+ high = __rte_rand_lcg32(seed);
+
+ return low | (high << 32);
+}
+
+static uint64_t
+__rte_rand_lfsr258_gen_seed(uint32_t *seed, uint64_t min_value)
+{
+ uint64_t res;
+
+ res = __rte_rand_lcg64(seed);
+
+ if (res < min_value)
+ res += min_value;
+
+ return res;
+}
+
+static void
+__rte_srand_lfsr258(uint64_t seed, struct rte_rand_state *state)
+{
+ uint32_t lcg_seed;
+
+ lcg_seed = (uint32_t)(seed ^ (seed >> 32));
+
+ state->z1 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 2UL);
+ state->z2 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 512UL);
+ state->z3 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 4096UL);
+ state->z4 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 131072UL);
+ state->z5 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 8388608UL);
+}
+
+void
+rte_srand(uint64_t seed)
+{
+ unsigned int lcore_id;
+
+ /* add lcore_id to seed to avoid having the same sequence */
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+ __rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]);
+}
+
+static __rte_always_inline uint64_t
+__rte_rand_lfsr258_comp(uint64_t z, uint64_t a, uint64_t b, uint64_t c,
+ uint64_t d)
+{
+ return ((z & c) << d) ^ (((z << a) ^ z) >> b);
+}
+
+/* Based on L’Ecuyer, P.: Tables of maximally equidistributed combined
+ * LFSR generators.
+ */
+
+static __rte_always_inline uint64_t
+__rte_rand_lfsr258(struct rte_rand_state *state)
+{
+ state->z1 = __rte_rand_lfsr258_comp(state->z1, 1UL, 53UL,
+ 18446744073709551614UL, 10UL);
+ state->z2 = __rte_rand_lfsr258_comp(state->z2, 24UL, 50UL,
+ 18446744073709551104UL, 5UL);
+ state->z3 = __rte_rand_lfsr258_comp(state->z3, 3UL, 23UL,
+ 18446744073709547520UL, 29UL);
+ state->z4 = __rte_rand_lfsr258_comp(state->z4, 5UL, 24UL,
+ 18446744073709420544UL, 23UL);
+ state->z5 = __rte_rand_lfsr258_comp(state->z5, 3UL, 33UL,
+ 18446744073701163008UL, 8UL);
+
+ return state->z1 ^ state->z2 ^ state->z3 ^ state->z4 ^ state->z5;
+}
+
+static __rte_always_inline
+struct rte_rand_state *__rte_rand_get_state(void)
+{
+ unsigned int lcore_id;
+
+ lcore_id = rte_lcore_id();
+
+ if (unlikely(lcore_id == LCORE_ID_ANY))
+ lcore_id = rte_get_master_lcore();
+
+ return &rand_states[lcore_id];
+}
+
+uint64_t
+rte_rand(void)
+{
+ struct rte_rand_state *state;
+
+ state = __rte_rand_get_state();
+
+ return __rte_rand_lfsr258(state);
+}
+
+uint64_t
+rte_rand_max(uint64_t upper_bound)
+{
+ struct rte_rand_state *state;
+ uint8_t ones;
+ uint8_t leading_zeros;
+ uint64_t mask = ~((uint64_t)0);
+ uint64_t res;
+
+ if (unlikely(upper_bound < 2))
+ return 0;
+
+ state = __rte_rand_get_state();
+
+ ones = __builtin_popcountll(upper_bound);
+
+ /* Handle power-of-2 upper_bound as a special case, since it
+ * has no bias issues.
+ */
+ if (unlikely(ones == 1))
+ return __rte_rand_lfsr258(state) & (upper_bound - 1);
+
+ /* The approach to avoiding bias is to create a mask that
+ * stretches beyond the request value range, and up to the
+ * next power-of-2. In case the masked generated random value
+ * is equal to or greater than the upper bound, just discard
+ * the value and generate a new one.
+ */
+
+ leading_zeros = __builtin_clzll(upper_bound);
+ mask >>= leading_zeros;
+
+ do {
+ res = __rte_rand_lfsr258(state) & mask;
+ } while (unlikely(res >= upper_bound));
+
+ return res;
+}
+
+static uint64_t
+__rte_random_initial_seed(void)
+{
+#ifdef RTE_LIBEAL_USE_GETENTROPY
+ int ge_rc;
+ uint64_t ge_seed;
+
+ ge_rc = getentropy(&ge_seed, sizeof(ge_seed));
+
+ if (ge_rc == 0)
+ return ge_seed;
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_RDSEED
+ unsigned int rdseed_low;
+ unsigned int rdseed_high;
+
+ /* first fallback: rdseed instruction, if available */
+ if (_rdseed32_step(&rdseed_low) == 1 &&
+ _rdseed32_step(&rdseed_high) == 1)
+ return (uint64_t)rdseed_low | ((uint64_t)rdseed_high << 32);
+#endif
+ /* second fallback: seed using rdtsc */
+ return rte_get_tsc_cycles();
+}
+
+RTE_INIT(rte_rand_init)
+{
+ uint64_t seed;
+
+ seed = __rte_random_initial_seed();
+
+ rte_srand(seed);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c b/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c
new file mode 100644
index 000000000..42dfa44eb
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_reciprocal.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ * Copyright(c) Hannes Frederic Sowa
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+
+#include "rte_reciprocal.h"
+
+struct rte_reciprocal rte_reciprocal_value(uint32_t d)
+{
+ struct rte_reciprocal R;
+ uint64_t m;
+ int l;
+
+ l = rte_fls_u32(d - 1);
+ m = ((1ULL << 32) * ((1ULL << l) - d));
+ m /= d;
+
+ ++m;
+ R.m = m;
+ R.sh1 = RTE_MIN(l, 1);
+ R.sh2 = RTE_MAX(l - 1, 0);
+
+ return R;
+}
+
+/*
+ * Code taken from Hacker's Delight:
+ * http://www.hackersdelight.org/hdcodetxt/divlu.c.txt
+ * License permits inclusion here per:
+ * http://www.hackersdelight.org/permissions.htm
+ */
+static uint64_t
+divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
+{
+ const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
+ uint64_t un1, un0, /* Norm. dividend LSD's. */
+ vn1, vn0, /* Norm. divisor digits. */
+ q1, q0, /* Quotient digits. */
+ un64, un21, un10, /* Dividend digit pairs. */
+ rhat; /* A remainder. */
+ int s; /* Shift amount for norm. */
+
+ /* If overflow, set rem. to an impossible value. */
+ if (u1 >= v) {
+ if (r != NULL)
+ *r = (uint64_t) -1;
+ return (uint64_t) -1;
+ }
+
+ /* Count leading zeros. */
+ s = __builtin_clzll(v);
+ if (s > 0) {
+ v = v << s;
+ un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
+ un10 = u0 << s;
+ } else {
+
+ un64 = u1 | u0;
+ un10 = u0;
+ }
+
+ vn1 = v >> 32;
+ vn0 = v & 0xFFFFFFFF;
+
+ un1 = un10 >> 32;
+ un0 = un10 & 0xFFFFFFFF;
+
+ q1 = un64/vn1;
+ rhat = un64 - q1*vn1;
+again1:
+ if (q1 >= b || q1*vn0 > b*rhat + un1) {
+ q1 = q1 - 1;
+ rhat = rhat + vn1;
+ if (rhat < b)
+ goto again1;
+ }
+
+ un21 = un64*b + un1 - q1*v;
+
+ q0 = un21/vn1;
+ rhat = un21 - q0*vn1;
+again2:
+ if (q0 >= b || q0*vn0 > b*rhat + un0) {
+ q0 = q0 - 1;
+ rhat = rhat + vn1;
+ if (rhat < b)
+ goto again2;
+ }
+
+ if (r != NULL)
+ *r = (un21*b + un0 - q0*v) >> s;
+ return q1*b + q0;
+}
+
+struct rte_reciprocal_u64
+rte_reciprocal_value_u64(uint64_t d)
+{
+ struct rte_reciprocal_u64 R;
+ uint64_t m;
+ uint64_t r;
+ int l;
+
+ l = 63 - __builtin_clzll(d);
+
+ m = divide_128_div_64_to_64((1ULL << l), 0, d, &r) << 1;
+ if (r << 1 < r || r << 1 >= d)
+ m++;
+ m = (1ULL << l) - d ? m + 1 : 1;
+ R.m = m;
+
+ R.sh1 = l > 1 ? 1 : l;
+ R.sh2 = (l > 0) ? l : 0;
+ R.sh2 -= R.sh2 && (m == 1) ? 1 : 0;
+
+ return R;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/common/rte_service.c b/src/spdk/dpdk/lib/librte_eal/common/rte_service.c
new file mode 100644
index 000000000..6123a2124
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/common/rte_service.c
@@ -0,0 +1,919 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <string.h>
+
+#include <rte_compat.h>
+#include <rte_service.h>
+#include <rte_service_component.h>
+
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_spinlock.h>
+
+#include "eal_private.h"
+
+#define RTE_SERVICE_NUM_MAX 64
+
+#define SERVICE_F_REGISTERED (1 << 0)
+#define SERVICE_F_STATS_ENABLED (1 << 1)
+#define SERVICE_F_START_CHECK (1 << 2)
+
+/* runstates for services and lcores, denoting if they are active or not */
+#define RUNSTATE_STOPPED 0
+#define RUNSTATE_RUNNING 1
+
+/* internal representation of a service */
+struct rte_service_spec_impl {
+ /* public part of the struct */
+ struct rte_service_spec spec;
+
+ /* spin lock that when set indicates a service core is currently
+ * running this service callback. When not set, a core may take the
+ * lock and then run the service callback.
+ */
+ rte_spinlock_t execute_lock;
+
+ /* API set/get-able variables */
+ int8_t app_runstate;
+ int8_t comp_runstate;
+ uint8_t internal_flags;
+
+ /* per service statistics */
+ /* Indicates how many cores the service is mapped to run on.
+ * It does not indicate the number of cores the service is running
+ * on currently.
+ */
+ uint32_t num_mapped_cores;
+ uint64_t calls;
+ uint64_t cycles_spent;
+} __rte_cache_aligned;
+
+/* the internal values of a service core */
+struct core_state {
+ /* map of services IDs are run on this core */
+ uint64_t service_mask;
+ uint8_t runstate; /* running or stopped */
+ uint8_t is_service_core; /* set if core is currently a service core */
+ uint8_t service_active_on_lcore[RTE_SERVICE_NUM_MAX];
+ uint64_t loops;
+ uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
+} __rte_cache_aligned;
+
+static uint32_t rte_service_count;
+static struct rte_service_spec_impl *rte_services;
+static struct core_state *lcore_states;
+static uint32_t rte_service_library_initialized;
+
+int32_t
+rte_service_init(void)
+{
+ if (rte_service_library_initialized) {
+ RTE_LOG(NOTICE, EAL,
+ "service library init() called, init flag %d\n",
+ rte_service_library_initialized);
+ return -EALREADY;
+ }
+
+ rte_services = rte_calloc("rte_services", RTE_SERVICE_NUM_MAX,
+ sizeof(struct rte_service_spec_impl),
+ RTE_CACHE_LINE_SIZE);
+ if (!rte_services) {
+ RTE_LOG(ERR, EAL, "error allocating rte services array\n");
+ goto fail_mem;
+ }
+
+ lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
+ sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
+ if (!lcore_states) {
+ RTE_LOG(ERR, EAL, "error allocating core states array\n");
+ goto fail_mem;
+ }
+
+ int i;
+ int count = 0;
+ struct rte_config *cfg = rte_eal_get_configuration();
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lcore_config[i].core_role == ROLE_SERVICE) {
+ if ((unsigned int)i == cfg->master_lcore)
+ continue;
+ rte_service_lcore_add(i);
+ count++;
+ }
+ }
+
+ rte_service_library_initialized = 1;
+ return 0;
+fail_mem:
+ rte_free(rte_services);
+ rte_free(lcore_states);
+ return -ENOMEM;
+}
+
+void
+rte_service_finalize(void)
+{
+ if (!rte_service_library_initialized)
+ return;
+
+ rte_service_lcore_reset_all();
+ rte_eal_mp_wait_lcore();
+
+ rte_free(rte_services);
+ rte_free(lcore_states);
+
+ rte_service_library_initialized = 0;
+}
+
+/* returns 1 if service is registered and has not been unregistered
+ * Returns 0 if service never registered, or has been unregistered
+ */
+static inline int
+service_valid(uint32_t id)
+{
+ return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED);
+}
+
+static struct rte_service_spec_impl *
+service_get(uint32_t id)
+{
+ return &rte_services[id];
+}
+
+/* validate ID and retrieve service pointer, or return error value */
+#define SERVICE_VALID_GET_OR_ERR_RET(id, service, retval) do { \
+ if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id)) \
+ return retval; \
+ service = &rte_services[id]; \
+} while (0)
+
+/* returns 1 if statistics should be collected for service
+ * Returns 0 if statistics should not be collected for service
+ */
+static inline int
+service_stats_enabled(struct rte_service_spec_impl *impl)
+{
+ return !!(impl->internal_flags & SERVICE_F_STATS_ENABLED);
+}
+
+static inline int
+service_mt_safe(struct rte_service_spec_impl *s)
+{
+ return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE);
+}
+
+int32_t
+rte_service_set_stats_enable(uint32_t id, int32_t enabled)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+
+ if (enabled)
+ s->internal_flags |= SERVICE_F_STATS_ENABLED;
+ else
+ s->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
+
+ return 0;
+}
+
+int32_t
+rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+
+ if (enabled)
+ s->internal_flags |= SERVICE_F_START_CHECK;
+ else
+ s->internal_flags &= ~(SERVICE_F_START_CHECK);
+
+ return 0;
+}
+
+uint32_t
+rte_service_get_count(void)
+{
+ return rte_service_count;
+}
+
+int32_t
+rte_service_get_by_name(const char *name, uint32_t *service_id)
+{
+ if (!service_id)
+ return -EINVAL;
+
+ int i;
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (service_valid(i) &&
+ strcmp(name, rte_services[i].spec.name) == 0) {
+ *service_id = i;
+ return 0;
+ }
+ }
+
+ return -ENODEV;
+}
+
+const char *
+rte_service_get_name(uint32_t id)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+ return s->spec.name;
+}
+
+int32_t
+rte_service_probe_capability(uint32_t id, uint32_t capability)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ return !!(s->spec.capabilities & capability);
+}
+
+int32_t
+rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *id_ptr)
+{
+ uint32_t i;
+ int32_t free_slot = -1;
+
+ if (spec->callback == NULL || strlen(spec->name) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i)) {
+ free_slot = i;
+ break;
+ }
+ }
+
+ if ((free_slot < 0) || (i == RTE_SERVICE_NUM_MAX))
+ return -ENOSPC;
+
+ struct rte_service_spec_impl *s = &rte_services[free_slot];
+ s->spec = *spec;
+ s->internal_flags |= SERVICE_F_REGISTERED | SERVICE_F_START_CHECK;
+
+ rte_service_count++;
+
+ if (id_ptr)
+ *id_ptr = free_slot;
+
+ return 0;
+}
+
+int32_t
+rte_service_component_unregister(uint32_t id)
+{
+ uint32_t i;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ rte_service_count--;
+
+ s->internal_flags &= ~(SERVICE_F_REGISTERED);
+
+ /* clear the run-bit in all cores */
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
+
+ memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
+
+ return 0;
+}
+
+int32_t
+rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ /* comp_runstate act as the guard variable. Use store-release
+ * memory order. This synchronizes with load-acquire in
+ * service_run and service_runstate_get function.
+ */
+ if (runstate)
+ __atomic_store_n(&s->comp_runstate, RUNSTATE_RUNNING,
+ __ATOMIC_RELEASE);
+ else
+ __atomic_store_n(&s->comp_runstate, RUNSTATE_STOPPED,
+ __ATOMIC_RELEASE);
+
+ return 0;
+}
+
+int32_t
+rte_service_runstate_set(uint32_t id, uint32_t runstate)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ /* app_runstate act as the guard variable. Use store-release
+ * memory order. This synchronizes with load-acquire in
+ * service_run runstate_get function.
+ */
+ if (runstate)
+ __atomic_store_n(&s->app_runstate, RUNSTATE_RUNNING,
+ __ATOMIC_RELEASE);
+ else
+ __atomic_store_n(&s->app_runstate, RUNSTATE_STOPPED,
+ __ATOMIC_RELEASE);
+
+ return 0;
+}
+
+int32_t
+rte_service_runstate_get(uint32_t id)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ /* comp_runstate and app_runstate act as the guard variables.
+ * Use load-acquire memory order. This synchronizes with
+ * store-release in service state set functions.
+ */
+ if (__atomic_load_n(&s->comp_runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_RUNNING &&
+ __atomic_load_n(&s->app_runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_RUNNING) {
+ int check_disabled = !(s->internal_flags &
+ SERVICE_F_START_CHECK);
+ int lcore_mapped = (__atomic_load_n(&s->num_mapped_cores,
+ __ATOMIC_RELAXED) > 0);
+
+ return (check_disabled | lcore_mapped);
+ } else
+ return 0;
+
+}
+
+static inline void
+service_runner_do_callback(struct rte_service_spec_impl *s,
+ struct core_state *cs, uint32_t service_idx)
+{
+ void *userdata = s->spec.callback_userdata;
+
+ if (service_stats_enabled(s)) {
+ uint64_t start = rte_rdtsc();
+ s->spec.callback(userdata);
+ uint64_t end = rte_rdtsc();
+ s->cycles_spent += end - start;
+ cs->calls_per_service[service_idx]++;
+ s->calls++;
+ } else
+ s->spec.callback(userdata);
+}
+
+
+/* Expects the service 's' is valid. */
+static int32_t
+service_run(uint32_t i, struct core_state *cs, uint64_t service_mask,
+ struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe)
+{
+ if (!s)
+ return -EINVAL;
+
+ /* comp_runstate and app_runstate act as the guard variables.
+ * Use load-acquire memory order. This synchronizes with
+ * store-release in service state set functions.
+ */
+ if (__atomic_load_n(&s->comp_runstate, __ATOMIC_ACQUIRE) !=
+ RUNSTATE_RUNNING ||
+ __atomic_load_n(&s->app_runstate, __ATOMIC_ACQUIRE) !=
+ RUNSTATE_RUNNING ||
+ !(service_mask & (UINT64_C(1) << i))) {
+ cs->service_active_on_lcore[i] = 0;
+ return -ENOEXEC;
+ }
+
+ cs->service_active_on_lcore[i] = 1;
+
+ if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) {
+ if (!rte_spinlock_trylock(&s->execute_lock))
+ return -EBUSY;
+
+ service_runner_do_callback(s, cs, i);
+ rte_spinlock_unlock(&s->execute_lock);
+ } else
+ service_runner_do_callback(s, cs, i);
+
+ return 0;
+}
+
+int32_t
+rte_service_may_be_active(uint32_t id)
+{
+ uint32_t ids[RTE_MAX_LCORE] = {0};
+ int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+ int i;
+
+ if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id))
+ return -EINVAL;
+
+ for (i = 0; i < lcore_count; i++) {
+ if (lcore_states[i].service_active_on_lcore[id])
+ return 1;
+ }
+
+ return 0;
+}
+
+int32_t
+rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe)
+{
+ struct core_state *cs = &lcore_states[rte_lcore_id()];
+ struct rte_service_spec_impl *s;
+
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ /* Increment num_mapped_cores to reflect that this core is
+ * now mapped capable of running the service.
+ */
+ __atomic_add_fetch(&s->num_mapped_cores, 1, __ATOMIC_RELAXED);
+
+ int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe);
+
+ __atomic_sub_fetch(&s->num_mapped_cores, 1, __ATOMIC_RELAXED);
+
+ return ret;
+}
+
+static int32_t
+service_runner_func(void *arg)
+{
+ RTE_SET_USED(arg);
+ uint32_t i;
+ const int lcore = rte_lcore_id();
+ struct core_state *cs = &lcore_states[lcore];
+
+ /* runstate act as the guard variable. Use load-acquire
+ * memory order here to synchronize with store-release
+ * in runstate update functions.
+ */
+ while (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_RUNNING) {
+ const uint64_t service_mask = cs->service_mask;
+
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
+ /* return value ignored as no change to code flow */
+ service_run(i, cs, service_mask, service_get(i), 1);
+ }
+
+ cs->loops++;
+ }
+
+ lcore_config[lcore].state = WAIT;
+
+ return 0;
+}
+
+int32_t
+rte_service_lcore_count(void)
+{
+ int32_t count = 0;
+ uint32_t i;
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ count += lcore_states[i].is_service_core;
+ return count;
+}
+
+int32_t
+rte_service_lcore_list(uint32_t array[], uint32_t n)
+{
+ uint32_t count = rte_service_lcore_count();
+ if (count > n)
+ return -ENOMEM;
+
+ if (!array)
+ return -EINVAL;
+
+ uint32_t i;
+ uint32_t idx = 0;
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ struct core_state *cs = &lcore_states[i];
+ if (cs->is_service_core) {
+ array[idx] = i;
+ idx++;
+ }
+ }
+
+ return count;
+}
+
+int32_t
+rte_service_lcore_count_services(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ return __builtin_popcountll(cs->service_mask);
+}
+
+int32_t
+rte_service_start_with_defaults(void)
+{
+ /* create a default mapping from cores to services, then start the
+ * services to make them transparent to unaware applications.
+ */
+ uint32_t i;
+ int ret;
+ uint32_t count = rte_service_get_count();
+
+ int32_t lcore_iter = 0;
+ uint32_t ids[RTE_MAX_LCORE] = {0};
+ int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+
+ if (lcore_count == 0)
+ return -ENOTSUP;
+
+ for (i = 0; (int)i < lcore_count; i++)
+ rte_service_lcore_start(ids[i]);
+
+ for (i = 0; i < count; i++) {
+ /* do 1:1 core mapping here, with each service getting
+ * assigned a single core by default. Adding multiple services
+ * should multiplex to a single core, or 1:1 if there are the
+ * same amount of services as service-cores
+ */
+ ret = rte_service_map_lcore_set(i, ids[lcore_iter], 1);
+ if (ret)
+ return -ENODEV;
+
+ lcore_iter++;
+ if (lcore_iter >= lcore_count)
+ lcore_iter = 0;
+
+ ret = rte_service_runstate_set(i, 1);
+ if (ret)
+ return -ENOEXEC;
+ }
+
+ return 0;
+}
+
+static int32_t
+service_update(uint32_t sid, uint32_t lcore, uint32_t *set, uint32_t *enabled)
+{
+ /* validate ID, or return error value */
+ if (sid >= RTE_SERVICE_NUM_MAX || !service_valid(sid) ||
+ lcore >= RTE_MAX_LCORE || !lcore_states[lcore].is_service_core)
+ return -EINVAL;
+
+ uint64_t sid_mask = UINT64_C(1) << sid;
+ if (set) {
+ uint64_t lcore_mapped = lcore_states[lcore].service_mask &
+ sid_mask;
+
+ if (*set && !lcore_mapped) {
+ lcore_states[lcore].service_mask |= sid_mask;
+ __atomic_add_fetch(&rte_services[sid].num_mapped_cores,
+ 1, __ATOMIC_RELAXED);
+ }
+ if (!*set && lcore_mapped) {
+ lcore_states[lcore].service_mask &= ~(sid_mask);
+ __atomic_sub_fetch(&rte_services[sid].num_mapped_cores,
+ 1, __ATOMIC_RELAXED);
+ }
+ }
+
+ if (enabled)
+ *enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
+
+ return 0;
+}
+
+int32_t
+rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
+{
+ uint32_t on = enabled > 0;
+ return service_update(id, lcore, &on, 0);
+}
+
+int32_t
+rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
+{
+ uint32_t enabled;
+ int ret = service_update(id, lcore, 0, &enabled);
+ if (ret == 0)
+ return enabled;
+ return ret;
+}
+
+static void
+set_lcore_state(uint32_t lcore, int32_t state)
+{
+ /* mark core state in hugepage backed config */
+ struct rte_config *cfg = rte_eal_get_configuration();
+ cfg->lcore_role[lcore] = state;
+
+ /* mark state in process local lcore_config */
+ lcore_config[lcore].core_role = state;
+
+ /* update per-lcore optimized state tracking */
+ lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
+}
+
+int32_t
+rte_service_lcore_reset_all(void)
+{
+ /* loop over cores, reset all to mask 0 */
+ uint32_t i;
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lcore_states[i].is_service_core) {
+ lcore_states[i].service_mask = 0;
+ set_lcore_state(i, ROLE_RTE);
+ /* runstate act as guard variable Use
+ * store-release memory order here to synchronize
+ * with load-acquire in runstate read functions.
+ */
+ __atomic_store_n(&lcore_states[i].runstate,
+ RUNSTATE_STOPPED, __ATOMIC_RELEASE);
+ }
+ }
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
+ __atomic_store_n(&rte_services[i].num_mapped_cores, 0,
+ __ATOMIC_RELAXED);
+
+ return 0;
+}
+
+int32_t
+rte_service_lcore_add(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+ if (lcore_states[lcore].is_service_core)
+ return -EALREADY;
+
+ set_lcore_state(lcore, ROLE_SERVICE);
+
+ /* ensure that after adding a core the mask and state are defaults */
+ lcore_states[lcore].service_mask = 0;
+ /* Use store-release memory order here to synchronize with
+ * load-acquire in runstate read functions.
+ */
+ __atomic_store_n(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+ __ATOMIC_RELEASE);
+
+ return rte_eal_wait_lcore(lcore);
+}
+
+int32_t
+rte_service_lcore_del(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -EINVAL;
+
+ /* runstate act as the guard variable. Use load-acquire
+ * memory order here to synchronize with store-release
+ * in runstate update functions.
+ */
+ if (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) !=
+ RUNSTATE_STOPPED)
+ return -EBUSY;
+
+ set_lcore_state(lcore, ROLE_RTE);
+
+ rte_smp_wmb();
+ return 0;
+}
+
+int32_t
+rte_service_lcore_start(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -EINVAL;
+
+ /* runstate act as the guard variable. Use load-acquire
+ * memory order here to synchronize with store-release
+ * in runstate update functions.
+ */
+ if (__atomic_load_n(&cs->runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_RUNNING)
+ return -EALREADY;
+
+ /* set core to run state first, and then launch otherwise it will
+ * return immediately as runstate keeps it in the service poll loop
+ */
+ /* Use load-acquire memory order here to synchronize with
+ * store-release in runstate update functions.
+ */
+ __atomic_store_n(&cs->runstate, RUNSTATE_RUNNING, __ATOMIC_RELEASE);
+
+ int ret = rte_eal_remote_launch(service_runner_func, 0, lcore);
+ /* returns -EBUSY if the core is already launched, 0 on success */
+ return ret;
+}
+
+int32_t
+rte_service_lcore_stop(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ /* runstate act as the guard variable. Use load-acquire
+ * memory order here to synchronize with store-release
+ * in runstate update functions.
+ */
+ if (__atomic_load_n(&lcore_states[lcore].runstate, __ATOMIC_ACQUIRE) ==
+ RUNSTATE_STOPPED)
+ return -EALREADY;
+
+ uint32_t i;
+ uint64_t service_mask = lcore_states[lcore].service_mask;
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ int32_t enabled = service_mask & (UINT64_C(1) << i);
+ int32_t service_running = rte_service_runstate_get(i);
+ int32_t only_core = (1 ==
+ __atomic_load_n(&rte_services[i].num_mapped_cores,
+ __ATOMIC_RELAXED));
+
+ /* if the core is mapped, and the service is running, and this
+ * is the only core that is mapped, the service would cease to
+ * run if this core stopped, so fail instead.
+ */
+ if (enabled && service_running && only_core)
+ return -EBUSY;
+ }
+
+ /* Use store-release memory order here to synchronize with
+ * load-acquire in runstate read functions.
+ */
+ __atomic_store_n(&lcore_states[lcore].runstate, RUNSTATE_STOPPED,
+ __ATOMIC_RELEASE);
+
+ return 0;
+}
+
+int32_t
+rte_service_attr_get(uint32_t id, uint32_t attr_id, uint64_t *attr_value)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ if (!attr_value)
+ return -EINVAL;
+
+ switch (attr_id) {
+ case RTE_SERVICE_ATTR_CYCLES:
+ *attr_value = s->cycles_spent;
+ return 0;
+ case RTE_SERVICE_ATTR_CALL_COUNT:
+ *attr_value = s->calls;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+int32_t
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE || !attr_value)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ switch (attr_id) {
+ case RTE_SERVICE_LCORE_ATTR_LOOPS:
+ *attr_value = cs->loops;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void
+service_dump_one(FILE *f, struct rte_service_spec_impl *s, uint32_t reset)
+{
+ /* avoid divide by zero */
+ int calls = 1;
+ if (s->calls != 0)
+ calls = s->calls;
+
+ if (reset) {
+ s->cycles_spent = 0;
+ s->calls = 0;
+ return;
+ }
+
+ if (f == NULL)
+ return;
+
+ fprintf(f, " %s: stats %d\tcalls %"PRIu64"\tcycles %"
+ PRIu64"\tavg: %"PRIu64"\n",
+ s->spec.name, service_stats_enabled(s), s->calls,
+ s->cycles_spent, s->cycles_spent / calls);
+}
+
+int32_t
+rte_service_attr_reset_all(uint32_t id)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ int reset = 1;
+ service_dump_one(NULL, s, reset);
+ return 0;
+}
+
+int32_t
+rte_service_lcore_attr_reset_all(uint32_t lcore)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ cs->loops = 0;
+
+ return 0;
+}
+
+static void
+service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
+{
+ uint32_t i;
+ struct core_state *cs = &lcore_states[lcore];
+
+ fprintf(f, "%02d\t", lcore);
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
+ fprintf(f, "%"PRIu64"\t", cs->calls_per_service[i]);
+ if (reset)
+ cs->calls_per_service[i] = 0;
+ }
+ fprintf(f, "\n");
+}
+
+int32_t
+rte_service_dump(FILE *f, uint32_t id)
+{
+ uint32_t i;
+ int print_one = (id != UINT32_MAX);
+
+ /* print only the specified service */
+ if (print_one) {
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ fprintf(f, "Service %s Summary\n", s->spec.name);
+ uint32_t reset = 0;
+ service_dump_one(f, s, reset);
+ return 0;
+ }
+
+ /* print all services, as UINT32_MAX was passed as id */
+ fprintf(f, "Services Summary\n");
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
+ uint32_t reset = 0;
+ service_dump_one(f, &rte_services[i], reset);
+ }
+
+ fprintf(f, "Service Cores Summary\n");
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lcore_config[i].core_role != ROLE_SERVICE)
+ continue;
+
+ uint32_t reset = 0;
+ service_dump_calls_per_lcore(f, i, reset);
+ }
+
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/Makefile b/src/spdk/dpdk/lib/librte_eal/freebsd/Makefile
new file mode 100644
index 000000000..af95386d4
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/Makefile
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2019 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_eal.a
+
+ARCH_DIR ?= $(RTE_ARCH)
+VPATH += $(RTE_SDK)/lib/librte_eal/$(ARCH_DIR)
+VPATH += $(RTE_SDK)/lib/librte_eal/common
+
+CFLAGS += -I$(SRCDIR)/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/include
+CFLAGS += $(WERROR_FLAGS) -O3
+
+LDLIBS += -lexecinfo
+LDLIBS += -lpthread
+LDLIBS += -lgcc_s
+LDLIBS += -lrte_kvargs
+LDLIBS += -lrte_telemetry
+
+EXPORT_MAP := ../rte_eal_version.map
+
+# specific to freebsd exec-env
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_hugepage_info.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_debug.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_memalloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_interrupts.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_alarm.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_dev.c
+
+# from common dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_memzone.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_mcfg.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_memalloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_tailqs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_errno.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_hypervisor.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_string_fns.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_hexdump.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_class.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_bus.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_dev.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_options.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_uuid.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_trace.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_trace_ctf.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_trace_points.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_common_trace_utils.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += hotplug_mp.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += malloc_elem.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += malloc_mp.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_keepalive.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_service.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_random.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_reciprocal.c
+
+# from arch dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_hypervisor.c
+SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c
+SRCS-y += rte_cycles.c
+
+CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_eal_thread.o += -Wno-return-type
+CFLAGS_eal_hpet.o += -Wno-return-type
+endif
+
+INC := rte_os.h
+
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_FREEBSD)-include := $(addprefix include/,$(INC))
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal.c
new file mode 100644
index 000000000..c41f265fa
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal.c
@@ -0,0 +1,1105 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation.
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <sys/file.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <rte_compat.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_service_component.h>
+#include <rte_log.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_interrupts.h>
+#include <rte_bus.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_version.h>
+#include <rte_vfio.h>
+#include <rte_atomic.h>
+#include <malloc_heap.h>
+#include <rte_telemetry.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+#include "eal_options.h"
+#include "eal_memcfg.h"
+#include "eal_trace.h"
+
+#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
+
+/* Allow the application to print its usage message too if set */
+static rte_usage_hook_t rte_application_usage_hook = NULL;
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc */
+static int mem_cfg_fd = -1;
+
+static struct flock wr_lock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = offsetof(struct rte_mem_config, memsegs),
+ .l_len = sizeof(early_mem_config.memsegs),
+};
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+ .mem_config = &early_mem_config,
+};
+
+/* internal configuration (per-core) */
+struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/* internal configuration */
+struct internal_config internal_config;
+
+/* used by rte_rdtsc() */
+int rte_cycles_vmware_tsc_map;
+
+/* platform-specific runtime dir */
+static char runtime_dir[PATH_MAX];
+
+static const char *default_runtime_dir = "/var/run";
+
+int
+eal_create_runtime_dir(void)
+{
+ const char *directory = default_runtime_dir;
+ const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR");
+ const char *fallback = "/tmp";
+ char tmp[PATH_MAX];
+ int ret;
+
+ if (getuid() != 0) {
+ /* try XDG path first, fall back to /tmp */
+ if (xdg_runtime_dir != NULL)
+ directory = xdg_runtime_dir;
+ else
+ directory = fallback;
+ }
+ /* create DPDK subdirectory under runtime dir */
+ ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory);
+ if (ret < 0 || ret == sizeof(tmp)) {
+ RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n");
+ return -1;
+ }
+
+ /* create prefix-specific subdirectory under DPDK runtime dir */
+ ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
+ tmp, eal_get_hugefile_prefix());
+ if (ret < 0 || ret == sizeof(runtime_dir)) {
+ RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
+ return -1;
+ }
+
+ /* create the path if it doesn't exist. no "mkdir -p" here, so do it
+ * step by step.
+ */
+ ret = mkdir(tmp, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+ tmp, strerror(errno));
+ return -1;
+ }
+
+ ret = mkdir(runtime_dir, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+ runtime_dir, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+eal_clean_runtime_dir(void)
+{
+ /* FreeBSD doesn't need this implemented for now, because, unlike Linux,
+ * FreeBSD doesn't create per-process files, so no need to clean up.
+ */
+ return 0;
+}
+
+
+const char *
+rte_eal_get_runtime_dir(void)
+{
+ return runtime_dir;
+}
+
+/* Return user provided mbuf pool ops name */
+const char *
+rte_eal_mbuf_user_pool_ops(void)
+{
+ return internal_config.user_mbuf_pool_ops_name;
+}
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+ return &rte_config;
+}
+
+enum rte_iova_mode
+rte_eal_iova_mode(void)
+{
+ return rte_eal_get_configuration()->iova_mode;
+}
+
+/* parse a sysfs (or other) file containing one integer value */
+int
+eal_parse_sysfs_value(const char *filename, unsigned long *val)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ if ((f = fopen(filename, "r")) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ *val = strtoul(buf, &end, 0);
+ if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+
+/* create memory configuration in shared/mmap memory. Take out
+ * a write lock on the memsegs, so we can auto-detect primary/secondary.
+ * This means we never close the file while running (auto-close on exit).
+ * We also don't lock the whole file, so that in future we can use read-locks
+ * on other parts, e.g. memzones, to detect if there are running secondary
+ * processes. */
+static int
+rte_eal_config_create(void)
+{
+ size_t page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t cfg_len = sizeof(*rte_config.mem_config);
+ size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz);
+ void *rte_mem_cfg_addr, *mapped_mem_cfg_addr;
+ int retval;
+
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return 0;
+
+ /* map the config before base address so that we don't waste a page */
+ if (internal_config.base_virtaddr != 0)
+ rte_mem_cfg_addr = (void *)
+ RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
+ sizeof(struct rte_mem_config), page_sz);
+ else
+ rte_mem_cfg_addr = NULL;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
+ if (mem_cfg_fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
+ pathname);
+ return -1;
+ }
+ }
+
+ retval = ftruncate(mem_cfg_fd, cfg_len);
+ if (retval < 0){
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n",
+ pathname);
+ return -1;
+ }
+
+ retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
+ if (retval < 0){
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary "
+ "process running?\n", pathname);
+ return -1;
+ }
+
+ /* reserve space for config */
+ rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr,
+ &cfg_len_aligned, page_sz, 0, 0);
+ if (rte_mem_cfg_addr == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n");
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ return -1;
+ }
+
+ /* remap the actual file into the space we've just reserved */
+ mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr,
+ cfg_len_aligned, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0);
+ if (mapped_mem_cfg_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n");
+ munmap(rte_mem_cfg_addr, cfg_len);
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ return -1;
+ }
+
+ memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
+ rte_config.mem_config = rte_mem_cfg_addr;
+
+ /* store address of the config in the config itself so that secondary
+ * processes could later map the config into this exact location
+ */
+ rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
+
+ return 0;
+}
+
+/* attach to an existing shared memory config */
+static int
+rte_eal_config_attach(void)
+{
+ void *rte_mem_cfg_addr;
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return 0;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR);
+ if (mem_cfg_fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
+ pathname);
+ return -1;
+ }
+ }
+
+ rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
+ PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
+ /* don't close the fd here, it will be closed on reattach */
+ if (rte_mem_cfg_addr == MAP_FAILED) {
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ rte_config.mem_config = rte_mem_cfg_addr;
+
+ return 0;
+}
+
+/* reattach the shared config at exact memory location primary process has it */
+static int
+rte_eal_config_reattach(void)
+{
+ struct rte_mem_config *mem_config;
+ void *rte_mem_cfg_addr;
+
+ if (internal_config.no_shconf)
+ return 0;
+
+ /* save the address primary process has mapped shared config to */
+ rte_mem_cfg_addr =
+ (void *)(uintptr_t)rte_config.mem_config->mem_cfg_addr;
+
+ /* unmap original config */
+ munmap(rte_config.mem_config, sizeof(struct rte_mem_config));
+
+ /* remap the config at proper address */
+ mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
+ sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
+ mem_cfg_fd, 0);
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+
+ if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
+ if (mem_config != MAP_FAILED) {
+ /* errno is stale, don't use */
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]"
+ " - please use '--" OPT_BASE_VIRTADDR
+ "' option\n",
+ rte_mem_cfg_addr, mem_config);
+ munmap(mem_config, sizeof(struct rte_mem_config));
+ return -1;
+ }
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ rte_config.mem_config = mem_config;
+
+ return 0;
+}
+
+/* Detect if we are a primary or a secondary process */
+enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+ enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+ const char *pathname = eal_runtime_config_path();
+
+ /* if there no shared config, there can be no secondary processes */
+ if (!internal_config.no_shconf) {
+ /* if we can open the file but not get a write-lock we are a
+ * secondary process. NOTE: if we get a file handle back, we
+ * keep that open and don't close it to prevent a race condition
+ * between multiple opens.
+ */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+ }
+
+ RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+ ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+ return ptype;
+}
+
+/* Sets up rte_config structure with the pointer to shared memory config.*/
+static int
+rte_config_init(void)
+{
+ rte_config.process_type = internal_config.process_type;
+
+ switch (rte_config.process_type){
+ case RTE_PROC_PRIMARY:
+ if (rte_eal_config_create() < 0)
+ return -1;
+ eal_mcfg_update_from_internal();
+ break;
+ case RTE_PROC_SECONDARY:
+ if (rte_eal_config_attach() < 0)
+ return -1;
+ eal_mcfg_wait_complete();
+ if (eal_mcfg_check_version() < 0) {
+ RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n");
+ return -1;
+ }
+ if (rte_eal_config_reattach() < 0)
+ return -1;
+ eal_mcfg_update_internal();
+ break;
+ case RTE_PROC_AUTO:
+ case RTE_PROC_INVALID:
+ RTE_LOG(ERR, EAL, "Invalid process type %d\n",
+ rte_config.process_type);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* display usage */
+static void
+eal_usage(const char *prgname)
+{
+ printf("\nUsage: %s ", prgname);
+ eal_common_usage();
+ /* Allow the application to print its usage message too if hook is set */
+ if ( rte_application_usage_hook ) {
+ printf("===== Application Usage =====\n\n");
+ rte_application_usage_hook(prgname);
+ }
+}
+
+/* Set a per-application usage message */
+rte_usage_hook_t
+rte_set_application_usage_hook( rte_usage_hook_t usage_func )
+{
+ rte_usage_hook_t old_func;
+
+ /* Will be NULL on the first call to denote the last usage routine. */
+ old_func = rte_application_usage_hook;
+ rte_application_usage_hook = usage_func;
+
+ return old_func;
+}
+
+static inline size_t
+eal_get_hugepage_mem_size(void)
+{
+ uint64_t size = 0;
+ unsigned i, j;
+
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+ struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+ if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+ size += hpi->hugepage_sz * hpi->num_pages[j];
+ }
+ }
+ }
+
+ return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
+}
+
+/* Parse the arguments for --log-level only */
+static void
+eal_log_level_parse(int argc, char **argv)
+{
+ int opt;
+ char **argvopt;
+ int option_index;
+ const int old_optind = optind;
+ const int old_optopt = optopt;
+ const int old_optreset = optreset;
+ char * const old_optarg = optarg;
+
+ argvopt = argv;
+ optind = 1;
+ optreset = 1;
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ int ret;
+
+ /* getopt is not happy, stop right now */
+ if (opt == '?')
+ break;
+
+ ret = (opt == OPT_LOG_LEVEL_NUM) ?
+ eal_parse_common_option(opt, optarg, &internal_config) : 0;
+
+ /* common parser is not happy */
+ if (ret < 0)
+ break;
+ }
+
+ /* restore getopt lib */
+ optind = old_optind;
+ optopt = old_optopt;
+ optreset = old_optreset;
+ optarg = old_optarg;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+eal_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ const int old_optind = optind;
+ const int old_optopt = optopt;
+ const int old_optreset = optreset;
+ char * const old_optarg = optarg;
+
+ argvopt = argv;
+ optind = 1;
+ optreset = 1;
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ /* getopt didn't recognise the option */
+ if (opt == '?') {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = eal_parse_common_option(opt, optarg, &internal_config);
+ /* common parser is not happy */
+ if (ret < 0) {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ /* common parser handled this option */
+ if (ret == 0)
+ continue;
+
+ switch (opt) {
+ case OPT_MBUF_POOL_OPS_NAME_NUM:
+ {
+ char *ops_name = strdup(optarg);
+ if (ops_name == NULL)
+ RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
+ else {
+ /* free old ops name */
+ if (internal_config.user_mbuf_pool_ops_name !=
+ NULL)
+ free(internal_config.user_mbuf_pool_ops_name);
+
+ internal_config.user_mbuf_pool_ops_name =
+ ops_name;
+ }
+ break;
+ }
+ case 'h':
+ eal_usage(prgname);
+ exit(EXIT_SUCCESS);
+ default:
+ if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
+ RTE_LOG(ERR, EAL, "Option %c is not supported "
+ "on FreeBSD\n", opt);
+ } else if (opt >= OPT_LONG_MIN_NUM &&
+ opt < OPT_LONG_MAX_NUM) {
+ RTE_LOG(ERR, EAL, "Option %s is not supported "
+ "on FreeBSD\n",
+ eal_long_options[option_index].name);
+ } else {
+ RTE_LOG(ERR, EAL, "Option %d is not supported "
+ "on FreeBSD\n", opt);
+ }
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* create runtime data directory */
+ if (internal_config.no_shconf == 0 &&
+ eal_create_runtime_dir() < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (eal_adjust_config(&internal_config) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ /* sanity checks */
+ if (eal_check_common_options(&internal_config) != 0) {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+ ret = optind-1;
+
+out:
+ /* restore getopt lib */
+ optind = old_optind;
+ optopt = old_optopt;
+ optreset = old_optreset;
+ optarg = old_optarg;
+
+ return ret;
+}
+
+static int
+check_socket(const struct rte_memseg_list *msl, void *arg)
+{
+ int *socket_id = arg;
+
+ if (msl->external)
+ return 0;
+
+ if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
+ return 1;
+
+ return 0;
+}
+
+static void
+eal_check_mem_on_local_socket(void)
+{
+ int socket_id;
+
+ socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
+
+ if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
+ RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n");
+}
+
+
+static int
+sync_func(__rte_unused void *arg)
+{
+ return 0;
+}
+
+/* return non-zero if hugepages are enabled. */
+int rte_eal_has_hugepages(void)
+{
+ return !internal_config.no_hugetlbfs;
+}
+
+/* Abstraction for port I/0 privilege */
+int
+rte_eal_iopl_init(void)
+{
+ static int fd = -1;
+
+ if (fd < 0)
+ fd = open("/dev/io", O_RDWR);
+
+ if (fd < 0)
+ return -1;
+ /* keep fd open for iopl */
+ return 0;
+}
+
+static void rte_eal_init_alert(const char *msg)
+{
+ fprintf(stderr, "EAL: FATAL: %s\n", msg);
+ RTE_LOG(ERR, EAL, "%s\n", msg);
+}
+
+/* Launch threads, called at application init(). */
+int
+rte_eal_init(int argc, char **argv)
+{
+ int i, fctret, ret;
+ pthread_t thread_id;
+ static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ /* checks if the machine is adequate */
+ if (!rte_cpu_is_supported()) {
+ rte_eal_init_alert("unsupported cpu type.");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ if (!rte_atomic32_test_and_set(&run_once)) {
+ rte_eal_init_alert("already called initialization.");
+ rte_errno = EALREADY;
+ return -1;
+ }
+
+ thread_id = pthread_self();
+
+ eal_reset_internal_config(&internal_config);
+
+ /* clone argv to report out later in telemetry */
+ eal_save_args(argc, argv);
+
+ /* set log level as early as possible */
+ eal_log_level_parse(argc, argv);
+
+ if (rte_eal_cpu_init() < 0) {
+ rte_eal_init_alert("Cannot detect lcores.");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ fctret = eal_parse_args(argc, argv);
+ if (fctret < 0) {
+ rte_eal_init_alert("Invalid 'command line' arguments.");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ /* FreeBSD always uses legacy memory model */
+ internal_config.legacy_mem = true;
+
+ if (eal_plugins_init() < 0) {
+ rte_eal_init_alert("Cannot init plugins");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (eal_trace_init() < 0) {
+ rte_eal_init_alert("Cannot init trace");
+ rte_errno = EFAULT;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (eal_option_device_parse()) {
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (rte_config_init() < 0) {
+ rte_eal_init_alert("Cannot init config");
+ return -1;
+ }
+
+ if (rte_eal_intr_init() < 0) {
+ rte_eal_init_alert("Cannot init interrupt-handling thread");
+ return -1;
+ }
+
+ if (rte_eal_alarm_init() < 0) {
+ rte_eal_init_alert("Cannot init alarm");
+ /* rte_eal_alarm_init sets rte_errno on failure. */
+ return -1;
+ }
+
+ /* Put mp channel init before bus scan so that we can init the vdev
+ * bus through mp channel in the secondary process before the bus scan.
+ */
+ if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) {
+ rte_eal_init_alert("failed to init mp channel");
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ rte_errno = EFAULT;
+ return -1;
+ }
+ }
+
+ if (rte_bus_scan()) {
+ rte_eal_init_alert("Cannot scan the buses for devices");
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
+ if (internal_config.iova_mode == RTE_IOVA_DC) {
+ /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
+ enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
+
+ if (iova_mode == RTE_IOVA_DC)
+ iova_mode = RTE_IOVA_PA;
+ rte_eal_get_configuration()->iova_mode = iova_mode;
+ } else {
+ rte_eal_get_configuration()->iova_mode =
+ internal_config.iova_mode;
+ }
+
+ RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
+ rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
+
+ if (internal_config.no_hugetlbfs == 0) {
+ /* rte_config isn't initialized yet */
+ ret = internal_config.process_type == RTE_PROC_PRIMARY ?
+ eal_hugepage_info_init() :
+ eal_hugepage_info_read();
+ if (ret < 0) {
+ rte_eal_init_alert("Cannot get hugepage information.");
+ rte_errno = EACCES;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+ }
+
+ if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
+ if (internal_config.no_hugetlbfs)
+ internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
+ else
+ internal_config.memory = eal_get_hugepage_mem_size();
+ }
+
+ if (internal_config.vmware_tsc_map == 1) {
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+ rte_cycles_vmware_tsc_map = 1;
+ RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
+ "you must have monitor_control.pseudo_perfctr = TRUE\n");
+#else
+ RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
+ "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
+#endif
+ }
+
+ /* in secondary processes, memory init may allocate additional fbarrays
+ * not present in primary processes, so to avoid any potential issues,
+ * initialize memzones first.
+ */
+ if (rte_eal_memzone_init() < 0) {
+ rte_eal_init_alert("Cannot init memzone");
+ rte_errno = ENODEV;
+ return -1;
+ }
+
+ if (rte_eal_memory_init() < 0) {
+ rte_eal_init_alert("Cannot init memory");
+ rte_errno = ENOMEM;
+ return -1;
+ }
+
+ if (rte_eal_malloc_heap_init() < 0) {
+ rte_eal_init_alert("Cannot init malloc heap");
+ rte_errno = ENODEV;
+ return -1;
+ }
+
+ if (rte_eal_tailqs_init() < 0) {
+ rte_eal_init_alert("Cannot init tail queues for objects");
+ rte_errno = EFAULT;
+ return -1;
+ }
+
+ if (rte_eal_timer_init() < 0) {
+ rte_eal_init_alert("Cannot init HPET or TSC timers");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ eal_check_mem_on_local_socket();
+
+ eal_thread_init_master(rte_config.master_lcore);
+
+ ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+
+ RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
+ rte_config.master_lcore, thread_id, cpuset,
+ ret == 0 ? "" : "...");
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+
+ /*
+ * create communication pipes between master thread
+ * and children
+ */
+ if (pipe(lcore_config[i].pipe_master2slave) < 0)
+ rte_panic("Cannot create pipe\n");
+ if (pipe(lcore_config[i].pipe_slave2master) < 0)
+ rte_panic("Cannot create pipe\n");
+
+ lcore_config[i].state = WAIT;
+
+ /* create a thread for each lcore */
+ ret = pthread_create(&lcore_config[i].thread_id, NULL,
+ eal_thread_loop, NULL);
+ if (ret != 0)
+ rte_panic("Cannot create thread\n");
+
+ /* Set thread_name for aid in debugging. */
+ snprintf(thread_name, sizeof(thread_name),
+ "lcore-slave-%d", i);
+ rte_thread_setname(lcore_config[i].thread_id, thread_name);
+ }
+
+ /*
+ * Launch a dummy function on all slave lcores, so that master lcore
+ * knows they are all ready when this function returns.
+ */
+ rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+ rte_eal_mp_wait_lcore();
+
+ /* initialize services so vdevs register service during bus_probe. */
+ ret = rte_service_init();
+ if (ret) {
+ rte_eal_init_alert("rte_service_init() failed");
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
+ /* Probe all the buses and devices/drivers on them */
+ if (rte_bus_probe()) {
+ rte_eal_init_alert("Cannot probe devices");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ /* initialize default service/lcore mappings and start running. Ignore
+ * -ENOTSUP, as it indicates no service coremask passed to EAL.
+ */
+ ret = rte_service_start_with_defaults();
+ if (ret < 0 && ret != -ENOTSUP) {
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
+ /*
+ * Clean up unused files in runtime directory. We do this at the end of
+ * init and not at the beginning because we want to clean stuff up
+ * whether we are primary or secondary process, but we cannot remove
+ * primary process' files because secondary should be able to run even
+ * if primary process is dead.
+ *
+ * In no_shconf mode, no runtime directory is created in the first
+ * place, so no cleanup needed.
+ */
+ if (!internal_config.no_shconf && eal_clean_runtime_dir() < 0) {
+ rte_eal_init_alert("Cannot clear runtime directory\n");
+ return -1;
+ }
+ if (!internal_config.no_telemetry) {
+ const char *error_str = NULL;
+ if (rte_telemetry_init(rte_eal_get_runtime_dir(),
+ &internal_config.ctrl_cpuset, &error_str)
+ != 0) {
+ rte_eal_init_alert(error_str);
+ return -1;
+ }
+ if (error_str != NULL)
+ RTE_LOG(NOTICE, EAL, "%s\n", error_str);
+ }
+
+ eal_mcfg_complete();
+
+ return fctret;
+}
+
+int
+rte_eal_cleanup(void)
+{
+ rte_service_finalize();
+ rte_mp_channel_cleanup();
+ rte_trace_save();
+ eal_trace_fini();
+ eal_cleanup_config(&internal_config);
+ return 0;
+}
+
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+ return rte_config.process_type;
+}
+
+int rte_eal_has_pci(void)
+{
+ return !internal_config.no_pci;
+}
+
+int rte_eal_create_uio_dev(void)
+{
+ return internal_config.create_uio_dev;
+}
+
+enum rte_intr_mode
+rte_eal_vfio_intr_mode(void)
+{
+ return RTE_INTR_MODE_NONE;
+}
+
+int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *vfio_dev_fd,
+ __rte_unused struct vfio_device_info *device_info)
+{
+ return -1;
+}
+
+int rte_vfio_release_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int fd)
+{
+ return -1;
+}
+
+int rte_vfio_enable(__rte_unused const char *modname)
+{
+ return -1;
+}
+
+int rte_vfio_is_enabled(__rte_unused const char *modname)
+{
+ return 0;
+}
+
+int rte_vfio_noiommu_is_enabled(void)
+{
+ return 0;
+}
+
+int rte_vfio_clear_group(__rte_unused int vfio_group_fd)
+{
+ return 0;
+}
+
+int
+rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_container_fd(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_create(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_destroy(__rte_unused int container_fd)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_group_bind(__rte_unused int container_fd,
+ __rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_group_unbind(__rte_unused int container_fd,
+ __rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_dma_map(__rte_unused int container_fd,
+ __rte_unused uint64_t vaddr,
+ __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_dma_unmap(__rte_unused int container_fd,
+ __rte_unused uint64_t vaddr,
+ __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_alarm.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_alarm.c
new file mode 100644
index 000000000..c38b2e04f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_alarm.c
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+
+#include <rte_alarm.h>
+#include <rte_cycles.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_interrupts.h>
+#include <rte_spinlock.h>
+#include <rte_eal_trace.h>
+
+#include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define NS_PER_US 1000
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+ LIST_ENTRY(alarm_entry) next;
+ struct rte_intr_handle handle;
+ struct timespec time;
+ rte_eal_alarm_callback cb_fn;
+ void *cb_arg;
+ volatile uint8_t executing;
+ volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static void eal_alarm_callback(void *arg);
+
+int
+rte_eal_alarm_init(void)
+{
+ intr_handle.type = RTE_INTR_HANDLE_ALARM;
+
+ /* on FreeBSD, timers don't use fd's, and their identifiers are stored
+ * in separate namespace from fd's, so using any value is OK. however,
+ * EAL interrupts handler expects fd's to be unique, so use an actual fd
+ * to guarantee unique timer identifier.
+ */
+ intr_handle.fd = open("/dev/zero", O_RDONLY);
+
+ return 0;
+}
+
+static inline int
+timespec_cmp(const struct timespec *now, const struct timespec *at)
+{
+ if (now->tv_sec < at->tv_sec)
+ return -1;
+ if (now->tv_sec > at->tv_sec)
+ return 1;
+ if (now->tv_nsec < at->tv_nsec)
+ return -1;
+ if (now->tv_nsec > at->tv_nsec)
+ return 1;
+ return 0;
+}
+
+static inline uint64_t
+diff_ns(struct timespec *now, struct timespec *at)
+{
+ uint64_t now_ns, at_ns;
+
+ if (timespec_cmp(now, at) >= 0)
+ return 0;
+
+ now_ns = now->tv_sec * NS_PER_S + now->tv_nsec;
+ at_ns = at->tv_sec * NS_PER_S + at->tv_nsec;
+
+ return at_ns - now_ns;
+}
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val)
+{
+ struct alarm_entry *ap;
+ struct timespec now;
+
+ if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+ return -1;
+
+ if (LIST_EMPTY(&alarm_list))
+ return -1;
+
+ ap = LIST_FIRST(&alarm_list);
+
+ *val = diff_ns(&now, &ap->time);
+
+ return 0;
+}
+
+static int
+unregister_current_callback(void)
+{
+ struct alarm_entry *ap;
+ int ret = 0;
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ ap = LIST_FIRST(&alarm_list);
+
+ do {
+ ret = rte_intr_callback_unregister(&intr_handle,
+ eal_alarm_callback, &ap->time);
+ } while (ret == -EAGAIN);
+ }
+
+ return ret;
+}
+
+static int
+register_first_callback(void)
+{
+ struct alarm_entry *ap;
+ int ret = 0;
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ ap = LIST_FIRST(&alarm_list);
+
+ /* register a new callback */
+ ret = rte_intr_callback_register(&intr_handle,
+ eal_alarm_callback, &ap->time);
+ }
+ return ret;
+}
+
+static void
+eal_alarm_callback(void *arg __rte_unused)
+{
+ struct timespec now;
+ struct alarm_entry *ap;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ ap = LIST_FIRST(&alarm_list);
+
+ if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+ return;
+
+ while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) {
+ ap->executing = 1;
+ ap->executing_id = pthread_self();
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ ap->cb_fn(ap->cb_arg);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ LIST_REMOVE(ap, next);
+ free(ap);
+
+ ap = LIST_FIRST(&alarm_list);
+ }
+
+ /* timer has been deleted from the kqueue, so recreate it if needed */
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+}
+
+
+int
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct alarm_entry *ap, *new_alarm;
+ struct timespec now;
+ uint64_t ns;
+ int ret = 0;
+
+ /* check parameters, also ensure us won't cause a uint64_t overflow */
+ if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+ return -EINVAL;
+
+ new_alarm = calloc(1, sizeof(*new_alarm));
+ if (new_alarm == NULL)
+ return -ENOMEM;
+
+ /* use current time to calculate absolute time of alarm */
+ clock_gettime(CLOCK_TYPE_ID, &now);
+
+ ns = us * NS_PER_US;
+
+ new_alarm->cb_fn = cb_fn;
+ new_alarm->cb_arg = cb_arg;
+ new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S;
+ new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ if (LIST_EMPTY(&alarm_list))
+ LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+ else {
+ LIST_FOREACH(ap, &alarm_list, next) {
+ if (timespec_cmp(&new_alarm->time, &ap->time) < 0) {
+ LIST_INSERT_BEFORE(ap, new_alarm, next);
+ break;
+ }
+ if (LIST_NEXT(ap, next) == NULL) {
+ LIST_INSERT_AFTER(ap, new_alarm, next);
+ break;
+ }
+ }
+ }
+
+ /* re-register first callback just in case */
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ rte_eal_trace_alarm_set(us, cb_fn, cb_arg, ret);
+ return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct alarm_entry *ap, *ap_prev;
+ int count = 0;
+ int err = 0;
+ int executing;
+
+ if (!cb_fn) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ do {
+ executing = 0;
+ rte_spinlock_lock(&alarm_list_lk);
+ /* remove any matches at the start of the list */
+ while (1) {
+ ap = LIST_FIRST(&alarm_list);
+ if (ap == NULL)
+ break;
+ if (cb_fn != ap->cb_fn)
+ break;
+ if (cb_arg != ap->cb_arg && cb_arg != (void *) -1)
+ break;
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ } else {
+ /* If calling from other context, mark that
+ * alarm is executing so loop can spin till it
+ * finish. Otherwise we are trying to cancel
+ * ourselves - mark it by EINPROGRESS.
+ */
+ if (pthread_equal(ap->executing_id,
+ pthread_self()) == 0)
+ executing++;
+ else
+ err = EINPROGRESS;
+
+ break;
+ }
+ }
+ ap_prev = ap;
+
+ /* now go through list, removing entries not at start */
+ LIST_FOREACH(ap, &alarm_list, next) {
+ /* this won't be true first time through */
+ if (cb_fn == ap->cb_fn &&
+ (cb_arg == (void *)-1 ||
+ cb_arg == ap->cb_arg)) {
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ ap = ap_prev;
+ } else if (pthread_equal(ap->executing_id,
+ pthread_self()) == 0) {
+ executing++;
+ } else {
+ err = EINPROGRESS;
+ }
+ }
+ ap_prev = ap;
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+ } while (executing != 0);
+
+ if (count == 0 && err == 0)
+ rte_errno = ENOENT;
+ else if (err)
+ rte_errno = err;
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ /* unregister if no alarms left, otherwise re-register first */
+ if (LIST_EMPTY(&alarm_list))
+ unregister_current_callback();
+ else
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ rte_eal_trace_alarm_cancel(cb_fn, cb_arg, count);
+ return count;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_alarm_private.h b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_alarm_private.h
new file mode 100644
index 000000000..65c711518
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_alarm_private.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef EAL_ALARM_PRIVATE_H
+#define EAL_ALARM_PRIVATE_H
+
+#include <inttypes.h>
+
+/*
+ * FreeBSD needs a back-channel communication mechanism between interrupt and
+ * alarm thread, because on FreeBSD, timer period is set up inside the interrupt
+ * API and not inside alarm API like on Linux.
+ */
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val);
+
+#endif // EAL_ALARM_PRIVATE_H
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_cpuflags.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_cpuflags.c
new file mode 100644
index 000000000..69b161ea6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_cpuflags.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <rte_common.h>
+#include <rte_cpuflags.h>
+
+unsigned long
+rte_cpu_getauxval(unsigned long type __rte_unused)
+{
+ /* not implemented */
+ return 0;
+}
+
+int
+rte_cpu_strcmp_auxval(unsigned long type __rte_unused,
+ const char *str __rte_unused)
+{
+ /* not implemented */
+ return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_debug.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_debug.c
new file mode 100644
index 000000000..5d92500bf
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_debug.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifdef RTE_BACKTRACE
+#include <execinfo.h>
+#endif
+#include <stdarg.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#define BACKTRACE_SIZE 256
+
+/* dump the stack of the calling core */
+void rte_dump_stack(void)
+{
+#ifdef RTE_BACKTRACE
+ void *func[BACKTRACE_SIZE];
+ char **symb = NULL;
+ int size;
+
+ size = backtrace(func, BACKTRACE_SIZE);
+ symb = backtrace_symbols(func, size);
+
+ if (symb == NULL)
+ return;
+
+ while (size > 0) {
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL,
+ "%d: [%s]\n", size, symb[size - 1]);
+ size --;
+ }
+
+ free(symb);
+#endif /* RTE_BACKTRACE */
+}
+
+/* not implemented in this environment */
+void rte_dump_registers(void)
+{
+ return;
+}
+
+/* call abort(), it will generate a coredump if enabled */
+void __rte_panic(const char *funcname, const char *format, ...)
+{
+ va_list ap;
+
+ rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+}
+
+/*
+ * Like rte_panic this terminates the application. However, no traceback is
+ * provided and no core-dump is generated.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+{
+ va_list ap;
+
+ if (exit_code != 0)
+ RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n"
+ " Cause: ", exit_code);
+
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+
+#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR
+ if (rte_eal_cleanup() != 0)
+ RTE_LOG(CRIT, EAL,
+ "EAL could not release all resources\n");
+ exit(exit_code);
+#else
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+#endif
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_dev.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_dev.c
new file mode 100644
index 000000000..8e06e7089
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_dev.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_log.h>
+#include <rte_compat.h>
+#include <rte_dev.h>
+
+int
+rte_dev_event_monitor_start(void)
+{
+ RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+ return -1;
+}
+
+int
+rte_dev_event_monitor_stop(void)
+{
+ RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+ return -1;
+}
+
+int
+rte_dev_hotplug_handle_enable(void)
+{
+ RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+ return -1;
+}
+
+int
+rte_dev_hotplug_handle_disable(void)
+{
+ RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+ return -1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_hugepage_info.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_hugepage_info.c
new file mode 100644
index 000000000..32012e142
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_hugepage_info.c
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include <rte_log.h>
+#include <fcntl.h>
+#include "eal_hugepages.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+
+#define CONTIGMEM_DEV "/dev/contigmem"
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ * Used in this file to store the hugepage file map on disk
+ */
+static void *
+map_shared_memory(const char *filename, const size_t mem_size, int flags)
+{
+ void *retval;
+ int fd = open(filename, flags, 0600);
+ if (fd < 0)
+ return NULL;
+ if (ftruncate(fd, mem_size) < 0) {
+ close(fd);
+ return NULL;
+ }
+ retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ close(fd);
+ return retval;
+}
+
+static void *
+open_shared_memory(const char *filename, const size_t mem_size)
+{
+ return map_shared_memory(filename, mem_size, O_RDWR);
+}
+
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+ return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
+}
+
+/*
+ * No hugepage support on freebsd, but we dummy it, using contigmem driver
+ */
+int
+eal_hugepage_info_init(void)
+{
+ size_t sysctl_size;
+ int num_buffers, fd, error;
+ int64_t buffer_size;
+ /* re-use the linux "internal config" structure for our memory data */
+ struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+ struct hugepage_info *tmp_hpi;
+ unsigned int i;
+
+ internal_config.num_hugepage_sizes = 1;
+
+ sysctl_size = sizeof(num_buffers);
+ error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers,
+ &sysctl_size, NULL, 0);
+
+ if (error != 0) {
+ RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers\n");
+ return -1;
+ }
+
+ sysctl_size = sizeof(buffer_size);
+ error = sysctlbyname("hw.contigmem.buffer_size", &buffer_size,
+ &sysctl_size, NULL, 0);
+
+ if (error != 0) {
+ RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size\n");
+ return -1;
+ }
+
+ fd = open(CONTIGMEM_DEV, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "could not open "CONTIGMEM_DEV"\n");
+ return -1;
+ }
+
+ if (buffer_size >= 1<<30)
+ RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dGB\n",
+ num_buffers, (int)(buffer_size>>30));
+ else if (buffer_size >= 1<<20)
+ RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dMB\n",
+ num_buffers, (int)(buffer_size>>20));
+ else
+ RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n",
+ num_buffers, (int)(buffer_size>>10));
+
+ strlcpy(hpi->hugedir, CONTIGMEM_DEV, sizeof(hpi->hugedir));
+ hpi->hugepage_sz = buffer_size;
+ hpi->num_pages[0] = num_buffers;
+ hpi->lock_descriptor = fd;
+
+ /* for no shared files mode, do not create shared memory config */
+ if (internal_config.no_shconf)
+ return 0;
+
+ tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
+ sizeof(internal_config.hugepage_info));
+ if (tmp_hpi == NULL ) {
+ RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
+ return -1;
+ }
+
+ memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info));
+
+ /* we've copied file descriptors along with everything else, but they
+ * will be invalid in secondary process, so overwrite them
+ */
+ for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
+ struct hugepage_info *tmp = &tmp_hpi[i];
+ tmp->lock_descriptor = -1;
+ }
+
+ if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* copy stuff from shared info into internal config */
+int
+eal_hugepage_info_read(void)
+{
+ struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+ struct hugepage_info *tmp_hpi;
+
+ internal_config.num_hugepage_sizes = 1;
+
+ tmp_hpi = open_shared_memory(eal_hugepage_info_path(),
+ sizeof(internal_config.hugepage_info));
+ if (tmp_hpi == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to open shared memory!\n");
+ return -1;
+ }
+
+ memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info));
+
+ if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_interrupts.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_interrupts.c
new file mode 100644
index 000000000..6d53d33c8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_interrupts.c
@@ -0,0 +1,713 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/queue.h>
+#include <unistd.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include <rte_eal_trace.h>
+
+#include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define MAX_INTR_EVENTS 16
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+ char charbuf[16]; /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+ TAILQ_ENTRY(rte_intr_callback) next;
+ rte_intr_callback_fn cb_fn; /**< callback address */
+ void *cb_arg; /**< parameter for callback */
+ uint8_t pending_delete; /**< delete after callback is called */
+ rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */
+};
+
+struct rte_intr_source {
+ TAILQ_ENTRY(rte_intr_source) next;
+ struct rte_intr_handle intr_handle; /**< interrupt handle */
+ struct rte_intr_cb_list callbacks; /**< user callbacks */
+ uint32_t active;
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+static volatile int kq = -1;
+
+static int
+intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke)
+{
+ /* alarm callbacks are special case */
+ if (ih->type == RTE_INTR_HANDLE_ALARM) {
+ uint64_t timeout_ns;
+
+ /* get soonest alarm timeout */
+ if (eal_alarm_get_timeout_ns(&timeout_ns) < 0)
+ return -1;
+
+ ke->filter = EVFILT_TIMER;
+ /* timers are one shot */
+ ke->flags |= EV_ONESHOT;
+ ke->fflags = NOTE_NSECONDS;
+ ke->data = timeout_ns;
+ } else {
+ ke->filter = EVFILT_READ;
+ }
+ ke->ident = ih->fd;
+
+ return 0;
+}
+
+int
+rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb, void *cb_arg)
+{
+ struct rte_intr_callback *callback;
+ struct rte_intr_source *src;
+ int ret = 0, add_event = 0;
+
+ /* first do parameter checking */
+ if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Registering with invalid input parameter\n");
+ return -EINVAL;
+ }
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Kqueue is not active: %d\n", kq);
+ return -ENODEV;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* find the source for this intr_handle */
+ TAILQ_FOREACH(src, &intr_sources, next) {
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+ }
+
+ /* if this is an alarm interrupt and it already has a callback,
+ * then we don't want to create a new callback because the only
+ * thing on the list should be eal_alarm_callback() and we may
+ * be called just to reset the timer.
+ */
+ if (src != NULL && src->intr_handle.type == RTE_INTR_HANDLE_ALARM &&
+ !TAILQ_EMPTY(&src->callbacks)) {
+ callback = NULL;
+ } else {
+ /* allocate a new interrupt callback entity */
+ callback = calloc(1, sizeof(*callback));
+ if (callback == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+ callback->cb_fn = cb;
+ callback->cb_arg = cb_arg;
+ callback->pending_delete = 0;
+ callback->ucb_fn = NULL;
+
+ if (src == NULL) {
+ src = calloc(1, sizeof(*src));
+ if (src == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ ret = -ENOMEM;
+ goto fail;
+ } else {
+ src->intr_handle = *intr_handle;
+ TAILQ_INIT(&src->callbacks);
+ TAILQ_INSERT_TAIL(&intr_sources, src, next);
+ }
+ }
+
+ /* we had no interrupts for this */
+ if (TAILQ_EMPTY(&src->callbacks))
+ add_event = 1;
+
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ }
+
+ /* add events to the queue. timer events are special as we need to
+ * re-set the timer.
+ */
+ if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) {
+ struct kevent ke;
+
+ memset(&ke, 0, sizeof(ke));
+ ke.flags = EV_ADD; /* mark for addition to the queue */
+
+ if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot convert interrupt handle to kevent\n");
+ ret = -ENODEV;
+ goto fail;
+ }
+
+ /**
+ * add the intr file descriptor into wait list.
+ */
+ if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+ /* currently, nic_uio does not support interrupts, so
+ * this error will always be triggered and output to the
+ * user. so, don't output it unless debug log level set.
+ */
+ if (errno == ENODEV)
+ RTE_LOG(DEBUG, EAL, "Interrupt handle %d not supported\n",
+ src->intr_handle.fd);
+ else
+ RTE_LOG(ERR, EAL, "Error adding fd %d "
+ "kevent, %s\n",
+ src->intr_handle.fd,
+ strerror(errno));
+ ret = -errno;
+ goto fail;
+ }
+ }
+ rte_eal_trace_intr_callback_register(intr_handle, cb, cb_arg, ret);
+ rte_spinlock_unlock(&intr_lock);
+
+ return 0;
+fail:
+ /* clean up */
+ if (src != NULL) {
+ if (callback != NULL)
+ TAILQ_REMOVE(&(src->callbacks), callback, next);
+ if (TAILQ_EMPTY(&(src->callbacks))) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+ }
+ free(callback);
+ rte_eal_trace_intr_callback_register(intr_handle, cb, cb_arg, ret);
+ rte_spinlock_unlock(&intr_lock);
+ return ret;
+}
+
+int
+rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb_fn, void *cb_arg,
+ rte_intr_unregister_callback_fn ucb_fn)
+{
+ int ret;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb, *next;
+
+ /* do parameter checking first */
+ if (intr_handle == NULL || intr_handle->fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Unregistering with invalid input parameter\n");
+ return -EINVAL;
+ }
+
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Kqueue is not active\n");
+ return -ENODEV;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if the insterrupt source for the fd is existent */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+
+ /* No interrupt source registered for the fd */
+ if (src == NULL) {
+ ret = -ENOENT;
+
+ /* only usable if the source is active */
+ } else if (src->active == 0) {
+ ret = -EAGAIN;
+
+ } else {
+ ret = 0;
+
+ /* walk through the callbacks and mark all that match. */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+ next = TAILQ_NEXT(cb, next);
+ if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+ cb->cb_arg == cb_arg)) {
+ cb->pending_delete = 1;
+ cb->ucb_fn = ucb_fn;
+ ret++;
+ }
+ }
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
+}
+
+int
+rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb_fn, void *cb_arg)
+{
+ int ret;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb, *next;
+
+ /* do parameter checking first */
+ if (intr_handle == NULL || intr_handle->fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Unregistering with invalid input parameter\n");
+ return -EINVAL;
+ }
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Kqueue is not active\n");
+ return -ENODEV;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if the insterrupt source for the fd is existent */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+
+ /* No interrupt source registered for the fd */
+ if (src == NULL) {
+ ret = -ENOENT;
+
+ /* interrupt source has some active callbacks right now. */
+ } else if (src->active != 0) {
+ ret = -EAGAIN;
+
+ /* ok to remove. */
+ } else {
+ struct kevent ke;
+
+ ret = 0;
+
+ /* remove it from the kqueue */
+ memset(&ke, 0, sizeof(ke));
+ ke.flags = EV_DELETE; /* mark for deletion from the queue */
+
+ if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot convert to kevent\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /**
+ * remove intr file descriptor from wait list.
+ */
+ if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n",
+ src->intr_handle.fd, strerror(errno));
+ /* removing non-existent even is an expected condition
+ * in some circumstances (e.g. oneshot events).
+ */
+ }
+
+ /*walk through the callbacks and remove all that match. */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+ next = TAILQ_NEXT(cb, next);
+ if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+ cb->cb_arg == cb_arg)) {
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ free(cb);
+ ret++;
+ }
+ }
+
+ /* all callbacks for that source are removed. */
+ if (TAILQ_EMPTY(&src->callbacks)) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+ }
+out:
+ rte_eal_trace_intr_callback_unregister(intr_handle, cb_fn, cb_arg,
+ ret);
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
+}
+
+int
+rte_intr_enable(const struct rte_intr_handle *intr_handle)
+{
+ int rc = 0;
+
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) {
+ rc = 0;
+ goto out;
+ }
+
+ if (!intr_handle || intr_handle->fd < 0 ||
+ intr_handle->uio_cfg_fd < 0) {
+ rc = -1;
+ goto out;
+ }
+
+ switch (intr_handle->type) {
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ rc = -1;
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ rc = -1;
+ break;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ rc = -1;
+ break;
+ }
+
+out:
+ rte_eal_trace_intr_enable(intr_handle, rc);
+ return rc;
+}
+
+int
+rte_intr_disable(const struct rte_intr_handle *intr_handle)
+{
+ int rc = 0;
+
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) {
+ rc = 0;
+ goto out;
+ }
+
+ if (!intr_handle || intr_handle->fd < 0 ||
+ intr_handle->uio_cfg_fd < 0) {
+ rc = -1;
+ goto out;
+ }
+
+ switch (intr_handle->type) {
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ rc = -1;
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ rc = -1;
+ break;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ rc = -1;
+ break;
+ }
+out:
+ rte_eal_trace_intr_disable(intr_handle, rc);
+ return rc;
+}
+
+int
+rte_intr_ack(const struct rte_intr_handle *intr_handle)
+{
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 0;
+
+ return -1;
+}
+
+static void
+eal_intr_process_interrupts(struct kevent *events, int nfds)
+{
+ struct rte_intr_callback active_cb;
+ union rte_intr_read_buffer buf;
+ struct rte_intr_callback *cb, *next;
+ struct rte_intr_source *src;
+ bool call = false;
+ int n, bytes_read;
+ struct kevent ke;
+
+ for (n = 0; n < nfds; n++) {
+ int event_fd = events[n].ident;
+
+ rte_spinlock_lock(&intr_lock);
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == event_fd)
+ break;
+ if (src == NULL) {
+ rte_spinlock_unlock(&intr_lock);
+ continue;
+ }
+
+ /* mark this interrupt source as active and release the lock. */
+ src->active = 1;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* set the length to be read dor different handle type */
+ switch (src->intr_handle.type) {
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = 0;
+ call = true;
+ break;
+ case RTE_INTR_HANDLE_VDEV:
+ case RTE_INTR_HANDLE_EXT:
+ bytes_read = 0;
+ call = true;
+ break;
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ bytes_read = 0;
+ call = true;
+ break;
+ default:
+ bytes_read = 1;
+ break;
+ }
+
+ if (bytes_read > 0) {
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ bytes_read = read(event_fd, &buf, bytes_read);
+ if (bytes_read < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK)
+ continue;
+
+ RTE_LOG(ERR, EAL, "Error reading from file "
+ "descriptor %d: %s\n",
+ event_fd,
+ strerror(errno));
+ } else if (bytes_read == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from file "
+ "descriptor %d\n", event_fd);
+ else
+ call = true;
+ }
+
+ /* grab a lock, again to call callbacks and update status. */
+ rte_spinlock_lock(&intr_lock);
+
+ if (call) {
+ /* Finally, call all callbacks. */
+ TAILQ_FOREACH(cb, &src->callbacks, next) {
+
+ /* make a copy and unlock. */
+ active_cb = *cb;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* call the actual callback */
+ active_cb.cb_fn(active_cb.cb_arg);
+
+ /*get the lock back. */
+ rte_spinlock_lock(&intr_lock);
+ }
+ }
+
+ /* we done with that interrupt source, release it. */
+ src->active = 0;
+
+ /* check if any callback are supposed to be removed */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+ next = TAILQ_NEXT(cb, next);
+ if (cb->pending_delete) {
+ /* remove it from the kqueue */
+ memset(&ke, 0, sizeof(ke));
+ /* mark for deletion from the queue */
+ ke.flags = EV_DELETE;
+
+ if (intr_source_to_kevent(&src->intr_handle, &ke) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot convert to kevent\n");
+ rte_spinlock_unlock(&intr_lock);
+ return;
+ }
+
+ /**
+ * remove intr file descriptor from wait list.
+ */
+ if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Error removing fd %d kevent, "
+ "%s\n", src->intr_handle.fd,
+ strerror(errno));
+ /* removing non-existent even is an expected
+ * condition in some circumstances
+ * (e.g. oneshot events).
+ */
+ }
+
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ if (cb->ucb_fn)
+ cb->ucb_fn(&src->intr_handle, cb->cb_arg);
+ free(cb);
+ }
+ }
+
+ /* all callbacks for that source are removed. */
+ if (TAILQ_EMPTY(&src->callbacks)) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+ }
+}
+
+static void *
+eal_intr_thread_main(void *arg __rte_unused)
+{
+ struct kevent events[MAX_INTR_EVENTS];
+ int nfds;
+
+ /* host thread, never break out */
+ for (;;) {
+ /* do not change anything, just wait */
+ nfds = kevent(kq, NULL, 0, events, MAX_INTR_EVENTS, NULL);
+
+ /* kevent fail */
+ if (nfds < 0) {
+ if (errno == EINTR)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "kevent returns with fail\n");
+ break;
+ }
+ /* kevent timeout, will never happen here */
+ else if (nfds == 0)
+ continue;
+
+ /* kevent has at least one fd ready to read */
+ eal_intr_process_interrupts(events, nfds);
+ }
+ close(kq);
+ kq = -1;
+ return NULL;
+}
+
+int
+rte_eal_intr_init(void)
+{
+ int ret = 0;
+
+ /* init the global interrupt source head */
+ TAILQ_INIT(&intr_sources);
+
+ kq = kqueue();
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create kqueue instance\n");
+ return -1;
+ }
+
+ /* create the host thread to wait/handle the interrupt */
+ ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
+ eal_intr_thread_main, NULL);
+ if (ret != 0) {
+ rte_errno = -ret;
+ RTE_LOG(ERR, EAL,
+ "Failed to create thread for interrupt handling\n");
+ }
+
+ return ret;
+}
+
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+ int epfd, int op, unsigned int vec, void *data)
+{
+ RTE_SET_USED(intr_handle);
+ RTE_SET_USED(epfd);
+ RTE_SET_USED(op);
+ RTE_SET_USED(vec);
+ RTE_SET_USED(data);
+
+ return -ENOTSUP;
+}
+
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
+{
+ RTE_SET_USED(intr_handle);
+ RTE_SET_USED(nb_efd);
+
+ return 0;
+}
+
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+}
+
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+ return 0;
+}
+
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+ return 1;
+}
+
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+ return 0;
+}
+
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout)
+{
+ RTE_SET_USED(epfd);
+ RTE_SET_USED(events);
+ RTE_SET_USED(maxevents);
+ RTE_SET_USED(timeout);
+
+ return -ENOTSUP;
+}
+
+int
+rte_epoll_ctl(int epfd, int op, int fd, struct rte_epoll_event *event)
+{
+ RTE_SET_USED(epfd);
+ RTE_SET_USED(op);
+ RTE_SET_USED(fd);
+ RTE_SET_USED(event);
+
+ return -ENOTSUP;
+}
+
+int
+rte_intr_tls_epfd(void)
+{
+ return -ENOTSUP;
+}
+
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+}
+
+int rte_thread_is_intr(void)
+{
+ return pthread_equal(intr_thread, pthread_self());
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_lcore.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_lcore.c
new file mode 100644
index 000000000..d9ef4bc9c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_lcore.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <sys/sysctl.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+/* No topology information available on FreeBSD including NUMA info */
+unsigned
+eal_cpu_core_id(__rte_unused unsigned lcore_id)
+{
+ return 0;
+}
+
+static int
+eal_get_ncpus(void)
+{
+ static int ncpu = -1;
+ int mib[2] = {CTL_HW, HW_NCPU};
+ size_t len = sizeof(ncpu);
+
+ if (ncpu < 0) {
+ sysctl(mib, 2, &ncpu, &len, NULL, 0);
+ RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu);
+ }
+ return ncpu;
+}
+
+unsigned
+eal_cpu_socket_id(__rte_unused unsigned cpu_id)
+{
+ return 0;
+}
+
+/* Check if a cpu is present by the presence of the
+ * cpu information for it.
+ */
+int
+eal_cpu_detected(unsigned lcore_id)
+{
+ const unsigned ncpus = eal_get_ncpus();
+ return lcore_id < ncpus;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_memalloc.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_memalloc.c
new file mode 100644
index 000000000..6893448db
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_memalloc.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <inttypes.h>
+
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+
+#include "eal_memalloc.h"
+
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms __rte_unused,
+ int __rte_unused n_segs, size_t __rte_unused page_sz,
+ int __rte_unused socket, bool __rte_unused exact)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return -1;
+}
+
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t __rte_unused page_sz, int __rte_unused socket)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return NULL;
+}
+
+int
+eal_memalloc_free_seg(struct rte_memseg *ms __rte_unused)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return -1;
+}
+
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms __rte_unused,
+ int n_segs __rte_unused)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return -1;
+}
+
+int
+eal_memalloc_sync_with_primary(void)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return -1;
+}
+
+int
+eal_memalloc_get_seg_fd(int list_idx __rte_unused, int seg_idx __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+eal_memalloc_set_seg_fd(int list_idx __rte_unused, int seg_idx __rte_unused,
+ int fd __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+eal_memalloc_set_seg_list_fd(int list_idx __rte_unused, int fd __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx __rte_unused,
+ int seg_idx __rte_unused, size_t *offset __rte_unused)
+{
+ return -ENOTSUP;
+}
+
+int
+eal_memalloc_init(void)
+{
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_memory.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_memory.c
new file mode 100644
index 000000000..5bc2da160
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_memory.c
@@ -0,0 +1,536 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_memcfg.h"
+#include "eal_options.h"
+
+#define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
+
+uint64_t eal_get_baseaddr(void)
+{
+ /*
+ * FreeBSD may allocate something in the space we will be mapping things
+ * before we get a chance to do that, so use a base address that's far
+ * away from where malloc() et al usually map things.
+ */
+ return 0x1000000000ULL;
+}
+
+/*
+ * Get physical address of any mapped virtual address in the current process.
+ */
+phys_addr_t
+rte_mem_virt2phy(const void *virtaddr)
+{
+ /* XXX not implemented. This function is only used by
+ * rte_mempool_virt2iova() when hugepages are disabled. */
+ (void)virtaddr;
+ return RTE_BAD_IOVA;
+}
+rte_iova_t
+rte_mem_virt2iova(const void *virtaddr)
+{
+ return rte_mem_virt2phy(virtaddr);
+}
+
+int
+rte_eal_hugepage_init(void)
+{
+ struct rte_mem_config *mcfg;
+ uint64_t total_mem = 0;
+ void *addr;
+ unsigned int i, j, seg_idx = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* for debug purposes, hugetlbfs can be disabled */
+ if (internal_config.no_hugetlbfs) {
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ struct rte_memseg *ms;
+ uint64_t page_sz;
+ int n_segs, cur_seg;
+
+ /* create a memseg list */
+ msl = &mcfg->memsegs[0];
+
+ page_sz = RTE_PGSIZE_4K;
+ n_segs = internal_config.memory / page_sz;
+
+ if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
+ sizeof(struct rte_memseg))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+ return -1;
+ }
+
+ addr = mmap(NULL, internal_config.memory,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
+ strerror(errno));
+ return -1;
+ }
+ msl->base_va = addr;
+ msl->page_sz = page_sz;
+ msl->len = internal_config.memory;
+ msl->socket_id = 0;
+ msl->heap = 1;
+
+ /* populate memsegs. each memseg is 1 page long */
+ for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
+ arr = &msl->memseg_arr;
+
+ ms = rte_fbarray_get(arr, cur_seg);
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ ms->iova = (uintptr_t)addr;
+ else
+ ms->iova = RTE_BAD_IOVA;
+ ms->addr = addr;
+ ms->hugepage_sz = page_sz;
+ ms->len = page_sz;
+ ms->socket_id = 0;
+
+ rte_fbarray_set_used(arr, cur_seg);
+
+ addr = RTE_PTR_ADD(addr, page_sz);
+ }
+ return 0;
+ }
+
+ /* map all hugepages and sort them */
+ for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
+ struct hugepage_info *hpi;
+ rte_iova_t prev_end = 0;
+ int prev_ms_idx = -1;
+ uint64_t page_sz, mem_needed;
+ unsigned int n_pages, max_pages;
+
+ hpi = &internal_config.hugepage_info[i];
+ page_sz = hpi->hugepage_sz;
+ max_pages = hpi->num_pages[0];
+ mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
+ page_sz);
+
+ n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
+
+ for (j = 0; j < n_pages; j++) {
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ struct rte_memseg *seg;
+ int msl_idx, ms_idx;
+ rte_iova_t physaddr;
+ int error;
+ size_t sysctl_size = sizeof(physaddr);
+ char physaddr_str[64];
+ bool is_adjacent;
+
+ /* first, check if this segment is IOVA-adjacent to
+ * the previous one.
+ */
+ snprintf(physaddr_str, sizeof(physaddr_str),
+ "hw.contigmem.physaddr.%d", j);
+ error = sysctlbyname(physaddr_str, &physaddr,
+ &sysctl_size, NULL, 0);
+ if (error < 0) {
+ RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
+ "from %s\n", j, hpi->hugedir);
+ return -1;
+ }
+
+ is_adjacent = prev_end != 0 && physaddr == prev_end;
+ prev_end = physaddr + hpi->hugepage_sz;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
+ msl_idx++) {
+ bool empty, need_hole;
+ msl = &mcfg->memsegs[msl_idx];
+ arr = &msl->memseg_arr;
+
+ if (msl->page_sz != page_sz)
+ continue;
+
+ empty = arr->count == 0;
+
+ /* we need a hole if this isn't an empty memseg
+ * list, and if previous segment was not
+ * adjacent to current one.
+ */
+ need_hole = !empty && !is_adjacent;
+
+ /* we need 1, plus hole if not adjacent */
+ ms_idx = rte_fbarray_find_next_n_free(arr,
+ 0, 1 + (need_hole ? 1 : 0));
+
+ /* memseg list is full? */
+ if (ms_idx < 0)
+ continue;
+
+ if (need_hole && prev_ms_idx == ms_idx - 1)
+ ms_idx++;
+ prev_ms_idx = ms_idx;
+
+ break;
+ }
+ if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
+ RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+ seg = rte_fbarray_get(arr, ms_idx);
+
+ addr = RTE_PTR_ADD(msl->base_va,
+ (size_t)msl->page_sz * ms_idx);
+
+ /* address is already mapped in memseg list, so using
+ * MAP_FIXED here is safe.
+ */
+ addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
+ MAP_SHARED | MAP_FIXED,
+ hpi->lock_descriptor,
+ j * EAL_PAGE_SIZE);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
+ j, hpi->hugedir);
+ return -1;
+ }
+
+ seg->addr = addr;
+ seg->iova = physaddr;
+ seg->hugepage_sz = page_sz;
+ seg->len = page_sz;
+ seg->nchannel = mcfg->nchannel;
+ seg->nrank = mcfg->nrank;
+ seg->socket_id = 0;
+
+ rte_fbarray_set_used(arr, ms_idx);
+
+ RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
+ PRIx64", len %zu\n",
+ seg_idx++, addr, physaddr, page_sz);
+
+ total_mem += seg->len;
+ }
+ if (total_mem >= internal_config.memory)
+ break;
+ }
+ if (total_mem < internal_config.memory) {
+ RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
+ "requested: %" PRIu64 "M "
+ "available: %" PRIu64 "M\n",
+ internal_config.memory >> 20, total_mem >> 20);
+ return -1;
+ }
+ return 0;
+}
+
+struct attach_walk_args {
+ int fd_hugepage;
+ int seg_idx;
+};
+static int
+attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
+{
+ struct attach_walk_args *wa = arg;
+ void *addr;
+
+ if (msl->external)
+ return 0;
+
+ addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
+ wa->seg_idx * EAL_PAGE_SIZE);
+ if (addr == MAP_FAILED || addr != ms->addr)
+ return -1;
+ wa->seg_idx++;
+
+ return 0;
+}
+
+int
+rte_eal_hugepage_attach(void)
+{
+ const struct hugepage_info *hpi;
+ int fd_hugepage = -1;
+ unsigned int i;
+
+ hpi = &internal_config.hugepage_info[0];
+
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+ const struct hugepage_info *cur_hpi = &hpi[i];
+ struct attach_walk_args wa;
+
+ memset(&wa, 0, sizeof(wa));
+
+ /* Obtain a file descriptor for contiguous memory */
+ fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
+ if (fd_hugepage < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n",
+ cur_hpi->hugedir);
+ goto error;
+ }
+ wa.fd_hugepage = fd_hugepage;
+ wa.seg_idx = 0;
+
+ /* Map the contiguous memory into each memory segment */
+ if (rte_memseg_walk(attach_segment, &wa) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
+ wa.seg_idx, cur_hpi->hugedir);
+ goto error;
+ }
+
+ close(fd_hugepage);
+ fd_hugepage = -1;
+ }
+
+ /* hugepage_info is no longer required */
+ return 0;
+
+error:
+ if (fd_hugepage >= 0)
+ close(fd_hugepage);
+ return -1;
+}
+
+int
+rte_eal_using_phys_addrs(void)
+{
+ return 0;
+}
+
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+ uint64_t area_sz, max_pages;
+
+ /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+ max_pages = RTE_MAX_MEMSEG_PER_LIST;
+ max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+ area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+ /* make sure the list isn't smaller than the page size */
+ area_sz = RTE_MAX(area_sz, page_sz);
+
+ return RTE_ALIGN(area_sz, page_sz);
+}
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+static int
+alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
+ int n_segs, int socket_id, int type_msl_idx)
+{
+ char name[RTE_FBARRAY_NAME_LEN];
+
+ snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
+ type_msl_idx);
+ if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
+ sizeof(struct rte_memseg))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
+ rte_strerror(rte_errno));
+ return -1;
+ }
+
+ msl->page_sz = page_sz;
+ msl->socket_id = socket_id;
+ msl->base_va = NULL;
+
+ RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
+ (size_t)page_sz >> 10, socket_id);
+
+ return 0;
+}
+
+static int
+alloc_va_space(struct rte_memseg_list *msl)
+{
+ uint64_t page_sz;
+ size_t mem_sz;
+ void *addr;
+ int flags = 0;
+
+#ifdef RTE_ARCH_PPC_64
+ flags |= MAP_HUGETLB;
+#endif
+
+ page_sz = msl->page_sz;
+ mem_sz = page_sz * msl->memseg_arr.len;
+
+ addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+ if (addr == NULL) {
+ if (rte_errno == EADDRNOTAVAIL)
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - "
+ "please use '--" OPT_BASE_VIRTADDR "' option\n",
+ (unsigned long long)mem_sz, msl->base_va);
+ else
+ RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+ return -1;
+ }
+ msl->base_va = addr;
+ msl->len = mem_sz;
+
+ return 0;
+}
+
+
+static int
+memseg_primary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int hpi_idx, msl_idx = 0;
+ struct rte_memseg_list *msl;
+ uint64_t max_mem, total_mem;
+
+ /* no-huge does not need this at all */
+ if (internal_config.no_hugetlbfs)
+ return 0;
+
+ /* FreeBSD has an issue where core dump will dump the entire memory
+ * contents, including anonymous zero-page memory. Therefore, while we
+ * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
+ * also be further limiting total memory amount to whatever memory is
+ * available to us through contigmem driver (plus spacing blocks).
+ *
+ * so, at each stage, we will be checking how much memory we are
+ * preallocating, and adjust all the values accordingly.
+ */
+
+ max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+ total_mem = 0;
+
+ /* create memseg lists */
+ for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+ hpi_idx++) {
+ uint64_t max_type_mem, total_type_mem = 0;
+ uint64_t avail_mem;
+ int type_msl_idx, max_segs, avail_segs, total_segs = 0;
+ struct hugepage_info *hpi;
+ uint64_t hugepage_sz;
+
+ hpi = &internal_config.hugepage_info[hpi_idx];
+ hugepage_sz = hpi->hugepage_sz;
+
+ /* no NUMA support on FreeBSD */
+
+ /* check if we've already exceeded total memory amount */
+ if (total_mem >= max_mem)
+ break;
+
+ /* first, calculate theoretical limits according to config */
+ max_type_mem = RTE_MIN(max_mem - total_mem,
+ (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
+ max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+
+ /* now, limit all of that to whatever will actually be
+ * available to us, because without dynamic allocation support,
+ * all of that extra memory will be sitting there being useless
+ * and slowing down core dumps in case of a crash.
+ *
+ * we need (N*2)-1 segments because we cannot guarantee that
+ * each segment will be IOVA-contiguous with the previous one,
+ * so we will allocate more and put spaces between segments
+ * that are non-contiguous.
+ */
+ avail_segs = (hpi->num_pages[0] * 2) - 1;
+ avail_mem = avail_segs * hugepage_sz;
+
+ max_type_mem = RTE_MIN(avail_mem, max_type_mem);
+ max_segs = RTE_MIN(avail_segs, max_segs);
+
+ type_msl_idx = 0;
+ while (total_type_mem < max_type_mem &&
+ total_segs < max_segs) {
+ uint64_t cur_max_mem, cur_mem;
+ unsigned int n_segs;
+
+ if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL,
+ "No more space in memseg lists, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ return -1;
+ }
+
+ msl = &mcfg->memsegs[msl_idx++];
+
+ cur_max_mem = max_type_mem - total_type_mem;
+
+ cur_mem = get_mem_amount(hugepage_sz,
+ cur_max_mem);
+ n_segs = cur_mem / hugepage_sz;
+
+ if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+ 0, type_msl_idx))
+ return -1;
+
+ total_segs += msl->memseg_arr.len;
+ total_type_mem = total_segs * hugepage_sz;
+ type_msl_idx++;
+
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+ return -1;
+ }
+ }
+ total_mem += total_type_mem;
+ }
+ return 0;
+}
+
+static int
+memseg_secondary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx = 0;
+ struct rte_memseg_list *msl;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+ msl = &mcfg->memsegs[msl_idx];
+
+ /* skip empty memseg lists */
+ if (msl->memseg_arr.len == 0)
+ continue;
+
+ if (rte_fbarray_attach(&msl->memseg_arr)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+ return -1;
+ }
+
+ /* preallocate VA space */
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+ return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ memseg_primary_init() :
+ memseg_secondary_init();
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_thread.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_thread.c
new file mode 100644
index 000000000..b52019782
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_thread.c
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sched.h>
+#include <pthread_np.h>
+#include <sys/queue.h>
+#include <sys/thr.h>
+
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_eal_trace.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
+RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
+{
+ int n;
+ char c = 0;
+ int m2s = lcore_config[slave_id].pipe_master2slave[1];
+ int s2m = lcore_config[slave_id].pipe_slave2master[0];
+ int rc = -EBUSY;
+
+ if (lcore_config[slave_id].state != WAIT)
+ goto finish;
+
+ lcore_config[slave_id].f = f;
+ lcore_config[slave_id].arg = arg;
+
+ /* send message */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(m2s, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ /* wait ack */
+ do {
+ n = read(s2m, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ rc = 0;
+finish:
+ rte_eal_trace_thread_remote_launch(f, arg, slave_id, rc);
+ return rc;
+}
+
+/* set affinity for current thread */
+static int
+eal_thread_set_affinity(void)
+{
+ unsigned lcore_id = rte_lcore_id();
+
+ /* acquire system unique id */
+ rte_gettid();
+
+ /* update EAL thread core affinity */
+ return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
+}
+
+void eal_thread_init_master(unsigned lcore_id)
+{
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+}
+
+/* main loop of threads */
+__rte_noreturn void *
+eal_thread_loop(__rte_unused void *arg)
+{
+ char c;
+ int n, ret;
+ unsigned lcore_id;
+ pthread_t thread_id;
+ int m2s, s2m;
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+
+ thread_id = pthread_self();
+
+ /* retrieve our lcore_id from the configuration structure */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (thread_id == lcore_config[lcore_id].thread_id)
+ break;
+ }
+ if (lcore_id == RTE_MAX_LCORE)
+ rte_panic("cannot retrieve lcore id\n");
+
+ m2s = lcore_config[lcore_id].pipe_master2slave[0];
+ s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+
+ ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+
+ RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
+ lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
+
+ __rte_trace_mem_per_thread_alloc();
+ rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
+
+ /* read on our pipe to get commands */
+ while (1) {
+ void *fct_arg;
+
+ /* wait command */
+ do {
+ n = read(m2s, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ lcore_config[lcore_id].state = RUNNING;
+
+ /* send ack */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(s2m, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ if (lcore_config[lcore_id].f == NULL)
+ rte_panic("NULL function pointer\n");
+
+ /* call the function and store the return value */
+ fct_arg = lcore_config[lcore_id].arg;
+ ret = lcore_config[lcore_id].f(fct_arg);
+ lcore_config[lcore_id].ret = ret;
+ rte_wmb();
+ lcore_config[lcore_id].state = FINISHED;
+ }
+
+ /* never reached */
+ /* pthread_exit(NULL); */
+ /* return NULL; */
+}
+
+/* require calling thread tid by gettid() */
+int rte_sys_gettid(void)
+{
+ long lwpid;
+ thr_self(&lwpid);
+ return (int)lwpid;
+}
+
+int rte_thread_setname(pthread_t id, const char *name)
+{
+ /* this BSD function returns no error */
+ pthread_set_name_np(id, name);
+ return 0;
+}
+
+int rte_thread_getname(pthread_t id, char *name, size_t len)
+{
+ RTE_SET_USED(id);
+ RTE_SET_USED(name);
+ RTE_SET_USED(len);
+
+ return -ENOTSUP;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/eal_timer.c b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_timer.c
new file mode 100644
index 000000000..beff755a4
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/eal_timer.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+#ifdef RTE_LIBEAL_USE_HPET
+#warning HPET is not supported in FreeBSD
+#endif
+
+enum timer_source eal_timer_source = EAL_TIMER_TSC;
+
+uint64_t
+get_tsc_freq(void)
+{
+ size_t sz;
+ int tmp;
+ uint64_t tsc_hz;
+
+ sz = sizeof(tmp);
+ tmp = 0;
+
+ if (sysctlbyname("kern.timecounter.smp_tsc", &tmp, &sz, NULL, 0))
+ RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+ else if (tmp != 1)
+ RTE_LOG(WARNING, EAL, "TSC is not safe to use in SMP mode\n");
+
+ tmp = 0;
+
+ if (sysctlbyname("kern.timecounter.invariant_tsc", &tmp, &sz, NULL, 0))
+ RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+ else if (tmp != 1)
+ RTE_LOG(WARNING, EAL, "TSC is not invariant\n");
+
+ sz = sizeof(tsc_hz);
+ if (sysctlbyname("machdep.tsc_freq", &tsc_hz, &sz, NULL, 0)) {
+ RTE_LOG(WARNING, EAL, "%s\n", strerror(errno));
+ return 0;
+ }
+
+ return tsc_hz;
+}
+
+int
+rte_eal_timer_init(void)
+{
+ set_tsc_freq();
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/include/meson.build b/src/spdk/dpdk/lib/librte_eal/freebsd/include/meson.build
new file mode 100644
index 000000000..7d18dd52f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/include/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+includes += include_directories('.')
+
+headers += files(
+ 'rte_os.h',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/include/rte_os.h b/src/spdk/dpdk/lib/librte_eal/freebsd/include/rte_os.h
new file mode 100644
index 000000000..eeb750cd8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/include/rte_os.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+#ifndef _RTE_OS_H_
+#define _RTE_OS_H_
+
+/**
+ * This is header should contain any function/macro definition
+ * which are not supported natively or named differently in the
+ * freebsd OS. Functions will be added in future releases.
+ */
+
+#include <pthread_np.h>
+
+typedef cpuset_t rte_cpuset_t;
+#define RTE_CPU_AND(dst, src1, src2) do \
+{ \
+ cpuset_t tmp; \
+ CPU_COPY(src1, &tmp); \
+ CPU_AND(&tmp, src2); \
+ CPU_COPY(&tmp, dst); \
+} while (0)
+#define RTE_CPU_OR(dst, src1, src2) do \
+{ \
+ cpuset_t tmp; \
+ CPU_COPY(src1, &tmp); \
+ CPU_OR(&tmp, src2); \
+ CPU_COPY(&tmp, dst); \
+} while (0)
+#define RTE_CPU_FILL(set) CPU_FILL(set)
+
+/* In FreeBSD 13 CPU_NAND macro is CPU_ANDNOT */
+#ifdef CPU_NAND
+#define RTE_CPU_NOT(dst, src) do \
+{ \
+ cpuset_t tmp; \
+ CPU_FILL(&tmp); \
+ CPU_NAND(&tmp, src); \
+ CPU_COPY(&tmp, dst); \
+} while (0)
+#else
+#define RTE_CPU_NOT(dst, src) do \
+{ \
+ cpuset_t tmp; \
+ CPU_FILL(&tmp); \
+ CPU_ANDNOT(&tmp, src); \
+ CPU_COPY(&tmp, dst); \
+} while (0)
+#endif
+
+#endif /* _RTE_OS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/freebsd/meson.build b/src/spdk/dpdk/lib/librte_eal/freebsd/meson.build
new file mode 100644
index 000000000..e10fd8a16
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/freebsd/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+subdir('include')
+
+sources += files(
+ 'eal.c',
+ 'eal_alarm.c',
+ 'eal_cpuflags.c',
+ 'eal_debug.c',
+ 'eal_dev.c',
+ 'eal_hugepage_info.c',
+ 'eal_interrupts.c',
+ 'eal_lcore.c',
+ 'eal_memalloc.c',
+ 'eal_memory.c',
+ 'eal_thread.c',
+ 'eal_timer.c',
+)
+
+deps += ['kvargs', 'telemetry']
diff --git a/src/spdk/dpdk/lib/librte_eal/include/Makefile b/src/spdk/dpdk/lib/librte_eal/include/Makefile
new file mode 100644
index 000000000..eb99190d1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2014 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := \
+ $(sort $(notdir \
+ $(wildcard $(RTE_SDK)/lib/librte_eal/include/*.h)))
+
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include/generic := \
+ $(sort $(addprefix generic/, $(notdir \
+ $(wildcard $(RTE_SDK)/lib/librte_eal/include/generic/*.h))))
+
+ARCH_DIR ?= $(RTE_ARCH)
+SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \
+ $(sort $(addprefix ../$(ARCH_DIR)/include/, $(notdir \
+ $(wildcard $(RTE_SDK)/lib/librte_eal/$(ARCH_DIR)/include/*.h))))
+
+include $(RTE_SDK)/mk/rte.install.mk
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_atomic.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_atomic.h
new file mode 100644
index 000000000..e6ab15a97
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_atomic.h
@@ -0,0 +1,1150 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_ATOMIC_H_
+#define _RTE_ATOMIC_H_
+
+/**
+ * @file
+ * Atomic Operations
+ *
+ * This file defines a generic API for atomic operations.
+ */
+
+#include <stdint.h>
+#include <rte_common.h>
+
+#ifdef __DOXYGEN__
+
+/** @name Memory Barrier
+ */
+///@{
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ */
+static inline void rte_mb(void);
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ */
+static inline void rte_wmb(void);
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ */
+static inline void rte_rmb(void);
+///@}
+
+/** @name SMP Memory Barrier
+ */
+///@{
+/**
+ * General memory barrier between lcores
+ *
+ * Guarantees that the LOAD and STORE operations that precede the
+ * rte_smp_mb() call are globally visible across the lcores
+ * before the LOAD and STORE operations that follows it.
+ */
+static inline void rte_smp_mb(void);
+
+/**
+ * Write memory barrier between lcores
+ *
+ * Guarantees that the STORE operations that precede the
+ * rte_smp_wmb() call are globally visible across the lcores
+ * before the STORE operations that follows it.
+ */
+static inline void rte_smp_wmb(void);
+
+/**
+ * Read memory barrier between lcores
+ *
+ * Guarantees that the LOAD operations that precede the
+ * rte_smp_rmb() call are globally visible across the lcores
+ * before the LOAD operations that follows it.
+ */
+static inline void rte_smp_rmb(void);
+///@}
+
+/** @name I/O Memory Barrier
+ */
+///@{
+/**
+ * General memory barrier for I/O device
+ *
+ * Guarantees that the LOAD and STORE operations that precede the
+ * rte_io_mb() call are visible to I/O device or CPU before the
+ * LOAD and STORE operations that follow it.
+ */
+static inline void rte_io_mb(void);
+
+/**
+ * Write memory barrier for I/O device
+ *
+ * Guarantees that the STORE operations that precede the
+ * rte_io_wmb() call are visible to I/O device before the STORE
+ * operations that follow it.
+ */
+static inline void rte_io_wmb(void);
+
+/**
+ * Read memory barrier for IO device
+ *
+ * Guarantees that the LOAD operations on I/O device that precede the
+ * rte_io_rmb() call are visible to CPU before the LOAD
+ * operations that follow it.
+ */
+static inline void rte_io_rmb(void);
+///@}
+
+/** @name Coherent I/O Memory Barrier
+ *
+ * Coherent I/O memory barrier is a lightweight version of I/O memory
+ * barriers which are system-wide data synchronization barriers. This
+ * is for only coherent memory domain between lcore and I/O device but
+ * it is same as the I/O memory barriers in most of architectures.
+ * However, some architecture provides even lighter barriers which are
+ * somewhere in between I/O memory barriers and SMP memory barriers.
+ * For example, in case of ARMv8, DMB(data memory barrier) instruction
+ * can have different shareability domains - inner-shareable and
+ * outer-shareable. And inner-shareable DMB fits for SMP memory
+ * barriers and outer-shareable DMB for coherent I/O memory barriers,
+ * which acts on coherent memory.
+ *
+ * In most cases, I/O memory barriers are safer but if operations are
+ * on coherent memory instead of incoherent MMIO region of a device,
+ * then coherent I/O memory barriers can be used and this could bring
+ * performance gain depending on architectures.
+ */
+///@{
+/**
+ * Write memory barrier for coherent memory between lcore and I/O device
+ *
+ * Guarantees that the STORE operations on coherent memory that
+ * precede the rte_cio_wmb() call are visible to I/O device before the
+ * STORE operations that follow it.
+ */
+static inline void rte_cio_wmb(void);
+
+/**
+ * Read memory barrier for coherent memory between lcore and I/O device
+ *
+ * Guarantees that the LOAD operations on coherent memory updated by
+ * I/O device that precede the rte_cio_rmb() call are visible to CPU
+ * before the LOAD operations that follow it.
+ */
+static inline void rte_cio_rmb(void);
+///@}
+
+#endif /* __DOXYGEN__ */
+
+/**
+ * Compiler barrier.
+ *
+ * Guarantees that operation reordering does not occur at compile time
+ * for operations directly before and after the barrier.
+ */
+#define rte_compiler_barrier() do { \
+ asm volatile ("" : : : "memory"); \
+} while(0)
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+
+/**
+ * Atomic compare and set.
+ *
+ * (atomic) equivalent to:
+ * if (*dst == exp)
+ * *dst = src (all 16-bit words)
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param exp
+ * The expected value.
+ * @param src
+ * The new value.
+ * @return
+ * Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+ return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ * ret = *dst
+ * *dst = val;
+ * return ret;
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param val
+ * The new value.
+ * @return
+ * The original value at that location
+ */
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+#if defined(__clang__)
+ return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+ return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+ volatile int16_t cnt; /**< An internal counter value. */
+} rte_atomic16_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC16_INIT(val) { (val) }
+
+/**
+ * Initialize an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_init(rte_atomic16_t *v)
+{
+ v->cnt = 0;
+}
+
+/**
+ * Atomically read a 16-bit value from a counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * The value of the counter.
+ */
+static inline int16_t
+rte_atomic16_read(const rte_atomic16_t *v)
+{
+ return v->cnt;
+}
+
+/**
+ * Atomically set a counter to a 16-bit value.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param new_value
+ * The new value for the counter.
+ */
+static inline void
+rte_atomic16_set(rte_atomic16_t *v, int16_t new_value)
+{
+ v->cnt = new_value;
+}
+
+/**
+ * Atomically add a 16-bit value to an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ */
+static inline void
+rte_atomic16_add(rte_atomic16_t *v, int16_t inc)
+{
+ __sync_fetch_and_add(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 16-bit value from an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic16_sub(rte_atomic16_t *v, int16_t dec)
+{
+ __sync_fetch_and_sub(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a counter by one.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+ rte_atomic16_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a counter by one.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+ rte_atomic16_sub(v, 1);
+}
+#endif
+
+/**
+ * Atomically add a 16-bit value to a counter and return the result.
+ *
+ * Atomically adds the 16-bits value (inc) to the atomic counter (v) and
+ * returns the value of v after addition.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ * @return
+ * The value of v after the addition.
+ */
+static inline int16_t
+rte_atomic16_add_return(rte_atomic16_t *v, int16_t inc)
+{
+ return __sync_add_and_fetch(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 16-bit value from a counter and return
+ * the result.
+ *
+ * Atomically subtracts the 16-bit value (inc) from the atomic counter
+ * (v) and returns the value of v after the subtraction.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ * @return
+ * The value of v after the subtraction.
+ */
+static inline int16_t
+rte_atomic16_sub_return(rte_atomic16_t *v, int16_t dec)
+{
+ return __sync_sub_and_fetch(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a 16-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the increment operation is 0; false otherwise.
+ */
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+ return __sync_add_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 16-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the decrement operation is 0; false otherwise.
+ */
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+ return __sync_sub_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 16-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * 0 if failed; else 1, success.
+ */
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+ return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 16-bit counter to 0.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void rte_atomic16_clear(rte_atomic16_t *v)
+{
+ v->cnt = 0;
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+/**
+ * Atomic compare and set.
+ *
+ * (atomic) equivalent to:
+ * if (*dst == exp)
+ * *dst = src (all 32-bit words)
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param exp
+ * The expected value.
+ * @param src
+ * The new value.
+ * @return
+ * Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+ return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ * ret = *dst
+ * *dst = val;
+ * return ret;
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param val
+ * The new value.
+ * @return
+ * The original value at that location
+ */
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+#if defined(__clang__)
+ return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+ return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+ volatile int32_t cnt; /**< An internal counter value. */
+} rte_atomic32_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC32_INIT(val) { (val) }
+
+/**
+ * Initialize an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_init(rte_atomic32_t *v)
+{
+ v->cnt = 0;
+}
+
+/**
+ * Atomically read a 32-bit value from a counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * The value of the counter.
+ */
+static inline int32_t
+rte_atomic32_read(const rte_atomic32_t *v)
+{
+ return v->cnt;
+}
+
+/**
+ * Atomically set a counter to a 32-bit value.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param new_value
+ * The new value for the counter.
+ */
+static inline void
+rte_atomic32_set(rte_atomic32_t *v, int32_t new_value)
+{
+ v->cnt = new_value;
+}
+
+/**
+ * Atomically add a 32-bit value to an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ */
+static inline void
+rte_atomic32_add(rte_atomic32_t *v, int32_t inc)
+{
+ __sync_fetch_and_add(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 32-bit value from an atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic32_sub(rte_atomic32_t *v, int32_t dec)
+{
+ __sync_fetch_and_sub(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a counter by one.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+ rte_atomic32_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a counter by one.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+ rte_atomic32_sub(v,1);
+}
+#endif
+
+/**
+ * Atomically add a 32-bit value to a counter and return the result.
+ *
+ * Atomically adds the 32-bits value (inc) to the atomic counter (v) and
+ * returns the value of v after addition.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ * @return
+ * The value of v after the addition.
+ */
+static inline int32_t
+rte_atomic32_add_return(rte_atomic32_t *v, int32_t inc)
+{
+ return __sync_add_and_fetch(&v->cnt, inc);
+}
+
+/**
+ * Atomically subtract a 32-bit value from a counter and return
+ * the result.
+ *
+ * Atomically subtracts the 32-bit value (inc) from the atomic counter
+ * (v) and returns the value of v after the subtraction.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ * @return
+ * The value of v after the subtraction.
+ */
+static inline int32_t
+rte_atomic32_sub_return(rte_atomic32_t *v, int32_t dec)
+{
+ return __sync_sub_and_fetch(&v->cnt, dec);
+}
+
+/**
+ * Atomically increment a 32-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the increment operation is 0; false otherwise.
+ */
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+ return __sync_add_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 32-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the decrement operation is 0; false otherwise.
+ */
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+ return __sync_sub_and_fetch(&v->cnt, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 32-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * 0 if failed; else 1, success.
+ */
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+ return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 32-bit counter to 0.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void rte_atomic32_clear(rte_atomic32_t *v)
+{
+ v->cnt = 0;
+}
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+/**
+ * An atomic compare and set function used by the mutex functions.
+ * (atomic) equivalent to:
+ * if (*dst == exp)
+ * *dst = src (all 64-bit words)
+ *
+ * @param dst
+ * The destination into which the value will be written.
+ * @param exp
+ * The expected value.
+ * @param src
+ * The new value.
+ * @return
+ * Non-zero on success; 0 on failure.
+ */
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+ return __sync_bool_compare_and_swap(dst, exp, src);
+}
+#endif
+
+/**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ * ret = *dst
+ * *dst = val;
+ * return ret;
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param val
+ * The new value.
+ * @return
+ * The original value at that location
+ */
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+#if defined(__clang__)
+ return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+ return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
+ * The atomic counter structure.
+ */
+typedef struct {
+ volatile int64_t cnt; /**< Internal counter value. */
+} rte_atomic64_t;
+
+/**
+ * Static initializer for an atomic counter.
+ */
+#define RTE_ATOMIC64_INIT(val) { (val) }
+
+/**
+ * Initialize the atomic counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_init(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+#ifdef __LP64__
+ v->cnt = 0;
+#else
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, 0);
+ }
+#endif
+}
+#endif
+
+/**
+ * Atomically read a 64-bit counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * The value of the counter.
+ */
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+#ifdef __LP64__
+ return v->cnt;
+#else
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ /* replace the value by itself */
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp);
+ }
+ return tmp;
+#endif
+}
+#endif
+
+/**
+ * Atomically set a 64-bit counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param new_value
+ * The new value of the counter.
+ */
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+#ifdef __LP64__
+ v->cnt = new_value;
+#else
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, new_value);
+ }
+#endif
+}
+#endif
+
+/**
+ * Atomically add a 64-bit value to a counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ */
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+ __sync_fetch_and_add(&v->cnt, inc);
+}
+#endif
+
+/**
+ * Atomically subtract a 64-bit value from a counter.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ */
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+ __sync_fetch_and_sub(&v->cnt, dec);
+}
+#endif
+
+/**
+ * Atomically increment a 64-bit counter by one and test.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+ rte_atomic64_add(v, 1);
+}
+#endif
+
+/**
+ * Atomically decrement a 64-bit counter by one and test.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+ rte_atomic64_sub(v, 1);
+}
+#endif
+
+/**
+ * Add a 64-bit value to an atomic counter and return the result.
+ *
+ * Atomically adds the 64-bit value (inc) to the atomic counter (v) and
+ * returns the value of v after the addition.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param inc
+ * The value to be added to the counter.
+ * @return
+ * The value of v after the addition.
+ */
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+ return __sync_add_and_fetch(&v->cnt, inc);
+}
+#endif
+
+/**
+ * Subtract a 64-bit value from an atomic counter and return the result.
+ *
+ * Atomically subtracts the 64-bit value (dec) from the atomic counter (v)
+ * and returns the value of v after the subtraction.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @param dec
+ * The value to be subtracted from the counter.
+ * @return
+ * The value of v after the subtraction.
+ */
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+ return __sync_sub_and_fetch(&v->cnt, dec);
+}
+#endif
+
+/**
+ * Atomically increment a 64-bit counter by one and test.
+ *
+ * Atomically increments the atomic counter (v) by one and returns
+ * true if the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after the addition is 0; false otherwise.
+ */
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+ return rte_atomic64_add_return(v, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically decrement a 64-bit counter by one and test.
+ *
+ * Atomically decrements the atomic counter (v) by one and returns true if
+ * the result is 0, or false in all other cases.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * True if the result after subtraction is 0; false otherwise.
+ */
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+ return rte_atomic64_sub_return(v, 1) == 0;
+}
+#endif
+
+/**
+ * Atomically test and set a 64-bit atomic counter.
+ *
+ * If the counter value is already set, return 0 (failed). Otherwise, set
+ * the counter value to 1 and return 1 (success).
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ * @return
+ * 0 if failed; else 1, success.
+ */
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+ return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+#endif
+
+/**
+ * Atomically set a 64-bit counter to 0.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void rte_atomic64_clear(rte_atomic64_t *v);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+ rte_atomic64_set(v, 0);
+}
+#endif
+
+/*------------------------ 128 bit atomic operations -------------------------*/
+
+/**
+ * 128-bit integer structure.
+ */
+RTE_STD_C11
+typedef struct {
+ RTE_STD_C11
+ union {
+ uint64_t val[2];
+#ifdef RTE_ARCH_64
+ __extension__ __int128 int128;
+#endif
+ };
+} __rte_aligned(16) rte_int128_t;
+
+#ifdef __DOXYGEN__
+
+/**
+ * An atomic compare and set function used by the mutex functions.
+ * (Atomically) Equivalent to:
+ * @code
+ * if (*dst == *exp)
+ * *dst = *src
+ * else
+ * *exp = *dst
+ * @endcode
+ *
+ * @note This function is currently available for the x86-64 and aarch64
+ * platforms.
+ *
+ * @note The success and failure arguments must be one of the __ATOMIC_* values
+ * defined in the C++11 standard. For details on their behavior, refer to the
+ * standard.
+ *
+ * @param dst
+ * The destination into which the value will be written.
+ * @param exp
+ * Pointer to the expected value. If the operation fails, this memory is
+ * updated with the actual value.
+ * @param src
+ * Pointer to the new value.
+ * @param weak
+ * A value of true allows the comparison to spuriously fail and allows the
+ * 'exp' update to occur non-atomically (i.e. a torn read may occur).
+ * Implementations may ignore this argument and only implement the strong
+ * variant.
+ * @param success
+ * If successful, the operation's memory behavior conforms to this (or a
+ * stronger) model.
+ * @param failure
+ * If unsuccessful, the operation's memory behavior conforms to this (or a
+ * stronger) model. This argument cannot be __ATOMIC_RELEASE,
+ * __ATOMIC_ACQ_REL, or a stronger model than success.
+ * @return
+ * Non-zero on success; 0 on failure.
+ */
+__rte_experimental
+static inline int
+rte_atomic128_cmp_exchange(rte_int128_t *dst,
+ rte_int128_t *exp,
+ const rte_int128_t *src,
+ unsigned int weak,
+ int success,
+ int failure);
+
+#endif /* __DOXYGEN__ */
+
+#endif /* _RTE_ATOMIC_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_byteorder.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_byteorder.h
new file mode 100644
index 000000000..9ca960932
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_byteorder.h
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_BYTEORDER_H_
+#define _RTE_BYTEORDER_H_
+
+/**
+ * @file
+ *
+ * Byte Swap Operations
+ *
+ * This file defines a generic API for byte swap operations. Part of
+ * the implementation is architecture-specific.
+ */
+
+#include <stdint.h>
+#ifdef RTE_EXEC_ENV_FREEBSD
+#include <sys/endian.h>
+#else
+#include <endian.h>
+#endif
+
+#include <rte_common.h>
+#include <rte_config.h>
+
+/*
+ * Compile-time endianness detection
+ */
+#define RTE_BIG_ENDIAN 1
+#define RTE_LITTLE_ENDIAN 2
+#if defined __BYTE_ORDER__
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif /* __BYTE_ORDER__ */
+#elif defined __BYTE_ORDER
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif /* __BYTE_ORDER */
+#elif defined __BIG_ENDIAN__
+#define RTE_BYTE_ORDER RTE_BIG_ENDIAN
+#elif defined __LITTLE_ENDIAN__
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif
+#if !defined(RTE_BYTE_ORDER)
+#error Unknown endianness.
+#endif
+
+#define RTE_STATIC_BSWAP16(v) \
+ ((((uint16_t)(v) & UINT16_C(0x00ff)) << 8) | \
+ (((uint16_t)(v) & UINT16_C(0xff00)) >> 8))
+
+#define RTE_STATIC_BSWAP32(v) \
+ ((((uint32_t)(v) & UINT32_C(0x000000ff)) << 24) | \
+ (((uint32_t)(v) & UINT32_C(0x0000ff00)) << 8) | \
+ (((uint32_t)(v) & UINT32_C(0x00ff0000)) >> 8) | \
+ (((uint32_t)(v) & UINT32_C(0xff000000)) >> 24))
+
+#define RTE_STATIC_BSWAP64(v) \
+ ((((uint64_t)(v) & UINT64_C(0x00000000000000ff)) << 56) | \
+ (((uint64_t)(v) & UINT64_C(0x000000000000ff00)) << 40) | \
+ (((uint64_t)(v) & UINT64_C(0x0000000000ff0000)) << 24) | \
+ (((uint64_t)(v) & UINT64_C(0x00000000ff000000)) << 8) | \
+ (((uint64_t)(v) & UINT64_C(0x000000ff00000000)) >> 8) | \
+ (((uint64_t)(v) & UINT64_C(0x0000ff0000000000)) >> 24) | \
+ (((uint64_t)(v) & UINT64_C(0x00ff000000000000)) >> 40) | \
+ (((uint64_t)(v) & UINT64_C(0xff00000000000000)) >> 56))
+
+/*
+ * These macros are functionally similar to rte_cpu_to_(be|le)(16|32|64)(),
+ * they take values in host CPU order and return them converted to the
+ * intended endianness.
+ *
+ * They resolve at compilation time to integer constants which can safely be
+ * used with static initializers, since those cannot involve function calls.
+ *
+ * On the other hand, they are not as optimized as their rte_cpu_to_*()
+ * counterparts, therefore applications should refrain from using them on
+ * variable values, particularly inside performance-sensitive code.
+ */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+#define RTE_BE16(v) (rte_be16_t)(v)
+#define RTE_BE32(v) (rte_be32_t)(v)
+#define RTE_BE64(v) (rte_be64_t)(v)
+#define RTE_LE16(v) (rte_le16_t)(RTE_STATIC_BSWAP16(v))
+#define RTE_LE32(v) (rte_le32_t)(RTE_STATIC_BSWAP32(v))
+#define RTE_LE64(v) (rte_le64_t)(RTE_STATIC_BSWAP64(v))
+#elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+#define RTE_BE16(v) (rte_be16_t)(RTE_STATIC_BSWAP16(v))
+#define RTE_BE32(v) (rte_be32_t)(RTE_STATIC_BSWAP32(v))
+#define RTE_BE64(v) (rte_be64_t)(RTE_STATIC_BSWAP64(v))
+#define RTE_LE16(v) (rte_le16_t)(v)
+#define RTE_LE32(v) (rte_le32_t)(v)
+#define RTE_LE64(v) (rte_le64_t)(v)
+#else
+#error Unsupported endianness.
+#endif
+
+/*
+ * The following types should be used when handling values according to a
+ * specific byte ordering, which may differ from that of the host CPU.
+ *
+ * Libraries, public APIs and applications are encouraged to use them for
+ * documentation purposes.
+ */
+typedef uint16_t rte_be16_t; /**< 16-bit big-endian value. */
+typedef uint32_t rte_be32_t; /**< 32-bit big-endian value. */
+typedef uint64_t rte_be64_t; /**< 64-bit big-endian value. */
+typedef uint16_t rte_le16_t; /**< 16-bit little-endian value. */
+typedef uint32_t rte_le32_t; /**< 32-bit little-endian value. */
+typedef uint64_t rte_le64_t; /**< 64-bit little-endian value. */
+
+/*
+ * An internal function to swap bytes in a 16-bit value.
+ *
+ * It is used by rte_bswap16() when the value is constant. Do not use
+ * this function directly; rte_bswap16() is preferred.
+ */
+static inline uint16_t
+rte_constant_bswap16(uint16_t x)
+{
+ return (uint16_t)RTE_STATIC_BSWAP16(x);
+}
+
+/*
+ * An internal function to swap bytes in a 32-bit value.
+ *
+ * It is used by rte_bswap32() when the value is constant. Do not use
+ * this function directly; rte_bswap32() is preferred.
+ */
+static inline uint32_t
+rte_constant_bswap32(uint32_t x)
+{
+ return (uint32_t)RTE_STATIC_BSWAP32(x);
+}
+
+/*
+ * An internal function to swap bytes of a 64-bit value.
+ *
+ * It is used by rte_bswap64() when the value is constant. Do not use
+ * this function directly; rte_bswap64() is preferred.
+ */
+static inline uint64_t
+rte_constant_bswap64(uint64_t x)
+{
+ return (uint64_t)RTE_STATIC_BSWAP64(x);
+}
+
+
+#ifdef __DOXYGEN__
+
+/**
+ * Swap bytes in a 16-bit value.
+ */
+static uint16_t rte_bswap16(uint16_t _x);
+
+/**
+ * Swap bytes in a 32-bit value.
+ */
+static uint32_t rte_bswap32(uint32_t x);
+
+/**
+ * Swap bytes in a 64-bit value.
+ */
+static uint64_t rte_bswap64(uint64_t x);
+
+/**
+ * Convert a 16-bit value from CPU order to little endian.
+ */
+static rte_le16_t rte_cpu_to_le_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from CPU order to little endian.
+ */
+static rte_le32_t rte_cpu_to_le_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from CPU order to little endian.
+ */
+static rte_le64_t rte_cpu_to_le_64(uint64_t x);
+
+
+/**
+ * Convert a 16-bit value from CPU order to big endian.
+ */
+static rte_be16_t rte_cpu_to_be_16(uint16_t x);
+
+/**
+ * Convert a 32-bit value from CPU order to big endian.
+ */
+static rte_be32_t rte_cpu_to_be_32(uint32_t x);
+
+/**
+ * Convert a 64-bit value from CPU order to big endian.
+ */
+static rte_be64_t rte_cpu_to_be_64(uint64_t x);
+
+
+/**
+ * Convert a 16-bit value from little endian to CPU order.
+ */
+static uint16_t rte_le_to_cpu_16(rte_le16_t x);
+
+/**
+ * Convert a 32-bit value from little endian to CPU order.
+ */
+static uint32_t rte_le_to_cpu_32(rte_le32_t x);
+
+/**
+ * Convert a 64-bit value from little endian to CPU order.
+ */
+static uint64_t rte_le_to_cpu_64(rte_le64_t x);
+
+
+/**
+ * Convert a 16-bit value from big endian to CPU order.
+ */
+static uint16_t rte_be_to_cpu_16(rte_be16_t x);
+
+/**
+ * Convert a 32-bit value from big endian to CPU order.
+ */
+static uint32_t rte_be_to_cpu_32(rte_be32_t x);
+
+/**
+ * Convert a 64-bit value from big endian to CPU order.
+ */
+static uint64_t rte_be_to_cpu_64(rte_be64_t x);
+
+#endif /* __DOXYGEN__ */
+
+#ifdef RTE_FORCE_INTRINSICS
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+#define rte_bswap16(x) __builtin_bswap16(x)
+#endif
+
+#define rte_bswap32(x) __builtin_bswap32(x)
+
+#define rte_bswap64(x) __builtin_bswap64(x)
+
+#endif
+
+#endif /* _RTE_BYTEORDER_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_cpuflags.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_cpuflags.h
new file mode 100644
index 000000000..872f0ebe3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_cpuflags.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_CPUFLAGS_H_
+#define _RTE_CPUFLAGS_H_
+
+/**
+ * @file
+ * Architecture specific API to determine available CPU features at runtime.
+ */
+
+#include "rte_common.h"
+#include <errno.h>
+
+/**
+ * Enumeration of all CPU features supported
+ */
+__extension__
+enum rte_cpu_flag_t;
+
+/**
+ * Get name of CPU flag
+ *
+ * @param feature
+ * CPU flag ID
+ * @return
+ * flag name
+ * NULL if flag ID is invalid
+ */
+__extension__
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
+
+/**
+ * Function for checking a CPU flag availability
+ *
+ * @param feature
+ * CPU flag to query CPU for
+ * @return
+ * 1 if flag is available
+ * 0 if flag is not available
+ * -ENOENT if flag is invalid
+ */
+__extension__
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
+
+/**
+ * This function checks that the currently used CPU supports the CPU features
+ * that were specified at compile time. It is called automatically within the
+ * EAL, so does not need to be used by applications. This version returns a
+ * result so that decisions may be made (for instance, graceful shutdowns).
+ */
+int
+rte_cpu_is_supported(void);
+
+/**
+ * This function attempts to retrieve a value from the auxiliary vector.
+ * If it is unsuccessful, the result will be 0, and errno will be set.
+ *
+ * @return A value from the auxiliary vector. When the value is 0, check
+ * errno to determine if an error occurred.
+ */
+unsigned long
+rte_cpu_getauxval(unsigned long type);
+
+/**
+ * This function retrieves a value from the auxiliary vector, and compares it
+ * as a string against the value retrieved.
+ *
+ * @return The result of calling strcmp() against the value retrieved from
+ * the auxiliary vector. When the value is 0 (meaning a match is found),
+ * check errno to determine if an error occurred.
+ */
+int
+rte_cpu_strcmp_auxval(unsigned long type, const char *str);
+
+#endif /* _RTE_CPUFLAGS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_cycles.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_cycles.h
new file mode 100644
index 000000000..73d1fa7b9
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_cycles.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2013 6WIND S.A.
+ */
+
+#ifndef _RTE_CYCLES_H_
+#define _RTE_CYCLES_H_
+
+/**
+ * @file
+ *
+ * Simple Time Reference Functions (Cycles and HPET).
+ */
+
+#include <stdint.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
+#include <rte_atomic.h>
+
+#define MS_PER_S 1000
+#define US_PER_S 1000000
+#define NS_PER_S 1000000000
+
+enum timer_source {
+ EAL_TIMER_TSC = 0,
+ EAL_TIMER_HPET
+};
+extern enum timer_source eal_timer_source;
+
+/**
+ * Get the measured frequency of the RDTSC counter
+ *
+ * @return
+ * The TSC frequency for this lcore
+ */
+uint64_t
+rte_get_tsc_hz(void);
+
+/**
+ * Return the number of TSC cycles since boot
+ *
+ * @return
+ * the number of cycles
+ */
+static inline uint64_t
+rte_get_tsc_cycles(void);
+
+#ifdef RTE_LIBEAL_USE_HPET
+/**
+ * Return the number of HPET cycles since boot
+ *
+ * This counter is global for all execution units. The number of
+ * cycles in one second can be retrieved using rte_get_hpet_hz().
+ *
+ * @return
+ * the number of cycles
+ */
+uint64_t
+rte_get_hpet_cycles(void);
+
+/**
+ * Get the number of HPET cycles in one second.
+ *
+ * @return
+ * The number of cycles in one second.
+ */
+uint64_t
+rte_get_hpet_hz(void);
+
+/**
+ * Initialise the HPET for use. This must be called before the rte_get_hpet_hz
+ * and rte_get_hpet_cycles APIs are called. If this function does not succeed,
+ * then the HPET functions are unavailable and should not be called.
+ *
+ * @param make_default
+ * If set, the hpet timer becomes the default timer whose values are
+ * returned by the rte_get_timer_hz/cycles API calls
+ *
+ * @return
+ * 0 on success,
+ * -1 on error, and the make_default parameter is ignored.
+ */
+int rte_eal_hpet_init(int make_default);
+
+#endif
+
+/**
+ * Get the number of cycles since boot from the default timer.
+ *
+ * @return
+ * The number of cycles
+ */
+static inline uint64_t
+rte_get_timer_cycles(void)
+{
+#ifdef RTE_LIBEAL_USE_HPET
+ switch(eal_timer_source) {
+ case EAL_TIMER_TSC:
+#endif
+ return rte_get_tsc_cycles();
+#ifdef RTE_LIBEAL_USE_HPET
+ case EAL_TIMER_HPET:
+ return rte_get_hpet_cycles();
+ default: rte_panic("Invalid timer source specified\n");
+ }
+#endif
+}
+
+/**
+ * Get the number of cycles in one second for the default timer.
+ *
+ * @return
+ * The number of cycles in one second.
+ */
+static inline uint64_t
+rte_get_timer_hz(void)
+{
+#ifdef RTE_LIBEAL_USE_HPET
+ switch(eal_timer_source) {
+ case EAL_TIMER_TSC:
+#endif
+ return rte_get_tsc_hz();
+#ifdef RTE_LIBEAL_USE_HPET
+ case EAL_TIMER_HPET:
+ return rte_get_hpet_hz();
+ default: rte_panic("Invalid timer source specified\n");
+ }
+#endif
+}
+/**
+ * Wait at least us microseconds.
+ * This function can be replaced with user-defined function.
+ * @see rte_delay_us_callback_register
+ *
+ * @param us
+ * The number of microseconds to wait.
+ */
+extern void
+(*rte_delay_us)(unsigned int us);
+
+/**
+ * Wait at least ms milliseconds.
+ *
+ * @param ms
+ * The number of milliseconds to wait.
+ */
+static inline void
+rte_delay_ms(unsigned ms)
+{
+ rte_delay_us(ms * 1000);
+}
+
+/**
+ * Blocking delay function.
+ *
+ * @param us
+ * Number of microseconds to wait.
+ */
+void rte_delay_us_block(unsigned int us);
+
+/**
+ * Delay function that uses system sleep.
+ * Does not block the CPU core.
+ *
+ * @param us
+ * Number of microseconds to wait.
+ */
+__rte_experimental
+void
+rte_delay_us_sleep(unsigned int us);
+
+/**
+ * Replace rte_delay_us with user defined function.
+ *
+ * @param userfunc
+ * User function which replaces rte_delay_us. rte_delay_us_block restores
+ * builtin block delay function.
+ */
+void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
+
+#endif /* _RTE_CYCLES_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_io.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_io.h
new file mode 100644
index 000000000..da457f7f7
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_io.h
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Cavium, Inc
+ */
+
+#ifndef _RTE_IO_H_
+#define _RTE_IO_H_
+
+/**
+ * @file
+ * I/O device memory operations
+ *
+ * This file defines the generic API for I/O device memory read/write operations
+ */
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#ifdef __DOXYGEN__
+
+/**
+ * Read a 8-bit value from I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param addr
+ * I/O memory address to read the value from
+ * @return
+ * read value
+ */
+static inline uint8_t
+rte_read8_relaxed(const volatile void *addr);
+
+/**
+ * Read a 16-bit value from I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param addr
+ * I/O memory address to read the value from
+ * @return
+ * read value
+ */
+static inline uint16_t
+rte_read16_relaxed(const volatile void *addr);
+
+/**
+ * Read a 32-bit value from I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param addr
+ * I/O memory address to read the value from
+ * @return
+ * read value
+ */
+static inline uint32_t
+rte_read32_relaxed(const volatile void *addr);
+
+/**
+ * Read a 64-bit value from I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param addr
+ * I/O memory address to read the value from
+ * @return
+ * read value
+ */
+static inline uint64_t
+rte_read64_relaxed(const volatile void *addr);
+
+/**
+ * Write a 8-bit value to I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+
+static inline void
+rte_write8_relaxed(uint8_t value, volatile void *addr);
+
+/**
+ * Write a 16-bit value to I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+static inline void
+rte_write16_relaxed(uint16_t value, volatile void *addr);
+
+/**
+ * Write a 32-bit value to I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+static inline void
+rte_write32_relaxed(uint32_t value, volatile void *addr);
+
+/**
+ * Write a 64-bit value to I/O device memory address *addr*.
+ *
+ * The relaxed version does not have additional I/O memory barrier, useful in
+ * accessing the device registers of integrated controllers which implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+static inline void
+rte_write64_relaxed(uint64_t value, volatile void *addr);
+
+/**
+ * Read a 8-bit value from I/O device memory address *addr*.
+ *
+ * @param addr
+ * I/O memory address to read the value from
+ * @return
+ * read value
+ */
+static inline uint8_t
+rte_read8(const volatile void *addr);
+
+/**
+ * Read a 16-bit value from I/O device memory address *addr*.
+ *
+ *
+ * @param addr
+ * I/O memory address to read the value from
+ * @return
+ * read value
+ */
+static inline uint16_t
+rte_read16(const volatile void *addr);
+
+/**
+ * Read a 32-bit value from I/O device memory address *addr*.
+ *
+ * @param addr
+ * I/O memory address to read the value from
+ * @return
+ * read value
+ */
+static inline uint32_t
+rte_read32(const volatile void *addr);
+
+/**
+ * Read a 64-bit value from I/O device memory address *addr*.
+ *
+ * @param addr
+ * I/O memory address to read the value from
+ * @return
+ * read value
+ */
+static inline uint64_t
+rte_read64(const volatile void *addr);
+
+/**
+ * Write a 8-bit value to I/O device memory address *addr*.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+
+static inline void
+rte_write8(uint8_t value, volatile void *addr);
+
+/**
+ * Write a 16-bit value to I/O device memory address *addr*.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+static inline void
+rte_write16(uint16_t value, volatile void *addr);
+
+/**
+ * Write a 32-bit value to I/O device memory address *addr*.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+static inline void
+rte_write32(uint32_t value, volatile void *addr);
+
+/**
+ * Write a 64-bit value to I/O device memory address *addr*.
+ *
+ * @param value
+ * Value to write
+ * @param addr
+ * I/O memory address to write the value to
+ */
+static inline void
+rte_write64(uint64_t value, volatile void *addr);
+
+#endif /* __DOXYGEN__ */
+
+#ifndef RTE_OVERRIDE_IO_H
+
+static __rte_always_inline uint8_t
+rte_read8_relaxed(const volatile void *addr)
+{
+ return *(const volatile uint8_t *)addr;
+}
+
+static __rte_always_inline uint16_t
+rte_read16_relaxed(const volatile void *addr)
+{
+ return *(const volatile uint16_t *)addr;
+}
+
+static __rte_always_inline uint32_t
+rte_read32_relaxed(const volatile void *addr)
+{
+ return *(const volatile uint32_t *)addr;
+}
+
+static __rte_always_inline uint64_t
+rte_read64_relaxed(const volatile void *addr)
+{
+ return *(const volatile uint64_t *)addr;
+}
+
+static __rte_always_inline void
+rte_write8_relaxed(uint8_t value, volatile void *addr)
+{
+ *(volatile uint8_t *)addr = value;
+}
+
+static __rte_always_inline void
+rte_write16_relaxed(uint16_t value, volatile void *addr)
+{
+ *(volatile uint16_t *)addr = value;
+}
+
+static __rte_always_inline void
+rte_write32_relaxed(uint32_t value, volatile void *addr)
+{
+ *(volatile uint32_t *)addr = value;
+}
+
+static __rte_always_inline void
+rte_write64_relaxed(uint64_t value, volatile void *addr)
+{
+ *(volatile uint64_t *)addr = value;
+}
+
+static __rte_always_inline uint8_t
+rte_read8(const volatile void *addr)
+{
+ uint8_t val;
+ val = rte_read8_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint16_t
+rte_read16(const volatile void *addr)
+{
+ uint16_t val;
+ val = rte_read16_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint32_t
+rte_read32(const volatile void *addr)
+{
+ uint32_t val;
+ val = rte_read32_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline uint64_t
+rte_read64(const volatile void *addr)
+{
+ uint64_t val;
+ val = rte_read64_relaxed(addr);
+ rte_io_rmb();
+ return val;
+}
+
+static __rte_always_inline void
+rte_write8(uint8_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write8_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write16(uint16_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write16_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32(uint32_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write32_relaxed(value, addr);
+}
+
+static __rte_always_inline void
+rte_write64(uint64_t value, volatile void *addr)
+{
+ rte_io_wmb();
+ rte_write64_relaxed(value, addr);
+}
+
+#endif /* RTE_OVERRIDE_IO_H */
+
+#endif /* _RTE_IO_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_mcslock.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_mcslock.h
new file mode 100644
index 000000000..2bef28351
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_mcslock.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_MCSLOCK_H_
+#define _RTE_MCSLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE MCS lock
+ *
+ * This file defines the main data structure and APIs for MCS queued lock.
+ *
+ * The MCS lock (proposed by John M. Mellor-Crummey and Michael L. Scott)
+ * provides scalability by spinning on a CPU/thread local variable which
+ * avoids expensive cache bouncings. It provides fairness by maintaining
+ * a list of acquirers and passing the lock to each CPU/thread in the order
+ * they acquired the lock.
+ */
+
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_pause.h>
+
+/**
+ * The rte_mcslock_t type.
+ */
+typedef struct rte_mcslock {
+ struct rte_mcslock *next;
+ int locked; /* 1 if the queue locked, 0 otherwise */
+} rte_mcslock_t;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: This API may change without prior notice
+ *
+ * Take the MCS lock.
+ *
+ * @param msl
+ * A pointer to the pointer of a MCS lock.
+ * When the lock is initialized or declared, the msl pointer should be
+ * set to NULL.
+ * @param me
+ * A pointer to a new node of MCS lock. Each CPU/thread acquiring the
+ * lock should use its 'own node'.
+ */
+__rte_experimental
+static inline void
+rte_mcslock_lock(rte_mcslock_t **msl, rte_mcslock_t *me)
+{
+ rte_mcslock_t *prev;
+
+ /* Init me node */
+ __atomic_store_n(&me->locked, 1, __ATOMIC_RELAXED);
+ __atomic_store_n(&me->next, NULL, __ATOMIC_RELAXED);
+
+ /* If the queue is empty, the exchange operation is enough to acquire
+ * the lock. Hence, the exchange operation requires acquire semantics.
+ * The store to me->next above should complete before the node is
+ * visible to other CPUs/threads. Hence, the exchange operation requires
+ * release semantics as well.
+ */
+ prev = __atomic_exchange_n(msl, me, __ATOMIC_ACQ_REL);
+ if (likely(prev == NULL)) {
+ /* Queue was empty, no further action required,
+ * proceed with lock taken.
+ */
+ return;
+ }
+ __atomic_store_n(&prev->next, me, __ATOMIC_RELAXED);
+
+ /* The while-load of me->locked should not move above the previous
+ * store to prev->next. Otherwise it will cause a deadlock. Need a
+ * store-load barrier.
+ */
+ __atomic_thread_fence(__ATOMIC_ACQ_REL);
+ /* If the lock has already been acquired, it first atomically
+ * places the node at the end of the queue and then proceeds
+ * to spin on me->locked until the previous lock holder resets
+ * the me->locked using mcslock_unlock().
+ */
+ while (__atomic_load_n(&me->locked, __ATOMIC_ACQUIRE))
+ rte_pause();
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: This API may change without prior notice
+ *
+ * Release the MCS lock.
+ *
+ * @param msl
+ * A pointer to the pointer of a MCS lock.
+ * @param me
+ * A pointer to the node of MCS lock passed in rte_mcslock_lock.
+ */
+__rte_experimental
+static inline void
+rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me)
+{
+ /* Check if there are more nodes in the queue. */
+ if (likely(__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)) {
+ /* No, last member in the queue. */
+ rte_mcslock_t *save_me = __atomic_load_n(&me, __ATOMIC_RELAXED);
+
+ /* Release the lock by setting it to NULL */
+ if (likely(__atomic_compare_exchange_n(msl, &save_me, NULL, 0,
+ __ATOMIC_RELEASE, __ATOMIC_RELAXED)))
+ return;
+
+ /* Speculative execution would be allowed to read in the
+ * while-loop first. This has the potential to cause a
+ * deadlock. Need a load barrier.
+ */
+ __atomic_thread_fence(__ATOMIC_ACQUIRE);
+ /* More nodes added to the queue by other CPUs.
+ * Wait until the next pointer is set.
+ */
+ while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL)
+ rte_pause();
+ }
+
+ /* Pass lock to next waiter. */
+ __atomic_store_n(&me->next->locked, 0, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: This API may change without prior notice
+ *
+ * Try to take the lock.
+ *
+ * @param msl
+ * A pointer to the pointer of a MCS lock.
+ * @param me
+ * A pointer to a new node of MCS lock.
+ * @return
+ * 1 if the lock is successfully taken; 0 otherwise.
+ */
+__rte_experimental
+static inline int
+rte_mcslock_trylock(rte_mcslock_t **msl, rte_mcslock_t *me)
+{
+ /* Init me node */
+ __atomic_store_n(&me->next, NULL, __ATOMIC_RELAXED);
+
+ /* Try to lock */
+ rte_mcslock_t *expected = NULL;
+
+ /* The lock can be taken only when the queue is empty. Hence,
+ * the compare-exchange operation requires acquire semantics.
+ * The store to me->next above should complete before the node
+ * is visible to other CPUs/threads. Hence, the compare-exchange
+ * operation requires release semantics as well.
+ */
+ return __atomic_compare_exchange_n(msl, &expected, me, 0,
+ __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: This API may change without prior notice
+ *
+ * Test if the lock is taken.
+ *
+ * @param msl
+ * A pointer to a MCS lock node.
+ * @return
+ * 1 if the lock is currently taken; 0 otherwise.
+ */
+__rte_experimental
+static inline int
+rte_mcslock_is_locked(rte_mcslock_t *msl)
+{
+ return (__atomic_load_n(&msl, __ATOMIC_RELAXED) != NULL);
+}
+
+#endif /* _RTE_MCSLOCK_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_memcpy.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_memcpy.h
new file mode 100644
index 000000000..701e550c3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_memcpy.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_MEMCPY_H_
+#define _RTE_MEMCPY_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcpy().
+ */
+
+/**
+ * Copy 16 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 32 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Copy 48 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src);
+
+#endif /* __DOXYGEN__ */
+
+/**
+ * Copy 64 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 128 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src);
+
+/**
+ * Copy 256 bytes from one location to another using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Copy bytes from one location to another. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ * @param n
+ * Number of bytes to copy.
+ * @return
+ * Pointer to the destination data.
+ */
+static void *
+rte_memcpy(void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+#endif /* _RTE_MEMCPY_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_pause.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_pause.h
new file mode 100644
index 000000000..7422785f1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_pause.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_PAUSE_H_
+#define _RTE_PAUSE_H_
+
+/**
+ * @file
+ *
+ * CPU pause operation.
+ *
+ */
+
+#include <stdint.h>
+#include <assert.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+#include <rte_compat.h>
+
+/**
+ * Pause CPU execution for a short while
+ *
+ * This call is intended for tight loops which poll a shared resource or wait
+ * for an event. A short pause within the loop may reduce the power consumption.
+ */
+static inline void rte_pause(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Wait for *addr to be updated with a 16-bit expected value, with a relaxed
+ * memory ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ * A pointer to the memory location.
+ * @param expected
+ * A 16-bit expected value to be in the memory location.
+ * @param memorder
+ * Two different memory orders that can be specified:
+ * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ * C++11 memory orders with the same names, see the C++11 standard or
+ * the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+ int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Wait for *addr to be updated with a 32-bit expected value, with a relaxed
+ * memory ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ * A pointer to the memory location.
+ * @param expected
+ * A 32-bit expected value to be in the memory location.
+ * @param memorder
+ * Two different memory orders that can be specified:
+ * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ * C++11 memory orders with the same names, see the C++11 standard or
+ * the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
+ int memorder);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Wait for *addr to be updated with a 64-bit expected value, with a relaxed
+ * memory ordering model meaning the loads around this API can be reordered.
+ *
+ * @param addr
+ * A pointer to the memory location.
+ * @param expected
+ * A 64-bit expected value to be in the memory location.
+ * @param memorder
+ * Two different memory orders that can be specified:
+ * __ATOMIC_ACQUIRE and __ATOMIC_RELAXED. These map to
+ * C++11 memory orders with the same names, see the C++11 standard or
+ * the GCC wiki on atomic synchronization for detailed definition.
+ */
+__rte_experimental
+static __rte_always_inline void
+rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
+ int memorder);
+
+#ifndef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED
+static __rte_always_inline void
+rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected,
+ int memorder)
+{
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+ while (__atomic_load_n(addr, memorder) != expected)
+ rte_pause();
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected,
+ int memorder)
+{
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+ while (__atomic_load_n(addr, memorder) != expected)
+ rte_pause();
+}
+
+static __rte_always_inline void
+rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected,
+ int memorder)
+{
+ assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED);
+
+ while (__atomic_load_n(addr, memorder) != expected)
+ rte_pause();
+}
+#endif
+
+#endif /* _RTE_PAUSE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_prefetch.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_prefetch.h
new file mode 100644
index 000000000..6e47bdfba
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_prefetch.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation
+ */
+
+#ifndef _RTE_PREFETCH_H_
+#define _RTE_PREFETCH_H_
+
+/**
+ * @file
+ *
+ * Prefetch operations.
+ *
+ * This file defines an API for prefetch macros / inline-functions,
+ * which are architecture-dependent. Prefetching occurs when a
+ * processor requests an instruction or data from memory to cache
+ * before it is actually needed, potentially speeding up the execution of the
+ * program.
+ */
+
+/**
+ * Prefetch a cache line into all cache levels.
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch0(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels except the 0th cache level.
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch1(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels except the 0th and 1th cache
+ * levels.
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch2(const volatile void *p);
+
+/**
+ * Prefetch a cache line into all cache levels (non-temporal/transient version)
+ *
+ * The non-temporal prefetch is intended as a prefetch hint that processor will
+ * use the prefetched data only once or short period, unlike the
+ * rte_prefetch0() function which imply that prefetched data to use repeatedly.
+ *
+ * @param p
+ * Address to prefetch
+ */
+static inline void rte_prefetch_non_temporal(const volatile void *p);
+
+#endif /* _RTE_PREFETCH_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_rwlock.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_rwlock.h
new file mode 100644
index 000000000..da9bc3e9c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_rwlock.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_RWLOCK_H_
+#define _RTE_RWLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE Read-Write Locks
+ *
+ * This file defines an API for read-write locks. The lock is used to
+ * protect data that allows multiple readers in parallel, but only
+ * one writer. All readers are blocked until the writer is finished
+ * writing.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <rte_atomic.h>
+#include <rte_pause.h>
+
+/**
+ * The rte_rwlock_t type.
+ *
+ * cnt is -1 when write lock is held, and > 0 when read locks are held.
+ */
+typedef struct {
+ volatile int32_t cnt; /**< -1 when W lock held, > 0 when R locks held. */
+} rte_rwlock_t;
+
+/**
+ * A static rwlock initializer.
+ */
+#define RTE_RWLOCK_INITIALIZER { 0 }
+
+/**
+ * Initialize the rwlock to an unlocked state.
+ *
+ * @param rwl
+ * A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_init(rte_rwlock_t *rwl)
+{
+ rwl->cnt = 0;
+}
+
+/**
+ * Take a read lock. Loop until the lock is held.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_read_lock(rte_rwlock_t *rwl)
+{
+ int32_t x;
+ int success = 0;
+
+ while (success == 0) {
+ x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
+ /* write lock is held */
+ if (x < 0) {
+ rte_pause();
+ continue;
+ }
+ success = __atomic_compare_exchange_n(&rwl->cnt, &x, x + 1, 1,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
+ }
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * try to take a read lock.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ * @return
+ * - zero if the lock is successfully taken
+ * - -EBUSY if lock could not be acquired for reading because a
+ * writer holds the lock
+ */
+__rte_experimental
+static inline int
+rte_rwlock_read_trylock(rte_rwlock_t *rwl)
+{
+ int32_t x;
+ int success = 0;
+
+ while (success == 0) {
+ x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
+ /* write lock is held */
+ if (x < 0)
+ return -EBUSY;
+ success = __atomic_compare_exchange_n(&rwl->cnt, &x, x + 1, 1,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
+ }
+
+ return 0;
+}
+
+/**
+ * Release a read lock.
+ *
+ * @param rwl
+ * A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_read_unlock(rte_rwlock_t *rwl)
+{
+ __atomic_fetch_sub(&rwl->cnt, 1, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * try to take a write lock.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ * @return
+ * - zero if the lock is successfully taken
+ * - -EBUSY if lock could not be acquired for writing because
+ * it was already locked for reading or writing
+ */
+__rte_experimental
+static inline int
+rte_rwlock_write_trylock(rte_rwlock_t *rwl)
+{
+ int32_t x;
+
+ x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
+ if (x != 0 || __atomic_compare_exchange_n(&rwl->cnt, &x, -1, 1,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0)
+ return -EBUSY;
+
+ return 0;
+}
+
+/**
+ * Take a write lock. Loop until the lock is held.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_lock(rte_rwlock_t *rwl)
+{
+ int32_t x;
+ int success = 0;
+
+ while (success == 0) {
+ x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
+ /* a lock is held */
+ if (x != 0) {
+ rte_pause();
+ continue;
+ }
+ success = __atomic_compare_exchange_n(&rwl->cnt, &x, -1, 1,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
+ }
+}
+
+/**
+ * Release a write lock.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_unlock(rte_rwlock_t *rwl)
+{
+ __atomic_store_n(&rwl->cnt, 0, __ATOMIC_RELEASE);
+}
+
+/**
+ * Try to execute critical section in a hardware memory transaction, if it
+ * fails or not available take a read lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Commit hardware memory transaction or release the read lock if the lock is used as a fall-back
+ *
+ * @param rwl
+ * A pointer to the rwlock structure.
+ */
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Try to execute critical section in a hardware memory transaction, if it
+ * fails or not available take a write lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl);
+
+/**
+ * Commit hardware memory transaction or release the write lock if the lock is used as a fall-back
+ *
+ * @param rwl
+ * A pointer to a rwlock structure.
+ */
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_spinlock.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_spinlock.h
new file mode 100644
index 000000000..87ae7a4f1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_spinlock.h
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_SPINLOCK_H_
+#define _RTE_SPINLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE Spinlocks
+ *
+ * This file defines an API for read-write locks, which are implemented
+ * in an architecture-specific way. This kind of lock simply waits in
+ * a loop repeatedly checking until the lock becomes available.
+ *
+ * All locks must be initialised before use, and only initialised once.
+ *
+ */
+
+#include <rte_lcore.h>
+#ifdef RTE_FORCE_INTRINSICS
+#include <rte_common.h>
+#endif
+#include <rte_pause.h>
+
+/**
+ * The rte_spinlock_t type.
+ */
+typedef struct {
+ volatile int locked; /**< lock status 0 = unlocked, 1 = locked */
+} rte_spinlock_t;
+
+/**
+ * A static spinlock initializer.
+ */
+#define RTE_SPINLOCK_INITIALIZER { 0 }
+
+/**
+ * Initialize the spinlock to an unlocked state.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_init(rte_spinlock_t *sl)
+{
+ sl->locked = 0;
+}
+
+/**
+ * Take the spinlock.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+ int exp = 0;
+
+ while (!__atomic_compare_exchange_n(&sl->locked, &exp, 1, 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
+ while (__atomic_load_n(&sl->locked, __ATOMIC_RELAXED))
+ rte_pause();
+ exp = 0;
+ }
+}
+#endif
+
+/**
+ * Release the spinlock.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl)
+{
+ __atomic_store_n(&sl->locked, 0, __ATOMIC_RELEASE);
+}
+#endif
+
+/**
+ * Try to take the lock.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ * @return
+ * 1 if the lock is successfully taken; 0 otherwise.
+ */
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl)
+{
+ int exp = 0;
+ return __atomic_compare_exchange_n(&sl->locked, &exp, 1,
+ 0, /* disallow spurious failure */
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
+}
+#endif
+
+/**
+ * Test if the lock is taken.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ * @return
+ * 1 if the lock is currently taken; 0 otherwise.
+ */
+static inline int rte_spinlock_is_locked (rte_spinlock_t *sl)
+{
+ return __atomic_load_n(&sl->locked, __ATOMIC_ACQUIRE);
+}
+
+/**
+ * Test if hardware transactional memory (lock elision) is supported
+ *
+ * @return
+ * 1 if the hardware transactional memory is supported; 0 otherwise.
+ */
+static inline int rte_tm_supported(void);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available take the spinlock.
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl);
+
+/**
+ * Commit hardware memory transaction or release the spinlock if
+ * the spinlock is used as a fall-back
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ */
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available try to take the lock.
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param sl
+ * A pointer to the spinlock.
+ * @return
+ * 1 if the hardware memory transaction is successfully started
+ * or lock is successfully taken; 0 otherwise.
+ */
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl);
+
+/**
+ * The rte_spinlock_recursive_t type.
+ */
+typedef struct {
+ rte_spinlock_t sl; /**< the actual spinlock */
+ volatile int user; /**< core id using lock, -1 for unused */
+ volatile int count; /**< count of time this lock has been called */
+} rte_spinlock_recursive_t;
+
+/**
+ * A static recursive spinlock initializer.
+ */
+#define RTE_SPINLOCK_RECURSIVE_INITIALIZER {RTE_SPINLOCK_INITIALIZER, -1, 0}
+
+/**
+ * Initialize the recursive spinlock to an unlocked state.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_init(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_init(&slr->sl);
+ slr->user = -1;
+ slr->count = 0;
+}
+
+/**
+ * Take the recursive spinlock.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_lock(rte_spinlock_recursive_t *slr)
+{
+ int id = rte_gettid();
+
+ if (slr->user != id) {
+ rte_spinlock_lock(&slr->sl);
+ slr->user = id;
+ }
+ slr->count++;
+}
+/**
+ * Release the recursive spinlock.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_unlock(rte_spinlock_recursive_t *slr)
+{
+ if (--(slr->count) == 0) {
+ slr->user = -1;
+ rte_spinlock_unlock(&slr->sl);
+ }
+
+}
+
+/**
+ * Try to take the recursive lock.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ * @return
+ * 1 if the lock is successfully taken; 0 otherwise.
+ */
+static inline int rte_spinlock_recursive_trylock(rte_spinlock_recursive_t *slr)
+{
+ int id = rte_gettid();
+
+ if (slr->user != id) {
+ if (rte_spinlock_trylock(&slr->sl) == 0)
+ return 0;
+ slr->user = id;
+ }
+ slr->count++;
+ return 1;
+}
+
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available take the recursive spinlocks
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_lock_tm(
+ rte_spinlock_recursive_t *slr);
+
+/**
+ * Commit hardware memory transaction or release the recursive spinlock
+ * if the recursive spinlock is used as a fall-back
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ */
+static inline void rte_spinlock_recursive_unlock_tm(
+ rte_spinlock_recursive_t *slr);
+
+/**
+ * Try to execute critical section in a hardware memory transaction,
+ * if it fails or not available try to take the recursive lock
+ *
+ * NOTE: An attempt to perform a HW I/O operation inside a hardware memory
+ * transaction always aborts the transaction since the CPU is not able to
+ * roll-back should the transaction fail. Therefore, hardware transactional
+ * locks are not advised to be used around rte_eth_rx_burst() and
+ * rte_eth_tx_burst() calls.
+ *
+ * @param slr
+ * A pointer to the recursive spinlock.
+ * @return
+ * 1 if the hardware memory transaction is successfully started
+ * or lock is successfully taken; 0 otherwise.
+ */
+static inline int rte_spinlock_recursive_trylock_tm(
+ rte_spinlock_recursive_t *slr);
+
+#endif /* _RTE_SPINLOCK_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_ticketlock.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_ticketlock.h
new file mode 100644
index 000000000..c295ae7f7
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_ticketlock.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_TICKETLOCK_H_
+#define _RTE_TICKETLOCK_H_
+
+/**
+ * @file
+ *
+ * RTE ticket locks
+ *
+ * This file defines an API for ticket locks, which give each waiting
+ * thread a ticket and take the lock one by one, first come, first
+ * serviced.
+ *
+ * All locks must be initialised before use, and only initialised once.
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <rte_lcore.h>
+#include <rte_pause.h>
+
+/**
+ * The rte_ticketlock_t type.
+ */
+typedef union {
+ uint32_t tickets;
+ struct {
+ uint16_t current;
+ uint16_t next;
+ } s;
+} rte_ticketlock_t;
+
+/**
+ * A static ticketlock initializer.
+ */
+#define RTE_TICKETLOCK_INITIALIZER { 0 }
+
+/**
+ * Initialize the ticketlock to an unlocked state.
+ *
+ * @param tl
+ * A pointer to the ticketlock.
+ */
+__rte_experimental
+static inline void
+rte_ticketlock_init(rte_ticketlock_t *tl)
+{
+ __atomic_store_n(&tl->tickets, 0, __ATOMIC_RELAXED);
+}
+
+/**
+ * Take the ticketlock.
+ *
+ * @param tl
+ * A pointer to the ticketlock.
+ */
+__rte_experimental
+static inline void
+rte_ticketlock_lock(rte_ticketlock_t *tl)
+{
+ uint16_t me = __atomic_fetch_add(&tl->s.next, 1, __ATOMIC_RELAXED);
+ rte_wait_until_equal_16(&tl->s.current, me, __ATOMIC_ACQUIRE);
+}
+
+/**
+ * Release the ticketlock.
+ *
+ * @param tl
+ * A pointer to the ticketlock.
+ */
+__rte_experimental
+static inline void
+rte_ticketlock_unlock(rte_ticketlock_t *tl)
+{
+ uint16_t i = __atomic_load_n(&tl->s.current, __ATOMIC_RELAXED);
+ __atomic_store_n(&tl->s.current, i + 1, __ATOMIC_RELEASE);
+}
+
+/**
+ * Try to take the lock.
+ *
+ * @param tl
+ * A pointer to the ticketlock.
+ * @return
+ * 1 if the lock is successfully taken; 0 otherwise.
+ */
+__rte_experimental
+static inline int
+rte_ticketlock_trylock(rte_ticketlock_t *tl)
+{
+ rte_ticketlock_t old, new;
+ old.tickets = __atomic_load_n(&tl->tickets, __ATOMIC_RELAXED);
+ new.tickets = old.tickets;
+ new.s.next++;
+ if (old.s.next == old.s.current) {
+ if (__atomic_compare_exchange_n(&tl->tickets, &old.tickets,
+ new.tickets, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * Test if the lock is taken.
+ *
+ * @param tl
+ * A pointer to the ticketlock.
+ * @return
+ * 1 if the lock is currently taken; 0 otherwise.
+ */
+__rte_experimental
+static inline int
+rte_ticketlock_is_locked(rte_ticketlock_t *tl)
+{
+ rte_ticketlock_t tic;
+ tic.tickets = __atomic_load_n(&tl->tickets, __ATOMIC_ACQUIRE);
+ return (tic.s.current != tic.s.next);
+}
+
+/**
+ * The rte_ticketlock_recursive_t type.
+ */
+#define TICKET_LOCK_INVALID_ID -1
+
+typedef struct {
+ rte_ticketlock_t tl; /**< the actual ticketlock */
+ int user; /**< core id using lock, TICKET_LOCK_INVALID_ID for unused */
+ unsigned int count; /**< count of time this lock has been called */
+} rte_ticketlock_recursive_t;
+
+/**
+ * A static recursive ticketlock initializer.
+ */
+#define RTE_TICKETLOCK_RECURSIVE_INITIALIZER {RTE_TICKETLOCK_INITIALIZER, \
+ TICKET_LOCK_INVALID_ID, 0}
+
+/**
+ * Initialize the recursive ticketlock to an unlocked state.
+ *
+ * @param tlr
+ * A pointer to the recursive ticketlock.
+ */
+__rte_experimental
+static inline void
+rte_ticketlock_recursive_init(rte_ticketlock_recursive_t *tlr)
+{
+ rte_ticketlock_init(&tlr->tl);
+ __atomic_store_n(&tlr->user, TICKET_LOCK_INVALID_ID, __ATOMIC_RELAXED);
+ tlr->count = 0;
+}
+
+/**
+ * Take the recursive ticketlock.
+ *
+ * @param tlr
+ * A pointer to the recursive ticketlock.
+ */
+__rte_experimental
+static inline void
+rte_ticketlock_recursive_lock(rte_ticketlock_recursive_t *tlr)
+{
+ int id = rte_gettid();
+
+ if (__atomic_load_n(&tlr->user, __ATOMIC_RELAXED) != id) {
+ rte_ticketlock_lock(&tlr->tl);
+ __atomic_store_n(&tlr->user, id, __ATOMIC_RELAXED);
+ }
+ tlr->count++;
+}
+
+/**
+ * Release the recursive ticketlock.
+ *
+ * @param tlr
+ * A pointer to the recursive ticketlock.
+ */
+__rte_experimental
+static inline void
+rte_ticketlock_recursive_unlock(rte_ticketlock_recursive_t *tlr)
+{
+ if (--(tlr->count) == 0) {
+ __atomic_store_n(&tlr->user, TICKET_LOCK_INVALID_ID,
+ __ATOMIC_RELAXED);
+ rte_ticketlock_unlock(&tlr->tl);
+ }
+}
+
+/**
+ * Try to take the recursive lock.
+ *
+ * @param tlr
+ * A pointer to the recursive ticketlock.
+ * @return
+ * 1 if the lock is successfully taken; 0 otherwise.
+ */
+__rte_experimental
+static inline int
+rte_ticketlock_recursive_trylock(rte_ticketlock_recursive_t *tlr)
+{
+ int id = rte_gettid();
+
+ if (__atomic_load_n(&tlr->user, __ATOMIC_RELAXED) != id) {
+ if (rte_ticketlock_trylock(&tlr->tl) == 0)
+ return 0;
+ __atomic_store_n(&tlr->user, id, __ATOMIC_RELAXED);
+ }
+ tlr->count++;
+ return 1;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TICKETLOCK_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/generic/rte_vect.h b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_vect.h
new file mode 100644
index 000000000..3fc47979f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/generic/rte_vect.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2016 6WIND S.A.
+ */
+
+#ifndef _RTE_VECT_H_
+#define _RTE_VECT_H_
+
+/**
+ * @file
+ * SIMD vector types
+ *
+ * This file defines types to use vector instructions with generic C code.
+ */
+
+#include <stdint.h>
+
+/* Unsigned vector types */
+
+/**
+ * 64 bits vector size to use with unsigned 8 bits elements.
+ *
+ * a = (rte_v64u8_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef uint8_t rte_v64u8_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 64 bits vector size to use with unsigned 16 bits elements.
+ *
+ * a = (rte_v64u16_t){ a0, a1, a2, a3 }
+ */
+typedef uint16_t rte_v64u16_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 64 bits vector size to use with unsigned 32 bits elements.
+ *
+ * a = (rte_v64u32_t){ a0, a1 }
+ */
+typedef uint32_t rte_v64u32_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 128 bits vector size to use with unsigned 8 bits elements.
+ *
+ * a = (rte_v128u8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ * a08, a09, a10, a11, a12, a13, a14, a15 }
+ */
+typedef uint8_t rte_v128u8_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with unsigned 16 bits elements.
+ *
+ * a = (rte_v128u16_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with unsigned 32 bits elements.
+ *
+ * a = (rte_v128u32_t){ a0, a1, a2, a3 }
+ */
+typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with unsigned 64 bits elements.
+ *
+ * a = (rte_v128u64_t){ a0, a1 }
+ */
+typedef uint64_t rte_v128u64_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 256 bits vector size to use with unsigned 8 bits elements.
+ *
+ * a = (rte_v256u8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ * a08, a09, a10, a11, a12, a13, a14, a15,
+ * a16, a17, a18, a19, a20, a21, a22, a23,
+ * a24, a25, a26, a27, a28, a29, a30, a31 }
+ */
+typedef uint8_t rte_v256u8_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with unsigned 16 bits elements.
+ *
+ * a = (rte_v256u16_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ * a08, a09, a10, a11, a12, a13, a14, a15 }
+ */
+typedef uint16_t rte_v256u16_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with unsigned 32 bits elements.
+ *
+ * a = (rte_v256u32_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef uint32_t rte_v256u32_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with unsigned 64 bits elements.
+ *
+ * a = (rte_v256u64_t){ a0, a1, a2, a3 }
+ */
+typedef uint64_t rte_v256u64_t __attribute__((vector_size(32), aligned(32)));
+
+
+/* Signed vector types */
+
+/**
+ * 64 bits vector size to use with 8 bits elements.
+ *
+ * a = (rte_v64s8_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef int8_t rte_v64s8_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 64 bits vector size to use with 16 bits elements.
+ *
+ * a = (rte_v64s16_t){ a0, a1, a2, a3 }
+ */
+typedef int16_t rte_v64s16_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 64 bits vector size to use with 32 bits elements.
+ *
+ * a = (rte_v64s32_t){ a0, a1 }
+ */
+typedef int32_t rte_v64s32_t __attribute__((vector_size(8), aligned(8)));
+
+/**
+ * 128 bits vector size to use with 8 bits elements.
+ *
+ * a = (rte_v128s8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ * a08, a09, a10, a11, a12, a13, a14, a15 }
+ */
+typedef int8_t rte_v128s8_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with 16 bits elements.
+ *
+ * a = (rte_v128s16_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef int16_t rte_v128s16_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with 32 bits elements.
+ *
+ * a = (rte_v128s32_t){ a0, a1, a2, a3 }
+ */
+typedef int32_t rte_v128s32_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 128 bits vector size to use with 64 bits elements.
+ *
+ * a = (rte_v128s64_t){ a1, a2 }
+ */
+typedef int64_t rte_v128s64_t __attribute__((vector_size(16), aligned(16)));
+
+/**
+ * 256 bits vector size to use with 8 bits elements.
+ *
+ * a = (rte_v256s8_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ * a08, a09, a10, a11, a12, a13, a14, a15,
+ * a16, a17, a18, a19, a20, a21, a22, a23,
+ * a24, a25, a26, a27, a28, a29, a30, a31 }
+ */
+typedef int8_t rte_v256s8_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with 16 bits elements.
+ *
+ * a = (rte_v256s16_t){ a00, a01, a02, a03, a04, a05, a06, a07,
+ * a08, a09, a10, a11, a12, a13, a14, a15 }
+ */
+typedef int16_t rte_v256s16_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with 32 bits elements.
+ *
+ * a = (rte_v256s32_t){ a0, a1, a2, a3, a4, a5, a6, a7 }
+ */
+typedef int32_t rte_v256s32_t __attribute__((vector_size(32), aligned(32)));
+
+/**
+ * 256 bits vector size to use with 64 bits elements.
+ *
+ * a = (rte_v256s64_t){ a0, a1, a2, a3 }
+ */
+typedef int64_t rte_v256s64_t __attribute__((vector_size(32), aligned(32)));
+
+#endif /* _RTE_VECT_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/meson.build b/src/spdk/dpdk/lib/librte_eal/include/meson.build
new file mode 100644
index 000000000..bc73ec2c5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/meson.build
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+includes += include_directories('.')
+
+headers += files(
+ 'rte_alarm.h',
+ 'rte_bitmap.h',
+ 'rte_branch_prediction.h',
+ 'rte_bus.h',
+ 'rte_class.h',
+ 'rte_common.h',
+ 'rte_compat.h',
+ 'rte_debug.h',
+ 'rte_dev.h',
+ 'rte_devargs.h',
+ 'rte_eal.h',
+ 'rte_eal_interrupts.h',
+ 'rte_eal_memconfig.h',
+ 'rte_eal_trace.h',
+ 'rte_errno.h',
+ 'rte_fbarray.h',
+ 'rte_hexdump.h',
+ 'rte_hypervisor.h',
+ 'rte_interrupts.h',
+ 'rte_keepalive.h',
+ 'rte_launch.h',
+ 'rte_lcore.h',
+ 'rte_log.h',
+ 'rte_malloc.h',
+ 'rte_memory.h',
+ 'rte_memzone.h',
+ 'rte_pci_dev_feature_defs.h',
+ 'rte_pci_dev_features.h',
+ 'rte_per_lcore.h',
+ 'rte_random.h',
+ 'rte_reciprocal.h',
+ 'rte_service.h',
+ 'rte_service_component.h',
+ 'rte_string_fns.h',
+ 'rte_tailq.h',
+ 'rte_time.h',
+ 'rte_trace.h',
+ 'rte_trace_point.h',
+ 'rte_trace_point_register.h',
+ 'rte_uuid.h',
+ 'rte_version.h',
+ 'rte_vfio.h',
+)
+
+# special case install the generic headers, since they go in a subdir
+generic_headers = files(
+ 'generic/rte_atomic.h',
+ 'generic/rte_byteorder.h',
+ 'generic/rte_cpuflags.h',
+ 'generic/rte_cycles.h',
+ 'generic/rte_io.h',
+ 'generic/rte_mcslock.h',
+ 'generic/rte_memcpy.h',
+ 'generic/rte_pause.h',
+ 'generic/rte_prefetch.h',
+ 'generic/rte_rwlock.h',
+ 'generic/rte_spinlock.h',
+ 'generic/rte_ticketlock.h',
+ 'generic/rte_vect.h',
+)
+install_headers(generic_headers, subdir: 'generic')
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_alarm.h b/src/spdk/dpdk/lib/librte_eal/include/rte_alarm.h
new file mode 100644
index 000000000..7e4d0b240
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_alarm.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_ALARM_H_
+#define _RTE_ALARM_H_
+
+/**
+ * @file
+ *
+ * Alarm functions
+ *
+ * Simple alarm-clock functionality supplied by eal.
+ * Does not require hpet support.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/**
+ * Signature of callback back function called when an alarm goes off.
+ */
+typedef void (*rte_eal_alarm_callback)(void *arg);
+
+/**
+ * Function to set a callback to be triggered when us microseconds
+ * have expired. Accuracy of timing to the microsecond is not guaranteed. The
+ * alarm function will not be called *before* the requested time, but may
+ * be called a short period of time afterwards.
+ * The alarm handler will be called only once. There is no need to call
+ * "rte_eal_alarm_cancel" from within the callback function.
+ *
+ * @param us
+ * The time in microseconds before the callback is called
+ * @param cb
+ * The function to be called when the alarm expires
+ * @param cb_arg
+ * Pointer parameter to be passed to the callback function
+ *
+ * @return
+ * On success, zero.
+ * On failure, a negative error number
+ */
+int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
+
+/**
+ * Function to cancel an alarm callback which has been registered before. If
+ * used outside alarm callback it wait for all callbacks to finish execution.
+ *
+ * @param cb_fn
+ * alarm callback
+ * @param cb_arg
+ * Pointer parameter to be passed to the callback function. To remove all
+ * copies of a given callback function, irrespective of parameter, (void *)-1
+ * can be used here.
+ *
+ * @return
+ * - value greater than 0 and rte_errno not changed - returned value is
+ * the number of canceled alarm callback functions
+ * - value greater or equal 0 and rte_errno set to EINPROGRESS, at least one
+ * alarm could not be canceled because cancellation was requested from alarm
+ * callback context. Returned value is the number of successfully canceled
+ * alarm callbacks
+ * - 0 and rte_errno set to ENOENT - no alarm found
+ * - -1 and rte_errno set to EINVAL - invalid parameter (NULL callback)
+ */
+int rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_ALARM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_bitmap.h b/src/spdk/dpdk/lib/librte_eal/include/rte_bitmap.h
new file mode 100644
index 000000000..7c90ef333
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_bitmap.h
@@ -0,0 +1,575 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef __INCLUDE_RTE_BITMAP_H__
+#define __INCLUDE_RTE_BITMAP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Bitmap
+ *
+ * The bitmap component provides a mechanism to manage large arrays of bits
+ * through bit get/set/clear and bit array scan operations.
+ *
+ * The bitmap scan operation is optimized for 64-bit CPUs using 64/128 byte cache
+ * lines. The bitmap is hierarchically organized using two arrays (array1 and
+ * array2), with each bit in array1 being associated with a full cache line
+ * (512/1024 bits) of bitmap bits, which are stored in array2: the bit in array1
+ * is set only when there is at least one bit set within its associated array2
+ * bits, otherwise the bit in array1 is cleared. The read and write operations
+ * for array1 and array2 are always done in slabs of 64 bits.
+ *
+ * This bitmap is not thread safe. For lock free operation on a specific bitmap
+ * instance, a single writer thread performing bit set/clear operations is
+ * allowed, only the writer thread can do bitmap scan operations, while there
+ * can be several reader threads performing bit get operations in parallel with
+ * the writer thread. When the use of locking primitives is acceptable, the
+ * serialization of the bit set/clear and bitmap scan operations needs to be
+ * enforced by the caller, while the bit get operation does not require locking
+ * the bitmap.
+ *
+ ***/
+
+#include <string.h>
+#include <rte_common.h>
+#include <rte_config.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+#include <rte_prefetch.h>
+
+/* Slab */
+#define RTE_BITMAP_SLAB_BIT_SIZE 64
+#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2 6
+#define RTE_BITMAP_SLAB_BIT_MASK (RTE_BITMAP_SLAB_BIT_SIZE - 1)
+
+/* Cache line (CL) */
+#define RTE_BITMAP_CL_BIT_SIZE (RTE_CACHE_LINE_SIZE * 8)
+#define RTE_BITMAP_CL_BIT_SIZE_LOG2 (RTE_CACHE_LINE_SIZE_LOG2 + 3)
+#define RTE_BITMAP_CL_BIT_MASK (RTE_BITMAP_CL_BIT_SIZE - 1)
+
+#define RTE_BITMAP_CL_SLAB_SIZE (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE)
+#define RTE_BITMAP_CL_SLAB_SIZE_LOG2 (RTE_BITMAP_CL_BIT_SIZE_LOG2 - RTE_BITMAP_SLAB_BIT_SIZE_LOG2)
+#define RTE_BITMAP_CL_SLAB_MASK (RTE_BITMAP_CL_SLAB_SIZE - 1)
+
+/** Bitmap data structure */
+struct rte_bitmap {
+ /* Context for array1 and array2 */
+ uint64_t *array1; /**< Bitmap array1 */
+ uint64_t *array2; /**< Bitmap array2 */
+ uint32_t array1_size; /**< Number of 64-bit slabs in array1 that are actually used */
+ uint32_t array2_size; /**< Number of 64-bit slabs in array2 */
+
+ /* Context for the "scan next" operation */
+ uint32_t index1; /**< Bitmap scan: Index of current array1 slab */
+ uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */
+ uint32_t index2; /**< Bitmap scan: Index of current array2 slab */
+ uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */
+
+ /* Storage space for array1 and array2 */
+ uint8_t memory[];
+};
+
+static inline void
+__rte_bitmap_index1_inc(struct rte_bitmap *bmp)
+{
+ bmp->index1 = (bmp->index1 + 1) & (bmp->array1_size - 1);
+}
+
+static inline uint64_t
+__rte_bitmap_mask1_get(struct rte_bitmap *bmp)
+{
+ return (~1llu) << bmp->offset1;
+}
+
+static inline void
+__rte_bitmap_index2_set(struct rte_bitmap *bmp)
+{
+ bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2);
+}
+
+static inline uint32_t
+__rte_bitmap_get_memory_footprint(uint32_t n_bits,
+ uint32_t *array1_byte_offset, uint32_t *array1_slabs,
+ uint32_t *array2_byte_offset, uint32_t *array2_slabs)
+{
+ uint32_t n_slabs_context, n_slabs_array1, n_cache_lines_context_and_array1;
+ uint32_t n_cache_lines_array2;
+ uint32_t n_bytes_total;
+
+ n_cache_lines_array2 = (n_bits + RTE_BITMAP_CL_BIT_SIZE - 1) / RTE_BITMAP_CL_BIT_SIZE;
+ n_slabs_array1 = (n_cache_lines_array2 + RTE_BITMAP_SLAB_BIT_SIZE - 1) / RTE_BITMAP_SLAB_BIT_SIZE;
+ n_slabs_array1 = rte_align32pow2(n_slabs_array1);
+ n_slabs_context = (sizeof(struct rte_bitmap) + (RTE_BITMAP_SLAB_BIT_SIZE / 8) - 1) / (RTE_BITMAP_SLAB_BIT_SIZE / 8);
+ n_cache_lines_context_and_array1 = (n_slabs_context + n_slabs_array1 + RTE_BITMAP_CL_SLAB_SIZE - 1) / RTE_BITMAP_CL_SLAB_SIZE;
+ n_bytes_total = (n_cache_lines_context_and_array1 + n_cache_lines_array2) * RTE_CACHE_LINE_SIZE;
+
+ if (array1_byte_offset) {
+ *array1_byte_offset = n_slabs_context * (RTE_BITMAP_SLAB_BIT_SIZE / 8);
+ }
+ if (array1_slabs) {
+ *array1_slabs = n_slabs_array1;
+ }
+ if (array2_byte_offset) {
+ *array2_byte_offset = n_cache_lines_context_and_array1 * RTE_CACHE_LINE_SIZE;
+ }
+ if (array2_slabs) {
+ *array2_slabs = n_cache_lines_array2 * RTE_BITMAP_CL_SLAB_SIZE;
+ }
+
+ return n_bytes_total;
+}
+
+static inline void
+__rte_bitmap_scan_init(struct rte_bitmap *bmp)
+{
+ bmp->index1 = bmp->array1_size - 1;
+ bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1;
+ __rte_bitmap_index2_set(bmp);
+ bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE;
+
+ bmp->go2 = 0;
+}
+
+/**
+ * Bitmap memory footprint calculation
+ *
+ * @param n_bits
+ * Number of bits in the bitmap
+ * @return
+ * Bitmap memory footprint measured in bytes on success, 0 on error
+ */
+static inline uint32_t
+rte_bitmap_get_memory_footprint(uint32_t n_bits) {
+ /* Check input arguments */
+ if (n_bits == 0) {
+ return 0;
+ }
+
+ return __rte_bitmap_get_memory_footprint(n_bits, NULL, NULL, NULL, NULL);
+}
+
+/**
+ * Bitmap initialization
+ *
+ * @param n_bits
+ * Number of pre-allocated bits in array2.
+ * @param mem
+ * Base address of array1 and array2.
+ * @param mem_size
+ * Minimum expected size of bitmap.
+ * @return
+ * Handle to bitmap instance.
+ */
+static inline struct rte_bitmap *
+rte_bitmap_init(uint32_t n_bits, uint8_t *mem, uint32_t mem_size)
+{
+ struct rte_bitmap *bmp;
+ uint32_t array1_byte_offset, array1_slabs, array2_byte_offset, array2_slabs;
+ uint32_t size;
+
+ /* Check input arguments */
+ if (n_bits == 0) {
+ return NULL;
+ }
+
+ if ((mem == NULL) || (((uintptr_t) mem) & RTE_CACHE_LINE_MASK)) {
+ return NULL;
+ }
+
+ size = __rte_bitmap_get_memory_footprint(n_bits,
+ &array1_byte_offset, &array1_slabs,
+ &array2_byte_offset, &array2_slabs);
+ if (size < mem_size) {
+ return NULL;
+ }
+
+ /* Setup bitmap */
+ memset(mem, 0, size);
+ bmp = (struct rte_bitmap *) mem;
+
+ bmp->array1 = (uint64_t *) &mem[array1_byte_offset];
+ bmp->array1_size = array1_slabs;
+ bmp->array2 = (uint64_t *) &mem[array2_byte_offset];
+ bmp->array2_size = array2_slabs;
+
+ __rte_bitmap_scan_init(bmp);
+
+ return bmp;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitmap clear slab overhead bits.
+ *
+ * @param slabs
+ * Slab array.
+ * @param slab_size
+ * Number of 64-bit slabs in the slabs array.
+ * @param pos
+ * The start bit position in the slabs to be cleared.
+ */
+__rte_experimental
+static inline void
+__rte_bitmap_clear_slab_overhead_bits(uint64_t *slabs, uint32_t slab_size,
+ uint32_t pos)
+{
+ uint32_t i;
+ uint32_t index = pos / RTE_BITMAP_SLAB_BIT_SIZE;
+ uint32_t offset = pos & RTE_BITMAP_SLAB_BIT_MASK;
+
+ if (offset) {
+ for (i = offset; i < RTE_BITMAP_SLAB_BIT_SIZE; i++)
+ slabs[index] &= ~(1llu << i);
+ index++;
+ }
+ if (index < slab_size)
+ memset(&slabs[index], 0, sizeof(slabs[0]) *
+ (slab_size - index));
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Bitmap initialization with all bits set
+ *
+ * @param n_bits
+ * Number of pre-allocated bits in array2.
+ * @param mem
+ * Base address of array1 and array2.
+ * @param mem_size
+ * Minimum expected size of bitmap.
+ * @return
+ * Handle to bitmap instance.
+ */
+__rte_experimental
+static inline struct rte_bitmap *
+rte_bitmap_init_with_all_set(uint32_t n_bits, uint8_t *mem, uint32_t mem_size)
+{
+ struct rte_bitmap *bmp;
+ uint32_t array1_byte_offset, array1_slabs;
+ uint32_t array2_byte_offset, array2_slabs;
+ uint32_t size;
+
+ /* Check input arguments */
+ if (!n_bits || !mem || (((uintptr_t) mem) & RTE_CACHE_LINE_MASK))
+ return NULL;
+
+ size = __rte_bitmap_get_memory_footprint(n_bits,
+ &array1_byte_offset, &array1_slabs,
+ &array2_byte_offset, &array2_slabs);
+ if (size < mem_size)
+ return NULL;
+
+ /* Setup bitmap */
+ bmp = (struct rte_bitmap *) mem;
+ bmp->array1 = (uint64_t *) &mem[array1_byte_offset];
+ bmp->array1_size = array1_slabs;
+ bmp->array2 = (uint64_t *) &mem[array2_byte_offset];
+ bmp->array2_size = array2_slabs;
+
+ __rte_bitmap_scan_init(bmp);
+
+ memset(bmp->array1, 0xff, bmp->array1_size * sizeof(bmp->array1[0]));
+ memset(bmp->array2, 0xff, bmp->array2_size * sizeof(bmp->array2[0]));
+ /* Clear overhead bits. */
+ __rte_bitmap_clear_slab_overhead_bits(bmp->array1, bmp->array1_size,
+ bmp->array2_size >> RTE_BITMAP_CL_SLAB_SIZE_LOG2);
+ __rte_bitmap_clear_slab_overhead_bits(bmp->array2, bmp->array2_size,
+ n_bits);
+ return bmp;
+}
+
+/**
+ * Bitmap free
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline int
+rte_bitmap_free(struct rte_bitmap *bmp)
+{
+ /* Check input arguments */
+ if (bmp == NULL) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Bitmap reset
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ */
+static inline void
+rte_bitmap_reset(struct rte_bitmap *bmp)
+{
+ memset(bmp->array1, 0, bmp->array1_size * sizeof(uint64_t));
+ memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t));
+ __rte_bitmap_scan_init(bmp);
+}
+
+/**
+ * Bitmap location prefetch into CPU L1 cache
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline void
+rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab2;
+ uint32_t index2;
+
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ slab2 = bmp->array2 + index2;
+ rte_prefetch0((void *) slab2);
+}
+
+/**
+ * Bitmap bit get
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ * @return
+ * 0 when bit is cleared, non-zero when bit is set
+ */
+static inline uint64_t
+rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab2;
+ uint32_t index2, offset2;
+
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ return (*slab2) & (1llu << offset2);
+}
+
+/**
+ * Bitmap bit set
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ */
+static inline void
+rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1, offset2;
+
+ /* Set bit in array2 slab and set bit in array1 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ slab1 = bmp->array1 + index1;
+
+ *slab2 |= 1llu << offset2;
+ *slab1 |= 1llu << offset1;
+}
+
+/**
+ * Bitmap slab set
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position identifying the array2 slab
+ * @param slab
+ * Value to be assigned to the 64-bit slab in array2
+ */
+static inline void
+rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1;
+
+ /* Set bits in array2 slab and set bit in array1 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ slab1 = bmp->array1 + index1;
+
+ *slab2 |= slab;
+ *slab1 |= 1llu << offset1;
+}
+
+static inline uint64_t
+__rte_bitmap_line_not_empty(uint64_t *slab2)
+{
+ uint64_t v1, v2, v3, v4;
+
+ v1 = slab2[0] | slab2[1];
+ v2 = slab2[2] | slab2[3];
+ v3 = slab2[4] | slab2[5];
+ v4 = slab2[6] | slab2[7];
+ v1 |= v2;
+ v3 |= v4;
+
+ return v1 | v3;
+}
+
+/**
+ * Bitmap bit clear
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ */
+static inline void
+rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1, offset2;
+
+ /* Clear bit in array2 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+
+ /* Return if array2 slab is not all-zeros */
+ *slab2 &= ~(1llu << offset2);
+ if (*slab2){
+ return;
+ }
+
+ /* Check the entire cache line of array2 for all-zeros */
+ index2 &= ~ RTE_BITMAP_CL_SLAB_MASK;
+ slab2 = bmp->array2 + index2;
+ if (__rte_bitmap_line_not_empty(slab2)) {
+ return;
+ }
+
+ /* The array2 cache line is all-zeros, so clear bit in array1 slab */
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab1 = bmp->array1 + index1;
+ *slab1 &= ~(1llu << offset1);
+
+ return;
+}
+
+static inline int
+__rte_bitmap_scan_search(struct rte_bitmap *bmp)
+{
+ uint64_t value1;
+ uint32_t i;
+
+ /* Check current array1 slab */
+ value1 = bmp->array1[bmp->index1];
+ value1 &= __rte_bitmap_mask1_get(bmp);
+
+ if (rte_bsf64_safe(value1, &bmp->offset1))
+ return 1;
+
+ __rte_bitmap_index1_inc(bmp);
+ bmp->offset1 = 0;
+
+ /* Look for another array1 slab */
+ for (i = 0; i < bmp->array1_size; i ++, __rte_bitmap_index1_inc(bmp)) {
+ value1 = bmp->array1[bmp->index1];
+
+ if (rte_bsf64_safe(value1, &bmp->offset1))
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline void
+__rte_bitmap_scan_read_init(struct rte_bitmap *bmp)
+{
+ __rte_bitmap_index2_set(bmp);
+ bmp->go2 = 1;
+ rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8));
+}
+
+static inline int
+__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+ uint64_t *slab2;
+
+ slab2 = bmp->array2 + bmp->index2;
+ for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) {
+ if (*slab2) {
+ *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ *slab = *slab2;
+
+ bmp->index2 ++;
+ slab2 ++;
+ bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Bitmap scan (with automatic wrap-around)
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * When function call returns 1, pos contains the position of the next set
+ * bit, otherwise not modified
+ * @param slab
+ * When function call returns 1, slab contains the value of the entire 64-bit
+ * slab where the bit indicated by pos is located. Slabs are always 64-bit
+ * aligned, so the position of the first bit of the slab (this bit is not
+ * necessarily set) is pos / 64. Once a slab has been returned by the bitmap
+ * scan operation, the internal pointers of the bitmap are updated to point
+ * after this slab, so the same slab will not be returned again if it
+ * contains more than one bit which is set. When function call returns 0,
+ * slab is not modified.
+ * @return
+ * 0 if there is no bit set in the bitmap, 1 otherwise
+ */
+static inline int
+rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+ /* Return data from current array2 line if available */
+ if (__rte_bitmap_scan_read(bmp, pos, slab)) {
+ return 1;
+ }
+
+ /* Look for non-empty array2 line */
+ if (__rte_bitmap_scan_search(bmp)) {
+ __rte_bitmap_scan_read_init(bmp);
+ __rte_bitmap_scan_read(bmp, pos, slab);
+ return 1;
+ }
+
+ /* Empty bitmap */
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_BITMAP_H__ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_branch_prediction.h b/src/spdk/dpdk/lib/librte_eal/include/rte_branch_prediction.h
new file mode 100644
index 000000000..854ef9e5d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_branch_prediction.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/**
+ * @file
+ * Branch Prediction Helpers in RTE
+ */
+
+#ifndef _RTE_BRANCH_PREDICTION_H_
+#define _RTE_BRANCH_PREDICTION_H_
+
+/**
+ * Check if a branch is likely to be taken.
+ *
+ * This compiler builtin allows the developer to indicate if a branch is
+ * likely to be taken. Example:
+ *
+ * if (likely(x > 1))
+ * do_stuff();
+ *
+ */
+#ifndef likely
+#define likely(x) __builtin_expect(!!(x), 1)
+#endif /* likely */
+
+/**
+ * Check if a branch is unlikely to be taken.
+ *
+ * This compiler builtin allows the developer to indicate if a branch is
+ * unlikely to be taken. Example:
+ *
+ * if (unlikely(x < 1))
+ * do_stuff();
+ *
+ */
+#ifndef unlikely
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif /* unlikely */
+
+#endif /* _RTE_BRANCH_PREDICTION_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_bus.h b/src/spdk/dpdk/lib/librte_eal/include/rte_bus.h
new file mode 100644
index 000000000..d3034d0ed
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_bus.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2016 NXP
+ */
+
+#ifndef _RTE_BUS_H_
+#define _RTE_BUS_H_
+
+/**
+ * @file
+ *
+ * DPDK device bus interface
+ *
+ * This file exposes API and interfaces for bus abstraction
+ * over the devices and drivers in EAL.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_dev.h>
+
+/** Double linked list of buses */
+TAILQ_HEAD(rte_bus_list, rte_bus);
+
+
+/**
+ * IOVA mapping mode.
+ *
+ * IOVA mapping mode is iommu programming mode of a device.
+ * That device (for example: IOMMU backed DMA device) based
+ * on rte_iova_mode will generate physical or virtual address.
+ *
+ */
+enum rte_iova_mode {
+ RTE_IOVA_DC = 0, /* Don't care mode */
+ RTE_IOVA_PA = (1 << 0), /* DMA using physical address */
+ RTE_IOVA_VA = (1 << 1) /* DMA using virtual address */
+};
+
+/**
+ * Bus specific scan for devices attached on the bus.
+ * For each bus object, the scan would be responsible for finding devices and
+ * adding them to its private device list.
+ *
+ * A bus should mandatorily implement this method.
+ *
+ * @return
+ * 0 for successful scan
+ * <0 for unsuccessful scan with error value
+ */
+typedef int (*rte_bus_scan_t)(void);
+
+/**
+ * Implementation specific probe function which is responsible for linking
+ * devices on that bus with applicable drivers.
+ *
+ * This is called while iterating over each registered bus.
+ *
+ * @return
+ * 0 for successful probe
+ * !0 for any error while probing
+ */
+typedef int (*rte_bus_probe_t)(void);
+
+/**
+ * Device iterator to find a device on a bus.
+ *
+ * This function returns an rte_device if one of those held by the bus
+ * matches the data passed as parameter.
+ *
+ * If the comparison function returns zero this function should stop iterating
+ * over any more devices. To continue a search the device of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to compare each device against.
+ *
+ * @param start
+ * starting point for the iteration
+ *
+ * @return
+ * The first device matching the data, NULL if none exists.
+ */
+typedef struct rte_device *
+(*rte_bus_find_device_t)(const struct rte_device *start, rte_dev_cmp_t cmp,
+ const void *data);
+
+/**
+ * Implementation specific probe function which is responsible for linking
+ * devices on that bus with applicable drivers.
+ *
+ * @param dev
+ * Device pointer that was returned by a previous call to find_device.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_plug_t)(struct rte_device *dev);
+
+/**
+ * Implementation specific remove function which is responsible for unlinking
+ * devices on that bus from assigned driver.
+ *
+ * @param dev
+ * Device pointer that was returned by a previous call to find_device.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
+
+/**
+ * Bus specific parsing function.
+ * Validates the syntax used in the textual representation of a device,
+ * If the syntax is valid and ``addr`` is not NULL, writes the bus-specific
+ * device representation to ``addr``.
+ *
+ * @param[in] name
+ * device textual description
+ *
+ * @param[out] addr
+ * device information location address, into which parsed info
+ * should be written. If NULL, nothing should be written, which
+ * is not an error.
+ *
+ * @return
+ * 0 if parsing was successful.
+ * !0 for any error.
+ */
+typedef int (*rte_bus_parse_t)(const char *name, void *addr);
+
+/**
+ * Device level DMA map function.
+ * After a successful call, the memory segment will be mapped to the
+ * given device.
+ *
+ * @param dev
+ * Device pointer.
+ * @param addr
+ * Virtual address to map.
+ * @param iova
+ * IOVA address to map.
+ * @param len
+ * Length of the memory segment being mapped.
+ *
+ * @return
+ * 0 if mapping was successful.
+ * Negative value and rte_errno is set otherwise.
+ */
+typedef int (*rte_dev_dma_map_t)(struct rte_device *dev, void *addr,
+ uint64_t iova, size_t len);
+
+/**
+ * Device level DMA unmap function.
+ * After a successful call, the memory segment will no longer be
+ * accessible by the given device.
+ *
+ * @param dev
+ * Device pointer.
+ * @param addr
+ * Virtual address to unmap.
+ * @param iova
+ * IOVA address to unmap.
+ * @param len
+ * Length of the memory segment being mapped.
+ *
+ * @return
+ * 0 if un-mapping was successful.
+ * Negative value and rte_errno is set otherwise.
+ */
+typedef int (*rte_dev_dma_unmap_t)(struct rte_device *dev, void *addr,
+ uint64_t iova, size_t len);
+
+/**
+ * Implement a specific hot-unplug handler, which is responsible for
+ * handle the failure when device be hot-unplugged. When the event of
+ * hot-unplug be detected, it could call this function to handle
+ * the hot-unplug failure and avoid app crash.
+ * @param dev
+ * Pointer of the device structure.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_hot_unplug_handler_t)(struct rte_device *dev);
+
+/**
+ * Implement a specific sigbus handler, which is responsible for handling
+ * the sigbus error which is either original memory error, or specific memory
+ * error that caused of device be hot-unplugged. When sigbus error be captured,
+ * it could call this function to handle sigbus error.
+ * @param failure_addr
+ * Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ * 0 for success handle the sigbus for hot-unplug.
+ * 1 for not process it, because it is a generic sigbus error.
+ * -1 for failed to handle the sigbus for hot-unplug.
+ */
+typedef int (*rte_bus_sigbus_handler_t)(const void *failure_addr);
+
+/**
+ * Bus scan policies
+ */
+enum rte_bus_scan_mode {
+ RTE_BUS_SCAN_UNDEFINED,
+ RTE_BUS_SCAN_WHITELIST,
+ RTE_BUS_SCAN_BLACKLIST,
+};
+
+/**
+ * A structure used to configure bus operations.
+ */
+struct rte_bus_conf {
+ enum rte_bus_scan_mode scan_mode; /**< Scan policy. */
+};
+
+
+/**
+ * Get common iommu class of the all the devices on the bus. The bus may
+ * check that those devices are attached to iommu driver.
+ * If no devices are attached to the bus. The bus may return with don't care
+ * (_DC) value.
+ * Otherwise, The bus will return appropriate _pa or _va iova mode.
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+typedef enum rte_iova_mode (*rte_bus_get_iommu_class_t)(void);
+
+
+/**
+ * A structure describing a generic bus.
+ */
+struct rte_bus {
+ TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */
+ const char *name; /**< Name of the bus */
+ rte_bus_scan_t scan; /**< Scan for devices attached to bus */
+ rte_bus_probe_t probe; /**< Probe devices on bus */
+ rte_bus_find_device_t find_device; /**< Find a device on the bus */
+ rte_bus_plug_t plug; /**< Probe single device for drivers */
+ rte_bus_unplug_t unplug; /**< Remove single device from driver */
+ rte_bus_parse_t parse; /**< Parse a device name */
+ rte_dev_dma_map_t dma_map; /**< DMA map for device in the bus */
+ rte_dev_dma_unmap_t dma_unmap; /**< DMA unmap for device in the bus */
+ struct rte_bus_conf conf; /**< Bus configuration */
+ rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
+ rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+ rte_bus_hot_unplug_handler_t hot_unplug_handler;
+ /**< handle hot-unplug failure on the bus */
+ rte_bus_sigbus_handler_t sigbus_handler;
+ /**< handle sigbus error on the bus */
+
+};
+
+/**
+ * Register a Bus handler.
+ *
+ * @param bus
+ * A pointer to a rte_bus structure describing the bus
+ * to be registered.
+ */
+void rte_bus_register(struct rte_bus *bus);
+
+/**
+ * Unregister a Bus handler.
+ *
+ * @param bus
+ * A pointer to a rte_bus structure describing the bus
+ * to be unregistered.
+ */
+void rte_bus_unregister(struct rte_bus *bus);
+
+/**
+ * Scan all the buses.
+ *
+ * @return
+ * 0 in case of success in scanning all buses
+ * !0 in case of failure to scan
+ */
+int rte_bus_scan(void);
+
+/**
+ * For each device on the buses, perform a driver 'match' and call the
+ * driver-specific probe for device initialization.
+ *
+ * @return
+ * 0 for successful match/probe
+ * !0 otherwise
+ */
+int rte_bus_probe(void);
+
+/**
+ * Dump information of all the buses registered with EAL.
+ *
+ * @param f
+ * A valid and open output stream handle
+ */
+void rte_bus_dump(FILE *f);
+
+/**
+ * Bus comparison function.
+ *
+ * @param bus
+ * Bus under test.
+ *
+ * @param data
+ * Data to compare against.
+ *
+ * @return
+ * 0 if the bus matches the data.
+ * !0 if the bus does not match.
+ * <0 if ordering is possible and the bus is lower than the data.
+ * >0 if ordering is possible and the bus is greater than the data.
+ */
+typedef int (*rte_bus_cmp_t)(const struct rte_bus *bus, const void *data);
+
+/**
+ * Bus iterator to find a particular bus.
+ *
+ * This function compares each registered bus to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more buses. To continue a search the bus of a previous search can
+ * be passed via the start parameter.
+ *
+ * @param start
+ * Starting point for the iteration.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to pass to comparison function.
+ *
+ * @return
+ * A pointer to a rte_bus structure or NULL in case no bus matches
+ */
+struct rte_bus *rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
+ const void *data);
+
+/**
+ * Find the registered bus for a particular device.
+ */
+struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev);
+
+/**
+ * Find the registered bus for a given name.
+ */
+struct rte_bus *rte_bus_find_by_name(const char *busname);
+
+
+/**
+ * Get the common iommu class of devices bound on to buses available in the
+ * system. RTE_IOVA_DC means that no preference has been expressed.
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+enum rte_iova_mode rte_bus_get_iommu_class(void);
+
+/**
+ * Helper for Bus registration.
+ * The constructor has higher priority than PMD constructors.
+ */
+#define RTE_REGISTER_BUS(nm, bus) \
+RTE_INIT_PRIO(businitfn_ ##nm, BUS) \
+{\
+ (bus).name = RTE_STR(nm);\
+ rte_bus_register(&bus); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BUS_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_class.h b/src/spdk/dpdk/lib/librte_eal/include/rte_class.h
new file mode 100644
index 000000000..856d09b22
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_class.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#ifndef _RTE_CLASS_H_
+#define _RTE_CLASS_H_
+
+/**
+ * @file
+ *
+ * DPDK device class interface.
+ *
+ * This file describes the interface of the device class
+ * abstraction layer.
+ *
+ * A device class defines the type of function a device
+ * will be used for e.g.: Ethernet adapter (eth),
+ * cryptographic co-processor (crypto), etc.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+
+#include <rte_dev.h>
+
+/** Double linked list of classes */
+TAILQ_HEAD(rte_class_list, rte_class);
+
+/**
+ * A structure describing a generic device class.
+ */
+struct rte_class {
+ TAILQ_ENTRY(rte_class) next; /**< Next device class in linked list */
+ const char *name; /**< Name of the class */
+ rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+};
+
+/**
+ * Class comparison function.
+ *
+ * @param cls
+ * Class under test.
+ *
+ * @param data
+ * Data to compare against.
+ *
+ * @return
+ * 0 if the class matches the data.
+ * !0 if the class does not match.
+ * <0 if ordering is possible and the class is lower than the data.
+ * >0 if ordering is possible and the class is greater than the data.
+ */
+typedef int (*rte_class_cmp_t)(const struct rte_class *cls, const void *data);
+
+/**
+ * Class iterator to find a particular class.
+ *
+ * This function compares each registered class to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more classes. To continue a search the class of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param start
+ * Starting point for the iteration.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to pass to comparison function.
+ *
+ * @return
+ * A pointer to a rte_class structure or NULL in case no class matches
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data);
+
+/**
+ * Find the registered class for a given name.
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name);
+
+/**
+ * Register a Class handle.
+ *
+ * @param cls
+ * A pointer to a rte_class structure describing the class
+ * to be registered.
+ */
+__rte_experimental
+void rte_class_register(struct rte_class *cls);
+
+/**
+ * Unregister a Class handle.
+ *
+ * @param cls
+ * A pointer to a rte_class structure describing the class
+ * to be unregistered.
+ */
+__rte_experimental
+void rte_class_unregister(struct rte_class *cls);
+
+/**
+ * Helper for Class registration.
+ * The constructor has lower priority than Bus constructors.
+ * The constructor has higher priority than PMD constructors.
+ */
+#define RTE_REGISTER_CLASS(nm, cls) \
+RTE_INIT_PRIO(classinitfn_ ##nm, CLASS) \
+{\
+ (cls).name = RTE_STR(nm); \
+ rte_class_register(&cls); \
+}
+
+#define RTE_UNREGISTER_CLASS(nm, cls) \
+RTE_FINI_PRIO(classfinifn_ ##nm, CLASS) \
+{ \
+ rte_class_unregister(&cls); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CLASS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_common.h b/src/spdk/dpdk/lib/librte_eal/include/rte_common.h
new file mode 100644
index 000000000..0843ce69e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_common.h
@@ -0,0 +1,842 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+#ifndef _RTE_COMMON_H_
+#define _RTE_COMMON_H_
+
+/**
+ * @file
+ *
+ * Generic, commonly-used macro and inline function definitions
+ * for DPDK.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+
+#include <rte_config.h>
+
+/* OS specific include */
+#include <rte_os.h>
+
+#ifndef typeof
+#define typeof __typeof__
+#endif
+
+#ifndef asm
+#define asm __asm__
+#endif
+
+/** C extension macro for environments lacking C11 features. */
+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L
+#define RTE_STD_C11 __extension__
+#else
+#define RTE_STD_C11
+#endif
+
+/*
+ * RTE_TOOLCHAIN_GCC is defined if the target is built with GCC,
+ * while a host application (like pmdinfogen) may have another compiler.
+ * RTE_CC_IS_GNU is true if the file is compiled with GCC,
+ * no matter it is a target or host application.
+ */
+#define RTE_CC_IS_GNU 0
+#if defined __clang__
+#define RTE_CC_CLANG
+#elif defined __INTEL_COMPILER
+#define RTE_CC_ICC
+#elif defined __GNUC__
+#define RTE_CC_GCC
+#undef RTE_CC_IS_GNU
+#define RTE_CC_IS_GNU 1
+#endif
+#if RTE_CC_IS_GNU
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + \
+ __GNUC_PATCHLEVEL__)
+#endif
+
+/**
+ * Force alignment
+ */
+#define __rte_aligned(a) __attribute__((__aligned__(a)))
+
+#ifdef RTE_ARCH_STRICT_ALIGN
+typedef uint64_t unaligned_uint64_t __rte_aligned(1);
+typedef uint32_t unaligned_uint32_t __rte_aligned(1);
+typedef uint16_t unaligned_uint16_t __rte_aligned(1);
+#else
+typedef uint64_t unaligned_uint64_t;
+typedef uint32_t unaligned_uint32_t;
+typedef uint16_t unaligned_uint16_t;
+#endif
+
+/**
+ * Force a structure to be packed
+ */
+#define __rte_packed __attribute__((__packed__))
+
+/******* Macro to mark functions and fields scheduled for removal *****/
+#define __rte_deprecated __attribute__((__deprecated__))
+
+/**
+ * Mark a function or variable to a weak reference.
+ */
+#define __rte_weak __attribute__((__weak__))
+
+/**
+ * Force symbol to be generated even if it appears to be unused.
+ */
+#define __rte_used __attribute__((used))
+
+/*********** Macros to eliminate unused variable warnings ********/
+
+/**
+ * short definition to mark a function parameter unused
+ */
+#define __rte_unused __attribute__((__unused__))
+
+/**
+ * definition to mark a variable or function parameter as used so
+ * as to avoid a compiler warning
+ */
+#define RTE_SET_USED(x) (void)(x)
+
+/**
+ * Check format string and its arguments at compile-time.
+ *
+ * GCC on Windows assumes MS-specific format string by default,
+ * even if the underlying stdio implementation is ANSI-compliant,
+ * so this must be overridden.
+ */
+#if RTE_CC_IS_GNU
+#define __rte_format_printf(format_index, first_arg) \
+ __attribute__((format(gnu_printf, format_index, first_arg)))
+#else
+#define __rte_format_printf(format_index, first_arg) \
+ __attribute__((format(printf, format_index, first_arg)))
+#endif
+
+#define RTE_PRIORITY_LOG 101
+#define RTE_PRIORITY_BUS 110
+#define RTE_PRIORITY_CLASS 120
+#define RTE_PRIORITY_LAST 65535
+
+#define RTE_PRIO(prio) \
+ RTE_PRIORITY_ ## prio
+
+/**
+ * Run function before main() with high priority.
+ *
+ * @param func
+ * Constructor function.
+ * @param prio
+ * Priority number must be above 100.
+ * Lowest number is the first to run.
+ */
+#ifndef RTE_INIT_PRIO /* Allow to override from EAL */
+#define RTE_INIT_PRIO(func, prio) \
+static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void)
+#endif
+
+/**
+ * Run function before main() with low priority.
+ *
+ * The constructor will be run after prioritized constructors.
+ *
+ * @param func
+ * Constructor function.
+ */
+#define RTE_INIT(func) \
+ RTE_INIT_PRIO(func, LAST)
+
+/**
+ * Run after main() with low priority.
+ *
+ * @param func
+ * Destructor function name.
+ * @param prio
+ * Priority number must be above 100.
+ * Lowest number is the last to run.
+ */
+#ifndef RTE_FINI_PRIO /* Allow to override from EAL */
+#define RTE_FINI_PRIO(func, prio) \
+static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
+#endif
+
+/**
+ * Run after main() with high priority.
+ *
+ * The destructor will be run *before* prioritized destructors.
+ *
+ * @param func
+ * Destructor function name.
+ */
+#define RTE_FINI(func) \
+ RTE_FINI_PRIO(func, LAST)
+
+/**
+ * Hint never returning function
+ */
+#define __rte_noreturn __attribute__((noreturn))
+
+/**
+ * Force a function to be inlined
+ */
+#define __rte_always_inline inline __attribute__((always_inline))
+
+/**
+ * Force a function to be noinlined
+ */
+#define __rte_noinline __attribute__((noinline))
+
+/**
+ * Hint function in the hot path
+ */
+#define __rte_hot __attribute__((hot))
+
+/**
+ * Hint function in the cold path
+ */
+#define __rte_cold __attribute__((cold))
+
+/*********** Macros for pointer arithmetic ********/
+
+/**
+ * add a byte-value offset to a pointer
+ */
+#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x)))
+
+/**
+ * subtract a byte-value offset from a pointer
+ */
+#define RTE_PTR_SUB(ptr, x) ((void*)((uintptr_t)ptr - (x)))
+
+/**
+ * get the difference between two pointer values, i.e. how far apart
+ * in bytes are the locations they point two. It is assumed that
+ * ptr1 is greater than ptr2.
+ */
+#define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2))
+
+/**
+ * Workaround to cast a const field of a structure to non-const type.
+ */
+#define RTE_CAST_FIELD(var, field, type) \
+ (*(type *)((uintptr_t)(var) + offsetof(typeof(*(var)), field)))
+
+/*********** Macros/static functions for doing alignment ********/
+
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no higher than the first parameter. Second parameter
+ * must be a power-of-two value.
+ */
+#define RTE_PTR_ALIGN_FLOOR(ptr, align) \
+ ((typeof(ptr))RTE_ALIGN_FLOOR((uintptr_t)ptr, align))
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant value
+ * will be of the same type as the first parameter, and will be no
+ * bigger than the first parameter. Second parameter must be a
+ * power-of-two value.
+ */
+#define RTE_ALIGN_FLOOR(val, align) \
+ (typeof(val))((val) & (~((typeof(val))((align) - 1))))
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ */
+#define RTE_PTR_ALIGN_CEIL(ptr, align) \
+ RTE_PTR_ALIGN_FLOOR((typeof(ptr))RTE_PTR_ADD(ptr, (align) - 1), align)
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant value
+ * will be of the same type as the first parameter, and will be no lower
+ * than the first parameter. Second parameter must be a power-of-two
+ * value.
+ */
+#define RTE_ALIGN_CEIL(val, align) \
+ RTE_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align)
+
+/**
+ * Macro to align a pointer to a given power-of-two. The resultant
+ * pointer will be a pointer of the same type as the first parameter, and
+ * point to an address no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ * This function is the same as RTE_PTR_ALIGN_CEIL
+ */
+#define RTE_PTR_ALIGN(ptr, align) RTE_PTR_ALIGN_CEIL(ptr, align)
+
+/**
+ * Macro to align a value to a given power-of-two. The resultant
+ * value will be of the same type as the first parameter, and
+ * will be no lower than the first parameter. Second parameter
+ * must be a power-of-two value.
+ * This function is the same as RTE_ALIGN_CEIL
+ */
+#define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align)
+
+/**
+ * Macro to align a value to the multiple of given value. The resultant
+ * value will be of the same type as the first parameter and will be no lower
+ * than the first parameter.
+ */
+#define RTE_ALIGN_MUL_CEIL(v, mul) \
+ (((v + (typeof(v))(mul) - 1) / ((typeof(v))(mul))) * (typeof(v))(mul))
+
+/**
+ * Macro to align a value to the multiple of given value. The resultant
+ * value will be of the same type as the first parameter and will be no higher
+ * than the first parameter.
+ */
+#define RTE_ALIGN_MUL_FLOOR(v, mul) \
+ ((v / ((typeof(v))(mul))) * (typeof(v))(mul))
+
+/**
+ * Macro to align value to the nearest multiple of the given value.
+ * The resultant value might be greater than or less than the first parameter
+ * whichever difference is the lowest.
+ */
+#define RTE_ALIGN_MUL_NEAR(v, mul) \
+ ({ \
+ typeof(v) ceil = RTE_ALIGN_MUL_CEIL(v, mul); \
+ typeof(v) floor = RTE_ALIGN_MUL_FLOOR(v, mul); \
+ (ceil - v) > (v - floor) ? floor : ceil; \
+ })
+
+/**
+ * Checks if a pointer is aligned to a given power-of-two value
+ *
+ * @param ptr
+ * The pointer whose alignment is to be checked
+ * @param align
+ * The power-of-two value to which the ptr should be aligned
+ *
+ * @return
+ * True(1) where the pointer is correctly aligned, false(0) otherwise
+ */
+static inline int
+rte_is_aligned(void *ptr, unsigned align)
+{
+ return RTE_PTR_ALIGN(ptr, align) == ptr;
+}
+
+/*********** Macros for compile type checks ********/
+
+/**
+ * Triggers an error at compilation time if the condition is true.
+ */
+#define RTE_BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+/*********** Cache line related macros ********/
+
+/** Cache line mask. */
+#define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1)
+
+/** Return the first cache-aligned value greater or equal to size. */
+#define RTE_CACHE_LINE_ROUNDUP(size) \
+ (RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / \
+ RTE_CACHE_LINE_SIZE))
+
+/** Cache line size in terms of log2 */
+#if RTE_CACHE_LINE_SIZE == 64
+#define RTE_CACHE_LINE_SIZE_LOG2 6
+#elif RTE_CACHE_LINE_SIZE == 128
+#define RTE_CACHE_LINE_SIZE_LOG2 7
+#else
+#error "Unsupported cache line size"
+#endif
+
+/** Minimum Cache line size. */
+#define RTE_CACHE_LINE_MIN_SIZE 64
+
+/** Force alignment to cache line. */
+#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
+
+/** Force minimum cache line alignment. */
+#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
+
+/*********** PA/IOVA type definitions ********/
+
+/** Physical address */
+typedef uint64_t phys_addr_t;
+#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
+
+/**
+ * IO virtual address type.
+ * When the physical addressing mode (IOVA as PA) is in use,
+ * the translation from an IO virtual address (IOVA) to a physical address
+ * is a direct mapping, i.e. the same value.
+ * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
+ */
+typedef uint64_t rte_iova_t;
+#define RTE_BAD_IOVA ((rte_iova_t)-1)
+
+/*********** Structure alignment markers ********/
+
+/** Generic marker for any place in a structure. */
+__extension__ typedef void *RTE_MARKER[0];
+/** Marker for 1B alignment in a structure. */
+__extension__ typedef uint8_t RTE_MARKER8[0];
+/** Marker for 2B alignment in a structure. */
+__extension__ typedef uint16_t RTE_MARKER16[0];
+/** Marker for 4B alignment in a structure. */
+__extension__ typedef uint32_t RTE_MARKER32[0];
+/** Marker for 8B alignment in a structure. */
+__extension__ typedef uint64_t RTE_MARKER64[0];
+
+/**
+ * Combines 32b inputs most significant set bits into the least
+ * significant bits to construct a value with the same MSBs as x
+ * but all 1's under it.
+ *
+ * @param x
+ * The integer whose MSBs need to be combined with its LSBs
+ * @return
+ * The combined value.
+ */
+static inline uint32_t
+rte_combine32ms1b(uint32_t x)
+{
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+
+ return x;
+}
+
+/**
+ * Combines 64b inputs most significant set bits into the least
+ * significant bits to construct a value with the same MSBs as x
+ * but all 1's under it.
+ *
+ * @param v
+ * The integer whose MSBs need to be combined with its LSBs
+ * @return
+ * The combined value.
+ */
+static inline uint64_t
+rte_combine64ms1b(uint64_t v)
+{
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v |= v >> 32;
+
+ return v;
+}
+
+/*********** Macros to work with powers of 2 ********/
+
+/**
+ * Macro to return 1 if n is a power of 2, 0 otherwise
+ */
+#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n)))
+
+/**
+ * Returns true if n is a power of 2
+ * @param n
+ * Number to check
+ * @return 1 if true, 0 otherwise
+ */
+static inline int
+rte_is_power_of_2(uint32_t n)
+{
+ return n && !(n & (n - 1));
+}
+
+/**
+ * Aligns input parameter to the next power of 2
+ *
+ * @param x
+ * The integer value to align
+ *
+ * @return
+ * Input parameter aligned to the next power of 2
+ */
+static inline uint32_t
+rte_align32pow2(uint32_t x)
+{
+ x--;
+ x = rte_combine32ms1b(x);
+
+ return x + 1;
+}
+
+/**
+ * Aligns input parameter to the previous power of 2
+ *
+ * @param x
+ * The integer value to align
+ *
+ * @return
+ * Input parameter aligned to the previous power of 2
+ */
+static inline uint32_t
+rte_align32prevpow2(uint32_t x)
+{
+ x = rte_combine32ms1b(x);
+
+ return x - (x >> 1);
+}
+
+/**
+ * Aligns 64b input parameter to the next power of 2
+ *
+ * @param v
+ * The 64b value to align
+ *
+ * @return
+ * Input parameter aligned to the next power of 2
+ */
+static inline uint64_t
+rte_align64pow2(uint64_t v)
+{
+ v--;
+ v = rte_combine64ms1b(v);
+
+ return v + 1;
+}
+
+/**
+ * Aligns 64b input parameter to the previous power of 2
+ *
+ * @param v
+ * The 64b value to align
+ *
+ * @return
+ * Input parameter aligned to the previous power of 2
+ */
+static inline uint64_t
+rte_align64prevpow2(uint64_t v)
+{
+ v = rte_combine64ms1b(v);
+
+ return v - (v >> 1);
+}
+
+/*********** Macros for calculating min and max **********/
+
+/**
+ * Macro to return the minimum of two numbers
+ */
+#define RTE_MIN(a, b) \
+ __extension__ ({ \
+ typeof (a) _a = (a); \
+ typeof (b) _b = (b); \
+ _a < _b ? _a : _b; \
+ })
+
+/**
+ * Macro to return the maximum of two numbers
+ */
+#define RTE_MAX(a, b) \
+ __extension__ ({ \
+ typeof (a) _a = (a); \
+ typeof (b) _b = (b); \
+ _a > _b ? _a : _b; \
+ })
+
+/*********** Other general functions / macros ********/
+
+/**
+ * Searches the input parameter for the least significant set bit
+ * (starting from zero).
+ * If a least significant 1 bit is found, its bit index is returned.
+ * If the content of the input parameter is zero, then the content of the return
+ * value is undefined.
+ * @param v
+ * input parameter, should not be zero.
+ * @return
+ * least significant set bit in the input parameter.
+ */
+static inline uint32_t
+rte_bsf32(uint32_t v)
+{
+ return (uint32_t)__builtin_ctz(v);
+}
+
+/**
+ * Searches the input parameter for the least significant set bit
+ * (starting from zero). Safe version (checks for input parameter being zero).
+ *
+ * @warning ``pos`` must be a valid pointer. It is not checked!
+ *
+ * @param v
+ * The input parameter.
+ * @param pos
+ * If ``v`` was not 0, this value will contain position of least significant
+ * bit within the input parameter.
+ * @return
+ * Returns 0 if ``v`` was 0, otherwise returns 1.
+ */
+static inline int
+rte_bsf32_safe(uint64_t v, uint32_t *pos)
+{
+ if (v == 0)
+ return 0;
+
+ *pos = rte_bsf32(v);
+ return 1;
+}
+
+/**
+ * Return the rounded-up log2 of a integer.
+ *
+ * @note Contrary to the logarithm mathematical operation,
+ * rte_log2_u32(0) == 0 and not -inf.
+ *
+ * @param v
+ * The input parameter.
+ * @return
+ * The rounded-up log2 of the input, or 0 if the input is 0.
+ */
+static inline uint32_t
+rte_log2_u32(uint32_t v)
+{
+ if (v == 0)
+ return 0;
+ v = rte_align32pow2(v);
+ return rte_bsf32(v);
+}
+
+
+/**
+ * Return the last (most-significant) bit set.
+ *
+ * @note The last (most significant) bit is at position 32.
+ * @note rte_fls_u32(0) = 0, rte_fls_u32(1) = 1, rte_fls_u32(0x80000000) = 32
+ *
+ * @param x
+ * The input parameter.
+ * @return
+ * The last (most-significant) bit set, or 0 if the input is 0.
+ */
+static inline int
+rte_fls_u32(uint32_t x)
+{
+ return (x == 0) ? 0 : 32 - __builtin_clz(x);
+}
+
+/**
+ * Searches the input parameter for the least significant set bit
+ * (starting from zero).
+ * If a least significant 1 bit is found, its bit index is returned.
+ * If the content of the input parameter is zero, then the content of the return
+ * value is undefined.
+ * @param v
+ * input parameter, should not be zero.
+ * @return
+ * least significant set bit in the input parameter.
+ */
+static inline int
+rte_bsf64(uint64_t v)
+{
+ return (uint32_t)__builtin_ctzll(v);
+}
+
+/**
+ * Searches the input parameter for the least significant set bit
+ * (starting from zero). Safe version (checks for input parameter being zero).
+ *
+ * @warning ``pos`` must be a valid pointer. It is not checked!
+ *
+ * @param v
+ * The input parameter.
+ * @param pos
+ * If ``v`` was not 0, this value will contain position of least significant
+ * bit within the input parameter.
+ * @return
+ * Returns 0 if ``v`` was 0, otherwise returns 1.
+ */
+static inline int
+rte_bsf64_safe(uint64_t v, uint32_t *pos)
+{
+ if (v == 0)
+ return 0;
+
+ *pos = rte_bsf64(v);
+ return 1;
+}
+
+/**
+ * Return the last (most-significant) bit set.
+ *
+ * @note The last (most significant) bit is at position 64.
+ * @note rte_fls_u64(0) = 0, rte_fls_u64(1) = 1,
+ * rte_fls_u64(0x8000000000000000) = 64
+ *
+ * @param x
+ * The input parameter.
+ * @return
+ * The last (most-significant) bit set, or 0 if the input is 0.
+ */
+static inline int
+rte_fls_u64(uint64_t x)
+{
+ return (x == 0) ? 0 : 64 - __builtin_clzll(x);
+}
+
+/**
+ * Return the rounded-up log2 of a 64-bit integer.
+ *
+ * @note Contrary to the logarithm mathematical operation,
+ * rte_log2_u64(0) == 0 and not -inf.
+ *
+ * @param v
+ * The input parameter.
+ * @return
+ * The rounded-up log2 of the input, or 0 if the input is 0.
+ */
+static inline uint32_t
+rte_log2_u64(uint64_t v)
+{
+ if (v == 0)
+ return 0;
+ v = rte_align64pow2(v);
+ /* we checked for v being 0 already, so no undefined behavior */
+ return rte_bsf64(v);
+}
+
+#ifndef offsetof
+/** Return the offset of a field in a structure. */
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+#endif
+
+/**
+ * Return pointer to the wrapping struct instance.
+ *
+ * Example:
+ *
+ * struct wrapper {
+ * ...
+ * struct child c;
+ * ...
+ * };
+ *
+ * struct child *x = obtain(...);
+ * struct wrapper *w = container_of(x, struct wrapper, c);
+ */
+#ifndef container_of
+#define container_of(ptr, type, member) __extension__ ({ \
+ const typeof(((type *)0)->member) *_ptr = (ptr); \
+ __rte_unused type *_target_ptr = \
+ (type *)(ptr); \
+ (type *)(((uintptr_t)_ptr) - offsetof(type, member)); \
+ })
+#endif
+
+/**
+ * Get the size of a field in a structure.
+ *
+ * @param type
+ * The type of the structure.
+ * @param field
+ * The field in the structure.
+ * @return
+ * The size of the field in the structure, in bytes.
+ */
+#define RTE_SIZEOF_FIELD(type, field) (sizeof(((type *)0)->field))
+
+#define _RTE_STR(x) #x
+/** Take a macro value and get a string version of it */
+#define RTE_STR(x) _RTE_STR(x)
+
+/**
+ * ISO C helpers to modify format strings using variadic macros.
+ * This is a replacement for the ", ## __VA_ARGS__" GNU extension.
+ * An empty %s argument is appended to avoid a dangling comma.
+ */
+#define RTE_FMT(fmt, ...) fmt "%.0s", __VA_ARGS__ ""
+#define RTE_FMT_HEAD(fmt, ...) fmt
+#define RTE_FMT_TAIL(fmt, ...) __VA_ARGS__
+
+/** Mask value of type "tp" for the first "ln" bit set. */
+#define RTE_LEN2MASK(ln, tp) \
+ ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln))))
+
+/** Number of elements in the array. */
+#define RTE_DIM(a) (sizeof (a) / sizeof ((a)[0]))
+
+/**
+ * Converts a numeric string to the equivalent uint64_t value.
+ * As well as straight number conversion, also recognises the suffixes
+ * k, m and g for kilobytes, megabytes and gigabytes respectively.
+ *
+ * If a negative number is passed in i.e. a string with the first non-black
+ * character being "-", zero is returned. Zero is also returned in the case of
+ * an error with the strtoull call in the function.
+ *
+ * @param str
+ * String containing number to convert.
+ * @return
+ * Number.
+ */
+static inline uint64_t
+rte_str_to_size(const char *str)
+{
+ char *endptr;
+ unsigned long long size;
+
+ while (isspace((int)*str))
+ str++;
+ if (*str == '-')
+ return 0;
+
+ errno = 0;
+ size = strtoull(str, &endptr, 0);
+ if (errno)
+ return 0;
+
+ if (*endptr == ' ')
+ endptr++; /* allow 1 space gap */
+
+ switch (*endptr){
+ case 'G': case 'g': size *= 1024; /* fall-through */
+ case 'M': case 'm': size *= 1024; /* fall-through */
+ case 'K': case 'k': size *= 1024; /* fall-through */
+ default:
+ break;
+ }
+ return size;
+}
+
+/**
+ * Function to terminate the application immediately, printing an error
+ * message and returning the exit_code back to the shell.
+ *
+ * This function never returns
+ *
+ * @param exit_code
+ * The exit code to be returned by the application
+ * @param format
+ * The format string to be used for printing the message. This can include
+ * printf format characters which will be expanded using any further parameters
+ * to the function.
+ */
+__rte_noreturn void
+rte_exit(int exit_code, const char *format, ...)
+ __rte_format_printf(2, 3);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_compat.h b/src/spdk/dpdk/lib/librte_eal/include/rte_compat.h
new file mode 100644
index 000000000..4cd8f68d6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_compat.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Neil Horman <nhorman@tuxdriver.com>.
+ * All rights reserved.
+ */
+
+#ifndef _RTE_COMPAT_H_
+#define _RTE_COMPAT_H_
+
+#ifndef ALLOW_EXPERIMENTAL_API
+
+#define __rte_experimental \
+__attribute__((deprecated("Symbol is not yet part of stable ABI"), \
+section(".text.experimental")))
+
+#else
+
+#define __rte_experimental \
+__attribute__((section(".text.experimental")))
+
+#endif
+
+#ifndef ALLOW_INTERNAL_API
+
+#define __rte_internal \
+__attribute__((error("Symbol is not public ABI"), \
+section(".text.internal")))
+
+#else
+
+#define __rte_internal \
+__attribute__((section(".text.internal")))
+
+#endif
+
+#endif /* _RTE_COMPAT_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_debug.h b/src/spdk/dpdk/lib/librte_eal/include/rte_debug.h
new file mode 100644
index 000000000..50052c5a9
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_debug.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_DEBUG_H_
+#define _RTE_DEBUG_H_
+
+/**
+ * @file
+ *
+ * Debug Functions in RTE
+ *
+ * This file defines a generic API for debug operations. Part of
+ * the implementation is architecture-specific.
+ */
+
+#include "rte_log.h"
+#include "rte_branch_prediction.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Dump the stack of the calling core to the console.
+ */
+void rte_dump_stack(void);
+
+/**
+ * Dump the registers of the calling core to the console.
+ *
+ * Note: Not implemented in a userapp environment; use gdb instead.
+ */
+void rte_dump_registers(void);
+
+/**
+ * Provide notification of a critical non-recoverable error and terminate
+ * execution abnormally.
+ *
+ * Display the format string and its expanded arguments (printf-like).
+ *
+ * In a linux environment, this function dumps the stack and calls
+ * abort() resulting in a core dump if enabled.
+ *
+ * The function never returns.
+ *
+ * @param ...
+ * The format string, followed by the variable list of arguments.
+ */
+#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
+#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
+
+#ifdef RTE_ENABLE_ASSERT
+#define RTE_ASSERT(exp) RTE_VERIFY(exp)
+#else
+#define RTE_ASSERT(exp) do {} while (0)
+#endif
+#define RTE_VERIFY(exp) do { \
+ if (unlikely(!(exp))) \
+ rte_panic("line %d\tassert \"%s\" failed\n", __LINE__, #exp); \
+} while (0)
+
+/*
+ * Provide notification of a critical non-recoverable error and stop.
+ *
+ * This function should not be called directly. Refer to rte_panic() macro
+ * documentation.
+ */
+void __rte_panic(const char *funcname , const char *format, ...)
+#ifdef __GNUC__
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
+ __rte_cold
+#endif
+#endif
+ __rte_noreturn
+ __rte_format_printf(2, 3);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_DEBUG_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_dev.h b/src/spdk/dpdk/lib/librte_eal/include/rte_dev.h
new file mode 100644
index 000000000..c8d985fb5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_dev.h
@@ -0,0 +1,518 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2014 6WIND S.A.
+ */
+
+#ifndef _RTE_DEV_H_
+#define _RTE_DEV_H_
+
+/**
+ * @file
+ *
+ * RTE PMD Driver Registration Interface
+ *
+ * This file manages the list of device drivers.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/queue.h>
+
+#include <rte_config.h>
+#include <rte_compat.h>
+#include <rte_log.h>
+
+/**
+ * The device event type.
+ */
+enum rte_dev_event_type {
+ RTE_DEV_EVENT_ADD, /**< device being added */
+ RTE_DEV_EVENT_REMOVE, /**< device being removed */
+ RTE_DEV_EVENT_MAX /**< max value of this enum */
+};
+
+struct rte_dev_event {
+ enum rte_dev_event_type type; /**< device event type */
+ int subsystem; /**< subsystem id */
+ char *devname; /**< device name */
+};
+
+typedef void (*rte_dev_event_cb_fn)(const char *device_name,
+ enum rte_dev_event_type event,
+ void *cb_arg);
+
+/* Macros to check for invalid function pointers */
+#define RTE_FUNC_PTR_OR_ERR_RET(func, retval) do { \
+ if ((func) == NULL) \
+ return retval; \
+} while (0)
+
+#define RTE_FUNC_PTR_OR_RET(func) do { \
+ if ((func) == NULL) \
+ return; \
+} while (0)
+
+/**
+ * Device driver.
+ */
+enum rte_kernel_driver {
+ RTE_KDRV_UNKNOWN = 0,
+ RTE_KDRV_IGB_UIO,
+ RTE_KDRV_VFIO,
+ RTE_KDRV_UIO_GENERIC,
+ RTE_KDRV_NIC_UIO,
+ RTE_KDRV_NONE,
+};
+
+/**
+ * Device policies.
+ */
+enum rte_dev_policy {
+ RTE_DEV_WHITELISTED,
+ RTE_DEV_BLACKLISTED,
+};
+
+/**
+ * A generic memory resource representation.
+ */
+struct rte_mem_resource {
+ uint64_t phys_addr; /**< Physical address, 0 if not resource. */
+ uint64_t len; /**< Length of the resource. */
+ void *addr; /**< Virtual address, NULL when not mapped. */
+};
+
+/**
+ * A structure describing a device driver.
+ */
+struct rte_driver {
+ TAILQ_ENTRY(rte_driver) next; /**< Next in list. */
+ const char *name; /**< Driver name. */
+ const char *alias; /**< Driver alias. */
+};
+
+/*
+ * Internal identifier length
+ * Sufficiently large to allow for UUID or PCI address
+ */
+#define RTE_DEV_NAME_MAX_LEN 64
+
+/**
+ * A structure describing a generic device.
+ */
+struct rte_device {
+ TAILQ_ENTRY(rte_device) next; /**< Next device */
+ const char *name; /**< Device name */
+ const struct rte_driver *driver; /**< Driver assigned after probing */
+ const struct rte_bus *bus; /**< Bus handle assigned on scan */
+ int numa_node; /**< NUMA node connection */
+ struct rte_devargs *devargs; /**< Arguments for latest probing */
+};
+
+/**
+ * Query status of a device.
+ *
+ * @param dev
+ * Generic device pointer.
+ * @return
+ * (int)true if already probed successfully, 0 otherwise.
+ */
+int rte_dev_is_probed(const struct rte_device *dev);
+
+/**
+ * Hotplug add a given device to a specific bus.
+ *
+ * In multi-process, it will request other processes to add the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param busname
+ * The bus name the device is added to.
+ * @param devname
+ * The device name. Based on this device name, eal will identify a driver
+ * capable of handling it and pass it to the driver probing function.
+ * @param drvargs
+ * Device arguments to be passed to the driver.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_hotplug_add(const char *busname, const char *devname,
+ const char *drvargs);
+
+/**
+ * Add matching devices.
+ *
+ * In multi-process, it will request other processes to add the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param devargs
+ * Device arguments including bus, class and driver properties.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_dev_probe(const char *devargs);
+
+/**
+ * Hotplug remove a given device from a specific bus.
+ *
+ * In multi-process, it will request other processes to remove the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param busname
+ * The bus name the device is removed from.
+ * @param devname
+ * The device name being removed.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_eal_hotplug_remove(const char *busname, const char *devname);
+
+/**
+ * Remove one device.
+ *
+ * In multi-process, it will request other processes to remove the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param dev
+ * Data structure of the device to remove.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_dev_remove(struct rte_device *dev);
+
+/**
+ * Device comparison function.
+ *
+ * This type of function is used to compare an rte_device with arbitrary
+ * data.
+ *
+ * @param dev
+ * Device handle.
+ *
+ * @param data
+ * Data to compare against. The type of this parameter is determined by
+ * the kind of comparison performed by the function.
+ *
+ * @return
+ * 0 if the device matches the data.
+ * !0 if the device does not match.
+ * <0 if ordering is possible and the device is lower than the data.
+ * >0 if ordering is possible and the device is greater than the data.
+ */
+typedef int (*rte_dev_cmp_t)(const struct rte_device *dev, const void *data);
+
+#define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[]
+
+#define RTE_PMD_EXPORT_NAME(name, idx) \
+static const char RTE_PMD_EXPORT_NAME_ARRAY(this_pmd_name, idx) \
+__rte_used = RTE_STR(name)
+
+#define DRV_EXP_TAG(name, tag) __##name##_##tag
+
+#define RTE_PMD_REGISTER_PCI_TABLE(name, table) \
+static const char DRV_EXP_TAG(name, pci_tbl_export)[] __rte_used = \
+RTE_STR(table)
+
+#define RTE_PMD_REGISTER_PARAM_STRING(name, str) \
+static const char DRV_EXP_TAG(name, param_string_export)[] \
+__rte_used = str
+
+/**
+ * Advertise the list of kernel modules required to run this driver
+ *
+ * This string lists the kernel modules required for the devices
+ * associated to a PMD. The format of each line of the string is:
+ * "<device-pattern> <kmod-expression>".
+ *
+ * The possible formats for the device pattern are:
+ * "*" all devices supported by this driver
+ * "pci:*" all PCI devices supported by this driver
+ * "pci:v8086:d*:sv*:sd*" all PCI devices supported by this driver
+ * whose vendor id is 0x8086.
+ *
+ * The format of the kernel modules list is a parenthesized expression
+ * containing logical-and (&) and logical-or (|).
+ *
+ * The device pattern and the kmod expression are separated by a space.
+ *
+ * Example:
+ * - "* igb_uio | uio_pci_generic | vfio"
+ */
+#define RTE_PMD_REGISTER_KMOD_DEP(name, str) \
+static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
+__rte_used = str
+
+/**
+ * Iteration context.
+ *
+ * This context carries over the current iteration state.
+ */
+struct rte_dev_iterator {
+ const char *dev_str; /**< device string. */
+ const char *bus_str; /**< bus-related part of device string. */
+ const char *cls_str; /**< class-related part of device string. */
+ struct rte_bus *bus; /**< bus handle. */
+ struct rte_class *cls; /**< class handle. */
+ struct rte_device *device; /**< current position. */
+ void *class_device; /**< additional specialized context. */
+};
+
+/**
+ * Device iteration function.
+ *
+ * Find the next device matching properties passed in parameters.
+ * The function takes an additional ``start`` parameter, that is
+ * used as starting context when relevant.
+ *
+ * The function returns the current element in the iteration.
+ * This return value will potentially be used as a start parameter
+ * in subsequent calls to the function.
+ *
+ * The additional iterator parameter is only there if a specific
+ * implementation needs additional context. It must not be modified by
+ * the iteration function itself.
+ *
+ * @param start
+ * Starting iteration context.
+ *
+ * @param devstr
+ * Device description string.
+ *
+ * @param it
+ * Device iterator.
+ *
+ * @return
+ * The address of the current element matching the device description
+ * string.
+ */
+typedef void *(*rte_dev_iterate_t)(const void *start,
+ const char *devstr,
+ const struct rte_dev_iterator *it);
+
+/**
+ * Initializes a device iterator.
+ *
+ * This iterator allows accessing a list of devices matching a criteria.
+ * The device matching is made among all buses and classes currently registered,
+ * filtered by the device description given as parameter.
+ *
+ * This function will not allocate any memory. It is safe to stop the
+ * iteration at any moment and let the iterator go out of context.
+ *
+ * @param it
+ * Device iterator handle.
+ *
+ * @param str
+ * Device description string.
+ *
+ * @return
+ * 0 on successful initialization.
+ * <0 on error.
+ */
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it, const char *str);
+
+/**
+ * Iterates on a device iterator.
+ *
+ * Generates a new rte_device handle corresponding to the next element
+ * in the list described in comprehension by the iterator.
+ *
+ * The next object is returned, and the iterator is updated.
+ *
+ * @param it
+ * Device iterator handle.
+ *
+ * @return
+ * An rte_device handle if found.
+ * NULL if an error occurred (rte_errno is set).
+ * NULL if no device could be found (rte_errno is not set).
+ */
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it);
+
+#define RTE_DEV_FOREACH(dev, devstr, it) \
+ for (rte_dev_iterator_init(it, devstr), \
+ dev = rte_dev_iterator_next(it); \
+ dev != NULL; \
+ dev = rte_dev_iterator_next(it))
+
+#ifdef __cplusplus
+}
+#endif
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It registers the callback for the specific device.
+ * Multiple callbacks can be registered at the same time.
+ *
+ * @param device_name
+ * The device name, that is the param name of the struct rte_device,
+ * null value means for all devices.
+ * @param cb_fn
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+__rte_experimental
+int
+rte_dev_event_callback_register(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It unregisters the callback according to the specified device.
+ *
+ * @param device_name
+ * The device name, that is the param name of the struct rte_device,
+ * null value means for all devices and their callbacks.
+ * @param cb_fn
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback, (void *)-1 means to remove all
+ * registered which has the same callback address.
+ *
+ * @return
+ * - On success, return the number of callback entities removed.
+ * - On failure, a negative value.
+ */
+__rte_experimental
+int
+rte_dev_event_callback_unregister(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Executes all the user application registered callbacks for
+ * the specific device.
+ *
+ * @param device_name
+ * The device name.
+ * @param event
+ * the device event type.
+ */
+__rte_experimental
+void
+rte_dev_event_callback_process(const char *device_name,
+ enum rte_dev_event_type event);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start the device event monitoring.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+__rte_experimental
+int
+rte_dev_event_monitor_start(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop the device event monitoring.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+__rte_experimental
+int
+rte_dev_event_monitor_stop(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enable hotplug handling for devices.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+__rte_experimental
+int
+rte_dev_hotplug_handle_enable(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Disable hotplug handling for devices.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+__rte_experimental
+int
+rte_dev_hotplug_handle_disable(void);
+
+/**
+ * Device level DMA map function.
+ * After a successful call, the memory segment will be mapped to the
+ * given device.
+ *
+ * @note: Memory must be registered in advance using rte_extmem_* APIs.
+ *
+ * @param dev
+ * Device pointer.
+ * @param addr
+ * Virtual address to map.
+ * @param iova
+ * IOVA address to map.
+ * @param len
+ * Length of the memory segment being mapped.
+ *
+ * @return
+ * 0 if mapping was successful.
+ * Negative value and rte_errno is set otherwise.
+ */
+__rte_experimental
+int
+rte_dev_dma_map(struct rte_device *dev, void *addr, uint64_t iova, size_t len);
+
+/**
+ * Device level DMA unmap function.
+ * After a successful call, the memory segment will no longer be
+ * accessible by the given device.
+ *
+ * @note: Memory must be registered in advance using rte_extmem_* APIs.
+ *
+ * @param dev
+ * Device pointer.
+ * @param addr
+ * Virtual address to unmap.
+ * @param iova
+ * IOVA address to unmap.
+ * @param len
+ * Length of the memory segment being mapped.
+ *
+ * @return
+ * 0 if un-mapping was successful.
+ * Negative value and rte_errno is set otherwise.
+ */
+__rte_experimental
+int
+rte_dev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova,
+ size_t len);
+
+#endif /* _RTE_DEV_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_devargs.h b/src/spdk/dpdk/lib/librte_eal/include/rte_devargs.h
new file mode 100644
index 000000000..898efa0d6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_devargs.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2014 6WIND S.A.
+ */
+
+#ifndef _RTE_DEVARGS_H_
+#define _RTE_DEVARGS_H_
+
+/**
+ * @file
+ *
+ * RTE devargs: list of devices and their user arguments
+ *
+ * This file stores a list of devices and their arguments given by
+ * the user when a DPDK application is started. These devices can be PCI
+ * devices or virtual devices. These devices are stored at startup in a
+ * list of rte_devargs structures.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <sys/queue.h>
+#include <rte_compat.h>
+#include <rte_bus.h>
+
+/**
+ * Type of generic device
+ */
+enum rte_devtype {
+ RTE_DEVTYPE_WHITELISTED_PCI,
+ RTE_DEVTYPE_BLACKLISTED_PCI,
+ RTE_DEVTYPE_VIRTUAL,
+};
+
+/**
+ * Structure that stores a device given by the user with its arguments
+ *
+ * A user device is a physical or a virtual device given by the user to
+ * the DPDK application at startup through command line arguments.
+ *
+ * The structure stores the configuration of the device, its PCI
+ * identifier if it's a PCI device or the driver name if it's a virtual
+ * device.
+ */
+struct rte_devargs {
+ /** Next in list. */
+ TAILQ_ENTRY(rte_devargs) next;
+ /** Type of device. */
+ enum rte_devtype type;
+ /** Device policy. */
+ enum rte_dev_policy policy;
+ /** Name of the device. */
+ char name[RTE_DEV_NAME_MAX_LEN];
+ RTE_STD_C11
+ union {
+ /** Arguments string as given by user or "" for no argument. */
+ char *args;
+ const char *drv_str;
+ };
+ struct rte_bus *bus; /**< bus handle. */
+ struct rte_class *cls; /**< class handle. */
+ const char *bus_str; /**< bus-related part of device string. */
+ const char *cls_str; /**< class-related part of device string. */
+ const char *data; /**< Device string storage. */
+};
+
+/**
+ * Parse a device string.
+ *
+ * Verify that a bus is capable of handling the device passed
+ * in argument. Store which bus will handle the device, its name
+ * and the eventual device parameters.
+ *
+ * The syntax is:
+ *
+ * bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ * pci:0000:05.00.0,arg=val
+ * 05.00.0,arg=val
+ * vdev:net_ring0
+ *
+ * @param da
+ * The devargs structure holding the device information.
+ *
+ * @param dev
+ * String describing a device.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative errno on error.
+ */
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev);
+
+/**
+ * Parse a device string.
+ *
+ * Verify that a bus is capable of handling the device passed
+ * in argument. Store which bus will handle the device, its name
+ * and the eventual device parameters.
+ *
+ * The device string is built with a printf-like syntax.
+ *
+ * The syntax is:
+ *
+ * bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ * pci:0000:05.00.0,arg=val
+ * 05.00.0,arg=val
+ * vdev:net_ring0
+ *
+ * @param da
+ * The devargs structure holding the device information.
+ * @param format
+ * Format string describing a device.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative errno on error.
+ */
+int
+rte_devargs_parsef(struct rte_devargs *da,
+ const char *format, ...)
+__rte_format_printf(2, 0);
+
+/**
+ * Insert an rte_devargs in the global list.
+ *
+ * @param da
+ * The devargs structure to insert.
+ * If a devargs for the same device is already inserted,
+ * it will be updated and returned. It means *da pointer can change.
+ *
+ * @return
+ * - 0 on success
+ * - Negative on error.
+ */
+int
+rte_devargs_insert(struct rte_devargs **da);
+
+/**
+ * Add a device to the user device list
+ * See rte_devargs_parse() for details.
+ *
+ * @param devtype
+ * The type of the device.
+ * @param devargs_str
+ * The arguments as given by the user.
+ *
+ * @return
+ * - 0 on success
+ * - A negative value on error
+ */
+int rte_devargs_add(enum rte_devtype devtype, const char *devargs_str);
+
+/**
+ * Remove a device from the user device list.
+ * Its resources are freed.
+ * If the devargs cannot be found, nothing happens.
+ *
+ * @param devargs
+ * The instance or a copy of devargs to remove.
+ *
+ * @return
+ * 0 on success.
+ * <0 on error.
+ * >0 if the devargs was not within the user device list.
+ */
+int rte_devargs_remove(struct rte_devargs *devargs);
+
+/**
+ * Count the number of user devices of a specified type
+ *
+ * @param devtype
+ * The type of the devices to counted.
+ *
+ * @return
+ * The number of devices.
+ */
+unsigned int
+rte_devargs_type_count(enum rte_devtype devtype);
+
+/**
+ * This function dumps the list of user device and their arguments.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_devargs_dump(FILE *f);
+
+/**
+ * Find next rte_devargs matching the provided bus name.
+ *
+ * @param busname
+ * Limit the iteration to devargs related to buses
+ * matching this name.
+ * Will return any next rte_devargs if NULL.
+ *
+ * @param start
+ * Starting iteration point. The iteration will start at
+ * the first rte_devargs if NULL.
+ *
+ * @return
+ * Next rte_devargs entry matching the requested bus,
+ * NULL if there is none.
+ */
+struct rte_devargs *
+rte_devargs_next(const char *busname, const struct rte_devargs *start);
+
+/**
+ * Iterate over all rte_devargs for a specific bus.
+ */
+#define RTE_EAL_DEVARGS_FOREACH(busname, da) \
+ for (da = rte_devargs_next(busname, NULL); \
+ da != NULL; \
+ da = rte_devargs_next(busname, da)) \
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_DEVARGS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_eal.h b/src/spdk/dpdk/lib/librte_eal/include/rte_eal.h
new file mode 100644
index 000000000..2f9ed298d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_eal.h
@@ -0,0 +1,495 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef _RTE_EAL_H_
+#define _RTE_EAL_H_
+
+/**
+ * @file
+ *
+ * EAL Configuration API
+ */
+
+#include <stdint.h>
+#include <sched.h>
+#include <time.h>
+
+#include <rte_config.h>
+#include <rte_compat.h>
+#include <rte_per_lcore.h>
+#include <rte_bus.h>
+
+#include <rte_pci_dev_feature_defs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_MAGIC 19820526 /**< Magic number written by the main partition when ready. */
+
+/* Maximum thread_name length. */
+#define RTE_MAX_THREAD_NAME_LEN 16
+
+/**
+ * The lcore role (used in RTE or not).
+ */
+enum rte_lcore_role_t {
+ ROLE_RTE,
+ ROLE_OFF,
+ ROLE_SERVICE,
+};
+
+/**
+ * The type of process in a linux, multi-process setup
+ */
+enum rte_proc_type_t {
+ RTE_PROC_AUTO = -1, /* allow auto-detection of primary/secondary */
+ RTE_PROC_PRIMARY = 0, /* set to zero, so primary is the default */
+ RTE_PROC_SECONDARY,
+
+ RTE_PROC_INVALID
+};
+
+/**
+ * Get the process type in a multi-process setup
+ *
+ * @return
+ * The process type
+ */
+enum rte_proc_type_t rte_eal_process_type(void);
+
+/**
+ * Request iopl privilege for all RPL.
+ *
+ * This function should be called by pmds which need access to ioports.
+
+ * @return
+ * - On success, returns 0.
+ * - On failure, returns -1.
+ */
+int rte_eal_iopl_init(void);
+
+/**
+ * Initialize the Environment Abstraction Layer (EAL).
+ *
+ * This function is to be executed on the MASTER lcore only, as soon
+ * as possible in the application's main() function.
+ *
+ * The function finishes the initialization process before main() is called.
+ * It puts the SLAVE lcores in the WAIT state.
+ *
+ * When the multi-partition feature is supported, depending on the
+ * configuration (if CONFIG_RTE_EAL_MAIN_PARTITION is disabled), this
+ * function waits to ensure that the magic number is set before
+ * returning. See also the rte_eal_get_configuration() function. Note:
+ * This behavior may change in the future.
+ *
+ * @param argc
+ * A non-negative value. If it is greater than 0, the array members
+ * for argv[0] through argv[argc] (non-inclusive) shall contain pointers
+ * to strings.
+ * @param argv
+ * An array of strings. The contents of the array, as well as the strings
+ * which are pointed to by the array, may be modified by this function.
+ * @return
+ * - On success, the number of parsed arguments, which is greater or
+ * equal to zero. After the call to rte_eal_init(),
+ * all arguments argv[x] with x < ret may have been modified by this
+ * function call and should not be further interpreted by the
+ * application. The EAL does not take any ownership of the memory used
+ * for either the argv array, or its members.
+ * - On failure, -1 and rte_errno is set to a value indicating the cause
+ * for failure. In some instances, the application will need to be
+ * restarted as part of clearing the issue.
+ *
+ * Error codes returned via rte_errno:
+ * EACCES indicates a permissions issue.
+ *
+ * EAGAIN indicates either a bus or system resource was not available,
+ * setup may be attempted again.
+ *
+ * EALREADY indicates that the rte_eal_init function has already been
+ * called, and cannot be called again.
+ *
+ * EFAULT indicates the tailq configuration name was not found in
+ * memory configuration.
+ *
+ * EINVAL indicates invalid parameters were passed as argv/argc.
+ *
+ * ENOMEM indicates failure likely caused by an out-of-memory condition.
+ *
+ * ENODEV indicates memory setup issues.
+ *
+ * ENOTSUP indicates that the EAL cannot initialize on this system.
+ *
+ * EPROTO indicates that the PCI bus is either not present, or is not
+ * readable by the eal.
+ *
+ * ENOEXEC indicates that a service core failed to launch successfully.
+ */
+int rte_eal_init(int argc, char **argv);
+
+/**
+ * Clean up the Environment Abstraction Layer (EAL)
+ *
+ * This function must be called to release any internal resources that EAL has
+ * allocated during rte_eal_init(). After this call, no DPDK function calls may
+ * be made. It is expected that common usage of this function is to call it
+ * just before terminating the process.
+ *
+ * @return 0 Successfully released all internal EAL resources
+ * @return -EFAULT There was an error in releasing all resources.
+ */
+int rte_eal_cleanup(void);
+
+/**
+ * Check if a primary process is currently alive
+ *
+ * This function returns true when a primary process is currently
+ * active.
+ *
+ * @param config_file_path
+ * The config_file_path argument provided should point at the location
+ * that the primary process will create its config file. If NULL, the default
+ * config file path is used.
+ *
+ * @return
+ * - If alive, returns 1.
+ * - If dead, returns 0.
+ */
+int rte_eal_primary_proc_alive(const char *config_file_path);
+
+#define RTE_MP_MAX_FD_NUM 8 /* The max amount of fds */
+#define RTE_MP_MAX_NAME_LEN 64 /* The max length of action name */
+#define RTE_MP_MAX_PARAM_LEN 256 /* The max length of param */
+struct rte_mp_msg {
+ char name[RTE_MP_MAX_NAME_LEN];
+ int len_param;
+ int num_fds;
+ uint8_t param[RTE_MP_MAX_PARAM_LEN];
+ int fds[RTE_MP_MAX_FD_NUM];
+};
+
+struct rte_mp_reply {
+ int nb_sent;
+ int nb_received;
+ struct rte_mp_msg *msgs; /* caller to free */
+};
+
+/**
+ * Action function typedef used by other components.
+ *
+ * As we create socket channel for primary/secondary communication, use
+ * this function typedef to register action for coming messages.
+ *
+ * @note When handling IPC request callbacks, the reply must be sent even in
+ * cases of error handling. Simply returning success or failure will *not*
+ * send a response to the requestor.
+ * Implementation of error signalling mechanism is up to the application.
+ *
+ * @note No memory allocations should take place inside the callback.
+ */
+typedef int (*rte_mp_t)(const struct rte_mp_msg *msg, const void *peer);
+
+/**
+ * Asynchronous reply function typedef used by other components.
+ *
+ * As we create socket channel for primary/secondary communication, use
+ * this function typedef to register action for coming responses to asynchronous
+ * requests.
+ *
+ * @note When handling IPC request callbacks, the reply must be sent even in
+ * cases of error handling. Simply returning success or failure will *not*
+ * send a response to the requestor.
+ * Implementation of error signalling mechanism is up to the application.
+ *
+ * @note No memory allocations should take place inside the callback.
+ */
+typedef int (*rte_mp_async_reply_t)(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register an action function for primary/secondary communication.
+ *
+ * Call this function to register an action, if the calling component wants
+ * to response the messages from the corresponding component in its primary
+ * process or secondary processes.
+ *
+ * @note IPC may be unsupported in certain circumstances, so caller should check
+ * for ENOTSUP error.
+ *
+ * @param name
+ * The name argument plays as the nonredundant key to find the action.
+ *
+ * @param action
+ * The action argument is the function pointer to the action function.
+ *
+ * @return
+ * - 0 on success.
+ * - (<0) on failure.
+ */
+__rte_experimental
+int
+rte_mp_action_register(const char *name, rte_mp_t action);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Unregister an action function for primary/secondary communication.
+ *
+ * Call this function to unregister an action if the calling component does
+ * not want to response the messages from the corresponding component in its
+ * primary process or secondary processes.
+ *
+ * @note IPC may be unsupported in certain circumstances, so caller should check
+ * for ENOTSUP error.
+ *
+ * @param name
+ * The name argument plays as the nonredundant key to find the action.
+ *
+ */
+__rte_experimental
+void
+rte_mp_action_unregister(const char *name);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Send a message to the peer process.
+ *
+ * This function will send a message which will be responded by the action
+ * identified by name in the peer process.
+ *
+ * @param msg
+ * The msg argument contains the customized message.
+ *
+ * @return
+ * - On success, return 0.
+ * - On failure, return -1, and the reason will be stored in rte_errno.
+ */
+__rte_experimental
+int
+rte_mp_sendmsg(struct rte_mp_msg *msg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Send a request to the peer process and expect a reply.
+ *
+ * This function sends a request message to the peer process, and will
+ * block until receiving reply message from the peer process.
+ *
+ * @note The caller is responsible to free reply->replies.
+ *
+ * @note This API must not be used inside memory-related or IPC callbacks, and
+ * no memory allocations should take place inside such callback.
+ *
+ * @note IPC may be unsupported in certain circumstances, so caller should check
+ * for ENOTSUP error.
+ *
+ * @param req
+ * The req argument contains the customized request message.
+ *
+ * @param reply
+ * The reply argument will be for storing all the replied messages;
+ * the caller is responsible for free reply->msgs.
+ *
+ * @param ts
+ * The ts argument specifies how long we can wait for the peer(s) to reply.
+ *
+ * @return
+ * - On success, return 0.
+ * - On failure, return -1, and the reason will be stored in rte_errno.
+ */
+__rte_experimental
+int
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+ const struct timespec *ts);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Send a request to the peer process and expect a reply in a separate callback.
+ *
+ * This function sends a request message to the peer process, and will not
+ * block. Instead, reply will be received in a separate callback.
+ *
+ * @note IPC may be unsupported in certain circumstances, so caller should check
+ * for ENOTSUP error.
+ *
+ * @param req
+ * The req argument contains the customized request message.
+ *
+ * @param ts
+ * The ts argument specifies how long we can wait for the peer(s) to reply.
+ *
+ * @param clb
+ * The callback to trigger when all responses for this request have arrived.
+ *
+ * @return
+ * - On success, return 0.
+ * - On failure, return -1, and the reason will be stored in rte_errno.
+ */
+__rte_experimental
+int
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+ rte_mp_async_reply_t clb);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Send a reply to the peer process.
+ *
+ * This function will send a reply message in response to a request message
+ * received previously.
+ *
+ * @note When handling IPC request callbacks, the reply must be sent even in
+ * cases of error handling. Simply returning success or failure will *not*
+ * send a response to the requestor.
+ * Implementation of error signalling mechanism is up to the application.
+ *
+ * @param msg
+ * The msg argument contains the customized message.
+ *
+ * @param peer
+ * The peer argument is the pointer to the peer socket path.
+ *
+ * @return
+ * - On success, return 0.
+ * - On failure, return -1, and the reason will be stored in rte_errno.
+ */
+__rte_experimental
+int
+rte_mp_reply(struct rte_mp_msg *msg, const char *peer);
+
+/**
+ * Usage function typedef used by the application usage function.
+ *
+ * Use this function typedef to define and call rte_set_application_usage_hook()
+ * routine.
+ */
+typedef void (*rte_usage_hook_t)(const char * prgname);
+
+/**
+ * Add application usage routine callout from the eal_usage() routine.
+ *
+ * This function allows the application to include its usage message
+ * in the EAL system usage message. The routine rte_set_application_usage_hook()
+ * needs to be called before the rte_eal_init() routine in the application.
+ *
+ * This routine is optional for the application and will behave as if the set
+ * routine was never called as the default behavior.
+ *
+ * @param usage_func
+ * The func argument is a function pointer to the application usage routine.
+ * Called function is defined using rte_usage_hook_t typedef, which is of
+ * the form void rte_usage_func(const char * prgname).
+ *
+ * Calling this routine with a NULL value will reset the usage hook routine and
+ * return the current value, which could be NULL.
+ * @return
+ * - Returns the current value of the rte_application_usage pointer to allow
+ * the caller to daisy chain the usage routines if needing more then one.
+ */
+rte_usage_hook_t
+rte_set_application_usage_hook(rte_usage_hook_t usage_func);
+
+/**
+ * Whether EAL is using huge pages (disabled by --no-huge option).
+ * The no-huge mode is not compatible with all drivers or features.
+ *
+ * @return
+ * Nonzero if hugepages are enabled.
+ */
+int rte_eal_has_hugepages(void);
+
+/**
+ * Whether EAL is using PCI bus.
+ * Disabled by --no-pci option.
+ *
+ * @return
+ * Nonzero if the PCI bus is enabled.
+ */
+int rte_eal_has_pci(void);
+
+/**
+ * Whether the EAL was asked to create UIO device.
+ *
+ * @return
+ * Nonzero if true.
+ */
+int rte_eal_create_uio_dev(void);
+
+/**
+ * The user-configured vfio interrupt mode.
+ *
+ * @return
+ * Interrupt mode configured with the command line,
+ * RTE_INTR_MODE_NONE by default.
+ */
+enum rte_intr_mode rte_eal_vfio_intr_mode(void);
+
+/**
+ * A wrap API for syscall gettid.
+ *
+ * @return
+ * On success, returns the thread ID of calling process.
+ * It is always successful.
+ */
+int rte_sys_gettid(void);
+
+/**
+ * Get system unique thread id.
+ *
+ * @return
+ * On success, returns the thread ID of calling process.
+ * It is always successful.
+ */
+static inline int rte_gettid(void)
+{
+ static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1;
+ if (RTE_PER_LCORE(_thread_id) == -1)
+ RTE_PER_LCORE(_thread_id) = rte_sys_gettid();
+ return RTE_PER_LCORE(_thread_id);
+}
+
+/**
+ * Get the iova mode
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+enum rte_iova_mode rte_eal_iova_mode(void);
+
+/**
+ * Get user provided pool ops name for mbuf
+ *
+ * @return
+ * returns user provided pool ops name.
+ */
+const char *
+rte_eal_mbuf_user_pool_ops(void);
+
+/**
+ * Get the runtime directory of DPDK
+ *
+ * @return
+ * The runtime directory path of DPDK
+ */
+const char *
+rte_eal_get_runtime_dir(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_EAL_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_eal_interrupts.h b/src/spdk/dpdk/lib/librte_eal/include/rte_eal_interrupts.h
new file mode 100644
index 000000000..773a34a42
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_eal_interrupts.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#error "don't include this file directly, please include generic <rte_interrupts.h>"
+#endif
+
+/**
+ * @file rte_eal_interrupts.h
+ * @internal
+ *
+ * Contains function prototypes exposed by the EAL for interrupt handling by
+ * drivers and other DPDK internal consumers.
+ */
+
+#ifndef _RTE_EAL_INTERRUPTS_H_
+#define _RTE_EAL_INTERRUPTS_H_
+
+#define RTE_MAX_RXTX_INTR_VEC_ID 512
+#define RTE_INTR_VEC_ZERO_OFFSET 0
+#define RTE_INTR_VEC_RXTX_OFFSET 1
+
+/**
+ * The interrupt source type, e.g. UIO, VFIO, ALARM etc.
+ */
+enum rte_intr_handle_type {
+ RTE_INTR_HANDLE_UNKNOWN = 0, /**< generic unknown handle */
+ RTE_INTR_HANDLE_UIO, /**< uio device handle */
+ RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */
+ RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */
+ RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */
+ RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */
+ RTE_INTR_HANDLE_ALARM, /**< alarm handle */
+ RTE_INTR_HANDLE_EXT, /**< external handler */
+ RTE_INTR_HANDLE_VDEV, /**< virtual device */
+ RTE_INTR_HANDLE_DEV_EVENT, /**< device event handle */
+ RTE_INTR_HANDLE_VFIO_REQ, /**< VFIO request handle */
+ RTE_INTR_HANDLE_MAX /**< count of elements */
+};
+
+#define RTE_INTR_EVENT_ADD 1UL
+#define RTE_INTR_EVENT_DEL 2UL
+
+typedef void (*rte_intr_event_cb_t)(int fd, void *arg);
+
+struct rte_epoll_data {
+ uint32_t event; /**< event type */
+ void *data; /**< User data */
+ rte_intr_event_cb_t cb_fun; /**< IN: callback fun */
+ void *cb_arg; /**< IN: callback arg */
+};
+
+enum {
+ RTE_EPOLL_INVALID = 0,
+ RTE_EPOLL_VALID,
+ RTE_EPOLL_EXEC,
+};
+
+/** interrupt epoll event obj, taken by epoll_event.ptr */
+struct rte_epoll_event {
+ volatile uint32_t status; /**< OUT: event status */
+ int fd; /**< OUT: event fd */
+ int epfd; /**< OUT: epoll instance the ev associated with */
+ struct rte_epoll_data epdata;
+};
+
+/** Handle for interrupts. */
+struct rte_intr_handle {
+ RTE_STD_C11
+ union {
+ int vfio_dev_fd; /**< VFIO device file descriptor */
+ int uio_cfg_fd; /**< UIO cfg file desc for uio_pci_generic */
+ };
+ int fd; /**< interrupt event file descriptor */
+ enum rte_intr_handle_type type; /**< handle type */
+ uint32_t max_intr; /**< max interrupt requested */
+ uint32_t nb_efd; /**< number of available efd(event fd) */
+ uint8_t efd_counter_size; /**< size of efd counter, used for vdev */
+ int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */
+ struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID];
+ /**< intr vector epoll event */
+ int *intr_vec; /**< intr vector number array */
+};
+
+#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */
+
+/**
+ * It waits for events on the epoll instance.
+ *
+ * @param epfd
+ * Epoll instance fd on which the caller wait for events.
+ * @param events
+ * Memory area contains the events that will be available for the caller.
+ * @param maxevents
+ * Up to maxevents are returned, must greater than zero.
+ * @param timeout
+ * Specifying a timeout of -1 causes a block indefinitely.
+ * Specifying a timeout equal to zero cause to return immediately.
+ * @return
+ * - On success, returns the number of available event.
+ * - On failure, a negative value.
+ */
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout);
+
+/**
+ * It performs control operations on epoll instance referred by the epfd.
+ * It requests that the operation op be performed for the target fd.
+ *
+ * @param epfd
+ * Epoll instance fd on which the caller perform control operations.
+ * @param op
+ * The operation be performed for the target fd.
+ * @param fd
+ * The target fd on which the control ops perform.
+ * @param event
+ * Describes the object linked to the fd.
+ * Note: The caller must take care the object deletion after CTL_DEL.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+ struct rte_epoll_event *event);
+
+/**
+ * The function returns the per thread epoll instance.
+ *
+ * @return
+ * epfd the epoll instance referred to.
+ */
+int
+rte_intr_tls_epfd(void);
+
+/**
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param epfd
+ * Epoll instance fd which the intr vector associated to.
+ * @param op
+ * The operation be performed for the vector.
+ * Operation type of {ADD, DEL}.
+ * @param vec
+ * RX intr vector number added to the epoll instance wait list.
+ * @param data
+ * User raw data.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+ int epfd, int op, unsigned int vec, void *data);
+
+/**
+ * It deletes registered eventfds.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle);
+
+/**
+ * It enables the packet I/O interrupt event if it's necessary.
+ * It creates event fd for each interrupt vector when MSIX is used,
+ * otherwise it multiplexes a single event fd.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param nb_efd
+ * Number of interrupt vector trying to enable.
+ * The value 0 is not allowed.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
+
+/**
+ * It disables the packet I/O interrupt event.
+ * It deletes registered eventfds and closes the open fds.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
+
+/**
+ * The packet I/O interrupt on datapath is enabled or not.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle);
+
+/**
+ * The interrupt handle instance allows other causes or not.
+ * Other causes stand for any none packet I/O interrupts.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle);
+
+/**
+ * The multiple interrupt vector capability of interrupt handle instance.
+ * It returns zero if no multiple interrupt vector support.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * @internal
+ * Check if currently executing in interrupt context
+ *
+ * @return
+ * - non zero in case of interrupt context
+ * - zero in case of process context
+ */
+__rte_experimental
+int
+rte_thread_is_intr(void);
+
+#endif /* _RTE_EAL_INTERRUPTS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_eal_memconfig.h b/src/spdk/dpdk/lib/librte_eal/include/rte_eal_memconfig.h
new file mode 100644
index 000000000..dede2ee32
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_eal_memconfig.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_EAL_MEMCONFIG_H_
+#define _RTE_EAL_MEMCONFIG_H_
+
+#include <stdbool.h>
+
+#include <rte_compat.h>
+
+/**
+ * @file
+ *
+ * This API allows access to EAL shared memory configuration through an API.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Lock the internal EAL shared memory configuration for shared access.
+ */
+void
+rte_mcfg_mem_read_lock(void);
+
+/**
+ * Unlock the internal EAL shared memory configuration for shared access.
+ */
+void
+rte_mcfg_mem_read_unlock(void);
+
+/**
+ * Lock the internal EAL shared memory configuration for exclusive access.
+ */
+void
+rte_mcfg_mem_write_lock(void);
+
+/**
+ * Unlock the internal EAL shared memory configuration for exclusive access.
+ */
+void
+rte_mcfg_mem_write_unlock(void);
+
+/**
+ * Lock the internal EAL TAILQ list for shared access.
+ */
+void
+rte_mcfg_tailq_read_lock(void);
+
+/**
+ * Unlock the internal EAL TAILQ list for shared access.
+ */
+void
+rte_mcfg_tailq_read_unlock(void);
+
+/**
+ * Lock the internal EAL TAILQ list for exclusive access.
+ */
+void
+rte_mcfg_tailq_write_lock(void);
+
+/**
+ * Unlock the internal EAL TAILQ list for exclusive access.
+ */
+void
+rte_mcfg_tailq_write_unlock(void);
+
+/**
+ * Lock the internal EAL Mempool list for shared access.
+ */
+void
+rte_mcfg_mempool_read_lock(void);
+
+/**
+ * Unlock the internal EAL Mempool list for shared access.
+ */
+void
+rte_mcfg_mempool_read_unlock(void);
+
+/**
+ * Lock the internal EAL Mempool list for exclusive access.
+ */
+void
+rte_mcfg_mempool_write_lock(void);
+
+/**
+ * Unlock the internal EAL Mempool list for exclusive access.
+ */
+void
+rte_mcfg_mempool_write_unlock(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Lock the internal EAL Timer Library lock for exclusive access.
+ */
+__rte_experimental
+void
+rte_mcfg_timer_lock(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Unlock the internal EAL Timer Library lock for exclusive access.
+ */
+__rte_experimental
+void
+rte_mcfg_timer_unlock(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * If true, pages are put in single files (per memseg list),
+ * as opposed to creating a file per page.
+ */
+__rte_experimental
+bool
+rte_mcfg_get_single_file_segments(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*__RTE_EAL_MEMCONFIG_H_*/
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_eal_trace.h b/src/spdk/dpdk/lib/librte_eal/include/rte_eal_trace.h
new file mode 100644
index 000000000..1ebb2905a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_eal_trace.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#ifndef _RTE_EAL_TRACE_H_
+#define _RTE_EAL_TRACE_H_
+
+/**
+ * @file
+ *
+ * API for EAL trace support
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_alarm.h>
+#include <rte_interrupts.h>
+#include <rte_trace_point.h>
+
+/* Generic */
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_void,
+ RTE_TRACE_POINT_ARGS(void),
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_u64,
+ RTE_TRACE_POINT_ARGS(uint64_t in),
+ rte_trace_point_emit_u64(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_u32,
+ RTE_TRACE_POINT_ARGS(uint32_t in),
+ rte_trace_point_emit_u32(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_u16,
+ RTE_TRACE_POINT_ARGS(uint16_t in),
+ rte_trace_point_emit_u16(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_u8,
+ RTE_TRACE_POINT_ARGS(uint8_t in),
+ rte_trace_point_emit_u8(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_i64,
+ RTE_TRACE_POINT_ARGS(int64_t in),
+ rte_trace_point_emit_i64(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_i32,
+ RTE_TRACE_POINT_ARGS(int32_t in),
+ rte_trace_point_emit_i32(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_i16,
+ RTE_TRACE_POINT_ARGS(int16_t in),
+ rte_trace_point_emit_i16(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_i8,
+ RTE_TRACE_POINT_ARGS(int8_t in),
+ rte_trace_point_emit_i8(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_int,
+ RTE_TRACE_POINT_ARGS(int in),
+ rte_trace_point_emit_int(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_long,
+ RTE_TRACE_POINT_ARGS(long in),
+ rte_trace_point_emit_long(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_float,
+ RTE_TRACE_POINT_ARGS(float in),
+ rte_trace_point_emit_float(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_double,
+ RTE_TRACE_POINT_ARGS(double in),
+ rte_trace_point_emit_double(in);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_ptr,
+ RTE_TRACE_POINT_ARGS(const void *ptr),
+ rte_trace_point_emit_ptr(ptr);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_str,
+ RTE_TRACE_POINT_ARGS(const char *str),
+ rte_trace_point_emit_string(str);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_generic_func,
+ RTE_TRACE_POINT_ARGS(const char *func),
+ rte_trace_point_emit_string(func);
+)
+
+#define RTE_EAL_TRACE_GENERIC_FUNC rte_eal_trace_generic_func(__func__)
+
+/* Alarm */
+RTE_TRACE_POINT(
+ rte_eal_trace_alarm_set,
+ RTE_TRACE_POINT_ARGS(uint64_t us, rte_eal_alarm_callback cb_fn,
+ void *cb_arg, int rc),
+ rte_trace_point_emit_u64(us);
+ rte_trace_point_emit_ptr(cb_fn);
+ rte_trace_point_emit_ptr(cb_arg);
+ rte_trace_point_emit_int(rc);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_alarm_cancel,
+ RTE_TRACE_POINT_ARGS(rte_eal_alarm_callback cb_fn, void *cb_arg,
+ int count),
+ rte_trace_point_emit_ptr(cb_fn);
+ rte_trace_point_emit_ptr(cb_arg);
+ rte_trace_point_emit_int(count);
+)
+
+/* Memory */
+RTE_TRACE_POINT(
+ rte_eal_trace_mem_zmalloc,
+ RTE_TRACE_POINT_ARGS(const char *type, size_t size, unsigned int align,
+ int socket, void *ptr),
+ rte_trace_point_emit_string(type);
+ rte_trace_point_emit_long(size);
+ rte_trace_point_emit_u32(align);
+ rte_trace_point_emit_int(socket);
+ rte_trace_point_emit_ptr(ptr);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_mem_malloc,
+ RTE_TRACE_POINT_ARGS(const char *type, size_t size, unsigned int align,
+ int socket, void *ptr),
+ rte_trace_point_emit_string(type);
+ rte_trace_point_emit_long(size);
+ rte_trace_point_emit_u32(align);
+ rte_trace_point_emit_int(socket);
+ rte_trace_point_emit_ptr(ptr);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_mem_realloc,
+ RTE_TRACE_POINT_ARGS(size_t size, unsigned int align, int socket,
+ void *ptr),
+ rte_trace_point_emit_long(size);
+ rte_trace_point_emit_u32(align);
+ rte_trace_point_emit_int(socket);
+ rte_trace_point_emit_ptr(ptr);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_mem_free,
+ RTE_TRACE_POINT_ARGS(void *ptr),
+ rte_trace_point_emit_ptr(ptr);
+)
+
+/* Memzone */
+RTE_TRACE_POINT(
+ rte_eal_trace_memzone_reserve,
+ RTE_TRACE_POINT_ARGS(const char *name, size_t len, int socket_id,
+ unsigned int flags, unsigned int align, unsigned int bound,
+ const void *mz),
+ rte_trace_point_emit_string(name);
+ rte_trace_point_emit_long(len);
+ rte_trace_point_emit_int(socket_id);
+ rte_trace_point_emit_u32(flags);
+ rte_trace_point_emit_u32(align);
+ rte_trace_point_emit_u32(bound);
+ rte_trace_point_emit_ptr(mz);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_memzone_lookup,
+ RTE_TRACE_POINT_ARGS(const char *name, const void *memzone),
+ rte_trace_point_emit_string(name);
+ rte_trace_point_emit_ptr(memzone);
+)
+
+RTE_TRACE_POINT(
+ rte_eal_trace_memzone_free,
+ RTE_TRACE_POINT_ARGS(const char *name, void *addr, int rc),
+ rte_trace_point_emit_string(name);
+ rte_trace_point_emit_ptr(addr);
+ rte_trace_point_emit_int(rc);
+)
+
+/* Thread */
+RTE_TRACE_POINT(
+ rte_eal_trace_thread_remote_launch,
+ RTE_TRACE_POINT_ARGS(int (*f)(void *), void *arg,
+ unsigned int slave_id, int rc),
+ rte_trace_point_emit_ptr(f);
+ rte_trace_point_emit_ptr(arg);
+ rte_trace_point_emit_u32(slave_id);
+ rte_trace_point_emit_int(rc);
+)
+RTE_TRACE_POINT(
+ rte_eal_trace_thread_lcore_ready,
+ RTE_TRACE_POINT_ARGS(unsigned int lcore_id, const char *cpuset),
+ rte_trace_point_emit_u32(lcore_id);
+ rte_trace_point_emit_string(cpuset);
+)
+
+/* Interrupt */
+RTE_TRACE_POINT(
+ rte_eal_trace_intr_callback_register,
+ RTE_TRACE_POINT_ARGS(const struct rte_intr_handle *handle,
+ rte_intr_callback_fn cb, void *cb_arg, int rc),
+ rte_trace_point_emit_int(rc);
+ rte_trace_point_emit_int(handle->vfio_dev_fd);
+ rte_trace_point_emit_int(handle->fd);
+ rte_trace_point_emit_int(handle->type);
+ rte_trace_point_emit_u32(handle->max_intr);
+ rte_trace_point_emit_u32(handle->nb_efd);
+ rte_trace_point_emit_ptr(cb);
+ rte_trace_point_emit_ptr(cb_arg);
+)
+RTE_TRACE_POINT(
+ rte_eal_trace_intr_callback_unregister,
+ RTE_TRACE_POINT_ARGS(const struct rte_intr_handle *handle,
+ rte_intr_callback_fn cb, void *cb_arg, int rc),
+ rte_trace_point_emit_int(rc);
+ rte_trace_point_emit_int(handle->vfio_dev_fd);
+ rte_trace_point_emit_int(handle->fd);
+ rte_trace_point_emit_int(handle->type);
+ rte_trace_point_emit_u32(handle->max_intr);
+ rte_trace_point_emit_u32(handle->nb_efd);
+ rte_trace_point_emit_ptr(cb);
+ rte_trace_point_emit_ptr(cb_arg);
+)
+RTE_TRACE_POINT(
+ rte_eal_trace_intr_enable,
+ RTE_TRACE_POINT_ARGS(const struct rte_intr_handle *handle, int rc),
+ rte_trace_point_emit_int(rc);
+ rte_trace_point_emit_int(handle->vfio_dev_fd);
+ rte_trace_point_emit_int(handle->fd);
+ rte_trace_point_emit_int(handle->type);
+ rte_trace_point_emit_u32(handle->max_intr);
+ rte_trace_point_emit_u32(handle->nb_efd);
+)
+RTE_TRACE_POINT(
+ rte_eal_trace_intr_disable,
+ RTE_TRACE_POINT_ARGS(const struct rte_intr_handle *handle, int rc),
+ rte_trace_point_emit_int(rc);
+ rte_trace_point_emit_int(handle->vfio_dev_fd);
+ rte_trace_point_emit_int(handle->fd);
+ rte_trace_point_emit_int(handle->type);
+ rte_trace_point_emit_u32(handle->max_intr);
+ rte_trace_point_emit_u32(handle->nb_efd);
+)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_EAL_TRACE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_errno.h b/src/spdk/dpdk/lib/librte_eal/include/rte_errno.h
new file mode 100644
index 000000000..ba45591d2
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_errno.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/**
+ * @file
+ *
+ * API for error cause tracking
+ */
+
+#ifndef _RTE_ERRNO_H_
+#define _RTE_ERRNO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_per_lcore.h>
+
+RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */
+
+/**
+ * Error number value, stored per-thread, which can be queried after
+ * calls to certain functions to determine why those functions failed.
+ *
+ * Uses standard values from errno.h wherever possible, with a small number
+ * of additional possible values for RTE-specific conditions.
+ */
+#define rte_errno RTE_PER_LCORE(_rte_errno)
+
+/**
+ * Function which returns a printable string describing a particular
+ * error code. For non-RTE-specific error codes, this function returns
+ * the value from the libc strerror function.
+ *
+ * @param errnum
+ * The error number to be looked up - generally the value of rte_errno
+ * @return
+ * A pointer to a thread-local string containing the text describing
+ * the error.
+ */
+const char *rte_strerror(int errnum);
+
+#ifndef __ELASTERROR
+/**
+ * Check if we have a defined value for the max system-defined errno values.
+ * if no max defined, start from 1000 to prevent overlap with standard values
+ */
+#define __ELASTERROR 1000
+#endif
+
+/** Error types */
+enum {
+ RTE_MIN_ERRNO = __ELASTERROR, /**< Start numbering above std errno vals */
+
+ E_RTE_SECONDARY, /**< Operation not allowed in secondary processes */
+ E_RTE_NO_CONFIG, /**< Missing rte_config */
+
+ RTE_MAX_ERRNO /**< Max RTE error number */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ERRNO_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_fbarray.h b/src/spdk/dpdk/lib/librte_eal/include/rte_fbarray.h
new file mode 100644
index 000000000..6dccdbec9
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_fbarray.h
@@ -0,0 +1,565 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef RTE_FBARRAY_H
+#define RTE_FBARRAY_H
+
+/**
+ * @file
+ *
+ * File-backed shared indexed array for DPDK.
+ *
+ * Basic workflow is expected to be the following:
+ * 1) Allocate array either using ``rte_fbarray_init()`` or
+ * ``rte_fbarray_attach()`` (depending on whether it's shared between
+ * multiple DPDK processes)
+ * 2) find free spots using ``rte_fbarray_find_next_free()``
+ * 3) get pointer to data in the free spot using ``rte_fbarray_get()``, and
+ * copy data into the pointer (element size is fixed)
+ * 4) mark entry as used using ``rte_fbarray_set_used()``
+ *
+ * Calls to ``rte_fbarray_init()`` and ``rte_fbarray_destroy()`` will have
+ * consequences for all processes, while calls to ``rte_fbarray_attach()`` and
+ * ``rte_fbarray_detach()`` will only have consequences within a single process.
+ * Therefore, it is safe to call ``rte_fbarray_attach()`` or
+ * ``rte_fbarray_detach()`` while another process is using ``rte_fbarray``,
+ * provided no other thread within the same process will try to use
+ * ``rte_fbarray`` before attaching or after detaching. It is not safe to call
+ * ``rte_fbarray_init()`` or ``rte_fbarray_destroy()`` while another thread or
+ * another process is using ``rte_fbarray``.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+
+#include <rte_compat.h>
+#include <rte_rwlock.h>
+
+#define RTE_FBARRAY_NAME_LEN 64
+
+struct rte_fbarray {
+ char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
+ unsigned int count; /**< number of entries stored */
+ unsigned int len; /**< current length of the array */
+ unsigned int elt_sz; /**< size of each element */
+ void *data; /**< data pointer */
+ rte_rwlock_t rwlock; /**< multiprocess lock */
+};
+
+/**
+ * Set up ``rte_fbarray`` structure and allocate underlying resources.
+ *
+ * Call this function to correctly set up ``rte_fbarray`` and allocate
+ * underlying files that will be backing the data in the current process. Note
+ * that in order to use and share ``rte_fbarray`` between multiple processes,
+ * data pointed to by ``arr`` pointer must itself be allocated in shared memory.
+ *
+ * @param arr
+ * Valid pointer to allocated ``rte_fbarray`` structure.
+ *
+ * @param name
+ * Unique name to be assigned to this array.
+ *
+ * @param len
+ * Number of elements initially available in the array.
+ *
+ * @param elt_sz
+ * Size of each element.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
+ unsigned int elt_sz);
+
+
+/**
+ * Attach to a file backing an already allocated and correctly set up
+ * ``rte_fbarray`` structure.
+ *
+ * Call this function to attach to file that will be backing the data in the
+ * current process. The structure must have been previously correctly set up
+ * with a call to ``rte_fbarray_init()``. Calls to ``rte_fbarray_attach()`` are
+ * usually meant to be performed in a multiprocessing scenario, with data
+ * pointed to by ``arr`` pointer allocated in shared memory.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up rte_fbarray structure.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_attach(struct rte_fbarray *arr);
+
+
+/**
+ * Deallocate resources for an already allocated and correctly set up
+ * ``rte_fbarray`` structure, and remove the underlying file.
+ *
+ * Call this function to deallocate all resources associated with an
+ * ``rte_fbarray`` structure within the current process. This will also
+ * zero-fill data pointed to by ``arr`` pointer and remove the underlying file
+ * backing the data, so it is expected that by the time this function is called,
+ * all other processes have detached from this ``rte_fbarray``.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_destroy(struct rte_fbarray *arr);
+
+
+/**
+ * Deallocate resources for an already allocated and correctly set up
+ * ``rte_fbarray`` structure.
+ *
+ * Call this function to deallocate all resources associated with an
+ * ``rte_fbarray`` structure within current process.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_detach(struct rte_fbarray *arr);
+
+
+/**
+ * Get pointer to element residing at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ * Index of an element to get a pointer to.
+ *
+ * @return
+ * - non-NULL pointer on success.
+ * - NULL on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+void *
+rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Find index of a specified element within the array.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param elt
+ * Pointer to element to find index to.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt);
+
+
+/**
+ * Mark specified element as used.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ * Element index to mark as used.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Mark specified element as free.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ * Element index to mark as free.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Check whether element at specified index is marked as used.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ * Element index to check as used.
+ *
+ * @return
+ * - 1 if element is used.
+ * - 0 if element is unused.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Find index of next free element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of next used element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of next chunk of ``n`` free elements, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of free elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find index of next chunk of ``n`` used elements, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of used elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find how many more free entries there are, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_contig_free(struct rte_fbarray *arr,
+ unsigned int start);
+
+
+/**
+ * Find how many more used entries there are, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start);
+
+/**
+ * Find index of previous free element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of previous used element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` free elements, down from specified
+ * index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of free elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` used elements, down from specified
+ * index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of used elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find how many more free entries there are before specified index (like
+ * ``rte_fbarray_find_contig_free`` but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr,
+ unsigned int start);
+
+
+/**
+ * Find how many more used entries there are before specified index (like
+ * ``rte_fbarray_find_contig_used`` but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of biggest chunk of free elements, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_biggest_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of biggest chunk of used elements, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_biggest_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of biggest chunk of free elements before a specified index (like
+ * ``rte_fbarray_find_biggest_free``, but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_rev_biggest_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of biggest chunk of used elements before a specified index (like
+ * ``rte_fbarray_find_biggest_used``, but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+__rte_experimental
+int
+rte_fbarray_find_rev_biggest_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Dump ``rte_fbarray`` metadata.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param f
+ * File object to dump information into.
+ */
+__rte_experimental
+void
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_FBARRAY_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_function_versioning.h b/src/spdk/dpdk/lib/librte_eal/include/rte_function_versioning.h
new file mode 100644
index 000000000..f588f2643
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_function_versioning.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Neil Horman <nhorman@tuxdriver.com>.
+ * All rights reserved.
+ */
+
+#ifndef _RTE_FUNCTION_VERSIONING_H_
+#define _RTE_FUNCTION_VERSIONING_H_
+#include <rte_common.h>
+
+#ifndef RTE_USE_FUNCTION_VERSIONING
+#error Use of function versioning disabled, is "use_function_versioning=true" in meson.build?
+#endif
+
+#ifdef RTE_BUILD_SHARED_LIB
+
+/*
+ * Provides backwards compatibility when updating exported functions.
+ * When a symol is exported from a library to provide an API, it also provides a
+ * calling convention (ABI) that is embodied in its name, return type,
+ * arguments, etc. On occasion that function may need to change to accommodate
+ * new functionality, behavior, etc. When that occurs, it is desirable to
+ * allow for backwards compatibility for a time with older binaries that are
+ * dynamically linked to the dpdk. To support that, the __vsym and
+ * VERSION_SYMBOL macros are created. They, in conjunction with the
+ * <library>_version.map file for a given library allow for multiple versions of
+ * a symbol to exist in a shared library so that older binaries need not be
+ * immediately recompiled.
+ *
+ * Refer to the guidelines document in the docs subdirectory for details on the
+ * use of these macros
+ */
+
+/*
+ * Macro Parameters:
+ * b - function base name
+ * e - function version extension, to be concatenated with base name
+ * n - function symbol version string to be applied
+ * f - function prototype
+ * p - full function symbol name
+ */
+
+/*
+ * VERSION_SYMBOL
+ * Creates a symbol version table entry binding symbol <b>@DPDK_<n> to the internal
+ * function name <b><e>
+ */
+#define VERSION_SYMBOL(b, e, n) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@DPDK_" RTE_STR(n))
+
+/*
+ * VERSION_SYMBOL_EXPERIMENTAL
+ * Creates a symbol version table entry binding the symbol <b>@EXPERIMENTAL to the internal
+ * function name <b><e>. The macro is used when a symbol matures to become part of the stable ABI,
+ * to provide an alias to experimental for some time.
+ */
+#define VERSION_SYMBOL_EXPERIMENTAL(b, e) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@EXPERIMENTAL")
+
+/*
+ * BIND_DEFAULT_SYMBOL
+ * Creates a symbol version entry instructing the linker to bind references to
+ * symbol <b> to the internal symbol <b><e>
+ */
+#define BIND_DEFAULT_SYMBOL(b, e, n) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@@DPDK_" RTE_STR(n))
+
+/*
+ * __vsym
+ * Annotation to be used in declaration of the internal symbol <b><e> to signal
+ * that it is being used as an implementation of a particular version of symbol
+ * <b>.
+ */
+#define __vsym __rte_used
+
+/*
+ * MAP_STATIC_SYMBOL
+ * If a function has been bifurcated into multiple versions, none of which
+ * are defined as the exported symbol name in the map file, this macro can be
+ * used to alias a specific version of the symbol to its exported name. For
+ * example, if you have 2 versions of a function foo_v1 and foo_v2, where the
+ * former is mapped to foo@DPDK_1 and the latter is mapped to foo@DPDK_2 when
+ * building a shared library, this macro can be used to map either foo_v1 or
+ * foo_v2 to the symbol foo when building a static library, e.g.:
+ * MAP_STATIC_SYMBOL(void foo(), foo_v2);
+ */
+#define MAP_STATIC_SYMBOL(f, p)
+
+#else
+/*
+ * No symbol versioning in use
+ */
+#define VERSION_SYMBOL(b, e, n)
+#define VERSION_SYMBOL_EXPERIMENTAL(b, e)
+#define __vsym
+#define BIND_DEFAULT_SYMBOL(b, e, n)
+#define MAP_STATIC_SYMBOL(f, p) f __attribute__((alias(RTE_STR(p))))
+/*
+ * RTE_BUILD_SHARED_LIB=n
+ */
+#endif
+
+#endif /* _RTE_FUNCTION_VERSIONING_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_hexdump.h b/src/spdk/dpdk/lib/librte_eal/include/rte_hexdump.h
new file mode 100644
index 000000000..2d03c089c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_hexdump.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_HEXDUMP_H_
+#define _RTE_HEXDUMP_H_
+
+/**
+ * @file
+ * Simple API to dump out memory in a special hex format.
+ */
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+* Dump out memory in a special hex dump format.
+*
+* @param f
+* A pointer to a file for output
+* @param title
+* If not NULL this string is printed as a header to the output.
+* @param buf
+* This is the buffer address to print out.
+* @param len
+* The number of bytes to dump out
+* @return
+* None.
+*/
+
+extern void
+rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len);
+
+/**
+* Dump out memory in a hex format with colons between bytes.
+*
+* @param f
+* A pointer to a file for output
+* @param title
+* If not NULL this string is printed as a header to the output.
+* @param buf
+* This is the buffer address to print out.
+* @param len
+* The number of bytes to dump out
+* @return
+* None.
+*/
+
+void
+rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_HEXDUMP_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_hypervisor.h b/src/spdk/dpdk/lib/librte_eal/include/rte_hypervisor.h
new file mode 100644
index 000000000..5fe719c1d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_hypervisor.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#ifndef RTE_HYPERVISOR_H
+#define RTE_HYPERVISOR_H
+
+/**
+ * @file
+ * Hypervisor awareness.
+ */
+
+enum rte_hypervisor {
+ RTE_HYPERVISOR_NONE,
+ RTE_HYPERVISOR_KVM,
+ RTE_HYPERVISOR_HYPERV,
+ RTE_HYPERVISOR_VMWARE,
+ RTE_HYPERVISOR_UNKNOWN
+};
+
+/**
+ * Get the id of hypervisor it is running on.
+ */
+enum rte_hypervisor
+rte_hypervisor_get(void);
+
+/**
+ * Get the name of a given hypervisor id.
+ */
+const char *
+rte_hypervisor_get_name(enum rte_hypervisor id);
+
+#endif /* RTE_HYPERVISOR_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_interrupts.h b/src/spdk/dpdk/lib/librte_eal/include/rte_interrupts.h
new file mode 100644
index 000000000..e3b406abc
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_interrupts.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#define _RTE_INTERRUPTS_H_
+
+#include <rte_common.h>
+#include <rte_compat.h>
+
+/**
+ * @file
+ *
+ * The RTE interrupt interface provides functions to register/unregister
+ * callbacks for a specific interrupt.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Interrupt handle */
+struct rte_intr_handle;
+
+/** Function to be registered for the specific interrupt */
+typedef void (*rte_intr_callback_fn)(void *cb_arg);
+
+/**
+ * Function to call after a callback is unregistered.
+ * Can be used to close fd and free cb_arg.
+ */
+typedef void (*rte_intr_unregister_callback_fn)(struct rte_intr_handle *intr_handle,
+ void *cb_arg);
+
+#include "rte_eal_interrupts.h"
+
+/**
+ * It registers the callback for the specific interrupt. Multiple
+ * callbacks can be registered at the same time.
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param cb
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * It unregisters the callback according to the specified interrupt handle.
+ *
+ * @param intr_handle
+ * pointer to the interrupt handle.
+ * @param cb
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback, (void *)-1 means to remove all
+ * registered which has the same callback address.
+ *
+ * @return
+ * - On success, return the number of callback entities removed.
+ * - On failure, a negative value.
+ */
+int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * Unregister the callback according to the specified interrupt handle,
+ * after it's no longer active. Fail if source is not active.
+ *
+ * @param intr_handle
+ * pointer to the interrupt handle.
+ * @param cb_fn
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback, (void *)-1 means to remove all
+ * registered which has the same callback address.
+ * @param ucb_fn
+ * callback to call before cb is unregistered (optional).
+ * can be used to close fd and free cb_arg.
+ *
+ * @return
+ * - On success, return the number of callback entities marked for remove.
+ * - On failure, a negative value.
+ */
+__rte_experimental
+int
+rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb_fn, void *cb_arg,
+ rte_intr_unregister_callback_fn ucb_fn);
+
+/**
+ * It enables the interrupt for the specified handle.
+ *
+ * @param intr_handle
+ * pointer to the interrupt handle.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int rte_intr_enable(const struct rte_intr_handle *intr_handle);
+
+/**
+ * It disables the interrupt for the specified handle.
+ *
+ * @param intr_handle
+ * pointer to the interrupt handle.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int rte_intr_disable(const struct rte_intr_handle *intr_handle);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It acknowledges an interrupt raised for the specified handle.
+ *
+ * This function should be called at the end of each interrupt handler either
+ * from application or driver, so that currently raised interrupt is acked and
+ * further new interrupts are raised.
+ *
+ * @param intr_handle
+ * pointer to the interrupt handle.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+__rte_experimental
+int rte_intr_ack(const struct rte_intr_handle *intr_handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_keepalive.h b/src/spdk/dpdk/lib/librte_eal/include/rte_keepalive.h
new file mode 100644
index 000000000..4bda7ca56
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_keepalive.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2015-2016 Intel Corporation.
+ */
+
+/**
+ * @file rte_keepalive.h
+ * DPDK RTE LCore Keepalive Monitor.
+ *
+ **/
+
+#ifndef _KEEPALIVE_H_
+#define _KEEPALIVE_H_
+
+#include <rte_config.h>
+#include <rte_memory.h>
+
+#ifndef RTE_KEEPALIVE_MAXCORES
+/**
+ * Number of cores to track.
+ * @note Must be larger than the highest core id. */
+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
+#endif
+
+enum rte_keepalive_state {
+ RTE_KA_STATE_UNUSED = 0,
+ RTE_KA_STATE_ALIVE = 1,
+ RTE_KA_STATE_MISSING = 4,
+ RTE_KA_STATE_DEAD = 2,
+ RTE_KA_STATE_GONE = 3,
+ RTE_KA_STATE_DOZING = 5,
+ RTE_KA_STATE_SLEEP = 6
+};
+
+/**
+ * Keepalive failure callback.
+ *
+ * Receives a data pointer passed to rte_keepalive_create() and the id of the
+ * failed core.
+ * @param data Data pointer passed to rte_keepalive_create()
+ * @param id_core ID of the core that has failed
+ */
+typedef void (*rte_keepalive_failure_callback_t)(
+ void *data,
+ const int id_core);
+
+/**
+ * Keepalive relay callback.
+ *
+ * Receives a data pointer passed to rte_keepalive_register_relay_callback(),
+ * the id of the core for which state is to be forwarded, and details of the
+ * current core state.
+ * @param data Data pointer passed to rte_keepalive_register_relay_callback()
+ * @param id_core ID of the core for which state is being reported
+ * @param core_state The current state of the core
+ * @param Timestamp of when core was last seen alive
+ */
+typedef void (*rte_keepalive_relay_callback_t)(
+ void *data,
+ const int id_core,
+ enum rte_keepalive_state core_state,
+ uint64_t last_seen
+ );
+
+/**
+ * Keepalive state structure.
+ * @internal
+ */
+struct rte_keepalive;
+
+/**
+ * Initialise keepalive sub-system.
+ * @param callback
+ * Function called upon detection of a dead core.
+ * @param data
+ * Data pointer to be passed to function callback.
+ * @return
+ * Keepalive structure success, NULL on failure.
+ */
+struct rte_keepalive *rte_keepalive_create(
+ rte_keepalive_failure_callback_t callback,
+ void *data);
+
+/**
+ * Checks & handles keepalive state of monitored cores.
+ * @param *ptr_timer Triggering timer (unused)
+ * @param *ptr_data Data pointer (keepalive structure)
+ */
+void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data);
+
+/**
+ * Registers a core for keepalive checks.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ * @param id_core
+ * ID number of core to register.
+ */
+void rte_keepalive_register_core(struct rte_keepalive *keepcfg,
+ const int id_core);
+
+/**
+ * Per-core keepalive check.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ *
+ * This function needs to be called from within the main process loop of
+ * the LCore to be checked.
+ */
+void
+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg);
+
+/**
+ * Per-core sleep-time indication.
+ * @param *keepcfg
+ * Keepalive structure pointer
+ *
+ * If CPU idling is enabled, this function needs to be called from within
+ * the main process loop of the LCore going to sleep, in order to avoid
+ * the LCore being mis-detected as dead.
+ */
+void
+rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg);
+
+/**
+ * Registers a 'live core' callback.
+ *
+ * The complement of the 'dead core' callback. This is called when a
+ * core is known to be alive, and is intended for cases when an app
+ * needs to know 'liveness' beyond just knowing when a core has died.
+ *
+ * @param *keepcfg
+ * Keepalive structure pointer
+ * @param callback
+ * Function called upon detection of a dead core.
+ * @param data
+ * Data pointer to be passed to function callback.
+ */
+void
+rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg,
+ rte_keepalive_relay_callback_t callback,
+ void *data);
+
+#endif /* _KEEPALIVE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_launch.h b/src/spdk/dpdk/lib/librte_eal/include/rte_launch.h
new file mode 100644
index 000000000..06a671752
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_launch.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_LAUNCH_H_
+#define _RTE_LAUNCH_H_
+
+/**
+ * @file
+ *
+ * Launch tasks on other lcores
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * State of an lcore.
+ */
+enum rte_lcore_state_t {
+ WAIT, /**< waiting a new command */
+ RUNNING, /**< executing command */
+ FINISHED, /**< command executed */
+};
+
+/**
+ * Definition of a remote launch function.
+ */
+typedef int (lcore_function_t)(void *);
+
+/**
+ * Launch a function on another lcore.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * Sends a message to a slave lcore (identified by the slave_id) that
+ * is in the WAIT state (this is true after the first call to
+ * rte_eal_init()). This can be checked by first calling
+ * rte_eal_wait_lcore(slave_id).
+ *
+ * When the remote lcore receives the message, it switches to
+ * the RUNNING state, then calls the function f with argument arg. Once the
+ * execution is done, the remote lcore switches to a FINISHED state and
+ * the return value of f is stored in a local variable to be read using
+ * rte_eal_wait_lcore().
+ *
+ * The MASTER lcore returns as soon as the message is sent and knows
+ * nothing about the completion of f.
+ *
+ * Note: This function is not designed to offer optimum
+ * performance. It is just a practical way to launch a function on
+ * another lcore at initialization time.
+ *
+ * @param f
+ * The function to be called.
+ * @param arg
+ * The argument for the function.
+ * @param slave_id
+ * The identifier of the lcore on which the function should be executed.
+ * @return
+ * - 0: Success. Execution of function f started on the remote lcore.
+ * - (-EBUSY): The remote lcore is not in a WAIT state.
+ */
+int rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned slave_id);
+
+/**
+ * This enum indicates whether the master core must execute the handler
+ * launched on all logical cores.
+ */
+enum rte_rmt_call_master_t {
+ SKIP_MASTER = 0, /**< lcore handler not executed by master core. */
+ CALL_MASTER, /**< lcore handler executed by master core. */
+};
+
+/**
+ * Launch a function on all lcores.
+ *
+ * Check that each SLAVE lcore is in a WAIT state, then call
+ * rte_eal_remote_launch() for each lcore.
+ *
+ * @param f
+ * The function to be called.
+ * @param arg
+ * The argument for the function.
+ * @param call_master
+ * If call_master set to SKIP_MASTER, the MASTER lcore does not call
+ * the function. If call_master is set to CALL_MASTER, the function
+ * is also called on master before returning. In any case, the master
+ * lcore returns as soon as it finished its job and knows nothing
+ * about the completion of f on the other lcores.
+ * @return
+ * - 0: Success. Execution of function f started on all remote lcores.
+ * - (-EBUSY): At least one remote lcore is not in a WAIT state. In this
+ * case, no message is sent to any of the lcores.
+ */
+int rte_eal_mp_remote_launch(lcore_function_t *f, void *arg,
+ enum rte_rmt_call_master_t call_master);
+
+/**
+ * Get the state of the lcore identified by slave_id.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * @param slave_id
+ * The identifier of the lcore.
+ * @return
+ * The state of the lcore.
+ */
+enum rte_lcore_state_t rte_eal_get_lcore_state(unsigned slave_id);
+
+/**
+ * Wait until an lcore finishes its job.
+ *
+ * To be executed on the MASTER lcore only.
+ *
+ * If the slave lcore identified by the slave_id is in a FINISHED state,
+ * switch to the WAIT state. If the lcore is in RUNNING state, wait until
+ * the lcore finishes its job and moves to the FINISHED state.
+ *
+ * @param slave_id
+ * The identifier of the lcore.
+ * @return
+ * - 0: If the lcore identified by the slave_id is in a WAIT state.
+ * - The value that was returned by the previous remote launch
+ * function call if the lcore identified by the slave_id was in a
+ * FINISHED or RUNNING state. In this case, it changes the state
+ * of the lcore to WAIT.
+ */
+int rte_eal_wait_lcore(unsigned slave_id);
+
+/**
+ * Wait until all lcores finish their jobs.
+ *
+ * To be executed on the MASTER lcore only. Issue an
+ * rte_eal_wait_lcore() for every lcore. The return values are
+ * ignored.
+ *
+ * After a call to rte_eal_mp_wait_lcore(), the caller can assume
+ * that all slave lcores are in a WAIT state.
+ */
+void rte_eal_mp_wait_lcore(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LAUNCH_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_lcore.h b/src/spdk/dpdk/lib/librte_eal/include/rte_lcore.h
new file mode 100644
index 000000000..339046bc8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_lcore.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_LCORE_H_
+#define _RTE_LCORE_H_
+
+/**
+ * @file
+ *
+ * API for lcore and socket manipulation
+ *
+ */
+#include <rte_config.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LCORE_ID_ANY UINT32_MAX /**< Any lcore. */
+
+RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per thread "lcore id". */
+RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */
+
+/**
+ * Get a lcore's role.
+ *
+ * @param lcore_id
+ * The identifier of the lcore, which MUST be between 0 and RTE_MAX_LCORE-1.
+ * @return
+ * The role of the lcore.
+ */
+enum rte_lcore_role_t rte_eal_lcore_role(unsigned int lcore_id);
+
+/**
+ * Return the Application thread ID of the execution unit.
+ *
+ * Note: in most cases the lcore id returned here will also correspond
+ * to the processor id of the CPU on which the thread is pinned, this
+ * will not be the case if the user has explicitly changed the thread to
+ * core affinities using --lcores EAL argument e.g. --lcores '(0-3)@10'
+ * to run threads with lcore IDs 0, 1, 2 and 3 on physical core 10..
+ *
+ * @return
+ * Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread)
+ */
+static inline unsigned
+rte_lcore_id(void)
+{
+ return RTE_PER_LCORE(_lcore_id);
+}
+
+/**
+ * Get the id of the master lcore
+ *
+ * @return
+ * the id of the master lcore
+ */
+unsigned int rte_get_master_lcore(void);
+
+/**
+ * Return the number of execution units (lcores) on the system.
+ *
+ * @return
+ * the number of execution units (lcores) on the system.
+ */
+unsigned int rte_lcore_count(void);
+
+/**
+ * Return the index of the lcore starting from zero.
+ *
+ * When option -c or -l is given, the index corresponds
+ * to the order in the list.
+ * For example:
+ * -c 0x30, lcore 4 has index 0, and 5 has index 1.
+ * -l 22,18 lcore 22 has index 0, and 18 has index 1.
+ *
+ * @param lcore_id
+ * The targeted lcore, or -1 for the current one.
+ * @return
+ * The relative index, or -1 if not enabled.
+ */
+int rte_lcore_index(int lcore_id);
+
+/**
+ * Return the ID of the physical socket of the logical core we are
+ * running on.
+ * @return
+ * the ID of current lcoreid's physical socket
+ */
+unsigned int rte_socket_id(void);
+
+/**
+ * Return number of physical sockets detected on the system.
+ *
+ * Note that number of nodes may not be correspondent to their physical id's:
+ * for example, a system may report two socket id's, but the actual socket id's
+ * may be 0 and 8.
+ *
+ * @return
+ * the number of physical sockets as recognized by EAL
+ */
+unsigned int
+rte_socket_count(void);
+
+/**
+ * Return socket id with a particular index.
+ *
+ * This will return socket id at a particular position in list of all detected
+ * physical socket id's. For example, on a machine with sockets [0, 8], passing
+ * 1 as a parameter will return 8.
+ *
+ * @param idx
+ * index of physical socket id to return
+ *
+ * @return
+ * - physical socket id as recognized by EAL
+ * - -1 on error, with errno set to EINVAL
+ */
+int
+rte_socket_id_by_idx(unsigned int idx);
+
+/**
+ * Get the ID of the physical socket of the specified lcore
+ *
+ * @param lcore_id
+ * the targeted lcore, which MUST be between 0 and RTE_MAX_LCORE-1.
+ * @return
+ * the ID of lcoreid's physical socket
+ */
+unsigned int
+rte_lcore_to_socket_id(unsigned int lcore_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Return the id of the lcore on a socket starting from zero.
+ *
+ * @param lcore_id
+ * The targeted lcore, or -1 for the current one.
+ * @return
+ * The relative index, or -1 if not enabled.
+ */
+__rte_experimental
+int
+rte_lcore_to_cpu_id(int lcore_id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Return the cpuset for a given lcore.
+ * @param lcore_id
+ * the targeted lcore, which MUST be between 0 and RTE_MAX_LCORE-1.
+ * @return
+ * The cpuset of that lcore
+ */
+__rte_experimental
+rte_cpuset_t
+rte_lcore_cpuset(unsigned int lcore_id);
+
+/**
+ * Test if an lcore is enabled.
+ *
+ * @param lcore_id
+ * The identifier of the lcore, which MUST be between 0 and
+ * RTE_MAX_LCORE-1.
+ * @return
+ * True if the given lcore is enabled; false otherwise.
+ */
+int rte_lcore_is_enabled(unsigned int lcore_id);
+
+/**
+ * Get the next enabled lcore ID.
+ *
+ * @param i
+ * The current lcore (reference).
+ * @param skip_master
+ * If true, do not return the ID of the master lcore.
+ * @param wrap
+ * If true, go back to 0 when RTE_MAX_LCORE is reached; otherwise,
+ * return RTE_MAX_LCORE.
+ * @return
+ * The next lcore_id or RTE_MAX_LCORE if not found.
+ */
+unsigned int rte_get_next_lcore(unsigned int i, int skip_master, int wrap);
+
+/**
+ * Macro to browse all running lcores.
+ */
+#define RTE_LCORE_FOREACH(i) \
+ for (i = rte_get_next_lcore(-1, 0, 0); \
+ i<RTE_MAX_LCORE; \
+ i = rte_get_next_lcore(i, 0, 0))
+
+/**
+ * Macro to browse all running lcores except the master lcore.
+ */
+#define RTE_LCORE_FOREACH_SLAVE(i) \
+ for (i = rte_get_next_lcore(-1, 1, 0); \
+ i<RTE_MAX_LCORE; \
+ i = rte_get_next_lcore(i, 1, 0))
+
+/**
+ * Set core affinity of the current thread.
+ * Support both EAL and non-EAL thread and update TLS.
+ *
+ * @param cpusetp
+ * Point to cpu_set_t for setting current thread affinity.
+ * @return
+ * On success, return 0; otherwise return -1;
+ */
+int rte_thread_set_affinity(rte_cpuset_t *cpusetp);
+
+/**
+ * Get core affinity of the current thread.
+ *
+ * @param cpusetp
+ * Point to cpu_set_t for getting current thread cpu affinity.
+ * It presumes input is not NULL, otherwise it causes panic.
+ *
+ */
+void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
+
+/**
+ * Set thread names.
+ *
+ * @note It fails with glibc < 2.12.
+ *
+ * @param id
+ * Thread id.
+ * @param name
+ * Thread name to set.
+ * @return
+ * On success, return 0; otherwise return a negative value.
+ */
+int rte_thread_setname(pthread_t id, const char *name);
+
+/**
+ * Get thread name.
+ *
+ * @note It fails with glibc < 2.12.
+ *
+ * @param id
+ * Thread id.
+ * @param name
+ * Thread name to set.
+ * @param len
+ * Thread name buffer length.
+ * @return
+ * On success, return 0; otherwise return a negative value.
+ */
+__rte_experimental
+int rte_thread_getname(pthread_t id, char *name, size_t len);
+
+/**
+ * Create a control thread.
+ *
+ * Wrapper to pthread_create(), pthread_setname_np() and
+ * pthread_setaffinity_np(). The affinity of the new thread is based
+ * on the CPU affinity retrieved at the time rte_eal_init() was called,
+ * the dataplane and service lcores are then excluded.
+ *
+ * @param thread
+ * Filled with the thread id of the new created thread.
+ * @param name
+ * The name of the control thread (max 16 characters including '\0').
+ * @param attr
+ * Attributes for the new thread.
+ * @param start_routine
+ * Function to be executed by the new thread.
+ * @param arg
+ * Argument passed to start_routine.
+ * @return
+ * On success, returns 0; on error, it returns a negative value
+ * corresponding to the error number.
+ */
+int
+rte_ctrl_thread_create(pthread_t *thread, const char *name,
+ const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg);
+
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ * The identifier of the lcore, which MUST be between 0 and
+ * RTE_MAX_LCORE-1.
+ * @param role
+ * The role to be checked against.
+ * @return
+ * Boolean value: positive if test is true; otherwise returns 0.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_LCORE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_log.h b/src/spdk/dpdk/lib/librte_eal/include/rte_log.h
new file mode 100644
index 000000000..1789ede56
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_log.h
@@ -0,0 +1,383 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+
+#ifndef _RTE_LOG_H_
+#define _RTE_LOG_H_
+
+/**
+ * @file
+ *
+ * RTE Logs API
+ *
+ * This file provides a log API to RTE applications.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_config.h>
+#include <rte_compat.h>
+
+struct rte_log_dynamic_type;
+
+/** The rte_log structure. */
+struct rte_logs {
+ uint32_t type; /**< Bitfield with enabled logs. */
+ uint32_t level; /**< Log level. */
+ FILE *file; /**< Output file set by rte_openlog_stream, or NULL. */
+ size_t dynamic_types_len;
+ struct rte_log_dynamic_type *dynamic_types;
+};
+
+/** Global log information */
+extern struct rte_logs rte_logs;
+
+/* SDK log type */
+#define RTE_LOGTYPE_EAL 0 /**< Log related to eal. */
+#define RTE_LOGTYPE_MALLOC 1 /**< Log related to malloc. */
+#define RTE_LOGTYPE_RING 2 /**< Log related to ring. */
+#define RTE_LOGTYPE_MEMPOOL 3 /**< Log related to mempool. */
+#define RTE_LOGTYPE_TIMER 4 /**< Log related to timers. */
+#define RTE_LOGTYPE_PMD 5 /**< Log related to poll mode driver. */
+#define RTE_LOGTYPE_HASH 6 /**< Log related to hash table. */
+#define RTE_LOGTYPE_LPM 7 /**< Log related to LPM. */
+#define RTE_LOGTYPE_KNI 8 /**< Log related to KNI. */
+#define RTE_LOGTYPE_ACL 9 /**< Log related to ACL. */
+#define RTE_LOGTYPE_POWER 10 /**< Log related to power. */
+#define RTE_LOGTYPE_METER 11 /**< Log related to QoS meter. */
+#define RTE_LOGTYPE_SCHED 12 /**< Log related to QoS port scheduler. */
+#define RTE_LOGTYPE_PORT 13 /**< Log related to port. */
+#define RTE_LOGTYPE_TABLE 14 /**< Log related to table. */
+#define RTE_LOGTYPE_PIPELINE 15 /**< Log related to pipeline. */
+#define RTE_LOGTYPE_MBUF 16 /**< Log related to mbuf. */
+#define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
+#define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */
+#define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */
+#define RTE_LOGTYPE_GSO 20 /**< Log related to GSO. */
+
+/* these log types can be used in an application */
+#define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */
+#define RTE_LOGTYPE_USER2 25 /**< User-defined log type 2. */
+#define RTE_LOGTYPE_USER3 26 /**< User-defined log type 3. */
+#define RTE_LOGTYPE_USER4 27 /**< User-defined log type 4. */
+#define RTE_LOGTYPE_USER5 28 /**< User-defined log type 5. */
+#define RTE_LOGTYPE_USER6 29 /**< User-defined log type 6. */
+#define RTE_LOGTYPE_USER7 30 /**< User-defined log type 7. */
+#define RTE_LOGTYPE_USER8 31 /**< User-defined log type 8. */
+
+/** First identifier for extended logs */
+#define RTE_LOGTYPE_FIRST_EXT_ID 32
+
+/* Can't use 0, as it gives compiler warnings */
+#define RTE_LOG_EMERG 1U /**< System is unusable. */
+#define RTE_LOG_ALERT 2U /**< Action must be taken immediately. */
+#define RTE_LOG_CRIT 3U /**< Critical conditions. */
+#define RTE_LOG_ERR 4U /**< Error conditions. */
+#define RTE_LOG_WARNING 5U /**< Warning conditions. */
+#define RTE_LOG_NOTICE 6U /**< Normal but significant condition. */
+#define RTE_LOG_INFO 7U /**< Informational. */
+#define RTE_LOG_DEBUG 8U /**< Debug-level messages. */
+
+/**
+ * Change the stream that will be used by the logging system.
+ *
+ * This can be done at any time. The f argument represents the stream
+ * to be used to send the logs. If f is NULL, the default output is
+ * used (stderr).
+ *
+ * @param f
+ * Pointer to the stream.
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int rte_openlog_stream(FILE *f);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the stream used by the logging system (see rte_openlog_stream()
+ * to change it).
+ *
+ * @return
+ * Pointer to the stream.
+ */
+__rte_experimental
+FILE *rte_log_get_stream(void);
+
+/**
+ * Set the global log level.
+ *
+ * After this call, logs with a level lower or equal than the level
+ * passed as argument will be displayed.
+ *
+ * @param level
+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ */
+void rte_log_set_global_level(uint32_t level);
+
+/**
+ * Get the global log level.
+ *
+ * @return
+ * The current global log level.
+ */
+uint32_t rte_log_get_global_level(void);
+
+/**
+ * Get the log level for a given type.
+ *
+ * @param logtype
+ * The log type identifier.
+ * @return
+ * 0 on success, a negative value if logtype is invalid.
+ */
+int rte_log_get_level(uint32_t logtype);
+
+/**
+ * For a given `logtype`, check if a log with `loglevel` can be printed.
+ *
+ * @param logtype
+ * The log type identifier
+ * @param loglevel
+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ * @return
+ * Returns 'true' if log can be printed and 'false' if it can't.
+ */
+__rte_experimental
+bool rte_log_can_log(uint32_t logtype, uint32_t loglevel);
+
+/**
+ * Set the log level for a given type based on globbing pattern.
+ *
+ * @param pattern
+ * The globbing pattern identifying the log type.
+ * @param level
+ * The level to be set.
+ * @return
+ * 0 on success, a negative value if level is invalid.
+ */
+int rte_log_set_level_pattern(const char *pattern, uint32_t level);
+
+/**
+ * Set the log level for a given type based on regular expression.
+ *
+ * @param regex
+ * The regular expression identifying the log type.
+ * @param level
+ * The level to be set.
+ * @return
+ * 0 on success, a negative value if level is invalid.
+ */
+int rte_log_set_level_regexp(const char *regex, uint32_t level);
+
+/**
+ * Set the log level for a given type.
+ *
+ * @param logtype
+ * The log type identifier.
+ * @param level
+ * The level to be set.
+ * @return
+ * 0 on success, a negative value if logtype or level is invalid.
+ */
+int rte_log_set_level(uint32_t logtype, uint32_t level);
+
+/**
+ * Get the current loglevel for the message being processed.
+ *
+ * Before calling the user-defined stream for logging, the log
+ * subsystem sets a per-lcore variable containing the loglevel and the
+ * logtype of the message being processed. This information can be
+ * accessed by the user-defined log output function through this
+ * function.
+ *
+ * @return
+ * The loglevel of the message being processed.
+ */
+int rte_log_cur_msg_loglevel(void);
+
+/**
+ * Get the current logtype for the message being processed.
+ *
+ * Before calling the user-defined stream for logging, the log
+ * subsystem sets a per-lcore variable containing the loglevel and the
+ * logtype of the message being processed. This information can be
+ * accessed by the user-defined log output function through this
+ * function.
+ *
+ * @return
+ * The logtype of the message being processed.
+ */
+int rte_log_cur_msg_logtype(void);
+
+/**
+ * Register a dynamic log type
+ *
+ * If a log is already registered with the same type, the returned value
+ * is the same than the previous one.
+ *
+ * @param name
+ * The string identifying the log type.
+ * @return
+ * - >0: success, the returned value is the log type identifier.
+ * - (-ENOMEM): cannot allocate memory.
+ */
+int rte_log_register(const char *name);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register a dynamic log type and try to pick its level from EAL options
+ *
+ * rte_log_register() is called inside. If successful, the function tries
+ * to search for matching regexp in the list of EAL log level options and
+ * pick the level from the last matching entry. If nothing can be applied
+ * from the list, the level will be set to the user-defined default value.
+ *
+ * @param name
+ * Name for the log type to be registered
+ * @param level_def
+ * Fallback level to be set if the global list has no matching options
+ * @return
+ * - >=0: the newly registered log type
+ * - <0: rte_log_register() error value
+ */
+__rte_experimental
+int rte_log_register_type_and_pick_level(const char *name, uint32_t level_def);
+
+/**
+ * Dump log information.
+ *
+ * Dump the global level and the registered log types.
+ *
+ * @param f
+ * The output stream where the dump should be sent.
+ */
+void rte_log_dump(FILE *f);
+
+/**
+ * Generates a log message.
+ *
+ * The message will be sent in the stream defined by the previous call
+ * to rte_openlog_stream().
+ *
+ * The level argument determines if the log should be displayed or
+ * not, depending on the global rte_logs variable.
+ *
+ * The preferred alternative is the RTE_LOG() because it adds the
+ * level and type in the logged string.
+ *
+ * @param level
+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ * @param logtype
+ * The log type, for example, RTE_LOGTYPE_EAL.
+ * @param format
+ * The format string, as in printf(3), followed by the variable arguments
+ * required by the format.
+ * @return
+ * - 0: Success.
+ * - Negative on error.
+ */
+int rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
+#ifdef __GNUC__
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
+ __rte_cold
+#endif
+#endif
+ __rte_format_printf(3, 4);
+
+/**
+ * Generates a log message.
+ *
+ * The message will be sent in the stream defined by the previous call
+ * to rte_openlog_stream().
+ *
+ * The level argument determines if the log should be displayed or
+ * not, depending on the global rte_logs variable. A trailing
+ * newline may be added if needed.
+ *
+ * The preferred alternative is the RTE_LOG() because it adds the
+ * level and type in the logged string.
+ *
+ * @param level
+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
+ * @param logtype
+ * The log type, for example, RTE_LOGTYPE_EAL.
+ * @param format
+ * The format string, as in printf(3), followed by the variable arguments
+ * required by the format.
+ * @param ap
+ * The va_list of the variable arguments required by the format.
+ * @return
+ * - 0: Success.
+ * - Negative on error.
+ */
+int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
+ __rte_format_printf(3, 0);
+
+/**
+ * Generates a log message.
+ *
+ * The RTE_LOG() is a helper that prefixes the string with the log level
+ * and type, and call rte_log().
+ *
+ * @param l
+ * Log level. A value between EMERG (1) and DEBUG (8). The short name is
+ * expanded by the macro, so it cannot be an integer value.
+ * @param t
+ * The log type, for example, EAL. The short name is expanded by the
+ * macro, so it cannot be an integer value.
+ * @param ...
+ * The fmt string, as in printf(3), followed by the variable arguments
+ * required by the format.
+ * @return
+ * - 0: Success.
+ * - Negative on error.
+ */
+#define RTE_LOG(l, t, ...) \
+ rte_log(RTE_LOG_ ## l, \
+ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__)
+
+/**
+ * Generates a log message for data path.
+ *
+ * Similar to RTE_LOG(), except that it is removed at compilation time
+ * if the RTE_LOG_DP_LEVEL configuration option is lower than the log
+ * level argument.
+ *
+ * @param l
+ * Log level. A value between EMERG (1) and DEBUG (8). The short name is
+ * expanded by the macro, so it cannot be an integer value.
+ * @param t
+ * The log type, for example, EAL. The short name is expanded by the
+ * macro, so it cannot be an integer value.
+ * @param ...
+ * The fmt string, as in printf(3), followed by the variable arguments
+ * required by the format.
+ * @return
+ * - 0: Success.
+ * - Negative on error.
+ */
+#define RTE_LOG_DP(l, t, ...) \
+ (void)((RTE_LOG_ ## l <= RTE_LOG_DP_LEVEL) ? \
+ rte_log(RTE_LOG_ ## l, \
+ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \
+ 0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LOG_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_malloc.h b/src/spdk/dpdk/lib/librte_eal/include/rte_malloc.h
new file mode 100644
index 000000000..42ca05182
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_malloc.h
@@ -0,0 +1,560 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+#ifndef _RTE_MALLOC_H_
+#define _RTE_MALLOC_H_
+
+/**
+ * @file
+ * RTE Malloc. This library provides methods for dynamically allocating memory
+ * from hugepages.
+ */
+
+#include <stdio.h>
+#include <stddef.h>
+#include <rte_compat.h>
+#include <rte_memory.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Structure to hold heap statistics obtained from rte_malloc_get_socket_stats function.
+ */
+struct rte_malloc_socket_stats {
+ size_t heap_totalsz_bytes; /**< Total bytes on heap */
+ size_t heap_freesz_bytes; /**< Total free bytes on heap */
+ size_t greatest_free_size; /**< Size in bytes of largest free block */
+ unsigned free_count; /**< Number of free elements on heap */
+ unsigned alloc_count; /**< Number of allocated elements on heap */
+ size_t heap_allocsz_bytes; /**< Total allocated bytes on heap */
+};
+
+/**
+ * This function allocates memory from the huge-page area of memory. The memory
+ * is not cleared. In NUMA systems, the memory allocated resides on the same
+ * NUMA socket as the core that calls this function.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ * Size (in bytes) to be allocated.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_malloc(const char *type, size_t size, unsigned align);
+
+/**
+ * Allocate zero'ed memory from the heap.
+ *
+ * Equivalent to rte_malloc() except that the memory zone is
+ * initialised with zeros. In NUMA systems, the memory allocated resides on the
+ * same NUMA socket as the core that calls this function.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ * Size (in bytes) to be allocated.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_zmalloc(const char *type, size_t size, unsigned align);
+
+/**
+ * Replacement function for calloc(), using huge-page memory. Memory area is
+ * initialised with zeros. In NUMA systems, the memory allocated resides on the
+ * same NUMA socket as the core that calls this function.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param num
+ * Number of elements to be allocated.
+ * @param size
+ * Size (in bytes) of a single element.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_calloc(const char *type, size_t num, size_t size, unsigned align);
+
+/**
+ * Replacement function for realloc(), using huge-page memory. Reserved area
+ * memory is resized, preserving contents. In NUMA systems, the new area
+ * may not reside on the same NUMA node as the old one.
+ *
+ * @param ptr
+ * Pointer to already allocated memory
+ * @param size
+ * Size (in bytes) of new area. If this is 0, memory is freed.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the reallocated memory.
+ */
+void *
+rte_realloc(void *ptr, size_t size, unsigned int align);
+
+/**
+ * Replacement function for realloc(), using huge-page memory. Reserved area
+ * memory is resized, preserving contents. In NUMA systems, the new area
+ * resides on requested NUMA socket.
+ *
+ * @param ptr
+ * Pointer to already allocated memory
+ * @param size
+ * Size (in bytes) of new area. If this is 0, memory is freed.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @param socket
+ * NUMA socket to allocate memory on.
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the reallocated memory.
+ */
+__rte_experimental
+void *
+rte_realloc_socket(void *ptr, size_t size, unsigned int align, int socket);
+
+/**
+ * This function allocates memory from the huge-page area of memory. The memory
+ * is not cleared.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ * Size (in bytes) to be allocated.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @param socket
+ * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ * will behave the same as rte_malloc().
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_malloc_socket(const char *type, size_t size, unsigned align, int socket);
+
+/**
+ * Allocate zero'ed memory from the heap.
+ *
+ * Equivalent to rte_malloc() except that the memory zone is
+ * initialised with zeros.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param size
+ * Size (in bytes) to be allocated.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @param socket
+ * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ * will behave the same as rte_zmalloc().
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket);
+
+/**
+ * Replacement function for calloc(), using huge-page memory. Memory area is
+ * initialised with zeros.
+ *
+ * @param type
+ * A string identifying the type of allocated objects (useful for debug
+ * purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param num
+ * Number of elements to be allocated.
+ * @param size
+ * Size (in bytes) of a single element.
+ * @param align
+ * If 0, the return is a pointer that is suitably aligned for any kind of
+ * variable (in the same manner as malloc()).
+ * Otherwise, the return is a pointer that is a multiple of *align*. In
+ * this case, it must obviously be a power of two. (Minimum alignment is the
+ * cacheline size, i.e. 64-bytes)
+ * @param socket
+ * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function
+ * will behave the same as rte_calloc().
+ * @return
+ * - NULL on error. Not enough memory, or invalid arguments (size is 0,
+ * align is not a power of two).
+ * - Otherwise, the pointer to the allocated object.
+ */
+void *
+rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket);
+
+/**
+ * Frees the memory space pointed to by the provided pointer.
+ *
+ * This pointer must have been returned by a previous call to
+ * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). The behaviour of
+ * rte_free() is undefined if the pointer does not match this requirement.
+ *
+ * If the pointer is NULL, the function does nothing.
+ *
+ * @param ptr
+ * The pointer to memory to be freed.
+ */
+void
+rte_free(void *ptr);
+
+/**
+ * If malloc debug is enabled, check a memory block for header
+ * and trailer markers to indicate that all is well with the block.
+ * If size is non-null, also return the size of the block.
+ *
+ * @param ptr
+ * pointer to the start of a data block, must have been returned
+ * by a previous call to rte_malloc(), rte_zmalloc(), rte_calloc()
+ * or rte_realloc()
+ * @param size
+ * if non-null, and memory block pointer is valid, returns the size
+ * of the memory block
+ * @return
+ * -1 on error, invalid pointer passed or header and trailer markers
+ * are missing or corrupted
+ * 0 on success
+ */
+int
+rte_malloc_validate(const void *ptr, size_t *size);
+
+/**
+ * Get heap statistics for the specified heap.
+ *
+ * @note This function is not thread-safe with respect to
+ * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
+ * @param socket
+ * An unsigned integer specifying the socket to get heap statistics for
+ * @param socket_stats
+ * A structure which provides memory to store statistics
+ * @return
+ * Null on error
+ * Pointer to structure storing statistics on success
+ */
+int
+rte_malloc_get_socket_stats(int socket,
+ struct rte_malloc_socket_stats *socket_stats);
+
+/**
+ * Add memory chunk to a heap with specified name.
+ *
+ * @note Multiple memory chunks can be added to the same heap
+ *
+ * @note Before accessing this memory in other processes, it needs to be
+ * attached in each of those processes by calling
+ * ``rte_malloc_heap_memory_attach`` in each other process.
+ *
+ * @note Memory must be previously allocated for DPDK to be able to use it as a
+ * malloc heap. Failing to do so will result in undefined behavior, up to and
+ * including segmentation faults.
+ *
+ * @note Calling this function will erase any contents already present at the
+ * supplied memory address.
+ *
+ * @param heap_name
+ * Name of the heap to add memory chunk to
+ * @param va_addr
+ * Start of virtual area to add to the heap. Must be aligned by ``page_sz``.
+ * @param len
+ * Length of virtual area to add to the heap. Must be aligned by ``page_sz``.
+ * @param iova_addrs
+ * Array of page IOVA addresses corresponding to each page in this memory
+ * area. Can be NULL, in which case page IOVA addresses will be set to
+ * RTE_BAD_IOVA.
+ * @param n_pages
+ * Number of elements in the iova_addrs array. Ignored if ``iova_addrs``
+ * is NULL.
+ * @param page_sz
+ * Page size of the underlying memory
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - one of the parameters was invalid
+ * EPERM - attempted to add memory to a reserved heap
+ * ENOSPC - no more space in internal config to store a new memory chunk
+ */
+__rte_experimental
+int
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+ rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
+
+/**
+ * Remove memory chunk from heap with specified name.
+ *
+ * @note Memory chunk being removed must be the same as one that was added;
+ * partially removing memory chunks is not supported
+ *
+ * @note Memory area must not contain any allocated elements to allow its
+ * removal from the heap
+ *
+ * @note All other processes must detach from the memory chunk prior to it being
+ * removed from the heap.
+ *
+ * @param heap_name
+ * Name of the heap to remove memory from
+ * @param va_addr
+ * Virtual address to remove from the heap
+ * @param len
+ * Length of virtual area to remove from the heap
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - one of the parameters was invalid
+ * EPERM - attempted to remove memory from a reserved heap
+ * ENOENT - heap or memory chunk was not found
+ * EBUSY - memory chunk still contains data
+ */
+__rte_experimental
+int
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Attach to an already existing chunk of external memory in another process.
+ *
+ * @note This function must be called before any attempt is made to use an
+ * already existing external memory chunk. This function does *not* need to
+ * be called if a call to ``rte_malloc_heap_memory_add`` was made in the
+ * current process.
+ *
+ * @param heap_name
+ * Heap name to which this chunk of memory belongs
+ * @param va_addr
+ * Start address of memory chunk to attach to
+ * @param len
+ * Length of memory chunk to attach to
+ * @return
+ * 0 on successful attach
+ * -1 on unsuccessful attach, with rte_errno set to indicate cause for error:
+ * EINVAL - one of the parameters was invalid
+ * EPERM - attempted to attach memory to a reserved heap
+ * ENOENT - heap or memory chunk was not found
+ */
+__rte_experimental
+int
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Detach from a chunk of external memory in secondary process.
+ *
+ * @note This function must be called in before any attempt is made to remove
+ * external memory from the heap in another process. This function does *not*
+ * need to be called if a call to ``rte_malloc_heap_memory_remove`` will be
+ * called in current process.
+ *
+ * @param heap_name
+ * Heap name to which this chunk of memory belongs
+ * @param va_addr
+ * Start address of memory chunk to attach to
+ * @param len
+ * Length of memory chunk to attach to
+ * @return
+ * 0 on successful detach
+ * -1 on unsuccessful detach, with rte_errno set to indicate cause for error:
+ * EINVAL - one of the parameters was invalid
+ * EPERM - attempted to detach memory from a reserved heap
+ * ENOENT - heap or memory chunk was not found
+ */
+__rte_experimental
+int
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Creates a new empty malloc heap with a specified name.
+ *
+ * @note Heaps created via this call will automatically get assigned a unique
+ * socket ID, which can be found using ``rte_malloc_heap_get_socket()``
+ *
+ * @param heap_name
+ * Name of the heap to create.
+ *
+ * @return
+ * - 0 on successful creation
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - ``heap_name`` was NULL, empty or too long
+ * EEXIST - heap by name of ``heap_name`` already exists
+ * ENOSPC - no more space in internal config to store a new heap
+ */
+__rte_experimental
+int
+rte_malloc_heap_create(const char *heap_name);
+
+/**
+ * Destroys a previously created malloc heap with specified name.
+ *
+ * @note This function will return a failure result if not all memory allocated
+ * from the heap has been freed back to the heap
+ *
+ * @note This function will return a failure result if not all memory segments
+ * were removed from the heap prior to its destruction
+ *
+ * @param heap_name
+ * Name of the heap to create.
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - ``heap_name`` was NULL, empty or too long
+ * ENOENT - heap by the name of ``heap_name`` was not found
+ * EPERM - attempting to destroy reserved heap
+ * EBUSY - heap still contains data
+ */
+__rte_experimental
+int
+rte_malloc_heap_destroy(const char *heap_name);
+
+/**
+ * Find socket ID corresponding to a named heap.
+ *
+ * @param name
+ * Heap name to find socket ID for
+ * @return
+ * Socket ID in case of success (a non-negative number)
+ * -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - ``name`` was NULL
+ * ENOENT - heap identified by the name ``name`` was not found
+ */
+__rte_experimental
+int
+rte_malloc_heap_get_socket(const char *name);
+
+/**
+ * Check if a given socket ID refers to externally allocated memory.
+ *
+ * @note Passing SOCKET_ID_ANY will return 0.
+ *
+ * @param socket_id
+ * Socket ID to check
+ * @return
+ * 1 if socket ID refers to externally allocated memory
+ * 0 if socket ID refers to internal DPDK memory
+ * -1 if socket ID is invalid
+ */
+__rte_experimental
+int
+rte_malloc_heap_socket_is_external(int socket_id);
+
+/**
+ * Dump statistics.
+ *
+ * Dump for the specified type to a file. If the type argument is
+ * NULL, all memory types will be dumped.
+ *
+ * @note This function is not thread-safe with respect to
+ * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
+ * @param f
+ * A pointer to a file for output
+ * @param type
+ * A string identifying the type of objects to dump, or NULL
+ * to dump all objects.
+ */
+void
+rte_malloc_dump_stats(FILE *f, const char *type);
+
+/**
+ * Dump contents of all malloc heaps to a file.
+ *
+ * @note This function is not thread-safe with respect to
+ * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+__rte_experimental
+void
+rte_malloc_dump_heaps(FILE *f);
+
+/**
+ * Set the maximum amount of allocated memory for this type.
+ *
+ * This is not yet implemented
+ *
+ * @param type
+ * A string identifying the type of allocated objects.
+ * @param max
+ * The maximum amount of allocated bytes for this type.
+ * @return
+ * - 0: Success.
+ * - (-1): Error.
+ */
+__rte_deprecated
+int
+rte_malloc_set_limit(const char *type, size_t max);
+
+/**
+ * Return the IO address of a virtual address obtained through
+ * rte_malloc
+ *
+ * @param addr
+ * Address obtained from a previous rte_malloc call
+ * @return
+ * RTE_BAD_IOVA on error
+ * otherwise return an address suitable for IO
+ */
+rte_iova_t
+rte_malloc_virt2iova(const void *addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MALLOC_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_memory.h b/src/spdk/dpdk/lib/librte_eal/include/rte_memory.h
new file mode 100644
index 000000000..3d8d0bd69
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_memory.h
@@ -0,0 +1,784 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_MEMORY_H_
+#define _RTE_MEMORY_H_
+
+/**
+ * @file
+ *
+ * Memory-related RTE API.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_config.h>
+#include <rte_fbarray.h>
+
+__extension__
+enum rte_page_sizes {
+ RTE_PGSIZE_4K = 1ULL << 12,
+ RTE_PGSIZE_64K = 1ULL << 16,
+ RTE_PGSIZE_256K = 1ULL << 18,
+ RTE_PGSIZE_2M = 1ULL << 21,
+ RTE_PGSIZE_16M = 1ULL << 24,
+ RTE_PGSIZE_256M = 1ULL << 28,
+ RTE_PGSIZE_512M = 1ULL << 29,
+ RTE_PGSIZE_1G = 1ULL << 30,
+ RTE_PGSIZE_4G = 1ULL << 32,
+ RTE_PGSIZE_16G = 1ULL << 34,
+};
+
+#define SOCKET_ID_ANY -1 /**< Any NUMA socket. */
+
+/**
+ * Physical memory segment descriptor.
+ */
+#define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
+/**< Prevent this segment from being freed back to the OS. */
+struct rte_memseg {
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
+ RTE_STD_C11
+ union {
+ void *addr; /**< Start virtual address. */
+ uint64_t addr_64; /**< Makes sure addr is always 64 bits */
+ };
+ size_t len; /**< Length of the segment. */
+ uint64_t hugepage_sz; /**< The pagesize of underlying memory */
+ int32_t socket_id; /**< NUMA socket ID. */
+ uint32_t nchannel; /**< Number of channels. */
+ uint32_t nrank; /**< Number of ranks. */
+ uint32_t flags; /**< Memseg-specific flags */
+} __rte_packed;
+
+/**
+ * memseg list is a special case as we need to store a bunch of other data
+ * together with the array itself.
+ */
+struct rte_memseg_list {
+ RTE_STD_C11
+ union {
+ void *base_va;
+ /**< Base virtual address for this memseg list. */
+ uint64_t addr_64;
+ /**< Makes sure addr is always 64-bits */
+ };
+ uint64_t page_sz; /**< Page size for all memsegs in this list. */
+ int socket_id; /**< Socket ID for all memsegs in this list. */
+ volatile uint32_t version; /**< version number for multiprocess sync. */
+ size_t len; /**< Length of memory area covered by this memseg list. */
+ unsigned int external; /**< 1 if this list points to external memory */
+ unsigned int heap; /**< 1 if this list points to a heap */
+ struct rte_fbarray memseg_arr;
+};
+
+/**
+ * Lock page in physical memory and prevent from swapping.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * 0 on success, negative on error.
+ */
+int rte_mem_lock_page(const void *virt);
+
+/**
+ * Get physical address of any mapped virtual address in the current process.
+ * It is found by browsing the /proc/self/pagemap special file.
+ * The page must be locked.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * The physical address or RTE_BAD_IOVA on error.
+ */
+phys_addr_t rte_mem_virt2phy(const void *virt);
+
+/**
+ * Get IO virtual address of any mapped virtual address in the current process.
+ *
+ * @note This function will not check internal page table. Instead, in IOVA as
+ * PA mode, it will fall back to getting real physical address (which may
+ * not match the expected IOVA, such as what was specified for external
+ * memory).
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * The IO address or RTE_BAD_IOVA on error.
+ */
+rte_iova_t rte_mem_virt2iova(const void *virt);
+
+/**
+ * Get virtual memory address corresponding to iova address.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param iova
+ * The iova address.
+ * @return
+ * Virtual address corresponding to iova address (or NULL if address does not
+ * exist within DPDK memory map).
+ */
+__rte_experimental
+void *
+rte_mem_iova2virt(rte_iova_t iova);
+
+/**
+ * Get memseg to which a particular virtual address belongs.
+ *
+ * @param virt
+ * The virtual address.
+ * @param msl
+ * The memseg list in which to look up based on ``virt`` address
+ * (can be NULL).
+ * @return
+ * Memseg pointer on success, or NULL on error.
+ */
+__rte_experimental
+struct rte_memseg *
+rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
+
+/**
+ * Get memseg list corresponding to virtual memory address.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * Memseg list to which this virtual address belongs to.
+ */
+__rte_experimental
+struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *virt);
+
+/**
+ * Memseg walk function prototype.
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg);
+
+/**
+ * Memseg contig walk function prototype. This will trigger a callback on every
+ * VA-contiguous area starting at memseg ``ms``, so total valid VA space at each
+ * callback call will be [``ms->addr``, ``ms->addr + len``).
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, size_t len, void *arg);
+
+/**
+ * Memseg list walk function prototype. This will trigger a callback on every
+ * allocated memseg list.
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
+ void *arg);
+
+/**
+ * Walk list of all memsegs.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+__rte_experimental
+int
+rte_memseg_walk(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+__rte_experimental
+int
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+__rte_experimental
+int
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
+
+/**
+ * Walk list of all memsegs without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+__rte_experimental
+int
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+__rte_experimental
+int
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+__rte_experimental
+int
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
+
+/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ * this file descriptor is inherently dangerous, so it should be treated
+ * as read-only for all intents and purposes.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+__rte_experimental
+int
+rte_memseg_get_fd(const struct rte_memseg *ms);
+
+/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ * this file descriptor is inherently dangerous, so it should be treated
+ * as read-only for all intents and purposes.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+__rte_experimental
+int
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ * @param offset
+ * A pointer to offset value where the result will be stored.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - EINVAL - ``offset`` pointer was NULL
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+__rte_experimental
+int
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ * @param offset
+ * A pointer to offset value where the result will be stored.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - EINVAL - ``offset`` pointer was NULL
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+__rte_experimental
+int
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+ size_t *offset);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register external memory chunk with DPDK.
+ *
+ * @note Using this API is mutually exclusive with ``rte_malloc`` family of
+ * API's.
+ *
+ * @note This API will not perform any DMA mapping. It is expected that user
+ * will do that themselves.
+ *
+ * @note Before accessing this memory in other processes, it needs to be
+ * attached in each of those processes by calling ``rte_extmem_attach`` in
+ * each other process.
+ *
+ * @param va_addr
+ * Start of virtual area to register. Must be aligned by ``page_sz``.
+ * @param len
+ * Length of virtual area to register. Must be aligned by ``page_sz``.
+ * @param iova_addrs
+ * Array of page IOVA addresses corresponding to each page in this memory
+ * area. Can be NULL, in which case page IOVA addresses will be set to
+ * RTE_BAD_IOVA.
+ * @param n_pages
+ * Number of elements in the iova_addrs array. Ignored if ``iova_addrs``
+ * is NULL.
+ * @param page_sz
+ * Page size of the underlying memory
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - one of the parameters was invalid
+ * EEXIST - memory chunk is already registered
+ * ENOSPC - no more space in internal config to store a new memory chunk
+ */
+__rte_experimental
+int
+rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
+ unsigned int n_pages, size_t page_sz);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Unregister external memory chunk with DPDK.
+ *
+ * @note Using this API is mutually exclusive with ``rte_malloc`` family of
+ * API's.
+ *
+ * @note This API will not perform any DMA unmapping. It is expected that user
+ * will do that themselves.
+ *
+ * @note Before calling this function, all other processes must call
+ * ``rte_extmem_detach`` to detach from the memory area.
+ *
+ * @param va_addr
+ * Start of virtual area to unregister
+ * @param len
+ * Length of virtual area to unregister
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - one of the parameters was invalid
+ * ENOENT - memory chunk was not found
+ */
+__rte_experimental
+int
+rte_extmem_unregister(void *va_addr, size_t len);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Attach to external memory chunk registered in another process.
+ *
+ * @note Using this API is mutually exclusive with ``rte_malloc`` family of
+ * API's.
+ *
+ * @note This API will not perform any DMA mapping. It is expected that user
+ * will do that themselves.
+ *
+ * @param va_addr
+ * Start of virtual area to register
+ * @param len
+ * Length of virtual area to register
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - one of the parameters was invalid
+ * ENOENT - memory chunk was not found
+ */
+__rte_experimental
+int
+rte_extmem_attach(void *va_addr, size_t len);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Detach from external memory chunk registered in another process.
+ *
+ * @note Using this API is mutually exclusive with ``rte_malloc`` family of
+ * API's.
+ *
+ * @note This API will not perform any DMA unmapping. It is expected that user
+ * will do that themselves.
+ *
+ * @param va_addr
+ * Start of virtual area to unregister
+ * @param len
+ * Length of virtual area to unregister
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - one of the parameters was invalid
+ * ENOENT - memory chunk was not found
+ */
+__rte_experimental
+int
+rte_extmem_detach(void *va_addr, size_t len);
+
+/**
+ * Dump the physical memory layout to a file.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_dump_physmem_layout(FILE *f);
+
+/**
+ * Get the total amount of available physical memory.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @return
+ * The total amount of available physical memory in bytes.
+ */
+uint64_t rte_eal_get_physmem_size(void);
+
+/**
+ * Get the number of memory channels.
+ *
+ * @return
+ * The number of memory channels on the system. The value is 0 if unknown
+ * or not the same on all devices.
+ */
+unsigned rte_memory_get_nchannel(void);
+
+/**
+ * Get the number of memory ranks.
+ *
+ * @return
+ * The number of memory ranks on the system. The value is 0 if unknown or
+ * not the same on all devices.
+ */
+unsigned rte_memory_get_nrank(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Check if all currently allocated memory segments are compliant with
+ * supplied DMA address width.
+ *
+ * @param maskbits
+ * Address width to check against.
+ */
+__rte_experimental
+int rte_mem_check_dma_mask(uint8_t maskbits);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Check if all currently allocated memory segments are compliant with
+ * supplied DMA address width. This function will use
+ * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying
+ * memory_hotplug_lock will not be acquired avoiding deadlock during
+ * memory initialization.
+ *
+ * This function is just for EAL core memory internal use. Drivers should
+ * use the previous rte_mem_check_dma_mask.
+ *
+ * @param maskbits
+ * Address width to check against.
+ */
+__rte_experimental
+int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set dma mask to use once memory initialization is done. Previous functions
+ * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be
+ * used safely until memory has been initialized.
+ */
+__rte_experimental
+void rte_mem_set_dma_mask(uint8_t maskbits);
+
+/**
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ *
+ * @return
+ * 1 if the system is able to obtain physical addresses.
+ * 0 if using DMA addresses through an IOMMU.
+ */
+int rte_eal_using_phys_addrs(void);
+
+
+/**
+ * Enum indicating which kind of memory event has happened. Used by callbacks to
+ * distinguish between memory allocations and deallocations.
+ */
+enum rte_mem_event {
+ RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
+ RTE_MEM_EVENT_FREE, /**< Deallocation event. */
+};
+#define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
+/**< maximum length of callback name */
+
+/**
+ * Function typedef used to register callbacks for memory events.
+ */
+typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
+ const void *addr, size_t len, void *arg);
+
+/**
+ * Function used to register callbacks for memory events.
+ *
+ * @note callbacks will happen while memory hotplug subsystem is write-locked,
+ * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
+ * deadlock when called from within such callbacks.
+ *
+ * @note mem event callbacks not being supported is an expected error condition,
+ * so user code needs to handle this situation. In these cases, return
+ * value will be -1, and rte_errno will be set to ENOTSUP.
+ *
+ * @param name
+ * Name associated with specified callback to be added to the list.
+ *
+ * @param clb
+ * Callback function pointer.
+ *
+ * @param arg
+ * Argument to pass to the callback.
+ *
+ * @return
+ * 0 on successful callback register
+ * -1 on unsuccessful callback register, with rte_errno value indicating
+ * reason for failure.
+ */
+__rte_experimental
+int
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+ void *arg);
+
+/**
+ * Function used to unregister callbacks for memory events.
+ *
+ * @param name
+ * Name associated with specified callback to be removed from the list.
+ *
+ * @param arg
+ * Argument to look for among callbacks with specified callback name.
+ *
+ * @return
+ * 0 on successful callback unregister
+ * -1 on unsuccessful callback unregister, with rte_errno value indicating
+ * reason for failure.
+ */
+__rte_experimental
+int
+rte_mem_event_callback_unregister(const char *name, void *arg);
+
+
+#define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
+/**< maximum length of alloc validator name */
+/**
+ * Function typedef used to register memory allocation validation callbacks.
+ *
+ * Returning 0 will allow allocation attempt to continue. Returning -1 will
+ * prevent allocation from succeeding.
+ */
+typedef int (*rte_mem_alloc_validator_t)(int socket_id,
+ size_t cur_limit, size_t new_len);
+
+/**
+ * @brief Register validator callback for memory allocations.
+ *
+ * Callbacks registered by this function will be called right before memory
+ * allocator is about to trigger allocation of more pages from the system if
+ * said allocation will bring total memory usage above specified limit on
+ * specified socket. User will be able to cancel pending allocation if callback
+ * returns -1.
+ *
+ * @note callbacks will happen while memory hotplug subsystem is write-locked,
+ * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
+ * deadlock when called from within such callbacks.
+ *
+ * @note validator callbacks not being supported is an expected error condition,
+ * so user code needs to handle this situation. In these cases, return
+ * value will be -1, and rte_errno will be set to ENOTSUP.
+ *
+ * @param name
+ * Name associated with specified callback to be added to the list.
+ *
+ * @param clb
+ * Callback function pointer.
+ *
+ * @param socket_id
+ * Socket ID on which to watch for allocations.
+ *
+ * @param limit
+ * Limit above which to trigger callbacks.
+ *
+ * @return
+ * 0 on successful callback register
+ * -1 on unsuccessful callback register, with rte_errno value indicating
+ * reason for failure.
+ */
+__rte_experimental
+int
+rte_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
+
+/**
+ * @brief Unregister validator callback for memory allocations.
+ *
+ * @param name
+ * Name associated with specified callback to be removed from the list.
+ *
+ * @param socket_id
+ * Socket ID on which to watch for allocations.
+ *
+ * @return
+ * 0 on successful callback unregister
+ * -1 on unsuccessful callback unregister, with rte_errno value indicating
+ * reason for failure.
+ */
+__rte_experimental
+int
+rte_mem_alloc_validator_unregister(const char *name, int socket_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMORY_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_memzone.h b/src/spdk/dpdk/lib/librte_eal/include/rte_memzone.h
new file mode 100644
index 000000000..091c9522f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_memzone.h
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_MEMZONE_H_
+#define _RTE_MEMZONE_H_
+
+/**
+ * @file
+ * RTE Memzone
+ *
+ * The goal of the memzone allocator is to reserve contiguous
+ * portions of physical memory. These zones are identified by a name.
+ *
+ * The memzone descriptors are shared by all partitions and are
+ * located in a known place of physical memory. This zone is accessed
+ * using rte_eal_get_configuration(). The lookup (by name) of a
+ * memory zone can be done in any partition and returns the same
+ * physical address.
+ *
+ * A reserved memory zone cannot be unreserved. The reservation shall
+ * be done at initialization time only.
+ */
+
+#include <stdio.h>
+#include <rte_compat.h>
+#include <rte_memory.h>
+#include <rte_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_MEMZONE_2MB 0x00000001 /**< Use 2MB pages. */
+#define RTE_MEMZONE_1GB 0x00000002 /**< Use 1GB pages. */
+#define RTE_MEMZONE_16MB 0x00000100 /**< Use 16MB pages. */
+#define RTE_MEMZONE_16GB 0x00000200 /**< Use 16GB pages. */
+#define RTE_MEMZONE_256KB 0x00010000 /**< Use 256KB pages. */
+#define RTE_MEMZONE_256MB 0x00020000 /**< Use 256MB pages. */
+#define RTE_MEMZONE_512MB 0x00040000 /**< Use 512MB pages. */
+#define RTE_MEMZONE_4GB 0x00080000 /**< Use 4GB pages. */
+#define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004 /**< Use available page size */
+#define RTE_MEMZONE_IOVA_CONTIG 0x00100000 /**< Ask for IOVA-contiguous memzone. */
+
+/**
+ * A structure describing a memzone, which is a contiguous portion of
+ * physical memory identified by a name.
+ */
+struct rte_memzone {
+
+#define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/
+ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */
+
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
+ RTE_STD_C11
+ union {
+ void *addr; /**< Start virtual address. */
+ uint64_t addr_64; /**< Makes sure addr is always 64-bits */
+ };
+ size_t len; /**< Length of the memzone. */
+
+ uint64_t hugepage_sz; /**< The page size of underlying memory */
+
+ int32_t socket_id; /**< NUMA socket ID. */
+
+ uint32_t flags; /**< Characteristics of this memzone. */
+} __rte_packed;
+
+/**
+ * Reserve a portion of physical memory.
+ *
+ * This function reserves some memory and returns a pointer to a
+ * correctly filled memzone descriptor. If the allocation cannot be
+ * done, return NULL.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
+ * @param name
+ * The name of the memzone. If it already exists, the function will
+ * fail and return NULL.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve(const char *name,
+ size_t len, int socket_id,
+ unsigned flags);
+
+/**
+ * Reserve a portion of physical memory with alignment on a specified
+ * boundary.
+ *
+ * This function reserves some memory with alignment on a specified
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If the allocation cannot be done or if the alignment
+ * is not a power of 2, returns NULL.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
+ * @param name
+ * The name of the memzone. If it already exists, the function will
+ * fail and return NULL.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
+ * @param align
+ * Alignment for resulting memzone. Must be a power of 2.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
+ size_t len, int socket_id,
+ unsigned flags, unsigned align);
+
+/**
+ * Reserve a portion of physical memory with specified alignment and
+ * boundary.
+ *
+ * This function reserves some memory with specified alignment and
+ * boundary, and returns a pointer to a correctly filled memzone
+ * descriptor. If the allocation cannot be done or if the alignment
+ * or boundary are not a power of 2, returns NULL.
+ * Memory buffer is reserved in a way, that it wouldn't cross specified
+ * boundary. That implies that requested length should be less or equal
+ * then boundary.
+ *
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
+ * @param name
+ * The name of the memzone. If it already exists, the function will
+ * fail and return NULL.
+ * @param len
+ * The size of the memory to be reserved. If it
+ * is 0, the biggest contiguous zone will be reserved.
+ * @param socket_id
+ * The socket identifier in the case of
+ * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The flags parameter is used to request memzones to be
+ * taken from specifically sized hugepages.
+ * - RTE_MEMZONE_2MB - Reserved from 2MB pages
+ * - RTE_MEMZONE_1GB - Reserved from 1GB pages
+ * - RTE_MEMZONE_16MB - Reserved from 16MB pages
+ * - RTE_MEMZONE_16GB - Reserved from 16GB pages
+ * - RTE_MEMZONE_256KB - Reserved from 256KB pages
+ * - RTE_MEMZONE_256MB - Reserved from 256MB pages
+ * - RTE_MEMZONE_512MB - Reserved from 512MB pages
+ * - RTE_MEMZONE_4GB - Reserved from 4GB pages
+ * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if
+ * the requested page size is unavailable.
+ * If this flag is not set, the function
+ * will return error on an unavailable size
+ * request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
+ * @param align
+ * Alignment for resulting memzone. Must be a power of 2.
+ * @param bound
+ * Boundary for resulting memzone. Must be a power of 2 or zero.
+ * Zero value implies no boundary condition.
+ * @return
+ * A pointer to a correctly-filled read-only memzone descriptor, or NULL
+ * on error.
+ * On error case, rte_errno will be set appropriately:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ * - EINVAL - invalid parameters
+ */
+const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
+ size_t len, int socket_id,
+ unsigned flags, unsigned align, unsigned bound);
+
+/**
+ * Free a memzone.
+ *
+ * @param mz
+ * A pointer to the memzone
+ * @return
+ * -EINVAL - invalid parameter.
+ * 0 - success
+ */
+int rte_memzone_free(const struct rte_memzone *mz);
+
+/**
+ * Lookup for a memzone.
+ *
+ * Get a pointer to a descriptor of an already reserved memory
+ * zone identified by the name given as an argument.
+ *
+ * @param name
+ * The name of the memzone.
+ * @return
+ * A pointer to a read-only memzone descriptor.
+ */
+const struct rte_memzone *rte_memzone_lookup(const char *name);
+
+/**
+ * Dump all reserved memzones to a file.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_memzone_dump(FILE *f);
+
+/**
+ * Walk list of all memzones
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
+ void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMZONE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_pci_dev_feature_defs.h b/src/spdk/dpdk/lib/librte_eal/include/rte_pci_dev_feature_defs.h
new file mode 100644
index 000000000..e12c22081
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_pci_dev_feature_defs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_PCI_DEV_DEFS_H_
+#define _RTE_PCI_DEV_DEFS_H_
+
+/* interrupt mode */
+enum rte_intr_mode {
+ RTE_INTR_MODE_NONE = 0,
+ RTE_INTR_MODE_LEGACY,
+ RTE_INTR_MODE_MSI,
+ RTE_INTR_MODE_MSIX
+};
+
+#endif /* _RTE_PCI_DEV_DEFS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_pci_dev_features.h b/src/spdk/dpdk/lib/librte_eal/include/rte_pci_dev_features.h
new file mode 100644
index 000000000..6104123d2
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_pci_dev_features.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_PCI_DEV_FEATURES_H
+#define _RTE_PCI_DEV_FEATURES_H
+
+#include <rte_pci_dev_feature_defs.h>
+
+#define RTE_INTR_MODE_NONE_NAME "none"
+#define RTE_INTR_MODE_LEGACY_NAME "legacy"
+#define RTE_INTR_MODE_MSI_NAME "msi"
+#define RTE_INTR_MODE_MSIX_NAME "msix"
+
+#endif
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_per_lcore.h b/src/spdk/dpdk/lib/librte_eal/include/rte_per_lcore.h
new file mode 100644
index 000000000..eaedf0cb3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_per_lcore.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_PER_LCORE_H_
+#define _RTE_PER_LCORE_H_
+
+/**
+ * @file
+ *
+ * Per-lcore variables in RTE
+ *
+ * This file defines an API for instantiating per-lcore "global
+ * variables" that are environment-specific. Note that in all
+ * environments, a "shared variable" is the default when you use a
+ * global variable.
+ *
+ * Parts of this are execution environment specific.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <pthread.h>
+
+/**
+ * Macro to define a per lcore variable "var" of type "type", don't
+ * use keywords like "static" or "volatile" in type, just prefix the
+ * whole macro.
+ */
+#define RTE_DEFINE_PER_LCORE(type, name) \
+ __thread __typeof__(type) per_lcore_##name
+
+/**
+ * Macro to declare an extern per lcore variable "var" of type "type"
+ */
+#define RTE_DECLARE_PER_LCORE(type, name) \
+ extern __thread __typeof__(type) per_lcore_##name
+
+/**
+ * Read/write the per-lcore variable value
+ */
+#define RTE_PER_LCORE(name) (per_lcore_##name)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PER_LCORE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_random.h b/src/spdk/dpdk/lib/librte_eal/include/rte_random.h
new file mode 100644
index 000000000..2b30ec85c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_random.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_RANDOM_H_
+#define _RTE_RANDOM_H_
+
+/**
+ * @file
+ *
+ * Pseudo-random Generators in RTE
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_compat.h>
+
+/**
+ * Seed the pseudo-random generator.
+ *
+ * The generator is automatically seeded by the EAL init with a timer
+ * value. It may need to be re-seeded by the user with a real random
+ * value.
+ *
+ * This function is not multi-thread safe in regards to other
+ * rte_srand() calls, nor is it in relation to concurrent rte_rand()
+ * calls.
+ *
+ * @param seedval
+ * The value of the seed.
+ */
+void
+rte_srand(uint64_t seedval);
+
+/**
+ * Get a pseudo-random value.
+ *
+ * The generator is not cryptographically secure.
+ *
+ * If called from lcore threads, this function is thread-safe.
+ *
+ * @return
+ * A pseudo-random value between 0 and (1<<64)-1.
+ */
+uint64_t
+rte_rand(void);
+
+/**
+ * Generates a pseudo-random number with an upper bound.
+ *
+ * This function returns an uniformly distributed (unbiased) random
+ * number less than a user-specified maximum value.
+ *
+ * If called from lcore threads, this function is thread-safe.
+ *
+ * @param upper_bound
+ * The upper bound of the generated number.
+ * @return
+ * A pseudo-random value between 0 and (upper_bound-1).
+ */
+__rte_experimental
+uint64_t
+rte_rand_max(uint64_t upper_bound);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_RANDOM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_reciprocal.h b/src/spdk/dpdk/lib/librte_eal/include/rte_reciprocal.h
new file mode 100644
index 000000000..63e16fde0
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_reciprocal.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+/*
+ * Reciprocal divide
+ *
+ * Used with permission from original authors
+ * Hannes Frederic Sowa and Daniel Borkmann
+ *
+ * This algorithm is based on the paper "Division by Invariant
+ * Integers Using Multiplication" by Torbjörn Granlund and Peter
+ * L. Montgomery.
+ *
+ * The assembler implementation from Agner Fog, which this code is
+ * based on, can be found here:
+ * http://www.agner.org/optimize/asmlib.zip
+ *
+ * This optimization for A/B is helpful if the divisor B is mostly
+ * runtime invariant. The reciprocal of B is calculated in the
+ * slow-path with reciprocal_value(). The fast-path can then just use
+ * a much faster multiplication operation with a variable dividend A
+ * to calculate the division A/B.
+ */
+
+#ifndef _RTE_RECIPROCAL_H_
+#define _RTE_RECIPROCAL_H_
+
+#include <stdint.h>
+
+struct rte_reciprocal {
+ uint32_t m;
+ uint8_t sh1, sh2;
+};
+
+struct rte_reciprocal_u64 {
+ uint64_t m;
+ uint8_t sh1, sh2;
+};
+
+static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
+{
+ uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
+
+ return (t + ((a - t) >> R.sh1)) >> R.sh2;
+}
+
+static __rte_always_inline uint64_t
+mullhi_u64(uint64_t x, uint64_t y)
+{
+#ifdef __SIZEOF_INT128__
+ __uint128_t xl = x;
+ __uint128_t rl = xl * y;
+
+ return (rl >> 64);
+#else
+ uint64_t u0, u1, v0, v1, k, t;
+ uint64_t w1, w2;
+ uint64_t whi;
+
+ u1 = x >> 32; u0 = x & 0xFFFFFFFF;
+ v1 = y >> 32; v0 = y & 0xFFFFFFFF;
+
+ t = u0*v0;
+ k = t >> 32;
+
+ t = u1*v0 + k;
+ w1 = t & 0xFFFFFFFF;
+ w2 = t >> 32;
+
+ t = u0*v1 + w1;
+ k = t >> 32;
+
+ whi = u1*v1 + w2 + k;
+
+ return whi;
+#endif
+}
+
+static __rte_always_inline uint64_t
+rte_reciprocal_divide_u64(uint64_t a, const struct rte_reciprocal_u64 *R)
+{
+ uint64_t t = mullhi_u64(a, R->m);
+
+ return (t + ((a - t) >> R->sh1)) >> R->sh2;
+}
+
+struct rte_reciprocal rte_reciprocal_value(uint32_t d);
+struct rte_reciprocal_u64 rte_reciprocal_value_u64(uint64_t d);
+
+#endif /* _RTE_RECIPROCAL_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_service.h b/src/spdk/dpdk/lib/librte_eal/include/rte_service.h
new file mode 100644
index 000000000..3a1c735c5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_service.h
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#ifndef _RTE_SERVICE_H_
+#define _RTE_SERVICE_H_
+
+/**
+ * @file
+ *
+ * Service functions
+ *
+ * The service functionality provided by this header allows a DPDK component
+ * to indicate that it requires a function call in order for it to perform
+ * its processing.
+ *
+ * An example usage of this functionality would be a component that registers
+ * a service to perform a particular packet processing duty: for example the
+ * eventdev software PMD. At startup the application requests all services
+ * that have been registered, and the cores in the service-coremask run the
+ * required services. The EAL removes these number of cores from the available
+ * runtime cores, and dedicates them to performing service-core workloads. The
+ * application has access to the remaining lcores as normal.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include<stdio.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_config.h>
+#include <rte_lcore.h>
+
+#define RTE_SERVICE_NAME_MAX 32
+
+/* Capabilities of a service.
+ *
+ * Use the *rte_service_probe_capability* function to check if a service is
+ * capable of a specific capability.
+ */
+/** When set, the service is capable of having multiple threads run it at the
+ * same time.
+ */
+#define RTE_SERVICE_CAP_MT_SAFE (1 << 0)
+
+/**
+ * Return the number of services registered.
+ *
+ * The number of services registered can be passed to *rte_service_get_by_id*,
+ * enabling the application to retrieve the specification of each service.
+ *
+ * @return The number of services registered.
+ */
+uint32_t rte_service_get_count(void);
+
+/**
+ * Return the id of a service by name.
+ *
+ * This function provides the id of the service using the service name as
+ * lookup key. The service id is to be passed to other functions in the
+ * rte_service_* API.
+ *
+ * Example usage:
+ * @code
+ * uint32_t service_id;
+ * int32_t ret = rte_service_get_by_name("service_X", &service_id);
+ * if (ret) {
+ * // handle error
+ * }
+ * @endcode
+ *
+ * @param name The name of the service to retrieve
+ * @param[out] service_id A pointer to a uint32_t, to be filled in with the id.
+ * @retval 0 Success. The service id is provided in *service_id*.
+ * @retval -EINVAL Null *service_id* pointer provided
+ * @retval -ENODEV No such service registered
+ */
+int32_t rte_service_get_by_name(const char *name, uint32_t *service_id);
+
+/**
+ * Return the name of the service.
+ *
+ * @return A pointer to the name of the service. The returned pointer remains
+ * in ownership of the service, and the application must not free it.
+ */
+const char *rte_service_get_name(uint32_t id);
+
+/**
+ * Check if a service has a specific capability.
+ *
+ * This function returns if *service* has implements *capability*.
+ * See RTE_SERVICE_CAP_* defines for a list of valid capabilities.
+ * @retval 1 Capability supported by this service instance
+ * @retval 0 Capability not supported by this service instance
+ */
+int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
+
+/**
+ * Map or unmap a lcore to a service.
+ *
+ * Each core can be added or removed from running a specific service. This
+ * function enables or disables *lcore* to run *service_id*.
+ *
+ * If multiple cores are enabled on a service, a lock is used to ensure that
+ * only one core runs the service at a time. The exception to this is when
+ * a service indicates that it is multi-thread safe by setting the capability
+ * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
+ * the service function can be run on multiple threads at the same time.
+ *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance by avoiding the use of lock.
+ *
+ * @param service_id the service to apply the lcore to
+ * @param lcore The lcore that will be mapped to service
+ * @param enable Zero to unmap or disable the core, non-zero to enable
+ *
+ * @retval 0 lcore map updated successfully
+ * @retval -EINVAL An invalid service or lcore was provided.
+ */
+int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore,
+ uint32_t enable);
+
+/**
+ * Retrieve the mapping of an lcore to a service.
+ *
+ * @param service_id the service to apply the lcore to
+ * @param lcore The lcore that will be mapped to service
+ *
+ * @retval 1 lcore is mapped to service
+ * @retval 0 lcore is not mapped to service
+ * @retval -EINVAL An invalid service or lcore was provided.
+ */
+int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore);
+
+/**
+ * Set the runstate of the service.
+ *
+ * Each service is either running or stopped. Setting a non-zero runstate
+ * enables the service to run, while setting runstate zero disables it.
+ *
+ * @param id The id of the service
+ * @param runstate The run state to apply to the service
+ *
+ * @retval 0 The service was successfully started
+ * @retval -EINVAL Invalid service id
+ */
+int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
+
+/**
+ * Get the runstate for the service with *id*. See *rte_service_runstate_set*
+ * for details of runstates. A service can call this function to ensure that
+ * the application has indicated that it will receive CPU cycles. Either a
+ * service-core is mapped (default case), or the application has explicitly
+ * disabled the check that a service-cores is mapped to the service and takes
+ * responsibility to run the service manually using the available function
+ * *rte_service_run_iter_on_app_lcore* to do so.
+ *
+ * @retval 1 Service is running
+ * @retval 0 Service is stopped
+ * @retval -EINVAL Invalid service id
+ */
+int32_t rte_service_runstate_get(uint32_t id);
+
+/**
+ * This function returns whether the service may be currently executing on
+ * at least one lcore, or definitely is not. This function can be used to
+ * determine if, after setting the service runstate to stopped, the service
+ * is still executing a service lcore.
+ *
+ * Care must be taken if calling this function when the service runstate is
+ * running, since the result of this function may be incorrect by the time the
+ * function returns due to service cores running in parallel.
+ *
+ * @retval 1 Service may be running on one or more lcores
+ * @retval 0 Service is not running on any lcore
+ * @retval -EINVAL Invalid service id
+ */
+int32_t
+rte_service_may_be_active(uint32_t id);
+
+/**
+ * Enable or disable the check for a service-core being mapped to the service.
+ * An application can disable the check when takes the responsibility to run a
+ * service itself using *rte_service_run_iter_on_app_lcore*.
+ *
+ * @param id The id of the service to set the check on
+ * @param enable When zero, the check is disabled. Non-zero enables the check.
+ *
+ * @retval 0 Success
+ * @retval -EINVAL Invalid service ID
+ */
+int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable);
+
+/**
+ * This function runs a service callback from a non-service lcore.
+ *
+ * This function is designed to enable gradual porting to service cores, and
+ * to enable unit tests to verify a service behaves as expected.
+ *
+ * When called, this function ensures that the service identified by *id* is
+ * safe to run on this lcore. Multi-thread safe services are invoked even if
+ * other cores are simultaneously running them as they are multi-thread safe.
+ *
+ * Multi-thread unsafe services are handled depending on the variable
+ * *serialize_multithread_unsafe*:
+ * - When set, the function will check if a service is already being invoked
+ * on another lcore, refusing to run it and returning -EBUSY.
+ * - When zero, the application takes responsibility to ensure that the service
+ * indicated by *id* is not going to be invoked by another lcore. This setting
+ * avoids atomic operations, so is likely to be more performant.
+ *
+ * @param id The ID of the service to run
+ * @param serialize_multithread_unsafe This parameter indicates to the service
+ * cores library if it is required to use atomics to serialize access
+ * to mult-thread unsafe services. As there is an overhead in using
+ * atomics, applications can choose to enable or disable this feature
+ *
+ * Note that any thread calling this function MUST be a DPDK EAL thread, as
+ * the *rte_lcore_id* function is used to access internal data structures.
+ *
+ * @retval 0 Service was run on the calling thread successfully
+ * @retval -EBUSY Another lcore is executing the service, and it is not a
+ * multi-thread safe service, so the service was not run on this lcore
+ * @retval -ENOEXEC Service is not in a run-able state
+ * @retval -EINVAL Invalid service id
+ */
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
+ uint32_t serialize_multithread_unsafe);
+
+/**
+ * Start a service core.
+ *
+ * Starting a core makes the core begin polling. Any services assigned to it
+ * will be run as fast as possible. The application must ensure that the lcore
+ * is in a launchable state: e.g. call *rte_eal_lcore_wait* on the lcore_id
+ * before calling this function.
+ *
+ * @retval 0 Success
+ * @retval -EINVAL Failed to start core. The *lcore_id* passed in is not
+ * currently assigned to be a service core.
+ */
+int32_t rte_service_lcore_start(uint32_t lcore_id);
+
+/**
+ * Stop a service core.
+ *
+ * Stopping a core makes the core become idle, but remains assigned as a
+ * service core.
+ *
+ * @retval 0 Success
+ * @retval -EINVAL Invalid *lcore_id* provided
+ * @retval -EALREADY Already stopped core
+ * @retval -EBUSY Failed to stop core, as it would cause a service to not
+ * be run, as this is the only core currently running the service.
+ * The application must stop the service first, and then stop the
+ * lcore.
+ */
+int32_t rte_service_lcore_stop(uint32_t lcore_id);
+
+/**
+ * Adds lcore to the list of service cores.
+ *
+ * This functions can be used at runtime in order to modify the service core
+ * mask.
+ *
+ * @retval 0 Success
+ * @retval -EBUSY lcore is busy, and not available for service core duty
+ * @retval -EALREADY lcore is already added to the service core list
+ * @retval -EINVAL Invalid lcore provided
+ */
+int32_t rte_service_lcore_add(uint32_t lcore);
+
+/**
+ * Removes lcore from the list of service cores.
+ *
+ * This can fail if the core is not stopped, see *rte_service_core_stop*.
+ *
+ * @retval 0 Success
+ * @retval -EBUSY Lcore is not stopped, stop service core before removing.
+ * @retval -EINVAL failed to add lcore to service core mask.
+ */
+int32_t rte_service_lcore_del(uint32_t lcore);
+
+/**
+ * Retrieve the number of service cores currently available.
+ *
+ * This function returns the integer count of service cores available. The
+ * service core count can be used in mapping logic when creating mappings
+ * from service cores to services.
+ *
+ * See *rte_service_lcore_list* for details on retrieving the lcore_id of each
+ * service core.
+ *
+ * @return The number of service cores currently configured.
+ */
+int32_t rte_service_lcore_count(void);
+
+/**
+ * Resets all service core mappings. This does not remove the service cores
+ * from duty, just unmaps all services / cores, and stops() the service cores.
+ * The runstate of services is not modified.
+ *
+ * @retval 0 Success
+ */
+int32_t rte_service_lcore_reset_all(void);
+
+/**
+ * Enable or disable statistics collection for *service*.
+ *
+ * This function enables per core, per-service cycle count collection.
+ * @param id The service to enable statistics gathering on.
+ * @param enable Zero to disable statistics, non-zero to enable.
+ * @retval 0 Success
+ * @retval -EINVAL Invalid service pointer passed
+ */
+int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable);
+
+/**
+ * Retrieve the list of currently enabled service cores.
+ *
+ * This function fills in an application supplied array, with each element
+ * indicating the lcore_id of a service core.
+ *
+ * Adding and removing service cores can be performed using
+ * *rte_service_lcore_add* and *rte_service_lcore_del*.
+ * @param [out] array An array of at least *rte_service_lcore_count* items.
+ * If statically allocating the buffer, use RTE_MAX_LCORE.
+ * @param [out] n The size of *array*.
+ * @retval >=0 Number of service cores that have been populated in the array
+ * @retval -ENOMEM The provided array is not large enough to fill in the
+ * service core list. No items have been populated, call this function
+ * with a size of at least *rte_service_core_count* items.
+ */
+int32_t rte_service_lcore_list(uint32_t array[], uint32_t n);
+
+/**
+ * Get the number of services running on the supplied lcore.
+ *
+ * @param lcore Id of the service core.
+ * @retval >=0 Number of services registered to this core.
+ * @retval -EINVAL Invalid lcore provided
+ * @retval -ENOTSUP The provided lcore is not a service core.
+ */
+int32_t rte_service_lcore_count_services(uint32_t lcore);
+
+/**
+ * Dumps any information available about the service. When id is UINT32_MAX,
+ * this function dumps info for all services.
+ *
+ * @retval 0 Statistics have been successfully dumped
+ * @retval -EINVAL Invalid service id provided
+ */
+int32_t rte_service_dump(FILE *f, uint32_t id);
+
+/**
+ * Returns the number of cycles that this service has consumed
+ */
+#define RTE_SERVICE_ATTR_CYCLES 0
+
+/**
+ * Returns the count of invocations of this service function
+ */
+#define RTE_SERVICE_ATTR_CALL_COUNT 1
+
+/**
+ * Get an attribute from a service.
+ *
+ * @retval 0 Success, the attribute value has been written to *attr_value*.
+ * -EINVAL Invalid id, attr_id or attr_value was NULL.
+ */
+int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id,
+ uint64_t *attr_value);
+
+/**
+ * Reset all attribute values of a service.
+ *
+ * @param id The service to reset all statistics of
+ * @retval 0 Successfully reset attributes
+ * -EINVAL Invalid service id provided
+ */
+int32_t rte_service_attr_reset_all(uint32_t id);
+
+/**
+ * Returns the number of times the service runner has looped.
+ */
+#define RTE_SERVICE_LCORE_ATTR_LOOPS 0
+
+/**
+ * Get an attribute from a service core.
+ *
+ * @param lcore Id of the service core.
+ * @param attr_id Id of the attribute to be retrieved.
+ * @param [out] attr_value Pointer to storage in which to write retrieved value.
+ * @retval 0 Success, the attribute value has been written to *attr_value*.
+ * -EINVAL Invalid lcore, attr_id or attr_value was NULL.
+ * -ENOTSUP lcore is not a service core.
+ */
+int32_t
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value);
+
+/**
+ * Reset all attribute values of a service core.
+ *
+ * @param lcore The service core to reset all the statistics of
+ * @retval 0 Successfully reset attributes
+ * -EINVAL Invalid service id provided
+ * -ENOTSUP lcore is not a service core.
+ */
+int32_t
+rte_service_lcore_attr_reset_all(uint32_t lcore);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_SERVICE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_service_component.h b/src/spdk/dpdk/lib/librte_eal/include/rte_service_component.h
new file mode 100644
index 000000000..b75aba11b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_service_component.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#ifndef _SERVICE_PRIVATE_H_
+#define _SERVICE_PRIVATE_H_
+
+/* This file specifies the internal service specification.
+ * Include this file if you are writing a component that requires CPU cycles to
+ * operate, and you wish to run the component using service cores
+ */
+#include <rte_compat.h>
+#include <rte_service.h>
+
+/**
+ * Signature of callback function to run a service.
+ */
+typedef int32_t (*rte_service_func)(void *args);
+
+/**
+ * The specification of a service.
+ *
+ * This struct contains metadata about the service itself, the callback
+ * function to run one iteration of the service, a userdata pointer, flags etc.
+ */
+struct rte_service_spec {
+ /** The name of the service. This should be used by the application to
+ * understand what purpose this service provides.
+ */
+ char name[RTE_SERVICE_NAME_MAX];
+ /** The callback to invoke to run one iteration of the service. */
+ rte_service_func callback;
+ /** The userdata pointer provided to the service callback. */
+ void *callback_userdata;
+ /** Flags to indicate the capabilities of this service. See defines in
+ * the public header file for values of RTE_SERVICE_CAP_*
+ */
+ uint32_t capabilities;
+ /** NUMA socket ID that this service is affinitized to */
+ int socket_id;
+};
+
+/**
+ * Register a new service.
+ *
+ * A service represents a component that requires CPU time periodically to
+ * achieve its purpose.
+ *
+ * For example the eventdev SW PMD requires CPU cycles to perform its
+ * scheduling. This can be achieved by registering it as a service, and the
+ * application can then assign CPU resources to that service.
+ *
+ * Note that when a service component registers itself, it is not permitted to
+ * add or remove service-core threads, or modify lcore-to-service mappings. The
+ * only API that may be called by the service-component is
+ * *rte_service_component_runstate_set*, which indicates that the service
+ * component is ready to be executed.
+ *
+ * If the service is known to be mapped to a single lcore, setting the
+ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve
+ * better performance.
+ *
+ * @param spec The specification of the service to register
+ * @param[out] service_id A pointer to a uint32_t, which will be filled in
+ * during registration of the service. It is set to the integers
+ * service number given to the service. This parameter may be NULL.
+ * @retval 0 Successfully registered the service.
+ * -EINVAL Attempted to register an invalid service (eg, no callback
+ * set)
+ */
+int32_t rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *service_id);
+
+/**
+ * Unregister a service component.
+ *
+ * The service being removed must be stopped before calling this function.
+ *
+ * @retval 0 The service was successfully unregistered.
+ * @retval -EBUSY The service is currently running, stop the service before
+ * calling unregister. No action has been taken.
+ */
+int32_t rte_service_component_unregister(uint32_t id);
+
+/**
+ * Private function to allow EAL to initialized default mappings.
+ *
+ * This function iterates all the services, and maps then to the available
+ * cores. Based on the capabilities of the services, they are set to run on the
+ * available cores in a round-robin manner.
+ *
+ * @retval 0 Success
+ * @retval -ENOTSUP No service lcores in use
+ * @retval -EINVAL Error while iterating over services
+ * @retval -ENODEV Error in enabling service lcore on a service
+ * @retval -ENOEXEC Error when starting services
+ */
+int32_t rte_service_start_with_defaults(void);
+
+/**
+ * Set the backend runstate of a component.
+ *
+ * This function allows services to be registered at startup, but not yet
+ * enabled to run by default. When the service has been configured (via the
+ * usual method; eg rte_eventdev_configure, the service can mark itself as
+ * ready to run. The differentiation between backend runstate and
+ * service_runstate is that the backend runstate is set by the service
+ * component while the service runstate is reserved for application usage.
+ *
+ * @retval 0 Success
+ */
+int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate);
+
+/**
+ * Initialize the service library.
+ *
+ * In order to use the service library, it must be initialized. EAL initializes
+ * the library at startup.
+ *
+ * @retval 0 Success
+ * @retval -EALREADY Service library is already initialized
+ */
+int32_t rte_service_init(void);
+
+/**
+ * @internal Free up the memory that has been initialized.
+ * This routine is to be invoked prior to process termination.
+ *
+ * @retval None
+ */
+void rte_service_finalize(void);
+
+#endif /* _SERVICE_PRIVATE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_string_fns.h b/src/spdk/dpdk/lib/librte_eal/include/rte_string_fns.h
new file mode 100644
index 000000000..8bac8243c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_string_fns.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+/**
+ * @file
+ *
+ * String-related functions as replacement for libc equivalents
+ */
+
+#ifndef _RTE_STRING_FNS_H_
+#define _RTE_STRING_FNS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_common.h>
+
+/**
+ * Takes string "string" parameter and splits it at character "delim"
+ * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
+ * strtok or strsep functions, this modifies its input string, by replacing
+ * instances of "delim" with '\\0'. All resultant tokens are returned in the
+ * "tokens" array which must have enough entries to hold "maxtokens".
+ *
+ * @param string
+ * The input string to be split into tokens
+ *
+ * @param stringlen
+ * The max length of the input buffer
+ *
+ * @param tokens
+ * The array to hold the pointers to the tokens in the string
+ *
+ * @param maxtokens
+ * The number of elements in the tokens array. At most, maxtokens-1 splits
+ * of the string will be done.
+ *
+ * @param delim
+ * The character on which the split of the data will be done
+ *
+ * @return
+ * The number of tokens in the tokens array.
+ */
+int
+rte_strsplit(char *string, int stringlen,
+ char **tokens, int maxtokens, char delim);
+
+/**
+ * @internal
+ * DPDK-specific version of strlcpy for systems without
+ * libc or libbsd copies of the function
+ */
+static inline size_t
+rte_strlcpy(char *dst, const char *src, size_t size)
+{
+ return (size_t)snprintf(dst, size, "%s", src);
+}
+
+/**
+ * @internal
+ * DPDK-specific version of strlcat for systems without
+ * libc or libbsd copies of the function
+ */
+static inline size_t
+rte_strlcat(char *dst, const char *src, size_t size)
+{
+ size_t l = strnlen(dst, size);
+ if (l < size)
+ return l + rte_strlcpy(&dst[l], src, size - l);
+ return l + strlen(src);
+}
+
+/* pull in a strlcpy function */
+#ifdef RTE_EXEC_ENV_FREEBSD
+#ifndef __BSD_VISIBLE /* non-standard functions are hidden */
+#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
+#define strlcat(dst, src, size) rte_strlcat(dst, src, size)
+#endif
+
+#else /* non-BSD platforms */
+#ifdef RTE_USE_LIBBSD
+#include <bsd/string.h>
+
+#else /* no BSD header files, create own */
+#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
+#define strlcat(dst, src, size) rte_strlcat(dst, src, size)
+
+#endif /* RTE_USE_LIBBSD */
+#endif /* FREEBSD */
+
+/**
+ * Copy string src to buffer dst of size dsize.
+ * At most dsize-1 chars will be copied.
+ * Always NUL-terminates, unless (dsize == 0).
+ * Returns number of bytes copied (terminating NUL-byte excluded) on success ;
+ * negative errno on error.
+ *
+ * @param dst
+ * The destination string.
+ *
+ * @param src
+ * The input string to be copied.
+ *
+ * @param dsize
+ * Length in bytes of the destination buffer.
+ *
+ * @return
+ * The number of bytes copied on success
+ * -E2BIG if the destination buffer is too small.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_STRING_FNS_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_tailq.h b/src/spdk/dpdk/lib/librte_eal/include/rte_tailq.h
new file mode 100644
index 000000000..b6fe4e5f7
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_tailq.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_TAILQ_H_
+#define _RTE_TAILQ_H_
+
+/**
+ * @file
+ * Here defines rte_tailq APIs for only internal use
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+#include <stdio.h>
+#include <rte_debug.h>
+
+/** dummy structure type used by the rte_tailq APIs */
+struct rte_tailq_entry {
+ TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */
+ void *data; /**< Pointer to the data referenced by this tailq entry */
+};
+/** dummy */
+TAILQ_HEAD(rte_tailq_entry_head, rte_tailq_entry);
+
+#define RTE_TAILQ_NAMESIZE 32
+
+/**
+ * The structure defining a tailq header entry for storing
+ * in the rte_config structure in shared memory. Each tailq
+ * is identified by name.
+ * Any library storing a set of objects e.g. rings, mempools, hash-tables,
+ * is recommended to use an entry here, so as to make it easy for
+ * a multi-process app to find already-created elements in shared memory.
+ */
+struct rte_tailq_head {
+ struct rte_tailq_entry_head tailq_head; /**< NOTE: must be first element */
+ char name[RTE_TAILQ_NAMESIZE];
+};
+
+struct rte_tailq_elem {
+ /**
+ * Reference to head in shared mem, updated at init time by
+ * rte_eal_tailqs_init()
+ */
+ struct rte_tailq_head *head;
+ TAILQ_ENTRY(rte_tailq_elem) next;
+ const char name[RTE_TAILQ_NAMESIZE];
+};
+
+/**
+ * Return the first tailq entry cast to the right struct.
+ */
+#define RTE_TAILQ_CAST(tailq_entry, struct_name) \
+ (struct struct_name *)&(tailq_entry)->tailq_head
+
+/**
+ * Utility macro to make looking up a tailqueue for a particular struct easier.
+ *
+ * @param name
+ * The name of tailq
+ *
+ * @param struct_name
+ * The name of the list type we are using. (Generally this is the same as the
+ * first parameter passed to TAILQ_HEAD macro)
+ *
+ * @return
+ * The return value from rte_eal_tailq_lookup, typecast to the appropriate
+ * structure pointer type.
+ * NULL on error, since the tailq_head is the first
+ * element in the rte_tailq_head structure.
+ */
+#define RTE_TAILQ_LOOKUP(name, struct_name) \
+ RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name)
+
+/**
+ * Dump tail queues to a file.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_dump_tailq(FILE *f);
+
+/**
+ * Lookup for a tail queue.
+ *
+ * Get a pointer to a tail queue header of a tail
+ * queue identified by the name given as an argument.
+ * Note: this function is not multi-thread safe, and should only be called from
+ * a single thread at a time
+ *
+ * @param name
+ * The name of the queue.
+ * @return
+ * A pointer to the tail queue head structure.
+ */
+struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
+
+/**
+ * Register a tail queue.
+ *
+ * Register a tail queue from shared memory.
+ * This function is mainly used by EAL_REGISTER_TAILQ macro which is used to
+ * register tailq from the different dpdk libraries. Since this macro is a
+ * constructor, the function has no access to dpdk shared memory, so the
+ * registered tailq can not be used before call to rte_eal_init() which calls
+ * rte_eal_tailqs_init().
+ *
+ * @param t
+ * The tailq element which contains the name of the tailq you want to
+ * create (/retrieve when in secondary process).
+ * @return
+ * 0 on success or -1 in case of an error.
+ */
+int rte_eal_tailq_register(struct rte_tailq_elem *t);
+
+#define EAL_REGISTER_TAILQ(t) \
+RTE_INIT(tailqinitfn_ ##t) \
+{ \
+ if (rte_eal_tailq_register(&t) < 0) \
+ rte_panic("Cannot initialize tailq: %s\n", t.name); \
+}
+
+/* This macro permits both remove and free var within the loop safely.*/
+#ifndef TAILQ_FOREACH_SAFE
+#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TAILQ_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_test.h b/src/spdk/dpdk/lib/librte_eal/include/rte_test.h
new file mode 100644
index 000000000..89e47f47a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_test.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Cavium, Inc
+ */
+
+#ifndef _RTE_TEST_H_
+#define _RTE_TEST_H_
+
+#include <rte_log.h>
+
+/* Before including rte_test.h file you can define
+ * RTE_TEST_TRACE_FAILURE(_file, _line, _func) macro to better trace/debug test
+ * failures. Mostly useful in development phase.
+ */
+#ifndef RTE_TEST_TRACE_FAILURE
+#define RTE_TEST_TRACE_FAILURE(_file, _line, _func)
+#endif
+
+
+#define RTE_TEST_ASSERT(cond, msg, ...) do { \
+ if (!(cond)) { \
+ RTE_LOG(DEBUG, EAL, "Test assert %s line %d failed: " \
+ msg "\n", __func__, __LINE__, ##__VA_ARGS__); \
+ RTE_TEST_TRACE_FAILURE(__FILE__, __LINE__, __func__); \
+ return -1; \
+ } \
+} while (0)
+
+#define RTE_TEST_ASSERT_EQUAL(a, b, msg, ...) \
+ RTE_TEST_ASSERT(a == b, msg, ##__VA_ARGS__)
+
+#define RTE_TEST_ASSERT_NOT_EQUAL(a, b, msg, ...) \
+ RTE_TEST_ASSERT(a != b, msg, ##__VA_ARGS__)
+
+#define RTE_TEST_ASSERT_SUCCESS(val, msg, ...) \
+ RTE_TEST_ASSERT(val == 0, msg, ##__VA_ARGS__)
+
+#define RTE_TEST_ASSERT_FAIL(val, msg, ...) \
+ RTE_TEST_ASSERT(val != 0, msg, ##__VA_ARGS__)
+
+#define RTE_TEST_ASSERT_NULL(val, msg, ...) \
+ RTE_TEST_ASSERT(val == NULL, msg, ##__VA_ARGS__)
+
+#define RTE_TEST_ASSERT_NOT_NULL(val, msg, ...) \
+ RTE_TEST_ASSERT(val != NULL, msg, ##__VA_ARGS__)
+
+#endif /* _RTE_TEST_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_time.h b/src/spdk/dpdk/lib/librte_eal/include/rte_time.h
new file mode 100644
index 000000000..5ad7c8841
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_time.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Intel Corporation
+ */
+
+#ifndef _RTE_TIME_H_
+#define _RTE_TIME_H_
+
+#include <stdint.h>
+#include <time.h>
+
+#define NSEC_PER_SEC 1000000000L
+
+/**
+ * Structure to hold the parameters of a running cycle counter to assist
+ * in converting cycles to nanoseconds.
+ */
+struct rte_timecounter {
+ /** Last cycle counter value read. */
+ uint64_t cycle_last;
+ /** Nanoseconds count. */
+ uint64_t nsec;
+ /** Bitmask separating nanosecond and sub-nanoseconds. */
+ uint64_t nsec_mask;
+ /** Sub-nanoseconds count. */
+ uint64_t nsec_frac;
+ /** Bitmask for two's complement subtraction of non-64 bit counters. */
+ uint64_t cc_mask;
+ /** Cycle to nanosecond divisor (power of two). */
+ uint32_t cc_shift;
+};
+
+/**
+ * Converts cyclecounter cycles to nanoseconds.
+ */
+static inline uint64_t
+rte_cyclecounter_cycles_to_ns(struct rte_timecounter *tc, uint64_t cycles)
+{
+ uint64_t ns;
+
+ /* Add fractional nanoseconds. */
+ ns = cycles + tc->nsec_frac;
+ tc->nsec_frac = ns & tc->nsec_mask;
+
+ /* Shift to get only nanoseconds. */
+ return ns >> tc->cc_shift;
+}
+
+/**
+ * Update the internal nanosecond count in the structure.
+ */
+static inline uint64_t
+rte_timecounter_update(struct rte_timecounter *tc, uint64_t cycle_now)
+{
+ uint64_t cycle_delta, ns_offset;
+
+ /* Calculate the delta since the last call. */
+ if (tc->cycle_last <= cycle_now)
+ cycle_delta = (cycle_now - tc->cycle_last) & tc->cc_mask;
+ else
+ /* Handle cycle counts that have wrapped around . */
+ cycle_delta = (~(tc->cycle_last - cycle_now) & tc->cc_mask) + 1;
+
+ /* Convert to nanoseconds. */
+ ns_offset = rte_cyclecounter_cycles_to_ns(tc, cycle_delta);
+
+ /* Store current cycle counter for next call. */
+ tc->cycle_last = cycle_now;
+
+ /* Update the nanosecond count. */
+ tc->nsec += ns_offset;
+
+ return tc->nsec;
+}
+
+/**
+ * Convert from timespec structure into nanosecond units.
+ */
+static inline uint64_t
+rte_timespec_to_ns(const struct timespec *ts)
+{
+ return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+/**
+ * Convert from nanosecond units into timespec structure.
+ */
+static inline struct timespec
+rte_ns_to_timespec(uint64_t nsec)
+{
+ struct timespec ts = {0, 0};
+
+ if (nsec == 0)
+ return ts;
+
+ ts.tv_sec = nsec / NSEC_PER_SEC;
+ ts.tv_nsec = nsec % NSEC_PER_SEC;
+
+ return ts;
+}
+
+#endif /* _RTE_TIME_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_trace.h b/src/spdk/dpdk/lib/librte_eal/include/rte_trace.h
new file mode 100644
index 000000000..a6e991fad
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_trace.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#ifndef _RTE_TRACE_H_
+#define _RTE_TRACE_H_
+
+/**
+ * @file
+ *
+ * RTE Trace API
+ *
+ * This file provides the trace API to RTE applications.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_compat.h>
+
+/**
+ * Test if trace is enabled.
+ *
+ * @return
+ * true if trace is enabled, false otherwise.
+ */
+__rte_experimental
+bool rte_trace_is_enabled(void);
+
+/**
+ * Enumerate trace mode operation.
+ */
+enum rte_trace_mode {
+ /**
+ * In this mode, when no space is left in the trace buffer, the
+ * subsequent events overwrite the old events.
+ */
+ RTE_TRACE_MODE_OVERWRITE,
+ /**
+ * In this mode, when no space is left in the trace buffer, the
+ * subsequent events shall not be recorded.
+ */
+ RTE_TRACE_MODE_DISCARD,
+};
+
+/**
+ * Set the trace mode.
+ *
+ * @param mode
+ * Trace mode.
+ */
+__rte_experimental
+void rte_trace_mode_set(enum rte_trace_mode mode);
+
+/**
+ * Get the trace mode.
+ *
+ * @return
+ * The current trace mode.
+ */
+__rte_experimental
+enum rte_trace_mode rte_trace_mode_get(void);
+
+/**
+ * Enable/Disable a set of tracepoints based on globbing pattern.
+ *
+ * @param pattern
+ * The globbing pattern identifying the tracepoint.
+ * @param enable
+ * true to enable tracepoint, false to disable the tracepoint, upon match.
+ * @return
+ * - 0: Success and no pattern match.
+ * - 1: Success and found pattern match.
+ * - (-ERANGE): Tracepoint object is not registered.
+ */
+__rte_experimental
+int rte_trace_pattern(const char *pattern, bool enable);
+
+/**
+ * Enable/Disable a set of tracepoints based on regular expression.
+ *
+ * @param regex
+ * A regular expression identifying the tracepoint.
+ * @param enable
+ * true to enable tracepoint, false to disable the tracepoint, upon match.
+ * @return
+ * - 0: Success and no pattern match.
+ * - 1: Success and found pattern match.
+ * - (-ERANGE): Tracepoint object is not registered.
+ * - (-EINVAL): Invalid regular expression rule.
+ */
+__rte_experimental
+int rte_trace_regexp(const char *regex, bool enable);
+
+/**
+ * Save the trace buffer to the trace directory.
+ *
+ * By default, trace directory will be created at $HOME directory and this can
+ * be overridden by --trace-dir EAL parameter.
+ *
+ * @return
+ * - 0: Success.
+ * - <0 : Failure.
+ */
+__rte_experimental
+int rte_trace_save(void);
+
+/**
+ * Dump the trace metadata to a file.
+ *
+ * @param f
+ * A pointer to a file for output
+ * @return
+ * - 0: Success.
+ * - <0 : Failure.
+ */
+__rte_experimental
+int rte_trace_metadata_dump(FILE *f);
+
+/**
+ * Dump the trace subsystem status to a file.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+__rte_experimental
+void rte_trace_dump(FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TRACE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_trace_point.h b/src/spdk/dpdk/lib/librte_eal/include/rte_trace_point.h
new file mode 100644
index 000000000..b45171275
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_trace_point.h
@@ -0,0 +1,408 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#ifndef _RTE_TRACE_POINT_H_
+#define _RTE_TRACE_POINT_H_
+
+/**
+ * @file
+ *
+ * RTE Tracepoint API
+ *
+ * This file provides the tracepoint API to RTE applications.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_cycles.h>
+#include <rte_per_lcore.h>
+#include <rte_string_fns.h>
+#include <rte_uuid.h>
+
+/** The tracepoint object. */
+typedef uint64_t rte_trace_point_t;
+
+/** Macro to define the tracepoint. */
+#define RTE_TRACE_POINT_DEFINE(tp) \
+rte_trace_point_t __attribute__((section("__rte_trace_point"))) __##tp
+
+/**
+ * Macro to define the tracepoint arguments in RTE_TRACE_POINT macro.
+
+ * @see RTE_TRACE_POINT, RTE_TRACE_POINT_FP
+ */
+#define RTE_TRACE_POINT_ARGS
+
+/** @internal Helper macro to support RTE_TRACE_POINT and RTE_TRACE_POINT_FP */
+#define __RTE_TRACE_POINT(_mode, _tp, _args, ...) \
+extern rte_trace_point_t __##_tp; \
+static __rte_always_inline void \
+_tp _args \
+{ \
+ __rte_trace_point_emit_header_##_mode(&__##_tp); \
+ __VA_ARGS__ \
+}
+
+/**
+ * Create a tracepoint.
+ *
+ * A tracepoint is defined by specifying:
+ * - its input arguments: they are the C function style parameters to define
+ * the arguments of tracepoint function. These input arguments are embedded
+ * using the RTE_TRACE_POINT_ARGS macro.
+ * - its output event fields: they are the sources of event fields that form
+ * the payload of any event that the execution of the tracepoint macro emits
+ * for this particular tracepoint. The application uses
+ * rte_trace_point_emit_* macros to emit the output event fields.
+ *
+ * @param tp
+ * Tracepoint object. Before using the tracepoint, an application needs to
+ * define the tracepoint using RTE_TRACE_POINT_DEFINE macro.
+ * @param args
+ * C function style input arguments to define the arguments to tracepoint
+ * function.
+ * @param ...
+ * Define the payload of trace function. The payload will be formed using
+ * rte_trace_point_emit_* macros. Use ";" delimiter between two payloads.
+ *
+ * @see RTE_TRACE_POINT_ARGS, RTE_TRACE_POINT_DEFINE, rte_trace_point_emit_*
+ */
+#define RTE_TRACE_POINT(tp, args, ...) \
+ __RTE_TRACE_POINT(generic, tp, args, __VA_ARGS__)
+
+/**
+ * Create a tracepoint for fast path.
+ *
+ * Similar to RTE_TRACE_POINT, except that it is removed at compilation time
+ * unless the RTE_ENABLE_TRACE_FP configuration parameter is set.
+ *
+ * @param tp
+ * Tracepoint object. Before using the tracepoint, an application needs to
+ * define the tracepoint using RTE_TRACE_POINT_DEFINE macro.
+ * @param args
+ * C function style input arguments to define the arguments to tracepoint.
+ * function.
+ * @param ...
+ * Define the payload of trace function. The payload will be formed using
+ * rte_trace_point_emit_* macros, Use ";" delimiter between two payloads.
+ *
+ * @see RTE_TRACE_POINT
+ */
+#define RTE_TRACE_POINT_FP(tp, args, ...) \
+ __RTE_TRACE_POINT(fp, tp, args, __VA_ARGS__)
+
+#ifdef __DOXYGEN__
+
+/**
+ * Register a tracepoint.
+ *
+ * @param trace
+ * The tracepoint object created using RTE_TRACE_POINT_DEFINE.
+ * @param name
+ * The name of the tracepoint object.
+ * @return
+ * - 0: Successfully registered the tracepoint.
+ * - <0: Failure to register the tracepoint.
+ */
+#define RTE_TRACE_POINT_REGISTER(trace, name)
+
+/** Tracepoint function payload for uint64_t datatype */
+#define rte_trace_point_emit_u64(val)
+/** Tracepoint function payload for int64_t datatype */
+#define rte_trace_point_emit_i64(val)
+/** Tracepoint function payload for uint32_t datatype */
+#define rte_trace_point_emit_u32(val)
+/** Tracepoint function payload for int32_t datatype */
+#define rte_trace_point_emit_i32(val)
+/** Tracepoint function payload for uint16_t datatype */
+#define rte_trace_point_emit_u16(val)
+/** Tracepoint function payload for int16_t datatype */
+#define rte_trace_point_emit_i16(val)
+/** Tracepoint function payload for uint8_t datatype */
+#define rte_trace_point_emit_u8(val)
+/** Tracepoint function payload for int8_t datatype */
+#define rte_trace_point_emit_i8(val)
+/** Tracepoint function payload for int datatype */
+#define rte_trace_point_emit_int(val)
+/** Tracepoint function payload for long datatype */
+#define rte_trace_point_emit_long(val)
+/** Tracepoint function payload for float datatype */
+#define rte_trace_point_emit_float(val)
+/** Tracepoint function payload for double datatype */
+#define rte_trace_point_emit_double(val)
+/** Tracepoint function payload for pointer datatype */
+#define rte_trace_point_emit_ptr(val)
+/** Tracepoint function payload for string datatype */
+#define rte_trace_point_emit_string(val)
+
+#endif /* __DOXYGEN__ */
+
+/** @internal Macro to define maximum emit length of string datatype. */
+#define __RTE_TRACE_EMIT_STRING_LEN_MAX 32
+/** @internal Macro to define event header size. */
+#define __RTE_TRACE_EVENT_HEADER_SZ sizeof(uint64_t)
+
+/**
+ * Enable recording events of the given tracepoint in the trace buffer.
+ *
+ * @param tp
+ * The tracepoint object to enable.
+ * @return
+ * - 0: Success.
+ * - (-ERANGE): Trace object is not registered.
+ */
+__rte_experimental
+int rte_trace_point_enable(rte_trace_point_t *tp);
+
+/**
+ * Disable recording events of the given tracepoint in the trace buffer.
+ *
+ * @param tp
+ * The tracepoint object to disable.
+ * @return
+ * - 0: Success.
+ * - (-ERANGE): Trace object is not registered.
+ */
+__rte_experimental
+int rte_trace_point_disable(rte_trace_point_t *tp);
+
+/**
+ * Test if recording events from the given tracepoint is enabled.
+ *
+ * @param tp
+ * The tracepoint object.
+ * @return
+ * true if tracepoint is enabled, false otherwise.
+ */
+__rte_experimental
+bool rte_trace_point_is_enabled(rte_trace_point_t *tp);
+
+/**
+ * Lookup a tracepoint object from its name.
+ *
+ * @param name
+ * The name of the tracepoint.
+ * @return
+ * The tracepoint object or NULL if not found.
+ */
+__rte_experimental
+rte_trace_point_t *rte_trace_point_lookup(const char *name);
+
+/**
+ * @internal
+ *
+ * Test if the tracepoint fast path compile-time option is enabled.
+ *
+ * @return
+ * true if tracepoint fast path enabled, false otherwise.
+ */
+__rte_experimental
+static __rte_always_inline bool
+__rte_trace_point_fp_is_enabled(void)
+{
+#ifdef RTE_ENABLE_TRACE_FP
+ return true;
+#else
+ return false;
+#endif
+}
+
+/**
+ * @internal
+ *
+ * Allocate trace memory buffer per thread.
+ *
+ */
+__rte_experimental
+void __rte_trace_mem_per_thread_alloc(void);
+
+/**
+ * @internal
+ *
+ * Helper function to emit field.
+ *
+ * @param sz
+ * The tracepoint size.
+ * @param field
+ * The name of the trace event.
+ * @param type
+ * The datatype of the trace event as string.
+ * @return
+ * - 0: Success.
+ * - <0: Failure.
+ */
+__rte_experimental
+void __rte_trace_point_emit_field(size_t sz, const char *field,
+ const char *type);
+
+/**
+ * @internal
+ *
+ * Helper function to register a dynamic tracepoint.
+ * Use RTE_TRACE_POINT_REGISTER macro for tracepoint registration.
+ *
+ * @param trace
+ * The tracepoint object created using RTE_TRACE_POINT_DEFINE.
+ * @param name
+ * The name of the tracepoint object.
+ * @param register_fn
+ * Trace registration function.
+ * @return
+ * - 0: Successfully registered the tracepoint.
+ * - <0: Failure to register the tracepoint.
+ */
+__rte_experimental
+int __rte_trace_point_register(rte_trace_point_t *trace, const char *name,
+ void (*register_fn)(void));
+
+#ifndef __DOXYGEN__
+
+#ifndef _RTE_TRACE_POINT_REGISTER_H_
+#ifdef ALLOW_EXPERIMENTAL_API
+
+#define __RTE_TRACE_EVENT_HEADER_ID_SHIFT (48)
+
+#define __RTE_TRACE_FIELD_SIZE_SHIFT 0
+#define __RTE_TRACE_FIELD_SIZE_MASK (0xffffULL << __RTE_TRACE_FIELD_SIZE_SHIFT)
+#define __RTE_TRACE_FIELD_ID_SHIFT (16)
+#define __RTE_TRACE_FIELD_ID_MASK (0xffffULL << __RTE_TRACE_FIELD_ID_SHIFT)
+#define __RTE_TRACE_FIELD_ENABLE_MASK (1ULL << 63)
+#define __RTE_TRACE_FIELD_ENABLE_DISCARD (1ULL << 62)
+
+struct __rte_trace_stream_header {
+ uint32_t magic;
+ rte_uuid_t uuid;
+ uint32_t lcore_id;
+ char thread_name[__RTE_TRACE_EMIT_STRING_LEN_MAX];
+} __rte_packed;
+
+struct __rte_trace_header {
+ uint32_t offset;
+ uint32_t len;
+ struct __rte_trace_stream_header stream_header;
+ uint8_t mem[];
+};
+
+RTE_DECLARE_PER_LCORE(void *, trace_mem);
+
+static __rte_always_inline void *
+__rte_trace_mem_get(uint64_t in)
+{
+ struct __rte_trace_header *trace = RTE_PER_LCORE(trace_mem);
+ const uint16_t sz = in & __RTE_TRACE_FIELD_SIZE_MASK;
+
+ /* Trace memory is not initialized for this thread */
+ if (unlikely(trace == NULL)) {
+ __rte_trace_mem_per_thread_alloc();
+ trace = RTE_PER_LCORE(trace_mem);
+ if (unlikely(trace == NULL))
+ return NULL;
+ }
+ /* Check the wrap around case */
+ uint32_t offset = trace->offset;
+ if (unlikely((offset + sz) >= trace->len)) {
+ /* Disable the trace event if it in DISCARD mode */
+ if (unlikely(in & __RTE_TRACE_FIELD_ENABLE_DISCARD))
+ return NULL;
+
+ offset = 0;
+ }
+ /* Align to event header size */
+ offset = RTE_ALIGN_CEIL(offset, __RTE_TRACE_EVENT_HEADER_SZ);
+ void *mem = RTE_PTR_ADD(&trace->mem[0], offset);
+ offset += sz;
+ trace->offset = offset;
+
+ return mem;
+}
+
+static __rte_always_inline void *
+__rte_trace_point_emit_ev_header(void *mem, uint64_t in)
+{
+ uint64_t val;
+
+ /* Event header [63:0] = id [63:48] | timestamp [47:0] */
+ val = rte_get_tsc_cycles() &
+ ~(0xffffULL << __RTE_TRACE_EVENT_HEADER_ID_SHIFT);
+ val |= ((in & __RTE_TRACE_FIELD_ID_MASK) <<
+ (__RTE_TRACE_EVENT_HEADER_ID_SHIFT -
+ __RTE_TRACE_FIELD_ID_SHIFT));
+
+ *(uint64_t *)mem = val;
+ return RTE_PTR_ADD(mem, __RTE_TRACE_EVENT_HEADER_SZ);
+}
+
+#define __rte_trace_point_emit_header_generic(t) \
+void *mem; \
+do { \
+ const uint64_t val = __atomic_load_n(t, __ATOMIC_ACQUIRE); \
+ if (likely(!(val & __RTE_TRACE_FIELD_ENABLE_MASK))) \
+ return; \
+ mem = __rte_trace_mem_get(val); \
+ if (unlikely(mem == NULL)) \
+ return; \
+ mem = __rte_trace_point_emit_ev_header(mem, val); \
+} while (0)
+
+#define __rte_trace_point_emit_header_fp(t) \
+ if (!__rte_trace_point_fp_is_enabled()) \
+ return; \
+ __rte_trace_point_emit_header_generic(t)
+
+#define __rte_trace_point_emit(in, type) \
+do { \
+ memcpy(mem, &(in), sizeof(in)); \
+ mem = RTE_PTR_ADD(mem, sizeof(in)); \
+} while (0)
+
+#define rte_trace_point_emit_string(in) \
+do { \
+ if (unlikely(in == NULL)) \
+ return; \
+ rte_strscpy(mem, in, __RTE_TRACE_EMIT_STRING_LEN_MAX); \
+ mem = RTE_PTR_ADD(mem, __RTE_TRACE_EMIT_STRING_LEN_MAX); \
+} while (0)
+
+#else
+
+#define __rte_trace_point_emit_header_generic(t) RTE_SET_USED(t)
+#define __rte_trace_point_emit_header_fp(t) RTE_SET_USED(t)
+#define __rte_trace_point_emit(in, type) RTE_SET_USED(in)
+#define rte_trace_point_emit_string(in) RTE_SET_USED(in)
+
+#endif /* ALLOW_EXPERIMENTAL_API */
+#endif /* _RTE_TRACE_POINT_REGISTER_H_ */
+
+#define rte_trace_point_emit_u64(in) __rte_trace_point_emit(in, uint64_t)
+#define rte_trace_point_emit_i64(in) __rte_trace_point_emit(in, int64_t)
+#define rte_trace_point_emit_u32(in) __rte_trace_point_emit(in, uint32_t)
+#define rte_trace_point_emit_i32(in) __rte_trace_point_emit(in, int32_t)
+#define rte_trace_point_emit_u16(in) __rte_trace_point_emit(in, uint16_t)
+#define rte_trace_point_emit_i16(in) __rte_trace_point_emit(in, int16_t)
+#define rte_trace_point_emit_u8(in) __rte_trace_point_emit(in, uint8_t)
+#define rte_trace_point_emit_i8(in) __rte_trace_point_emit(in, int8_t)
+#define rte_trace_point_emit_int(in) __rte_trace_point_emit(in, int32_t)
+#define rte_trace_point_emit_long(in) __rte_trace_point_emit(in, long)
+#define rte_trace_point_emit_float(in) __rte_trace_point_emit(in, float)
+#define rte_trace_point_emit_double(in) __rte_trace_point_emit(in, double)
+#define rte_trace_point_emit_ptr(in) __rte_trace_point_emit(in, uintptr_t)
+
+#endif /* __DOXYGEN__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TRACE_POINT_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_trace_point_register.h b/src/spdk/dpdk/lib/librte_eal/include/rte_trace_point_register.h
new file mode 100644
index 000000000..4e7c25ba1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_trace_point_register.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#ifndef _RTE_TRACE_POINT_REGISTER_H_
+#define _RTE_TRACE_POINT_REGISTER_H_
+
+#ifdef _RTE_TRACE_POINT_H_
+#error for registration, include this file first before <rte_trace_point.h>
+#endif
+
+#include <rte_per_lcore.h>
+#include <rte_trace_point.h>
+
+RTE_DECLARE_PER_LCORE(volatile int, trace_point_sz);
+
+#define RTE_TRACE_POINT_REGISTER(trace, name) \
+ __rte_trace_point_register(&__##trace, RTE_STR(name), \
+ (void (*)(void)) trace)
+
+#define __rte_trace_point_emit_header_generic(t) \
+ RTE_PER_LCORE(trace_point_sz) = __RTE_TRACE_EVENT_HEADER_SZ
+
+#define __rte_trace_point_emit_header_fp(t) \
+ __rte_trace_point_emit_header_generic(t)
+
+#define __rte_trace_point_emit(in, type) \
+do { \
+ RTE_BUILD_BUG_ON(sizeof(type) != sizeof(typeof(in))); \
+ __rte_trace_point_emit_field(sizeof(type), RTE_STR(in), \
+ RTE_STR(type)); \
+} while (0)
+
+#define rte_trace_point_emit_string(in) \
+do { \
+ RTE_SET_USED(in); \
+ __rte_trace_point_emit_field(__RTE_TRACE_EMIT_STRING_LEN_MAX, \
+ RTE_STR(in)"[32]", "string_bounded_t"); \
+} while (0)
+
+#endif /* _RTE_TRACE_POINT_REGISTER_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_uuid.h b/src/spdk/dpdk/lib/librte_eal/include/rte_uuid.h
new file mode 100644
index 000000000..044afbdfa
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_uuid.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
+ */
+/**
+ * @file
+ *
+ * UUID related functions originally from libuuid
+ */
+
+#ifndef _RTE_UUID_H_
+#define _RTE_UUID_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+
+/**
+ * Struct describing a Universal Unique Identifier
+ */
+typedef unsigned char rte_uuid_t[16];
+
+/**
+ * Helper for defining UUID values for id tables.
+ */
+#define RTE_UUID_INIT(a, b, c, d, e) { \
+ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, \
+ ((a) >> 8) & 0xff, (a) & 0xff, \
+ ((b) >> 8) & 0xff, (b) & 0xff, \
+ ((c) >> 8) & 0xff, (c) & 0xff, \
+ ((d) >> 8) & 0xff, (d) & 0xff, \
+ ((e) >> 40) & 0xff, ((e) >> 32) & 0xff, \
+ ((e) >> 24) & 0xff, ((e) >> 16) & 0xff, \
+ ((e) >> 8) & 0xff, (e) & 0xff \
+}
+
+/**
+ * Test if UUID is all zeros.
+ *
+ * @param uu
+ * The uuid to check.
+ * @return
+ * true if uuid is NULL value, false otherwise
+ */
+bool rte_uuid_is_null(const rte_uuid_t uu);
+
+/**
+ * Copy uuid.
+ *
+ * @param dst
+ * Destination uuid
+ * @param src
+ * Source uuid
+ */
+static inline void rte_uuid_copy(rte_uuid_t dst, const rte_uuid_t src)
+{
+ memcpy(dst, src, sizeof(rte_uuid_t));
+}
+
+/**
+ * Compare two UUID's
+ *
+ * @param a
+ * A UUID to compare
+ * @param b
+ * A UUID to compare
+ * @return
+ * returns an integer less than, equal to, or greater than zero if UUID a is
+ * is less than, equal, or greater than UUID b.
+ */
+int rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b);
+
+/**
+ * Extract UUID from string
+ *
+ * @param in
+ * Pointer to string of characters to convert
+ * @param uu
+ * Destination UUID
+ * @return
+ * Returns 0 on success, and -1 if string is not a valid UUID.
+ */
+int rte_uuid_parse(const char *in, rte_uuid_t uu);
+
+/**
+ * Convert UUID to string
+ *
+ * @param uu
+ * UUID to format
+ * @param out
+ * Resulting string buffer
+ * @param len
+ * Sizeof the available string buffer
+ */
+#define RTE_UUID_STRLEN (36 + 1)
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_UUID_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_version.h b/src/spdk/dpdk/lib/librte_eal/include/rte_version.h
new file mode 100644
index 000000000..f7a3a1ebc
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_version.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+/**
+ * @file
+ * Definitions of DPDK version numbers
+ */
+
+#ifndef _RTE_VERSION_H_
+#define _RTE_VERSION_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <rte_common.h>
+
+/**
+ * Macro to compute a version number usable for comparisons
+ */
+#define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
+
+/**
+ * All version numbers in one to compare with RTE_VERSION_NUM()
+ */
+#define RTE_VERSION RTE_VERSION_NUM( \
+ RTE_VER_YEAR, \
+ RTE_VER_MONTH, \
+ RTE_VER_MINOR, \
+ RTE_VER_RELEASE)
+
+/**
+ * Function returning version string
+ * @return
+ * string
+ */
+static inline const char *
+rte_version(void)
+{
+ static char version[32];
+ if (version[0] != 0)
+ return version;
+ if (strlen(RTE_VER_SUFFIX) == 0)
+ snprintf(version, sizeof(version), "%s %d.%02d.%d",
+ RTE_VER_PREFIX,
+ RTE_VER_YEAR,
+ RTE_VER_MONTH,
+ RTE_VER_MINOR);
+ else
+ snprintf(version, sizeof(version), "%s %d.%02d.%d%s%d",
+ RTE_VER_PREFIX,
+ RTE_VER_YEAR,
+ RTE_VER_MONTH,
+ RTE_VER_MINOR,
+ RTE_VER_SUFFIX,
+ RTE_VER_RELEASE);
+ return version;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_VERSION_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/include/rte_vfio.h b/src/spdk/dpdk/lib/librte_eal/include/rte_vfio.h
new file mode 100644
index 000000000..20ed8c45a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/include/rte_vfio.h
@@ -0,0 +1,360 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 6WIND S.A.
+ */
+
+#ifndef _RTE_VFIO_H_
+#define _RTE_VFIO_H_
+
+/**
+ * @file
+ * RTE VFIO. This library provides various VFIO related utility functions.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/*
+ * determine if VFIO is present on the system
+ */
+#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+#define VFIO_PRESENT
+#endif /* kernel version >= 3.6.0 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+#define HAVE_VFIO_DEV_REQ_INTERFACE
+#endif /* kernel version >= 4.0.0 */
+#endif /* RTE_EAL_VFIO */
+
+#ifdef VFIO_PRESENT
+
+#include <linux/vfio.h>
+
+#define VFIO_DIR "/dev/vfio"
+#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define VFIO_GET_REGION_IDX(x) (x >> 40)
+#define VFIO_NOIOMMU_MODE \
+ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
+
+/* NOIOMMU is defined from kernel version 4.5 onwards */
+#ifdef VFIO_NOIOMMU_IOMMU
+#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU
+#else
+#define RTE_VFIO_NOIOMMU 8
+#endif
+
+/*
+ * capabilities are only supported on kernel 4.6+. there were also some API
+ * changes as well, so add a macro to get cap offset.
+ */
+#ifdef VFIO_REGION_INFO_FLAG_CAPS
+#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
+#define VFIO_CAP_OFFSET(x) (x->cap_offset)
+#else
+#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
+#define VFIO_CAP_OFFSET(x) (x->resv)
+struct vfio_info_cap_header {
+ uint16_t id;
+ uint16_t version;
+ uint32_t next;
+};
+#endif
+
+/* kernels 4.16+ can map BAR containing MSI-X table */
+#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#else
+#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
+#endif
+
+#else /* not VFIO_PRESENT */
+
+/* we don't need an actual definition, only pointer is used */
+struct vfio_device_info;
+
+#endif /* VFIO_PRESENT */
+
+#define RTE_VFIO_DEFAULT_CONTAINER_FD (-1)
+
+/**
+ * Setup vfio_cfg for the device identified by its address.
+ * It discovers the configured I/O MMU groups or sets a new one for the device.
+ * If a new groups is assigned, the DMA mapping is performed.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param vfio_dev_fd
+ * VFIO fd.
+ *
+ * @param device_info
+ * Device information.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ * >1 if the device cannot be managed this way.
+ */
+int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info);
+
+/**
+ * Release a device mapped to a VFIO-managed I/O MMU group.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param fd
+ * VFIO fd.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
+
+/**
+ * Enable a VFIO-related kmod.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param modname
+ * kernel module name.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int rte_vfio_enable(const char *modname);
+
+/**
+ * Check whether a VFIO-related kmod is enabled.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param modname
+ * kernel module name.
+ *
+ * @return
+ * !0 if true.
+ * 0 otherwise.
+ */
+int rte_vfio_is_enabled(const char *modname);
+
+/**
+ * Whether VFIO NOIOMMU mode is enabled.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @return
+ * !0 if true.
+ * 0 otherwise.
+ */
+int rte_vfio_noiommu_is_enabled(void);
+
+/**
+ * Remove group fd from internal VFIO group fd array/
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param vfio_group_fd
+ * VFIO Group FD.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int
+rte_vfio_clear_group(int vfio_group_fd);
+
+/**
+ * Parse IOMMU group number for a device
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param iommu_group_num
+ * iommu group number
+ *
+ * @return
+ * >0 on success
+ * 0 for non-existent group or VFIO
+ * <0 for errors
+ */
+int
+rte_vfio_get_group_num(const char *sysfs_base,
+ const char *dev_addr, int *iommu_group_num);
+
+/**
+ * Open a new VFIO container fd
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @return
+ * > 0 container fd
+ * < 0 for errors
+ */
+int
+rte_vfio_get_container_fd(void);
+
+/**
+ * Open VFIO group fd or get an existing one
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param iommu_group_num
+ * iommu group number
+ *
+ * @return
+ * > 0 group fd
+ * < 0 for errors
+ */
+int
+rte_vfio_get_group_fd(int iommu_group_num);
+
+/**
+ * Create a new container for device binding.
+ *
+ * @note Any newly allocated DPDK memory will not be mapped into these
+ * containers by default, user needs to manage DMA mappings for
+ * any container created by this API.
+ *
+ * @note When creating containers using this API, the container will only be
+ * available in the process that has created it. Sharing containers and
+ * devices between multiple processes is not supported.
+ *
+ * @return
+ * the container fd if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_create(void);
+
+/**
+ * Destroy the container, unbind all vfio groups within it.
+ *
+ * @param container_fd
+ * the container fd to destroy
+ *
+ * @return
+ * 0 if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_destroy(int container_fd);
+
+/**
+ * Bind a IOMMU group to a container.
+ *
+ * @param container_fd
+ * the container's fd
+ *
+ * @param iommu_group_num
+ * the iommu group number to bind to container
+ *
+ * @return
+ * group fd if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
+
+/**
+ * Unbind a IOMMU group from a container.
+ *
+ * @param container_fd
+ * the container fd of container
+ *
+ * @param iommu_group_num
+ * the iommu group number to delete from container
+ *
+ * @return
+ * 0 if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
+
+/**
+ * Perform DMA mapping for devices in a container.
+ *
+ * @param container_fd
+ * the specified container fd. Use RTE_VFIO_DEFAULT_CONTAINER_FD to
+ * use the default container.
+ *
+ * @param vaddr
+ * Starting virtual address of memory to be mapped.
+ *
+ * @param iova
+ * Starting IOVA address of memory to be mapped.
+ *
+ * @param len
+ * Length of memory segment being mapped.
+ *
+ * @return
+ * 0 if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
+ uint64_t iova, uint64_t len);
+
+/**
+ * Perform DMA unmapping for devices in a container.
+ *
+ * @param container_fd
+ * the specified container fd. Use RTE_VFIO_DEFAULT_CONTAINER_FD to
+ * use the default container.
+ *
+ * @param vaddr
+ * Starting virtual address of memory to be unmapped.
+ *
+ * @param iova
+ * Starting IOVA address of memory to be unmapped.
+ *
+ * @param len
+ * Length of memory segment being unmapped.
+ *
+ * @return
+ * 0 if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr,
+ uint64_t iova, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VFIO_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/Makefile b/src/spdk/dpdk/lib/librte_eal/linux/Makefile
new file mode 100644
index 000000000..48cc34844
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/Makefile
@@ -0,0 +1,103 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2019 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_eal.a
+
+ARCH_DIR ?= $(RTE_ARCH)
+VPATH += $(RTE_SDK)/lib/librte_eal/$(ARCH_DIR)
+VPATH += $(RTE_SDK)/lib/librte_eal/common
+
+CFLAGS += -I$(SRCDIR)/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/include
+CFLAGS += $(WERROR_FLAGS) -O3
+
+LDLIBS += -ldl
+LDLIBS += -lpthread
+LDLIBS += -lgcc_s
+LDLIBS += -lrt
+LDLIBS += -lrte_kvargs
+LDLIBS += -lrte_telemetry
+ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y)
+LDLIBS += -lnuma
+endif
+
+EXPORT_MAP := ../rte_eal_version.map
+
+# specific to linux exec-env
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_hugepage_info.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_vfio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_vfio_mp_sync.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_memalloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_debug.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_interrupts.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_alarm.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_dev.c
+
+# from common dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_lcore.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_timer.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memzone.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_mcfg.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memalloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memory.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_tailqs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_errno.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_hypervisor.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_string_fns.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_hexdump.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_class.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_bus.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_dev.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_options.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_thread.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_uuid.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_trace.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_trace_ctf.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_trace_points.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_trace_utils.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += hotplug_mp.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += malloc_elem.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += malloc_mp.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_keepalive.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_service.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_random.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_reciprocal.c
+
+# from arch dir
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_cpuflags.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_hypervisor.c
+SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c
+SRCS-y += rte_cycles.c
+
+CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_eal_thread.o += -Wno-return-type
+endif
+
+INC := rte_kni_common.h
+INC += rte_os.h
+
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUX)-include := $(addprefix include/,$(INC))
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal.c b/src/spdk/dpdk/lib/librte_eal/linux/eal.c
new file mode 100644
index 000000000..f162124a3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal.c
@@ -0,0 +1,1405 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation.
+ * Copyright(c) 2012-2014 6WIND S.A.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <sys/file.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <stddef.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+#if defined(RTE_ARCH_X86)
+#include <sys/io.h>
+#endif
+#include <linux/version.h>
+
+#include <rte_compat.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_service_component.h>
+#include <rte_log.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_interrupts.h>
+#include <rte_bus.h>
+#include <rte_dev.h>
+#include <rte_devargs.h>
+#include <rte_version.h>
+#include <rte_atomic.h>
+#include <malloc_heap.h>
+#include <rte_vfio.h>
+#include <rte_telemetry.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+#include "eal_memcfg.h"
+#include "eal_trace.h"
+#include "eal_options.h"
+#include "eal_vfio.h"
+#include "hotplug_mp.h"
+
+#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
+
+#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
+
+#define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups"
+
+/* Allow the application to print its usage message too if set */
+static rte_usage_hook_t rte_application_usage_hook = NULL;
+
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc */
+static int mem_cfg_fd = -1;
+
+static struct flock wr_lock = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = offsetof(struct rte_mem_config, memsegs),
+ .l_len = sizeof(early_mem_config.memsegs),
+};
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+ .mem_config = &early_mem_config,
+};
+
+/* internal configuration (per-core) */
+struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/* internal configuration */
+struct internal_config internal_config;
+
+/* used by rte_rdtsc() */
+int rte_cycles_vmware_tsc_map;
+
+/* platform-specific runtime dir */
+static char runtime_dir[PATH_MAX];
+
+static const char *default_runtime_dir = "/var/run";
+
+int
+eal_create_runtime_dir(void)
+{
+ const char *directory = default_runtime_dir;
+ const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR");
+ const char *fallback = "/tmp";
+ char tmp[PATH_MAX];
+ int ret;
+
+ if (getuid() != 0) {
+ /* try XDG path first, fall back to /tmp */
+ if (xdg_runtime_dir != NULL)
+ directory = xdg_runtime_dir;
+ else
+ directory = fallback;
+ }
+ /* create DPDK subdirectory under runtime dir */
+ ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory);
+ if (ret < 0 || ret == sizeof(tmp)) {
+ RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n");
+ return -1;
+ }
+
+ /* create prefix-specific subdirectory under DPDK runtime dir */
+ ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
+ tmp, eal_get_hugefile_prefix());
+ if (ret < 0 || ret == sizeof(runtime_dir)) {
+ RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
+ return -1;
+ }
+
+ /* create the path if it doesn't exist. no "mkdir -p" here, so do it
+ * step by step.
+ */
+ ret = mkdir(tmp, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+ tmp, strerror(errno));
+ return -1;
+ }
+
+ ret = mkdir(runtime_dir, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+ runtime_dir, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+eal_clean_runtime_dir(void)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ int dir_fd, fd, lck_result;
+ static const char * const filters[] = {
+ "fbarray_*",
+ "mp_socket_*"
+ };
+
+ /* open directory */
+ dir = opendir(runtime_dir);
+ if (!dir) {
+ RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+ dir_fd = dirfd(dir);
+
+ /* lock the directory before doing anything, to avoid races */
+ if (flock(dir_fd, LOCK_EX) < 0) {
+ RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+
+ dirent = readdir(dir);
+ if (!dirent) {
+ RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+
+ while (dirent != NULL) {
+ unsigned int f_idx;
+ bool skip = true;
+
+ /* skip files that don't match the patterns */
+ for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) {
+ const char *filter = filters[f_idx];
+
+ if (fnmatch(filter, dirent->d_name, 0) == 0) {
+ skip = false;
+ break;
+ }
+ }
+ if (skip) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* try and lock the file */
+ fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+ /* skip to next file */
+ if (fd == -1) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* non-blocking lock */
+ lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+ /* if lock succeeds, remove the file */
+ if (lck_result != -1)
+ unlinkat(dir_fd, dirent->d_name, 0);
+ close(fd);
+ dirent = readdir(dir);
+ }
+
+ /* closedir closes dir_fd and drops the lock */
+ closedir(dir);
+ return 0;
+
+error:
+ if (dir)
+ closedir(dir);
+
+ RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n",
+ strerror(errno));
+
+ return -1;
+}
+
+const char *
+rte_eal_get_runtime_dir(void)
+{
+ return runtime_dir;
+}
+
+/* Return user provided mbuf pool ops name */
+const char *
+rte_eal_mbuf_user_pool_ops(void)
+{
+ return internal_config.user_mbuf_pool_ops_name;
+}
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+ return &rte_config;
+}
+
+enum rte_iova_mode
+rte_eal_iova_mode(void)
+{
+ return rte_eal_get_configuration()->iova_mode;
+}
+
+/* parse a sysfs (or other) file containing one integer value */
+int
+eal_parse_sysfs_value(const char *filename, unsigned long *val)
+{
+ FILE *f;
+ char buf[BUFSIZ];
+ char *end = NULL;
+
+ if ((f = fopen(filename, "r")) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+ __func__, filename);
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ *val = strtoul(buf, &end, 0);
+ if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+ RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+ __func__, filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+
+/* create memory configuration in shared/mmap memory. Take out
+ * a write lock on the memsegs, so we can auto-detect primary/secondary.
+ * This means we never close the file while running (auto-close on exit).
+ * We also don't lock the whole file, so that in future we can use read-locks
+ * on other parts, e.g. memzones, to detect if there are running secondary
+ * processes. */
+static int
+rte_eal_config_create(void)
+{
+ size_t page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t cfg_len = sizeof(*rte_config.mem_config);
+ size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz);
+ void *rte_mem_cfg_addr, *mapped_mem_cfg_addr;
+ int retval;
+
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return 0;
+
+ /* map the config before hugepage address so that we don't waste a page */
+ if (internal_config.base_virtaddr != 0)
+ rte_mem_cfg_addr = (void *)
+ RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
+ sizeof(struct rte_mem_config), page_sz);
+ else
+ rte_mem_cfg_addr = NULL;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
+ if (mem_cfg_fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
+ pathname);
+ return -1;
+ }
+ }
+
+ retval = ftruncate(mem_cfg_fd, cfg_len);
+ if (retval < 0){
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ RTE_LOG(ERR, EAL, "Cannot resize '%s' for rte_mem_config\n",
+ pathname);
+ return -1;
+ }
+
+ retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
+ if (retval < 0){
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another primary "
+ "process running?\n", pathname);
+ return -1;
+ }
+
+ /* reserve space for config */
+ rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr,
+ &cfg_len_aligned, page_sz, 0, 0);
+ if (rte_mem_cfg_addr == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n");
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ return -1;
+ }
+
+ /* remap the actual file into the space we've just reserved */
+ mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr,
+ cfg_len_aligned, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0);
+ if (mapped_mem_cfg_addr == MAP_FAILED) {
+ munmap(rte_mem_cfg_addr, cfg_len);
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n");
+ return -1;
+ }
+
+ memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
+ rte_config.mem_config = rte_mem_cfg_addr;
+
+ /* store address of the config in the config itself so that secondary
+ * processes could later map the config into this exact location */
+ rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
+
+ rte_config.mem_config->dma_maskbits = 0;
+
+ return 0;
+}
+
+/* attach to an existing shared memory config */
+static int
+rte_eal_config_attach(void)
+{
+ struct rte_mem_config *mem_config;
+
+ const char *pathname = eal_runtime_config_path();
+
+ if (internal_config.no_shconf)
+ return 0;
+
+ if (mem_cfg_fd < 0){
+ mem_cfg_fd = open(pathname, O_RDWR);
+ if (mem_cfg_fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open '%s' for rte_mem_config\n",
+ pathname);
+ return -1;
+ }
+ }
+
+ /* map it as read-only first */
+ mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
+ PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
+ if (mem_config == MAP_FAILED) {
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ rte_config.mem_config = mem_config;
+
+ return 0;
+}
+
+/* reattach the shared config at exact memory location primary process has it */
+static int
+rte_eal_config_reattach(void)
+{
+ struct rte_mem_config *mem_config;
+ void *rte_mem_cfg_addr;
+
+ if (internal_config.no_shconf)
+ return 0;
+
+ /* save the address primary process has mapped shared config to */
+ rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr;
+
+ /* unmap original config */
+ munmap(rte_config.mem_config, sizeof(struct rte_mem_config));
+
+ /* remap the config at proper address */
+ mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
+ sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
+ mem_cfg_fd, 0);
+
+ close(mem_cfg_fd);
+ mem_cfg_fd = -1;
+
+ if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
+ if (mem_config != MAP_FAILED) {
+ /* errno is stale, don't use */
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]"
+ " - please use '--" OPT_BASE_VIRTADDR
+ "' option\n", rte_mem_cfg_addr, mem_config);
+ munmap(mem_config, sizeof(struct rte_mem_config));
+ return -1;
+ }
+ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ rte_config.mem_config = mem_config;
+
+ return 0;
+}
+
+/* Detect if we are a primary or a secondary process */
+enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+ enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+ const char *pathname = eal_runtime_config_path();
+
+ /* if there no shared config, there can be no secondary processes */
+ if (!internal_config.no_shconf) {
+ /* if we can open the file but not get a write-lock we are a
+ * secondary process. NOTE: if we get a file handle back, we
+ * keep that open and don't close it to prevent a race condition
+ * between multiple opens.
+ */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+ }
+
+ RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+ ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+ return ptype;
+}
+
+/* Sets up rte_config structure with the pointer to shared memory config.*/
+static int
+rte_config_init(void)
+{
+ rte_config.process_type = internal_config.process_type;
+
+ switch (rte_config.process_type){
+ case RTE_PROC_PRIMARY:
+ if (rte_eal_config_create() < 0)
+ return -1;
+ eal_mcfg_update_from_internal();
+ break;
+ case RTE_PROC_SECONDARY:
+ if (rte_eal_config_attach() < 0)
+ return -1;
+ eal_mcfg_wait_complete();
+ if (eal_mcfg_check_version() < 0) {
+ RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n");
+ return -1;
+ }
+ if (rte_eal_config_reattach() < 0)
+ return -1;
+ eal_mcfg_update_internal();
+ break;
+ case RTE_PROC_AUTO:
+ case RTE_PROC_INVALID:
+ RTE_LOG(ERR, EAL, "Invalid process type %d\n",
+ rte_config.process_type);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
+static void
+eal_hugedirs_unlock(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+ {
+ /* skip uninitialized */
+ if (internal_config.hugepage_info[i].lock_descriptor < 0)
+ continue;
+ /* unlock hugepage file */
+ flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
+ close(internal_config.hugepage_info[i].lock_descriptor);
+ /* reset the field */
+ internal_config.hugepage_info[i].lock_descriptor = -1;
+ }
+}
+
+/* display usage */
+static void
+eal_usage(const char *prgname)
+{
+ printf("\nUsage: %s ", prgname);
+ eal_common_usage();
+ printf("EAL Linux options:\n"
+ " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n"
+ " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n"
+ " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n"
+ " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n"
+ " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n"
+ " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
+ " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n"
+ " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n"
+ " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n"
+ "\n");
+ /* Allow the application to print its usage message too if hook is set */
+ if ( rte_application_usage_hook ) {
+ printf("===== Application Usage =====\n\n");
+ rte_application_usage_hook(prgname);
+ }
+}
+
+/* Set a per-application usage message */
+rte_usage_hook_t
+rte_set_application_usage_hook( rte_usage_hook_t usage_func )
+{
+ rte_usage_hook_t old_func;
+
+ /* Will be NULL on the first call to denote the last usage routine. */
+ old_func = rte_application_usage_hook;
+ rte_application_usage_hook = usage_func;
+
+ return old_func;
+}
+
+static int
+eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
+{
+ char * arg[RTE_MAX_NUMA_NODES];
+ char *end;
+ int arg_num, i, len;
+ uint64_t total_mem = 0;
+
+ len = strnlen(strval, SOCKET_MEM_STRLEN);
+ if (len == SOCKET_MEM_STRLEN) {
+ RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
+ return -1;
+ }
+
+ /* all other error cases will be caught later */
+ if (!isdigit(strval[len-1]))
+ return -1;
+
+ /* split the optarg into separate socket values */
+ arg_num = rte_strsplit(strval, len,
+ arg, RTE_MAX_NUMA_NODES, ',');
+
+ /* if split failed, or 0 arguments */
+ if (arg_num <= 0)
+ return -1;
+
+ /* parse each defined socket option */
+ errno = 0;
+ for (i = 0; i < arg_num; i++) {
+ uint64_t val;
+ end = NULL;
+ val = strtoull(arg[i], &end, 10);
+
+ /* check for invalid input */
+ if ((errno != 0) ||
+ (arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ val <<= 20;
+ total_mem += val;
+ socket_arg[i] = val;
+ }
+
+ return 0;
+}
+
+static int
+eal_parse_vfio_intr(const char *mode)
+{
+ unsigned i;
+ static struct {
+ const char *name;
+ enum rte_intr_mode value;
+ } map[] = {
+ { "legacy", RTE_INTR_MODE_LEGACY },
+ { "msi", RTE_INTR_MODE_MSI },
+ { "msix", RTE_INTR_MODE_MSIX },
+ };
+
+ for (i = 0; i < RTE_DIM(map); i++) {
+ if (!strcmp(mode, map[i].name)) {
+ internal_config.vfio_intr_mode = map[i].value;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+/* Parse the arguments for --log-level only */
+static void
+eal_log_level_parse(int argc, char **argv)
+{
+ int opt;
+ char **argvopt;
+ int option_index;
+ const int old_optind = optind;
+ const int old_optopt = optopt;
+ char * const old_optarg = optarg;
+
+ argvopt = argv;
+ optind = 1;
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ int ret;
+
+ /* getopt is not happy, stop right now */
+ if (opt == '?')
+ break;
+
+ ret = (opt == OPT_LOG_LEVEL_NUM) ?
+ eal_parse_common_option(opt, optarg, &internal_config) : 0;
+
+ /* common parser is not happy */
+ if (ret < 0)
+ break;
+ }
+
+ /* restore getopt lib */
+ optind = old_optind;
+ optopt = old_optopt;
+ optarg = old_optarg;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+eal_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ const int old_optind = optind;
+ const int old_optopt = optopt;
+ char * const old_optarg = optarg;
+
+ argvopt = argv;
+ optind = 1;
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ /* getopt didn't recognise the option */
+ if (opt == '?') {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = eal_parse_common_option(opt, optarg, &internal_config);
+ /* common parser is not happy */
+ if (ret < 0) {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ /* common parser handled this option */
+ if (ret == 0)
+ continue;
+
+ switch (opt) {
+ case 'h':
+ eal_usage(prgname);
+ exit(EXIT_SUCCESS);
+
+ case OPT_HUGE_DIR_NUM:
+ {
+ char *hdir = strdup(optarg);
+ if (hdir == NULL)
+ RTE_LOG(ERR, EAL, "Could not store hugepage directory\n");
+ else {
+ /* free old hugepage dir */
+ if (internal_config.hugepage_dir != NULL)
+ free(internal_config.hugepage_dir);
+ internal_config.hugepage_dir = hdir;
+ }
+ break;
+ }
+ case OPT_FILE_PREFIX_NUM:
+ {
+ char *prefix = strdup(optarg);
+ if (prefix == NULL)
+ RTE_LOG(ERR, EAL, "Could not store file prefix\n");
+ else {
+ /* free old prefix */
+ if (internal_config.hugefile_prefix != NULL)
+ free(internal_config.hugefile_prefix);
+ internal_config.hugefile_prefix = prefix;
+ }
+ break;
+ }
+ case OPT_SOCKET_MEM_NUM:
+ if (eal_parse_socket_arg(optarg,
+ internal_config.socket_mem) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_SOCKET_MEM "\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ internal_config.force_sockets = 1;
+ break;
+
+ case OPT_SOCKET_LIMIT_NUM:
+ if (eal_parse_socket_arg(optarg,
+ internal_config.socket_limit) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_SOCKET_LIMIT "\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ internal_config.force_socket_limits = 1;
+ break;
+
+ case OPT_VFIO_INTR_NUM:
+ if (eal_parse_vfio_intr(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_VFIO_INTR "\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case OPT_CREATE_UIO_DEV_NUM:
+ internal_config.create_uio_dev = 1;
+ break;
+
+ case OPT_MBUF_POOL_OPS_NAME_NUM:
+ {
+ char *ops_name = strdup(optarg);
+ if (ops_name == NULL)
+ RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
+ else {
+ /* free old ops name */
+ if (internal_config.user_mbuf_pool_ops_name !=
+ NULL)
+ free(internal_config.user_mbuf_pool_ops_name);
+
+ internal_config.user_mbuf_pool_ops_name =
+ ops_name;
+ }
+ break;
+ }
+ case OPT_MATCH_ALLOCATIONS_NUM:
+ internal_config.match_allocations = 1;
+ break;
+
+ default:
+ if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
+ RTE_LOG(ERR, EAL, "Option %c is not supported "
+ "on Linux\n", opt);
+ } else if (opt >= OPT_LONG_MIN_NUM &&
+ opt < OPT_LONG_MAX_NUM) {
+ RTE_LOG(ERR, EAL, "Option %s is not supported "
+ "on Linux\n",
+ eal_long_options[option_index].name);
+ } else {
+ RTE_LOG(ERR, EAL, "Option %d is not supported "
+ "on Linux\n", opt);
+ }
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* create runtime data directory */
+ if (internal_config.no_shconf == 0 &&
+ eal_create_runtime_dir() < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (eal_adjust_config(&internal_config) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ /* sanity checks */
+ if (eal_check_common_options(&internal_config) != 0) {
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+ ret = optind-1;
+
+out:
+ /* restore getopt lib */
+ optind = old_optind;
+ optopt = old_optopt;
+ optarg = old_optarg;
+
+ return ret;
+}
+
+static int
+check_socket(const struct rte_memseg_list *msl, void *arg)
+{
+ int *socket_id = arg;
+
+ if (msl->external)
+ return 0;
+
+ return *socket_id == msl->socket_id;
+}
+
+static void
+eal_check_mem_on_local_socket(void)
+{
+ int socket_id;
+
+ socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
+
+ if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
+ RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n");
+}
+
+static int
+sync_func(__rte_unused void *arg)
+{
+ return 0;
+}
+
+/*
+ * Request iopl privilege for all RPL, returns 0 on success
+ * iopl() call is mostly for the i386 architecture. For other architectures,
+ * return -1 to indicate IO privilege can't be changed in this way.
+ */
+int
+rte_eal_iopl_init(void)
+{
+#if defined(RTE_ARCH_X86)
+ if (iopl(3) != 0)
+ return -1;
+#endif
+ return 0;
+}
+
+#ifdef VFIO_PRESENT
+static int rte_eal_vfio_setup(void)
+{
+ if (rte_vfio_enable("vfio"))
+ return -1;
+
+ return 0;
+}
+#endif
+
+static void rte_eal_init_alert(const char *msg)
+{
+ fprintf(stderr, "EAL: FATAL: %s\n", msg);
+ RTE_LOG(ERR, EAL, "%s\n", msg);
+}
+
+/*
+ * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the
+ * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel
+ * IOMMU groups. If IOMMU is not enabled, that path would be empty.
+ * Therefore, checking if the path is empty will tell us if IOMMU is enabled.
+ */
+static bool
+is_iommu_enabled(void)
+{
+ DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH);
+ struct dirent *d;
+ int n = 0;
+
+ /* if directory doesn't exist, assume IOMMU is not enabled */
+ if (dir == NULL)
+ return false;
+
+ while ((d = readdir(dir)) != NULL) {
+ /* skip dot and dot-dot */
+ if (++n > 2)
+ break;
+ }
+ closedir(dir);
+
+ return n > 2;
+}
+
+/* Launch threads, called at application init(). */
+int
+rte_eal_init(int argc, char **argv)
+{
+ int i, fctret, ret;
+ pthread_t thread_id;
+ static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+ const char *p;
+ static char logid[PATH_MAX];
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+ bool phys_addrs;
+
+ /* checks if the machine is adequate */
+ if (!rte_cpu_is_supported()) {
+ rte_eal_init_alert("unsupported cpu type.");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ if (!rte_atomic32_test_and_set(&run_once)) {
+ rte_eal_init_alert("already called initialization.");
+ rte_errno = EALREADY;
+ return -1;
+ }
+
+ p = strrchr(argv[0], '/');
+ strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid));
+ thread_id = pthread_self();
+
+ eal_reset_internal_config(&internal_config);
+
+ /* set log level as early as possible */
+ eal_log_level_parse(argc, argv);
+
+ /* clone argv to report out later in telemetry */
+ eal_save_args(argc, argv);
+
+ if (rte_eal_cpu_init() < 0) {
+ rte_eal_init_alert("Cannot detect lcores.");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ fctret = eal_parse_args(argc, argv);
+ if (fctret < 0) {
+ rte_eal_init_alert("Invalid 'command line' arguments.");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (eal_plugins_init() < 0) {
+ rte_eal_init_alert("Cannot init plugins");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (eal_trace_init() < 0) {
+ rte_eal_init_alert("Cannot init trace");
+ rte_errno = EFAULT;
+ return -1;
+ }
+
+ if (eal_option_device_parse()) {
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (rte_config_init() < 0) {
+ rte_eal_init_alert("Cannot init config");
+ return -1;
+ }
+
+ if (rte_eal_intr_init() < 0) {
+ rte_eal_init_alert("Cannot init interrupt-handling thread");
+ return -1;
+ }
+
+ if (rte_eal_alarm_init() < 0) {
+ rte_eal_init_alert("Cannot init alarm");
+ /* rte_eal_alarm_init sets rte_errno on failure. */
+ return -1;
+ }
+
+ /* Put mp channel init before bus scan so that we can init the vdev
+ * bus through mp channel in the secondary process before the bus scan.
+ */
+ if (rte_mp_channel_init() < 0 && rte_errno != ENOTSUP) {
+ rte_eal_init_alert("failed to init mp channel");
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ rte_errno = EFAULT;
+ return -1;
+ }
+ }
+
+ /* register multi-process action callbacks for hotplug */
+ if (eal_mp_dev_hotplug_init() < 0) {
+ rte_eal_init_alert("failed to register mp callback for hotplug");
+ return -1;
+ }
+
+ if (rte_bus_scan()) {
+ rte_eal_init_alert("Cannot scan the buses for devices");
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ phys_addrs = rte_eal_using_phys_addrs() != 0;
+
+ /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
+ if (internal_config.iova_mode == RTE_IOVA_DC) {
+ /* autodetect the IOVA mapping mode */
+ enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
+
+ if (iova_mode == RTE_IOVA_DC) {
+ RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n");
+
+ if (!phys_addrs) {
+ /* if we have no access to physical addresses,
+ * pick IOVA as VA mode.
+ */
+ iova_mode = RTE_IOVA_VA;
+ RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n");
+#if defined(RTE_LIBRTE_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+ } else if (rte_eal_check_module("rte_kni") == 1) {
+ iova_mode = RTE_IOVA_PA;
+ RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI performance.\n");
+#endif
+ } else if (is_iommu_enabled()) {
+ /* we have an IOMMU, pick IOVA as VA mode */
+ iova_mode = RTE_IOVA_VA;
+ RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n");
+ } else {
+ /* physical addresses available, and no IOMMU
+ * found, so pick IOVA as PA.
+ */
+ iova_mode = RTE_IOVA_PA;
+ RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n");
+ }
+ }
+#if defined(RTE_LIBRTE_KNI) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+ /* Workaround for KNI which requires physical address to work
+ * in kernels < 4.10
+ */
+ if (iova_mode == RTE_IOVA_VA &&
+ rte_eal_check_module("rte_kni") == 1) {
+ if (phys_addrs) {
+ iova_mode = RTE_IOVA_PA;
+ RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n");
+ } else {
+ RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n");
+ }
+ }
+#endif
+ rte_eal_get_configuration()->iova_mode = iova_mode;
+ } else {
+ rte_eal_get_configuration()->iova_mode =
+ internal_config.iova_mode;
+ }
+
+ if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) {
+ rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available");
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
+ rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
+
+ if (internal_config.no_hugetlbfs == 0) {
+ /* rte_config isn't initialized yet */
+ ret = internal_config.process_type == RTE_PROC_PRIMARY ?
+ eal_hugepage_info_init() :
+ eal_hugepage_info_read();
+ if (ret < 0) {
+ rte_eal_init_alert("Cannot get hugepage information.");
+ rte_errno = EACCES;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+ }
+
+ if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
+ if (internal_config.no_hugetlbfs)
+ internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
+ }
+
+ if (internal_config.vmware_tsc_map == 1) {
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+ rte_cycles_vmware_tsc_map = 1;
+ RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
+ "you must have monitor_control.pseudo_perfctr = TRUE\n");
+#else
+ RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
+ "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
+#endif
+ }
+
+ if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
+ rte_eal_init_alert("Cannot init logging.");
+ rte_errno = ENOMEM;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+#ifdef VFIO_PRESENT
+ if (rte_eal_vfio_setup() < 0) {
+ rte_eal_init_alert("Cannot init VFIO");
+ rte_errno = EAGAIN;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+#endif
+ /* in secondary processes, memory init may allocate additional fbarrays
+ * not present in primary processes, so to avoid any potential issues,
+ * initialize memzones first.
+ */
+ if (rte_eal_memzone_init() < 0) {
+ rte_eal_init_alert("Cannot init memzone");
+ rte_errno = ENODEV;
+ return -1;
+ }
+
+ if (rte_eal_memory_init() < 0) {
+ rte_eal_init_alert("Cannot init memory");
+ rte_errno = ENOMEM;
+ return -1;
+ }
+
+ /* the directories are locked during eal_hugepage_info_init */
+ eal_hugedirs_unlock();
+
+ if (rte_eal_malloc_heap_init() < 0) {
+ rte_eal_init_alert("Cannot init malloc heap");
+ rte_errno = ENODEV;
+ return -1;
+ }
+
+ if (rte_eal_tailqs_init() < 0) {
+ rte_eal_init_alert("Cannot init tail queues for objects");
+ rte_errno = EFAULT;
+ return -1;
+ }
+
+ if (rte_eal_timer_init() < 0) {
+ rte_eal_init_alert("Cannot init HPET or TSC timers");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ eal_check_mem_on_local_socket();
+
+ eal_thread_init_master(rte_config.master_lcore);
+
+ ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+
+ RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
+ rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
+ ret == 0 ? "" : "...");
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+
+ /*
+ * create communication pipes between master thread
+ * and children
+ */
+ if (pipe(lcore_config[i].pipe_master2slave) < 0)
+ rte_panic("Cannot create pipe\n");
+ if (pipe(lcore_config[i].pipe_slave2master) < 0)
+ rte_panic("Cannot create pipe\n");
+
+ lcore_config[i].state = WAIT;
+
+ /* create a thread for each lcore */
+ ret = pthread_create(&lcore_config[i].thread_id, NULL,
+ eal_thread_loop, NULL);
+ if (ret != 0)
+ rte_panic("Cannot create thread\n");
+
+ /* Set thread_name for aid in debugging. */
+ snprintf(thread_name, sizeof(thread_name),
+ "lcore-slave-%d", i);
+ ret = rte_thread_setname(lcore_config[i].thread_id,
+ thread_name);
+ if (ret != 0)
+ RTE_LOG(DEBUG, EAL,
+ "Cannot set name for lcore thread\n");
+ }
+
+ /*
+ * Launch a dummy function on all slave lcores, so that master lcore
+ * knows they are all ready when this function returns.
+ */
+ rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+ rte_eal_mp_wait_lcore();
+
+ /* initialize services so vdevs register service during bus_probe. */
+ ret = rte_service_init();
+ if (ret) {
+ rte_eal_init_alert("rte_service_init() failed");
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
+ /* Probe all the buses and devices/drivers on them */
+ if (rte_bus_probe()) {
+ rte_eal_init_alert("Cannot probe devices");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+#ifdef VFIO_PRESENT
+ /* Register mp action after probe() so that we got enough info */
+ if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0)
+ return -1;
+#endif
+
+ /* initialize default service/lcore mappings and start running. Ignore
+ * -ENOTSUP, as it indicates no service coremask passed to EAL.
+ */
+ ret = rte_service_start_with_defaults();
+ if (ret < 0 && ret != -ENOTSUP) {
+ rte_errno = ENOEXEC;
+ return -1;
+ }
+
+ /*
+ * Clean up unused files in runtime directory. We do this at the end of
+ * init and not at the beginning because we want to clean stuff up
+ * whether we are primary or secondary process, but we cannot remove
+ * primary process' files because secondary should be able to run even
+ * if primary process is dead.
+ *
+ * In no_shconf mode, no runtime directory is created in the first
+ * place, so no cleanup needed.
+ */
+ if (!internal_config.no_shconf && eal_clean_runtime_dir() < 0) {
+ rte_eal_init_alert("Cannot clear runtime directory\n");
+ return -1;
+ }
+ if (!internal_config.no_telemetry) {
+ const char *error_str = NULL;
+ if (rte_telemetry_init(rte_eal_get_runtime_dir(),
+ &internal_config.ctrl_cpuset, &error_str)
+ != 0) {
+ rte_eal_init_alert(error_str);
+ return -1;
+ }
+ if (error_str != NULL)
+ RTE_LOG(NOTICE, EAL, "%s\n", error_str);
+ }
+
+ eal_mcfg_complete();
+
+ return fctret;
+}
+
+static int
+mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg __rte_unused)
+{
+ /* ms is const, so find this memseg */
+ struct rte_memseg *found;
+
+ if (msl->external)
+ return 0;
+
+ found = rte_mem_virt2memseg(ms->addr, msl);
+
+ found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
+
+ return 0;
+}
+
+int
+rte_eal_cleanup(void)
+{
+ /* if we're in a primary process, we need to mark hugepages as freeable
+ * so that finalization can release them back to the system.
+ */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_memseg_walk(mark_freeable, NULL);
+ rte_service_finalize();
+ rte_mp_channel_cleanup();
+ rte_trace_save();
+ eal_trace_fini();
+ eal_cleanup_config(&internal_config);
+ return 0;
+}
+
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+ return rte_config.process_type;
+}
+
+int rte_eal_has_hugepages(void)
+{
+ return ! internal_config.no_hugetlbfs;
+}
+
+int rte_eal_has_pci(void)
+{
+ return !internal_config.no_pci;
+}
+
+int rte_eal_create_uio_dev(void)
+{
+ return internal_config.create_uio_dev;
+}
+
+enum rte_intr_mode
+rte_eal_vfio_intr_mode(void)
+{
+ return internal_config.vfio_intr_mode;
+}
+
+int
+rte_eal_check_module(const char *module_name)
+{
+ char sysfs_mod_name[PATH_MAX];
+ struct stat st;
+ int n;
+
+ if (NULL == module_name)
+ return -1;
+
+ /* Check if there is sysfs mounted */
+ if (stat("/sys/module", &st) != 0) {
+ RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ /* A module might be built-in, therefore try sysfs */
+ n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name);
+ if (n < 0 || n > PATH_MAX) {
+ RTE_LOG(DEBUG, EAL, "Could not format module path\n");
+ return -1;
+ }
+
+ if (stat(sysfs_mod_name, &st) != 0) {
+ RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n",
+ sysfs_mod_name, errno, strerror(errno));
+ return 0;
+ }
+
+ /* Module has been found */
+ return 1;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_alarm.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_alarm.c
new file mode 100644
index 000000000..3252c6fa5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_alarm.c
@@ -0,0 +1,248 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+#include <sys/timerfd.h>
+
+#include <rte_memory.h>
+#include <rte_interrupts.h>
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_eal_trace.h>
+
+#include <eal_private.h>
+
+#ifndef TFD_NONBLOCK
+#include <fcntl.h>
+#define TFD_NONBLOCK O_NONBLOCK
+#endif
+
+#define NS_PER_US 1000
+#define US_PER_MS 1000
+#define MS_PER_S 1000
+#ifndef US_PER_S
+#define US_PER_S (US_PER_MS * MS_PER_S)
+#endif
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+ LIST_ENTRY(alarm_entry) next;
+ struct timeval time;
+ rte_eal_alarm_callback cb_fn;
+ void *cb_arg;
+ volatile uint8_t executing;
+ volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static int handler_registered = 0;
+static void eal_alarm_callback(void *arg);
+
+int
+rte_eal_alarm_init(void)
+{
+ intr_handle.type = RTE_INTR_HANDLE_ALARM;
+ /* create a timerfd file descriptor */
+ intr_handle.fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+ if (intr_handle.fd == -1)
+ goto error;
+
+ return 0;
+
+error:
+ rte_errno = errno;
+ return -1;
+}
+
+static void
+eal_alarm_callback(void *arg __rte_unused)
+{
+ struct timespec now;
+ struct alarm_entry *ap;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ while ((ap = LIST_FIRST(&alarm_list)) !=NULL &&
+ clock_gettime(CLOCK_TYPE_ID, &now) == 0 &&
+ (ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
+ (ap->time.tv_usec * NS_PER_US) <= now.tv_nsec))) {
+ ap->executing = 1;
+ ap->executing_id = pthread_self();
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ ap->cb_fn(ap->cb_arg);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ LIST_REMOVE(ap, next);
+ free(ap);
+ }
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ struct itimerspec atime = { .it_interval = { 0, 0 } };
+
+ ap = LIST_FIRST(&alarm_list);
+ atime.it_value.tv_sec = ap->time.tv_sec;
+ atime.it_value.tv_nsec = ap->time.tv_usec * NS_PER_US;
+ /* perform borrow for subtraction if necessary */
+ if (now.tv_nsec > (ap->time.tv_usec * NS_PER_US))
+ atime.it_value.tv_sec--, atime.it_value.tv_nsec += US_PER_S * NS_PER_US;
+
+ atime.it_value.tv_sec -= now.tv_sec;
+ atime.it_value.tv_nsec -= now.tv_nsec;
+ timerfd_settime(intr_handle.fd, 0, &atime, NULL);
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+}
+
+int
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct timespec now;
+ int ret = 0;
+ struct alarm_entry *ap, *new_alarm;
+
+ /* Check parameters, including that us won't cause a uint64_t overflow */
+ if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+ return -EINVAL;
+
+ new_alarm = calloc(1, sizeof(*new_alarm));
+ if (new_alarm == NULL)
+ return -ENOMEM;
+
+ /* use current time to calculate absolute time of alarm */
+ clock_gettime(CLOCK_TYPE_ID, &now);
+
+ new_alarm->cb_fn = cb_fn;
+ new_alarm->cb_arg = cb_arg;
+ new_alarm->time.tv_usec = ((now.tv_nsec / NS_PER_US) + us) % US_PER_S;
+ new_alarm->time.tv_sec = now.tv_sec + (((now.tv_nsec / NS_PER_US) + us) / US_PER_S);
+
+ rte_spinlock_lock(&alarm_list_lk);
+ if (!handler_registered) {
+ /* registration can fail, callback can be registered later */
+ if (rte_intr_callback_register(&intr_handle,
+ eal_alarm_callback, NULL) == 0)
+ handler_registered = 1;
+ }
+
+ if (LIST_EMPTY(&alarm_list))
+ LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+ else {
+ LIST_FOREACH(ap, &alarm_list, next) {
+ if (ap->time.tv_sec > new_alarm->time.tv_sec ||
+ (ap->time.tv_sec == new_alarm->time.tv_sec &&
+ ap->time.tv_usec > new_alarm->time.tv_usec)){
+ LIST_INSERT_BEFORE(ap, new_alarm, next);
+ break;
+ }
+ if (LIST_NEXT(ap, next) == NULL) {
+ LIST_INSERT_AFTER(ap, new_alarm, next);
+ break;
+ }
+ }
+ }
+
+ if (LIST_FIRST(&alarm_list) == new_alarm) {
+ struct itimerspec alarm_time = {
+ .it_interval = {0, 0},
+ .it_value = {
+ .tv_sec = us / US_PER_S,
+ .tv_nsec = (us % US_PER_S) * NS_PER_US,
+ },
+ };
+ ret |= timerfd_settime(intr_handle.fd, 0, &alarm_time, NULL);
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ rte_eal_trace_alarm_set(us, cb_fn, cb_arg, ret);
+ return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct alarm_entry *ap, *ap_prev;
+ int count = 0;
+ int err = 0;
+ int executing;
+
+ if (!cb_fn) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ do {
+ executing = 0;
+ rte_spinlock_lock(&alarm_list_lk);
+ /* remove any matches at the start of the list */
+ while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
+ cb_fn == ap->cb_fn &&
+ (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ } else {
+ /* If calling from other context, mark that alarm is executing
+ * so loop can spin till it finish. Otherwise we are trying to
+ * cancel our self - mark it by EINPROGRESS */
+ if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+ executing++;
+ else
+ err = EINPROGRESS;
+
+ break;
+ }
+ }
+ ap_prev = ap;
+
+ /* now go through list, removing entries not at start */
+ LIST_FOREACH(ap, &alarm_list, next) {
+ /* this won't be true first time through */
+ if (cb_fn == ap->cb_fn &&
+ (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ ap = ap_prev;
+ } else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+ executing++;
+ else
+ err = EINPROGRESS;
+ }
+ ap_prev = ap;
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+ } while (executing != 0);
+
+ if (count == 0 && err == 0)
+ rte_errno = ENOENT;
+ else if (err)
+ rte_errno = err;
+
+ rte_eal_trace_alarm_cancel(cb_fn, cb_arg, count);
+ return count;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_cpuflags.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_cpuflags.c
new file mode 100644
index 000000000..d38296e1e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_cpuflags.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Red Hat, Inc.
+ */
+
+#include <elf.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 16)
+#include <sys/auxv.h>
+#define HAS_AUXV 1
+#endif
+#endif
+
+#include <rte_cpuflags.h>
+
+#ifndef HAS_AUXV
+static unsigned long
+getauxval(unsigned long type __rte_unused)
+{
+ errno = ENOTSUP;
+ return 0;
+}
+#endif
+
+#ifdef RTE_ARCH_64
+typedef Elf64_auxv_t Internal_Elfx_auxv_t;
+#else
+typedef Elf32_auxv_t Internal_Elfx_auxv_t;
+#endif
+
+/**
+ * Provides a method for retrieving values from the auxiliary vector and
+ * possibly running a string comparison.
+ *
+ * @return Always returns a result. When the result is 0, check errno
+ * to see if an error occurred during processing.
+ */
+static unsigned long
+_rte_cpu_getauxval(unsigned long type, const char *str)
+{
+ unsigned long val;
+
+ errno = 0;
+ val = getauxval(type);
+
+ if (!val && (errno == ENOTSUP || errno == ENOENT)) {
+ int auxv_fd = open("/proc/self/auxv", O_RDONLY);
+ Internal_Elfx_auxv_t auxv;
+
+ if (auxv_fd == -1)
+ return 0;
+
+ errno = ENOENT;
+ while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
+ if (auxv.a_type == type) {
+ errno = 0;
+ val = auxv.a_un.a_val;
+ if (str)
+ val = strcmp((const char *)val, str);
+ break;
+ }
+ }
+ close(auxv_fd);
+ }
+
+ return val;
+}
+
+unsigned long
+rte_cpu_getauxval(unsigned long type)
+{
+ return _rte_cpu_getauxval(type, NULL);
+}
+
+int
+rte_cpu_strcmp_auxval(unsigned long type, const char *str)
+{
+ return _rte_cpu_getauxval(type, str);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_debug.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_debug.c
new file mode 100644
index 000000000..5d92500bf
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_debug.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifdef RTE_BACKTRACE
+#include <execinfo.h>
+#endif
+#include <stdarg.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#define BACKTRACE_SIZE 256
+
+/* dump the stack of the calling core */
+void rte_dump_stack(void)
+{
+#ifdef RTE_BACKTRACE
+ void *func[BACKTRACE_SIZE];
+ char **symb = NULL;
+ int size;
+
+ size = backtrace(func, BACKTRACE_SIZE);
+ symb = backtrace_symbols(func, size);
+
+ if (symb == NULL)
+ return;
+
+ while (size > 0) {
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL,
+ "%d: [%s]\n", size, symb[size - 1]);
+ size --;
+ }
+
+ free(symb);
+#endif /* RTE_BACKTRACE */
+}
+
+/* not implemented in this environment */
+void rte_dump_registers(void)
+{
+ return;
+}
+
+/* call abort(), it will generate a coredump if enabled */
+void __rte_panic(const char *funcname, const char *format, ...)
+{
+ va_list ap;
+
+ rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+}
+
+/*
+ * Like rte_panic this terminates the application. However, no traceback is
+ * provided and no core-dump is generated.
+ */
+void
+rte_exit(int exit_code, const char *format, ...)
+{
+ va_list ap;
+
+ if (exit_code != 0)
+ RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n"
+ " Cause: ", exit_code);
+
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+
+#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR
+ if (rte_eal_cleanup() != 0)
+ RTE_LOG(CRIT, EAL,
+ "EAL could not release all resources\n");
+ exit(exit_code);
+#else
+ rte_dump_stack();
+ rte_dump_registers();
+ abort();
+#endif
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_dev.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_dev.c
new file mode 100644
index 000000000..83c9cd660
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_dev.c
@@ -0,0 +1,396 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+
+#include <rte_string_fns.h>
+#include <rte_log.h>
+#include <rte_compat.h>
+#include <rte_dev.h>
+#include <rte_malloc.h>
+#include <rte_interrupts.h>
+#include <rte_alarm.h>
+#include <rte_bus.h>
+#include <rte_eal.h>
+#include <rte_spinlock.h>
+#include <rte_errno.h>
+
+#include "eal_private.h"
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static bool monitor_started;
+static bool hotplug_handle;
+
+#define EAL_UEV_MSG_LEN 4096
+#define EAL_UEV_MSG_ELEM_LEN 128
+
+/*
+ * spinlock for device hot-unplug failure handling. If it try to access bus or
+ * device, such as handle sigbus on bus or handle memory failure for device
+ * just need to use this lock. It could protect the bus and the device to avoid
+ * race condition.
+ */
+static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER;
+
+static struct sigaction sigbus_action_old;
+
+static int sigbus_need_recover;
+
+static void dev_uev_handler(__rte_unused void *param);
+
+/* identify the system layer which reports this event. */
+enum eal_dev_event_subsystem {
+ EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
+ EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
+ EAL_DEV_EVENT_SUBSYSTEM_VFIO, /* VFIO driver device event */
+ EAL_DEV_EVENT_SUBSYSTEM_MAX
+};
+
+static void
+sigbus_action_recover(void)
+{
+ if (sigbus_need_recover) {
+ sigaction(SIGBUS, &sigbus_action_old, NULL);
+ sigbus_need_recover = 0;
+ }
+}
+
+static void sigbus_handler(int signum, siginfo_t *info,
+ void *ctx __rte_unused)
+{
+ int ret;
+
+ RTE_LOG(DEBUG, EAL, "Thread catch SIGBUS, fault address:%p\n",
+ info->si_addr);
+
+ rte_spinlock_lock(&failure_handle_lock);
+ ret = rte_bus_sigbus_handler(info->si_addr);
+ rte_spinlock_unlock(&failure_handle_lock);
+ if (ret == -1) {
+ rte_exit(EXIT_FAILURE,
+ "Failed to handle SIGBUS for hot-unplug, "
+ "(rte_errno: %s)!", strerror(rte_errno));
+ } else if (ret == 1) {
+ if (sigbus_action_old.sa_flags == SA_SIGINFO
+ && sigbus_action_old.sa_sigaction) {
+ (*(sigbus_action_old.sa_sigaction))(signum,
+ info, ctx);
+ } else if (sigbus_action_old.sa_flags != SA_SIGINFO
+ && sigbus_action_old.sa_handler) {
+ (*(sigbus_action_old.sa_handler))(signum);
+ } else {
+ rte_exit(EXIT_FAILURE,
+ "Failed to handle generic SIGBUS!");
+ }
+ }
+
+ RTE_LOG(DEBUG, EAL, "Success to handle SIGBUS for hot-unplug!\n");
+}
+
+static int cmp_dev_name(const struct rte_device *dev,
+ const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(dev->name, name);
+}
+
+static int
+dev_uev_socket_fd_create(void)
+{
+ struct sockaddr_nl addr;
+ int ret;
+
+ intr_handle.fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC |
+ SOCK_NONBLOCK,
+ NETLINK_KOBJECT_UEVENT);
+ if (intr_handle.fd < 0) {
+ RTE_LOG(ERR, EAL, "create uevent fd failed.\n");
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.nl_family = AF_NETLINK;
+ addr.nl_pid = 0;
+ addr.nl_groups = 0xffffffff;
+
+ ret = bind(intr_handle.fd, (struct sockaddr *) &addr, sizeof(addr));
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Failed to bind uevent socket.\n");
+ goto err;
+ }
+
+ return 0;
+err:
+ close(intr_handle.fd);
+ intr_handle.fd = -1;
+ return ret;
+}
+
+static int
+dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)
+{
+ char action[EAL_UEV_MSG_ELEM_LEN];
+ char subsystem[EAL_UEV_MSG_ELEM_LEN];
+ char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
+ int i = 0;
+
+ memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
+ memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
+ memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);
+
+ while (i < length) {
+ for (; i < length; i++) {
+ if (*buf)
+ break;
+ buf++;
+ }
+ /**
+ * check device uevent from kernel side, no need to check
+ * uevent from udev.
+ */
+ if (!strncmp(buf, "libudev", 7)) {
+ buf += 7;
+ i += 7;
+ return -1;
+ }
+ if (!strncmp(buf, "ACTION=", 7)) {
+ buf += 7;
+ i += 7;
+ strlcpy(action, buf, sizeof(action));
+ } else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
+ buf += 10;
+ i += 10;
+ strlcpy(subsystem, buf, sizeof(subsystem));
+ } else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
+ buf += 14;
+ i += 14;
+ strlcpy(pci_slot_name, buf, sizeof(subsystem));
+ event->devname = strdup(pci_slot_name);
+ }
+ for (; i < length; i++) {
+ if (*buf == '\0')
+ break;
+ buf++;
+ }
+ }
+
+ /* parse the subsystem layer */
+ if (!strncmp(subsystem, "uio", 3))
+ event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
+ else if (!strncmp(subsystem, "pci", 3))
+ event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
+ else if (!strncmp(subsystem, "vfio", 4))
+ event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_VFIO;
+ else
+ return -1;
+
+ /* parse the action type */
+ if (!strncmp(action, "add", 3))
+ event->type = RTE_DEV_EVENT_ADD;
+ else if (!strncmp(action, "remove", 6))
+ event->type = RTE_DEV_EVENT_REMOVE;
+ else
+ return -1;
+ return 0;
+}
+
+static void
+dev_delayed_unregister(void *param)
+{
+ rte_intr_callback_unregister(&intr_handle, dev_uev_handler, param);
+ close(intr_handle.fd);
+ intr_handle.fd = -1;
+}
+
+static void
+dev_uev_handler(__rte_unused void *param)
+{
+ struct rte_dev_event uevent;
+ int ret;
+ char buf[EAL_UEV_MSG_LEN];
+ struct rte_bus *bus;
+ struct rte_device *dev;
+ const char *busname = "";
+
+ memset(&uevent, 0, sizeof(struct rte_dev_event));
+ memset(buf, 0, EAL_UEV_MSG_LEN);
+
+ ret = recv(intr_handle.fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT);
+ if (ret < 0 && errno == EAGAIN)
+ return;
+ else if (ret <= 0) {
+ /* connection is closed or broken, can not up again. */
+ RTE_LOG(ERR, EAL, "uevent socket connection is broken.\n");
+ rte_eal_alarm_set(1, dev_delayed_unregister, NULL);
+ return;
+ }
+
+ ret = dev_uev_parse(buf, &uevent, EAL_UEV_MSG_LEN);
+ if (ret < 0) {
+ RTE_LOG(DEBUG, EAL, "It is not an valid event "
+ "that need to be handle.\n");
+ return;
+ }
+
+ RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
+ uevent.devname, uevent.type, uevent.subsystem);
+
+ switch (uevent.subsystem) {
+ case EAL_DEV_EVENT_SUBSYSTEM_PCI:
+ case EAL_DEV_EVENT_SUBSYSTEM_UIO:
+ busname = "pci";
+ break;
+ default:
+ break;
+ }
+
+ if (uevent.devname) {
+ if (uevent.type == RTE_DEV_EVENT_REMOVE && hotplug_handle) {
+ rte_spinlock_lock(&failure_handle_lock);
+ bus = rte_bus_find_by_name(busname);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
+ busname);
+ goto failure_handle_err;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name,
+ uevent.devname);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
+ "bus (%s)\n", uevent.devname, busname);
+ goto failure_handle_err;
+ }
+
+ ret = bus->hot_unplug_handler(dev);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Can not handle hot-unplug "
+ "for device (%s)\n", dev->name);
+ }
+ rte_spinlock_unlock(&failure_handle_lock);
+ }
+ rte_dev_event_callback_process(uevent.devname, uevent.type);
+ }
+
+ return;
+
+failure_handle_err:
+ rte_spinlock_unlock(&failure_handle_lock);
+}
+
+int
+rte_dev_event_monitor_start(void)
+{
+ int ret;
+
+ if (monitor_started)
+ return 0;
+
+ ret = dev_uev_socket_fd_create();
+ if (ret) {
+ RTE_LOG(ERR, EAL, "error create device event fd.\n");
+ return -1;
+ }
+
+ intr_handle.type = RTE_INTR_HANDLE_DEV_EVENT;
+ ret = rte_intr_callback_register(&intr_handle, dev_uev_handler, NULL);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to register uevent callback.\n");
+ return -1;
+ }
+
+ monitor_started = true;
+
+ return 0;
+}
+
+int
+rte_dev_event_monitor_stop(void)
+{
+ int ret;
+
+ if (!monitor_started)
+ return 0;
+
+ ret = rte_intr_callback_unregister(&intr_handle, dev_uev_handler,
+ (void *)-1);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "fail to unregister uevent callback.\n");
+ return ret;
+ }
+
+ close(intr_handle.fd);
+ intr_handle.fd = -1;
+ monitor_started = false;
+
+ return 0;
+}
+
+int
+dev_sigbus_handler_register(void)
+{
+ sigset_t mask;
+ struct sigaction action;
+
+ rte_errno = 0;
+
+ if (sigbus_need_recover)
+ return 0;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGBUS);
+ action.sa_flags = SA_SIGINFO;
+ action.sa_mask = mask;
+ action.sa_sigaction = sigbus_handler;
+ sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
+
+ return rte_errno;
+}
+
+int
+dev_sigbus_handler_unregister(void)
+{
+ rte_errno = 0;
+
+ sigbus_action_recover();
+
+ return rte_errno;
+}
+
+int
+rte_dev_hotplug_handle_enable(void)
+{
+ int ret = 0;
+
+ ret = dev_sigbus_handler_register();
+ if (ret < 0)
+ RTE_LOG(ERR, EAL,
+ "fail to register sigbus handler for devices.\n");
+
+ hotplug_handle = true;
+
+ return ret;
+}
+
+int
+rte_dev_hotplug_handle_disable(void)
+{
+ int ret = 0;
+
+ ret = dev_sigbus_handler_unregister();
+ if (ret < 0)
+ RTE_LOG(ERR, EAL,
+ "fail to unregister sigbus handler for devices.\n");
+
+ hotplug_handle = false;
+
+ return ret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_hugepage_info.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_hugepage_info.c
new file mode 100644
index 000000000..91a4fede7
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_hugepage_info.c
@@ -0,0 +1,547 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <string.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <linux/mman.h> /* for hugetlb-related flags */
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_common.h>
+#include "rte_string_fns.h"
+#include "eal_internal_cfg.h"
+#include "eal_hugepages.h"
+#include "eal_filesystem.h"
+
+static const char sys_dir_path[] = "/sys/kernel/mm/hugepages";
+static const char sys_pages_numa_dir_path[] = "/sys/devices/system/node";
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ * Used in this file to store the hugepage file map on disk
+ */
+static void *
+map_shared_memory(const char *filename, const size_t mem_size, int flags)
+{
+ void *retval;
+ int fd = open(filename, flags, 0600);
+ if (fd < 0)
+ return NULL;
+ if (ftruncate(fd, mem_size) < 0) {
+ close(fd);
+ return NULL;
+ }
+ retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ close(fd);
+ return retval;
+}
+
+static void *
+open_shared_memory(const char *filename, const size_t mem_size)
+{
+ return map_shared_memory(filename, mem_size, O_RDWR);
+}
+
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+ return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
+}
+
+static int get_hp_sysfs_value(const char *subdir, const char *file, unsigned long *val)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "%s/%s/%s",
+ sys_dir_path, subdir, file);
+ return eal_parse_sysfs_value(path, val);
+}
+
+/* this function is only called from eal_hugepage_info_init which itself
+ * is only called from a primary process */
+static uint32_t
+get_num_hugepages(const char *subdir)
+{
+ unsigned long resv_pages, num_pages, over_pages, surplus_pages;
+ const char *nr_hp_file = "free_hugepages";
+ const char *nr_rsvd_file = "resv_hugepages";
+ const char *nr_over_file = "nr_overcommit_hugepages";
+ const char *nr_splus_file = "surplus_hugepages";
+
+ /* first, check how many reserved pages kernel reports */
+ if (get_hp_sysfs_value(subdir, nr_rsvd_file, &resv_pages) < 0)
+ return 0;
+
+ if (get_hp_sysfs_value(subdir, nr_hp_file, &num_pages) < 0)
+ return 0;
+
+ if (get_hp_sysfs_value(subdir, nr_over_file, &over_pages) < 0)
+ over_pages = 0;
+
+ if (get_hp_sysfs_value(subdir, nr_splus_file, &surplus_pages) < 0)
+ surplus_pages = 0;
+
+ /* adjust num_pages */
+ if (num_pages >= resv_pages)
+ num_pages -= resv_pages;
+ else if (resv_pages)
+ num_pages = 0;
+
+ if (over_pages >= surplus_pages)
+ over_pages -= surplus_pages;
+ else
+ over_pages = 0;
+
+ if (num_pages == 0 && over_pages == 0)
+ RTE_LOG(WARNING, EAL, "No available hugepages reported in %s\n",
+ subdir);
+
+ num_pages += over_pages;
+ if (num_pages < over_pages) /* overflow */
+ num_pages = UINT32_MAX;
+
+ /* we want to return a uint32_t and more than this looks suspicious
+ * anyway ... */
+ if (num_pages > UINT32_MAX)
+ num_pages = UINT32_MAX;
+
+ return num_pages;
+}
+
+static uint32_t
+get_num_hugepages_on_node(const char *subdir, unsigned int socket)
+{
+ char path[PATH_MAX], socketpath[PATH_MAX];
+ DIR *socketdir;
+ unsigned long num_pages = 0;
+ const char *nr_hp_file = "free_hugepages";
+
+ snprintf(socketpath, sizeof(socketpath), "%s/node%u/hugepages",
+ sys_pages_numa_dir_path, socket);
+
+ socketdir = opendir(socketpath);
+ if (socketdir) {
+ /* Keep calm and carry on */
+ closedir(socketdir);
+ } else {
+ /* Can't find socket dir, so ignore it */
+ return 0;
+ }
+
+ snprintf(path, sizeof(path), "%s/%s/%s",
+ socketpath, subdir, nr_hp_file);
+ if (eal_parse_sysfs_value(path, &num_pages) < 0)
+ return 0;
+
+ if (num_pages == 0)
+ RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n",
+ subdir);
+
+ /*
+ * we want to return a uint32_t and more than this looks suspicious
+ * anyway ...
+ */
+ if (num_pages > UINT32_MAX)
+ num_pages = UINT32_MAX;
+
+ return num_pages;
+}
+
+static uint64_t
+get_default_hp_size(void)
+{
+ const char proc_meminfo[] = "/proc/meminfo";
+ const char str_hugepagesz[] = "Hugepagesize:";
+ unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1;
+ char buffer[256];
+ unsigned long long size = 0;
+
+ FILE *fd = fopen(proc_meminfo, "r");
+ if (fd == NULL)
+ rte_panic("Cannot open %s\n", proc_meminfo);
+ while(fgets(buffer, sizeof(buffer), fd)){
+ if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){
+ size = rte_str_to_size(&buffer[hugepagesz_len]);
+ break;
+ }
+ }
+ fclose(fd);
+ if (size == 0)
+ rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo);
+ return size;
+}
+
+static int
+get_hugepage_dir(uint64_t hugepage_sz, char *hugedir, int len)
+{
+ enum proc_mount_fieldnames {
+ DEVICE = 0,
+ MOUNTPT,
+ FSTYPE,
+ OPTIONS,
+ _FIELDNAME_MAX
+ };
+ static uint64_t default_size = 0;
+ const char proc_mounts[] = "/proc/mounts";
+ const char hugetlbfs_str[] = "hugetlbfs";
+ const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1;
+ const char pagesize_opt[] = "pagesize=";
+ const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1;
+ const char split_tok = ' ';
+ char *splitstr[_FIELDNAME_MAX];
+ char buf[BUFSIZ];
+ int retval = -1;
+
+ FILE *fd = fopen(proc_mounts, "r");
+ if (fd == NULL)
+ rte_panic("Cannot open %s\n", proc_mounts);
+
+ if (default_size == 0)
+ default_size = get_default_hp_size();
+
+ while (fgets(buf, sizeof(buf), fd)){
+ if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX,
+ split_tok) != _FIELDNAME_MAX) {
+ RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts);
+ break; /* return NULL */
+ }
+
+ /* we have a specified --huge-dir option, only examine that dir */
+ if (internal_config.hugepage_dir != NULL &&
+ strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0)
+ continue;
+
+ if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){
+ const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt);
+
+ /* if no explicit page size, the default page size is compared */
+ if (pagesz_str == NULL){
+ if (hugepage_sz == default_size){
+ strlcpy(hugedir, splitstr[MOUNTPT], len);
+ retval = 0;
+ break;
+ }
+ }
+ /* there is an explicit page size, so check it */
+ else {
+ uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]);
+ if (pagesz == hugepage_sz) {
+ strlcpy(hugedir, splitstr[MOUNTPT], len);
+ retval = 0;
+ break;
+ }
+ }
+ } /* end if strncmp hugetlbfs */
+ } /* end while fgets */
+
+ fclose(fd);
+ return retval;
+}
+
+/*
+ * Clear the hugepage directory of whatever hugepage files
+ * there are. Checks if the file is locked (i.e.
+ * if it's in use by another DPDK process).
+ */
+static int
+clear_hugedir(const char * hugedir)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ int dir_fd, fd, lck_result;
+ const char filter[] = "*map_*"; /* matches hugepage files */
+
+ /* open directory */
+ dir = opendir(hugedir);
+ if (!dir) {
+ RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n",
+ hugedir);
+ goto error;
+ }
+ dir_fd = dirfd(dir);
+
+ dirent = readdir(dir);
+ if (!dirent) {
+ RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n",
+ hugedir);
+ goto error;
+ }
+
+ while(dirent != NULL){
+ /* skip files that don't match the hugepage pattern */
+ if (fnmatch(filter, dirent->d_name, 0) > 0) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* try and lock the file */
+ fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+ /* skip to next file */
+ if (fd == -1) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* non-blocking lock */
+ lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+ /* if lock succeeds, remove the file */
+ if (lck_result != -1)
+ unlinkat(dir_fd, dirent->d_name, 0);
+ close (fd);
+ dirent = readdir(dir);
+ }
+
+ closedir(dir);
+ return 0;
+
+error:
+ if (dir)
+ closedir(dir);
+
+ RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n",
+ strerror(errno));
+
+ return -1;
+}
+
+static int
+compare_hpi(const void *a, const void *b)
+{
+ const struct hugepage_info *hpi_a = a;
+ const struct hugepage_info *hpi_b = b;
+
+ return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
+}
+
+static void
+calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)
+{
+ uint64_t total_pages = 0;
+ unsigned int i;
+
+ /*
+ * first, try to put all hugepages into relevant sockets, but
+ * if first attempts fails, fall back to collecting all pages
+ * in one socket and sorting them later
+ */
+ total_pages = 0;
+ /* we also don't want to do this for legacy init */
+ if (!internal_config.legacy_mem)
+ for (i = 0; i < rte_socket_count(); i++) {
+ int socket = rte_socket_id_by_idx(i);
+ unsigned int num_pages =
+ get_num_hugepages_on_node(
+ dirent->d_name, socket);
+ hpi->num_pages[socket] = num_pages;
+ total_pages += num_pages;
+ }
+ /*
+ * we failed to sort memory from the get go, so fall
+ * back to old way
+ */
+ if (total_pages == 0) {
+ hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+
+#ifndef RTE_ARCH_64
+ /* for 32-bit systems, limit number of hugepages to
+ * 1GB per page size */
+ hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
+ RTE_PGSIZE_1G / hpi->hugepage_sz);
+#endif
+ }
+}
+
+static int
+hugepage_info_init(void)
+{ const char dirent_start_text[] = "hugepages-";
+ const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
+ unsigned int i, num_sizes = 0;
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir(sys_dir_path);
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot open directory %s to read system hugepage info\n",
+ sys_dir_path);
+ return -1;
+ }
+
+ for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
+ struct hugepage_info *hpi;
+
+ if (strncmp(dirent->d_name, dirent_start_text,
+ dirent_start_len) != 0)
+ continue;
+
+ if (num_sizes >= MAX_HUGEPAGE_SIZES)
+ break;
+
+ hpi = &internal_config.hugepage_info[num_sizes];
+ hpi->hugepage_sz =
+ rte_str_to_size(&dirent->d_name[dirent_start_len]);
+
+ /* first, check if we have a mountpoint */
+ if (get_hugepage_dir(hpi->hugepage_sz,
+ hpi->hugedir, sizeof(hpi->hugedir)) < 0) {
+ uint32_t num_pages;
+
+ num_pages = get_num_hugepages(dirent->d_name);
+ if (num_pages > 0)
+ RTE_LOG(NOTICE, EAL,
+ "%" PRIu32 " hugepages of size "
+ "%" PRIu64 " reserved, but no mounted "
+ "hugetlbfs found for that size\n",
+ num_pages, hpi->hugepage_sz);
+ /* if we have kernel support for reserving hugepages
+ * through mmap, and we're in in-memory mode, treat this
+ * page size as valid. we cannot be in legacy mode at
+ * this point because we've checked this earlier in the
+ * init process.
+ */
+#ifdef MAP_HUGE_SHIFT
+ if (internal_config.in_memory) {
+ RTE_LOG(DEBUG, EAL, "In-memory mode enabled, "
+ "hugepages of size %" PRIu64 " bytes "
+ "will be allocated anonymously\n",
+ hpi->hugepage_sz);
+ calc_num_pages(hpi, dirent);
+ num_sizes++;
+ }
+#endif
+ continue;
+ }
+
+ /* try to obtain a writelock */
+ hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
+
+ /* if blocking lock failed */
+ if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
+ RTE_LOG(CRIT, EAL,
+ "Failed to lock hugepage directory!\n");
+ break;
+ }
+ /* clear out the hugepages dir from unused pages */
+ if (clear_hugedir(hpi->hugedir) == -1)
+ break;
+
+ calc_num_pages(hpi, dirent);
+
+ num_sizes++;
+ }
+ closedir(dir);
+
+ /* something went wrong, and we broke from the for loop above */
+ if (dirent != NULL)
+ return -1;
+
+ internal_config.num_hugepage_sizes = num_sizes;
+
+ /* sort the page directory entries by size, largest to smallest */
+ qsort(&internal_config.hugepage_info[0], num_sizes,
+ sizeof(internal_config.hugepage_info[0]), compare_hpi);
+
+ /* now we have all info, check we have at least one valid size */
+ for (i = 0; i < num_sizes; i++) {
+ /* pages may no longer all be on socket 0, so check all */
+ unsigned int j, num_pages = 0;
+ struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
+ num_pages += hpi->num_pages[j];
+ if (num_pages > 0)
+ return 0;
+ }
+
+ /* no valid hugepage mounts available, return error */
+ return -1;
+}
+
+/*
+ * when we initialize the hugepage info, everything goes
+ * to socket 0 by default. it will later get sorted by memory
+ * initialization procedure.
+ */
+int
+eal_hugepage_info_init(void)
+{
+ struct hugepage_info *hpi, *tmp_hpi;
+ unsigned int i;
+
+ if (hugepage_info_init() < 0)
+ return -1;
+
+ /* for no shared files mode, we're done */
+ if (internal_config.no_shconf)
+ return 0;
+
+ hpi = &internal_config.hugepage_info[0];
+
+ tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
+ sizeof(internal_config.hugepage_info));
+ if (tmp_hpi == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
+ return -1;
+ }
+
+ memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info));
+
+ /* we've copied file descriptors along with everything else, but they
+ * will be invalid in secondary process, so overwrite them
+ */
+ for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
+ struct hugepage_info *tmp = &tmp_hpi[i];
+ tmp->lock_descriptor = -1;
+ }
+
+ if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+ return -1;
+ }
+ return 0;
+}
+
+int eal_hugepage_info_read(void)
+{
+ struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+ struct hugepage_info *tmp_hpi;
+
+ tmp_hpi = open_shared_memory(eal_hugepage_info_path(),
+ sizeof(internal_config.hugepage_info));
+ if (tmp_hpi == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to open shared memory!\n");
+ return -1;
+ }
+
+ memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info));
+
+ if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_interrupts.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_interrupts.c
new file mode 100644
index 000000000..16e7a7d51
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_interrupts.c
@@ -0,0 +1,1521 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <sys/epoll.h>
+#include <sys/signalfd.h>
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include <rte_memory.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_pause.h>
+#include <rte_vfio.h>
+#include <rte_eal_trace.h>
+
+#include "eal_private.h"
+#include "eal_vfio.h"
+#include "eal_thread.h"
+
+#define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
+#define NB_OTHER_INTR 1
+
+static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */
+
+/**
+ * union for pipe fds.
+ */
+union intr_pipefds{
+ struct {
+ int pipefd[2];
+ };
+ struct {
+ int readfd;
+ int writefd;
+ };
+};
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+ int uio_intr_count; /* for uio device */
+#ifdef VFIO_PRESENT
+ uint64_t vfio_intr_count; /* for vfio device */
+#endif
+ uint64_t timerfd_num; /* for timerfd */
+ char charbuf[16]; /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+ TAILQ_ENTRY(rte_intr_callback) next;
+ rte_intr_callback_fn cb_fn; /**< callback address */
+ void *cb_arg; /**< parameter for callback */
+ uint8_t pending_delete; /**< delete after callback is called */
+ rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */
+};
+
+struct rte_intr_source {
+ TAILQ_ENTRY(rte_intr_source) next;
+ struct rte_intr_handle intr_handle; /**< interrupt handle */
+ struct rte_intr_cb_list callbacks; /**< user callbacks */
+ uint32_t active;
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* union buffer for pipe read/write */
+static union intr_pipefds intr_pipe;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+/* VFIO interrupts */
+#ifdef VFIO_PRESENT
+
+#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
+/* irq set buffer length for queue interrupts and LSC interrupt */
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+ sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1))
+
+/* enable legacy (INTx) interrupts */
+static int
+vfio_enable_intx(const struct rte_intr_handle *intr_handle) {
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret;
+ int *fd_ptr;
+
+ len = sizeof(irq_set_buf);
+
+ /* enable INTx */
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+ fd_ptr = (int *) &irq_set->data;
+ *fd_ptr = intr_handle->fd;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ /* unmask INTx after enabling */
+ memset(irq_set, 0, len);
+ len = sizeof(struct vfio_irq_set);
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+ return 0;
+}
+
+/* disable legacy (INTx) interrupts */
+static int
+vfio_disable_intx(const struct rte_intr_handle *intr_handle) {
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret;
+
+ len = sizeof(struct vfio_irq_set);
+
+ /* mask interrupts before disabling */
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ /* disable INTx*/
+ memset(irq_set, 0, len);
+ irq_set->argsz = len;
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL,
+ "Error disabling INTx interrupts for fd %d\n", intr_handle->fd);
+ return -1;
+ }
+ return 0;
+}
+
+/* unmask/ack legacy (INTx) interrupts */
+static int
+vfio_ack_intx(const struct rte_intr_handle *intr_handle)
+{
+ struct vfio_irq_set irq_set;
+
+ /* unmask INTx */
+ memset(&irq_set, 0, sizeof(irq_set));
+ irq_set.argsz = sizeof(irq_set);
+ irq_set.count = 1;
+ irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
+ irq_set.index = VFIO_PCI_INTX_IRQ_INDEX;
+ irq_set.start = 0;
+
+ if (ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set)) {
+ RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+ return 0;
+}
+
+/* enable MSI interrupts */
+static int
+vfio_enable_msi(const struct rte_intr_handle *intr_handle) {
+ int len, ret;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ struct vfio_irq_set *irq_set;
+ int *fd_ptr;
+
+ len = sizeof(irq_set_buf);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
+ irq_set->start = 0;
+ fd_ptr = (int *) &irq_set->data;
+ *fd_ptr = intr_handle->fd;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+ return 0;
+}
+
+/* disable MSI interrupts */
+static int
+vfio_disable_msi(const struct rte_intr_handle *intr_handle) {
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret;
+
+ len = sizeof(struct vfio_irq_set);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Error disabling MSI interrupts for fd %d\n", intr_handle->fd);
+
+ return ret;
+}
+
+/* enable MSI-X interrupts */
+static int
+vfio_enable_msix(const struct rte_intr_handle *intr_handle) {
+ int len, ret;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+ struct vfio_irq_set *irq_set;
+ int *fd_ptr;
+
+ len = sizeof(irq_set_buf);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */
+ irq_set->count = intr_handle->max_intr ?
+ (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ?
+ RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = 0;
+ fd_ptr = (int *) &irq_set->data;
+ /* INTR vector offset 0 reserve for non-efds mapping */
+ fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd;
+ memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds,
+ sizeof(*intr_handle->efds) * intr_handle->nb_efd);
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* disable MSI-X interrupts */
+static int
+vfio_disable_msix(const struct rte_intr_handle *intr_handle) {
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+ int len, ret;
+
+ len = sizeof(struct vfio_irq_set);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd);
+
+ return ret;
+}
+
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+/* enable req notifier */
+static int
+vfio_enable_req(const struct rte_intr_handle *intr_handle)
+{
+ int len, ret;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ struct vfio_irq_set *irq_set;
+ int *fd_ptr;
+
+ len = sizeof(irq_set_buf);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
+ irq_set->start = 0;
+ fd_ptr = (int *) &irq_set->data;
+ *fd_ptr = intr_handle->fd;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Error enabling req interrupts for fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* disable req notifier */
+static int
+vfio_disable_req(const struct rte_intr_handle *intr_handle)
+{
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[IRQ_SET_BUF_LEN];
+ int len, ret;
+
+ len = sizeof(struct vfio_irq_set);
+
+ irq_set = (struct vfio_irq_set *) irq_set_buf;
+ irq_set->argsz = len;
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+ if (ret)
+ RTE_LOG(ERR, EAL, "Error disabling req interrupts for fd %d\n",
+ intr_handle->fd);
+
+ return ret;
+}
+#endif
+#endif
+
+static int
+uio_intx_intr_disable(const struct rte_intr_handle *intr_handle)
+{
+ unsigned char command_high;
+
+ /* use UIO config file descriptor for uio_pci_generic */
+ if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+ RTE_LOG(ERR, EAL,
+ "Error reading interrupts status for fd %d\n",
+ intr_handle->uio_cfg_fd);
+ return -1;
+ }
+ /* disable interrupts */
+ command_high |= 0x4;
+ if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+ RTE_LOG(ERR, EAL,
+ "Error disabling interrupts for fd %d\n",
+ intr_handle->uio_cfg_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+uio_intx_intr_enable(const struct rte_intr_handle *intr_handle)
+{
+ unsigned char command_high;
+
+ /* use UIO config file descriptor for uio_pci_generic */
+ if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+ RTE_LOG(ERR, EAL,
+ "Error reading interrupts status for fd %d\n",
+ intr_handle->uio_cfg_fd);
+ return -1;
+ }
+ /* enable interrupts */
+ command_high &= ~0x4;
+ if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) {
+ RTE_LOG(ERR, EAL,
+ "Error enabling interrupts for fd %d\n",
+ intr_handle->uio_cfg_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+uio_intr_disable(const struct rte_intr_handle *intr_handle)
+{
+ const int value = 0;
+
+ if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
+ RTE_LOG(ERR, EAL,
+ "Error disabling interrupts for fd %d (%s)\n",
+ intr_handle->fd, strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+static int
+uio_intr_enable(const struct rte_intr_handle *intr_handle)
+{
+ const int value = 1;
+
+ if (write(intr_handle->fd, &value, sizeof(value)) < 0) {
+ RTE_LOG(ERR, EAL,
+ "Error enabling interrupts for fd %d (%s)\n",
+ intr_handle->fd, strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+int
+rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb, void *cb_arg)
+{
+ int ret, wake_thread;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *callback;
+
+ wake_thread = 0;
+
+ /* first do parameter checking */
+ if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Registering with invalid input parameter\n");
+ return -EINVAL;
+ }
+
+ /* allocate a new interrupt callback entity */
+ callback = calloc(1, sizeof(*callback));
+ if (callback == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ return -ENOMEM;
+ }
+ callback->cb_fn = cb;
+ callback->cb_arg = cb_arg;
+ callback->pending_delete = 0;
+ callback->ucb_fn = NULL;
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if there is at least one callback registered for the fd */
+ TAILQ_FOREACH(src, &intr_sources, next) {
+ if (src->intr_handle.fd == intr_handle->fd) {
+ /* we had no interrupts for this */
+ if (TAILQ_EMPTY(&src->callbacks))
+ wake_thread = 1;
+
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ ret = 0;
+ break;
+ }
+ }
+
+ /* no existing callbacks for this - add new source */
+ if (src == NULL) {
+ src = calloc(1, sizeof(*src));
+ if (src == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ free(callback);
+ ret = -ENOMEM;
+ } else {
+ src->intr_handle = *intr_handle;
+ TAILQ_INIT(&src->callbacks);
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ TAILQ_INSERT_TAIL(&intr_sources, src, next);
+ wake_thread = 1;
+ ret = 0;
+ }
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+
+ /**
+ * check if need to notify the pipe fd waited by epoll_wait to
+ * rebuild the wait list.
+ */
+ if (wake_thread)
+ if (write(intr_pipe.writefd, "1", 1) < 0)
+ ret = -EPIPE;
+
+ rte_eal_trace_intr_callback_register(intr_handle, cb, cb_arg, ret);
+ return ret;
+}
+
+int
+rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb_fn, void *cb_arg,
+ rte_intr_unregister_callback_fn ucb_fn)
+{
+ int ret;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb, *next;
+
+ /* do parameter checking first */
+ if (intr_handle == NULL || intr_handle->fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Unregistering with invalid input parameter\n");
+ return -EINVAL;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if the insterrupt source for the fd is existent */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+
+ /* No interrupt source registered for the fd */
+ if (src == NULL) {
+ ret = -ENOENT;
+
+ /* only usable if the source is active */
+ } else if (src->active == 0) {
+ ret = -EAGAIN;
+
+ } else {
+ ret = 0;
+
+ /* walk through the callbacks and mark all that match. */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+ next = TAILQ_NEXT(cb, next);
+ if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+ cb->cb_arg == cb_arg)) {
+ cb->pending_delete = 1;
+ cb->ucb_fn = ucb_fn;
+ ret++;
+ }
+ }
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
+}
+
+int
+rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
+ rte_intr_callback_fn cb_fn, void *cb_arg)
+{
+ int ret;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb, *next;
+
+ /* do parameter checking first */
+ if (intr_handle == NULL || intr_handle->fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Unregistering with invalid input parameter\n");
+ return -EINVAL;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if the insterrupt source for the fd is existent */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+
+ /* No interrupt source registered for the fd */
+ if (src == NULL) {
+ ret = -ENOENT;
+
+ /* interrupt source has some active callbacks right now. */
+ } else if (src->active != 0) {
+ ret = -EAGAIN;
+
+ /* ok to remove. */
+ } else {
+ ret = 0;
+
+ /*walk through the callbacks and remove all that match. */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+
+ next = TAILQ_NEXT(cb, next);
+
+ if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+ cb->cb_arg == cb_arg)) {
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ free(cb);
+ ret++;
+ }
+ }
+
+ /* all callbacks for that source are removed. */
+ if (TAILQ_EMPTY(&src->callbacks)) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+
+ /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
+ if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) {
+ ret = -EPIPE;
+ }
+
+ rte_eal_trace_intr_callback_unregister(intr_handle, cb_fn, cb_arg,
+ ret);
+ return ret;
+}
+
+int
+rte_intr_enable(const struct rte_intr_handle *intr_handle)
+{
+ int rc = 0;
+
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) {
+ rc = 0;
+ goto out;
+ }
+
+ if (!intr_handle || intr_handle->fd < 0 ||
+ intr_handle->uio_cfg_fd < 0) {
+ rc = -1;
+ goto out;
+ }
+
+ switch (intr_handle->type){
+ /* write to the uio fd to enable the interrupt */
+ case RTE_INTR_HANDLE_UIO:
+ if (uio_intr_enable(intr_handle))
+ rc = -1;
+ break;
+ case RTE_INTR_HANDLE_UIO_INTX:
+ if (uio_intx_intr_enable(intr_handle))
+ rc = -1;
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ rc = -1;
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ if (vfio_enable_msix(intr_handle))
+ rc = -1;
+ break;
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ if (vfio_enable_msi(intr_handle))
+ rc = -1;
+ break;
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ if (vfio_enable_intx(intr_handle))
+ rc = -1;
+ break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ case RTE_INTR_HANDLE_VFIO_REQ:
+ if (vfio_enable_req(intr_handle))
+ rc = -1;
+ break;
+#endif
+#endif
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ rc = -1;
+ break;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ rc = -1;
+ break;
+ }
+out:
+ rte_eal_trace_intr_enable(intr_handle, rc);
+ return rc;
+}
+
+/**
+ * PMD generally calls this function at the end of its IRQ callback.
+ * Internally, it unmasks the interrupt if possible.
+ *
+ * For INTx, unmasking is required as the interrupt is auto-masked prior to
+ * invoking callback.
+ *
+ * For MSI/MSI-X, unmasking is typically not needed as the interrupt is not
+ * auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI,
+ * this function is no-op.
+ */
+int
+rte_intr_ack(const struct rte_intr_handle *intr_handle)
+{
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 0;
+
+ if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+ return -1;
+
+ switch (intr_handle->type) {
+ /* Both acking and enabling are same for UIO */
+ case RTE_INTR_HANDLE_UIO:
+ if (uio_intr_enable(intr_handle))
+ return -1;
+ break;
+ case RTE_INTR_HANDLE_UIO_INTX:
+ if (uio_intx_intr_enable(intr_handle))
+ return -1;
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+#ifdef VFIO_PRESENT
+ /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ return 0;
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ if (vfio_ack_intx(intr_handle))
+ return -1;
+ break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ case RTE_INTR_HANDLE_VFIO_REQ:
+ return -1;
+#endif
+#endif
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ return -1;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+rte_intr_disable(const struct rte_intr_handle *intr_handle)
+{
+ int rc = 0;
+
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) {
+ rc = 0;
+ goto out;
+ }
+
+ if (!intr_handle || intr_handle->fd < 0 ||
+ intr_handle->uio_cfg_fd < 0) {
+ rc = -1;
+ goto out;
+ }
+
+ switch (intr_handle->type){
+ /* write to the uio fd to disable the interrupt */
+ case RTE_INTR_HANDLE_UIO:
+ if (uio_intr_disable(intr_handle))
+ rc = -1;
+ break;
+ case RTE_INTR_HANDLE_UIO_INTX:
+ if (uio_intx_intr_disable(intr_handle))
+ rc = -1;
+ break;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ rc = -1;
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ if (vfio_disable_msix(intr_handle))
+ rc = -1;
+ break;
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ if (vfio_disable_msi(intr_handle))
+ rc = -1;
+ break;
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ if (vfio_disable_intx(intr_handle))
+ rc = -1;
+ break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ case RTE_INTR_HANDLE_VFIO_REQ:
+ if (vfio_disable_req(intr_handle))
+ rc = -1;
+ break;
+#endif
+#endif
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ rc = -1;
+ break;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ rc = -1;
+ break;
+ }
+out:
+ rte_eal_trace_intr_disable(intr_handle, rc);
+ return rc;
+}
+
+static int
+eal_intr_process_interrupts(struct epoll_event *events, int nfds)
+{
+ bool call = false;
+ int n, bytes_read, rv;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb, *next;
+ union rte_intr_read_buffer buf;
+ struct rte_intr_callback active_cb;
+
+ for (n = 0; n < nfds; n++) {
+
+ /**
+ * if the pipe fd is ready to read, return out to
+ * rebuild the wait list.
+ */
+ if (events[n].data.fd == intr_pipe.readfd){
+ int r = read(intr_pipe.readfd, buf.charbuf,
+ sizeof(buf.charbuf));
+ RTE_SET_USED(r);
+ return -1;
+ }
+ rte_spinlock_lock(&intr_lock);
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd ==
+ events[n].data.fd)
+ break;
+ if (src == NULL){
+ rte_spinlock_unlock(&intr_lock);
+ continue;
+ }
+
+ /* mark this interrupt source as active and release the lock. */
+ src->active = 1;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* set the length to be read dor different handle type */
+ switch (src->intr_handle.type) {
+ case RTE_INTR_HANDLE_UIO:
+ case RTE_INTR_HANDLE_UIO_INTX:
+ bytes_read = sizeof(buf.uio_intr_count);
+ break;
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = sizeof(buf.timerfd_num);
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ bytes_read = sizeof(buf.vfio_intr_count);
+ break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ case RTE_INTR_HANDLE_VFIO_REQ:
+ bytes_read = 0;
+ call = true;
+ break;
+#endif
+#endif
+ case RTE_INTR_HANDLE_VDEV:
+ case RTE_INTR_HANDLE_EXT:
+ bytes_read = 0;
+ call = true;
+ break;
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ bytes_read = 0;
+ call = true;
+ break;
+ default:
+ bytes_read = 1;
+ break;
+ }
+
+ if (bytes_read > 0) {
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ bytes_read = read(events[n].data.fd, &buf, bytes_read);
+ if (bytes_read < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK)
+ continue;
+
+ RTE_LOG(ERR, EAL, "Error reading from file "
+ "descriptor %d: %s\n",
+ events[n].data.fd,
+ strerror(errno));
+ /*
+ * The device is unplugged or buggy, remove
+ * it as an interrupt source and return to
+ * force the wait list to be rebuilt.
+ */
+ rte_spinlock_lock(&intr_lock);
+ TAILQ_REMOVE(&intr_sources, src, next);
+ rte_spinlock_unlock(&intr_lock);
+
+ for (cb = TAILQ_FIRST(&src->callbacks); cb;
+ cb = next) {
+ next = TAILQ_NEXT(cb, next);
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ free(cb);
+ }
+ free(src);
+ return -1;
+ } else if (bytes_read == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from file "
+ "descriptor %d\n", events[n].data.fd);
+ else
+ call = true;
+ }
+
+ /* grab a lock, again to call callbacks and update status. */
+ rte_spinlock_lock(&intr_lock);
+
+ if (call) {
+
+ /* Finally, call all callbacks. */
+ TAILQ_FOREACH(cb, &src->callbacks, next) {
+
+ /* make a copy and unlock. */
+ active_cb = *cb;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* call the actual callback */
+ active_cb.cb_fn(active_cb.cb_arg);
+
+ /*get the lock back. */
+ rte_spinlock_lock(&intr_lock);
+ }
+ }
+ /* we done with that interrupt source, release it. */
+ src->active = 0;
+
+ rv = 0;
+
+ /* check if any callback are supposed to be removed */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+ next = TAILQ_NEXT(cb, next);
+ if (cb->pending_delete) {
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ if (cb->ucb_fn)
+ cb->ucb_fn(&src->intr_handle, cb->cb_arg);
+ free(cb);
+ rv++;
+ }
+ }
+
+ /* all callbacks for that source are removed. */
+ if (TAILQ_EMPTY(&src->callbacks)) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+
+ /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
+ if (rv >= 0 && write(intr_pipe.writefd, "1", 1) < 0) {
+ rte_spinlock_unlock(&intr_lock);
+ return -EPIPE;
+ }
+
+ rte_spinlock_unlock(&intr_lock);
+ }
+
+ return 0;
+}
+
+/**
+ * It handles all the interrupts.
+ *
+ * @param pfd
+ * epoll file descriptor.
+ * @param totalfds
+ * The number of file descriptors added in epoll.
+ *
+ * @return
+ * void
+ */
+static void
+eal_intr_handle_interrupts(int pfd, unsigned totalfds)
+{
+ struct epoll_event events[totalfds];
+ int nfds = 0;
+
+ for(;;) {
+ nfds = epoll_wait(pfd, events, totalfds,
+ EAL_INTR_EPOLL_WAIT_FOREVER);
+ /* epoll_wait fail */
+ if (nfds < 0) {
+ if (errno == EINTR)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "epoll_wait returns with fail\n");
+ return;
+ }
+ /* epoll_wait timeout, will never happens here */
+ else if (nfds == 0)
+ continue;
+ /* epoll_wait has at least one fd ready to read */
+ if (eal_intr_process_interrupts(events, nfds) < 0)
+ return;
+ }
+}
+
+/**
+ * It builds/rebuilds up the epoll file descriptor with all the
+ * file descriptors being waited on. Then handles the interrupts.
+ *
+ * @param arg
+ * pointer. (unused)
+ *
+ * @return
+ * never return;
+ */
+static __rte_noreturn void *
+eal_intr_thread_main(__rte_unused void *arg)
+{
+ /* host thread, never break out */
+ for (;;) {
+ /* build up the epoll fd with all descriptors we are to
+ * wait on then pass it to the handle_interrupts function
+ */
+ static struct epoll_event pipe_event = {
+ .events = EPOLLIN | EPOLLPRI,
+ };
+ struct rte_intr_source *src;
+ unsigned numfds = 0;
+
+ /* create epoll fd */
+ int pfd = epoll_create(1);
+ if (pfd < 0)
+ rte_panic("Cannot create epoll instance\n");
+
+ pipe_event.data.fd = intr_pipe.readfd;
+ /**
+ * add pipe fd into wait list, this pipe is used to
+ * rebuild the wait list.
+ */
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd,
+ &pipe_event) < 0) {
+ rte_panic("Error adding fd to %d epoll_ctl, %s\n",
+ intr_pipe.readfd, strerror(errno));
+ }
+ numfds++;
+
+ rte_spinlock_lock(&intr_lock);
+
+ TAILQ_FOREACH(src, &intr_sources, next) {
+ struct epoll_event ev;
+
+ if (src->callbacks.tqh_first == NULL)
+ continue; /* skip those with no callbacks */
+ memset(&ev, 0, sizeof(ev));
+ ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+ ev.data.fd = src->intr_handle.fd;
+
+ /**
+ * add all the uio device file descriptor
+ * into wait list.
+ */
+ if (epoll_ctl(pfd, EPOLL_CTL_ADD,
+ src->intr_handle.fd, &ev) < 0){
+ rte_panic("Error adding fd %d epoll_ctl, %s\n",
+ src->intr_handle.fd, strerror(errno));
+ }
+ else
+ numfds++;
+ }
+ rte_spinlock_unlock(&intr_lock);
+ /* serve the interrupt */
+ eal_intr_handle_interrupts(pfd, numfds);
+
+ /**
+ * when we return, we need to rebuild the
+ * list of fds to monitor.
+ */
+ close(pfd);
+ }
+}
+
+int
+rte_eal_intr_init(void)
+{
+ int ret = 0;
+
+ /* init the global interrupt source head */
+ TAILQ_INIT(&intr_sources);
+
+ /**
+ * create a pipe which will be waited by epoll and notified to
+ * rebuild the wait list of epoll.
+ */
+ if (pipe(intr_pipe.pipefd) < 0) {
+ rte_errno = errno;
+ return -1;
+ }
+
+ /* create the host thread to wait/handle the interrupt */
+ ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
+ eal_intr_thread_main, NULL);
+ if (ret != 0) {
+ rte_errno = -ret;
+ RTE_LOG(ERR, EAL,
+ "Failed to create thread for interrupt handling\n");
+ }
+
+ return ret;
+}
+
+static void
+eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
+{
+ union rte_intr_read_buffer buf;
+ int bytes_read = 0;
+ int nbytes;
+
+ switch (intr_handle->type) {
+ case RTE_INTR_HANDLE_UIO:
+ case RTE_INTR_HANDLE_UIO_INTX:
+ bytes_read = sizeof(buf.uio_intr_count);
+ break;
+#ifdef VFIO_PRESENT
+ case RTE_INTR_HANDLE_VFIO_MSIX:
+ case RTE_INTR_HANDLE_VFIO_MSI:
+ case RTE_INTR_HANDLE_VFIO_LEGACY:
+ bytes_read = sizeof(buf.vfio_intr_count);
+ break;
+#endif
+ case RTE_INTR_HANDLE_VDEV:
+ bytes_read = intr_handle->efd_counter_size;
+ /* For vdev, number of bytes to read is set by driver */
+ break;
+ case RTE_INTR_HANDLE_EXT:
+ return;
+ default:
+ bytes_read = 1;
+ RTE_LOG(INFO, EAL, "unexpected intr type\n");
+ break;
+ }
+
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ if (bytes_read == 0)
+ return;
+ do {
+ nbytes = read(fd, &buf, bytes_read);
+ if (nbytes < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK ||
+ errno == EAGAIN)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "Error reading from fd %d: %s\n",
+ fd, strerror(errno));
+ } else if (nbytes == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd);
+ return;
+ } while (1);
+}
+
+static int
+eal_epoll_process_event(struct epoll_event *evs, unsigned int n,
+ struct rte_epoll_event *events)
+{
+ unsigned int i, count = 0;
+ struct rte_epoll_event *rev;
+
+ for (i = 0; i < n; i++) {
+ rev = evs[i].data.ptr;
+ if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID,
+ RTE_EPOLL_EXEC))
+ continue;
+
+ events[count].status = RTE_EPOLL_VALID;
+ events[count].fd = rev->fd;
+ events[count].epfd = rev->epfd;
+ events[count].epdata.event = rev->epdata.event;
+ events[count].epdata.data = rev->epdata.data;
+ if (rev->epdata.cb_fun)
+ rev->epdata.cb_fun(rev->fd,
+ rev->epdata.cb_arg);
+
+ rte_compiler_barrier();
+ rev->status = RTE_EPOLL_VALID;
+ count++;
+ }
+ return count;
+}
+
+static inline int
+eal_init_tls_epfd(void)
+{
+ int pfd = epoll_create(255);
+
+ if (pfd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Cannot create epoll instance\n");
+ return -1;
+ }
+ return pfd;
+}
+
+int
+rte_intr_tls_epfd(void)
+{
+ if (RTE_PER_LCORE(_epfd) == -1)
+ RTE_PER_LCORE(_epfd) = eal_init_tls_epfd();
+
+ return RTE_PER_LCORE(_epfd);
+}
+
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout)
+{
+ struct epoll_event evs[maxevents];
+ int rc;
+
+ if (!events) {
+ RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+ return -1;
+ }
+
+ /* using per thread epoll fd */
+ if (epfd == RTE_EPOLL_PER_THREAD)
+ epfd = rte_intr_tls_epfd();
+
+ while (1) {
+ rc = epoll_wait(epfd, evs, maxevents, timeout);
+ if (likely(rc > 0)) {
+ /* epoll_wait has at least one fd ready to read */
+ rc = eal_epoll_process_event(evs, rc, events);
+ break;
+ } else if (rc < 0) {
+ if (errno == EINTR)
+ continue;
+ /* epoll_wait fail */
+ RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n",
+ strerror(errno));
+ rc = -1;
+ break;
+ } else {
+ /* rc == 0, epoll_wait timed out */
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static inline void
+eal_epoll_data_safe_free(struct rte_epoll_event *ev)
+{
+ while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID,
+ RTE_EPOLL_INVALID))
+ while (ev->status != RTE_EPOLL_VALID)
+ rte_pause();
+ memset(&ev->epdata, 0, sizeof(ev->epdata));
+ ev->fd = -1;
+ ev->epfd = -1;
+}
+
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+ struct rte_epoll_event *event)
+{
+ struct epoll_event ev;
+
+ if (!event) {
+ RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n");
+ return -1;
+ }
+
+ /* using per thread epoll fd */
+ if (epfd == RTE_EPOLL_PER_THREAD)
+ epfd = rte_intr_tls_epfd();
+
+ if (op == EPOLL_CTL_ADD) {
+ event->status = RTE_EPOLL_VALID;
+ event->fd = fd; /* ignore fd in event */
+ event->epfd = epfd;
+ ev.data.ptr = (void *)event;
+ }
+
+ ev.events = event->epdata.event;
+ if (epoll_ctl(epfd, op, fd, &ev) < 0) {
+ RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n",
+ op, fd, strerror(errno));
+ if (op == EPOLL_CTL_ADD)
+ /* rollback status when CTL_ADD fail */
+ event->status = RTE_EPOLL_INVALID;
+ return -1;
+ }
+
+ if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID)
+ eal_epoll_data_safe_free(event);
+
+ return 0;
+}
+
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
+ int op, unsigned int vec, void *data)
+{
+ struct rte_epoll_event *rev;
+ struct rte_epoll_data *epdata;
+ int epfd_op;
+ unsigned int efd_idx;
+ int rc = 0;
+
+ efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
+ (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
+
+ if (!intr_handle || intr_handle->nb_efd == 0 ||
+ efd_idx >= intr_handle->nb_efd) {
+ RTE_LOG(ERR, EAL, "Wrong intr vector number.\n");
+ return -EPERM;
+ }
+
+ switch (op) {
+ case RTE_INTR_EVENT_ADD:
+ epfd_op = EPOLL_CTL_ADD;
+ rev = &intr_handle->elist[efd_idx];
+ if (rev->status != RTE_EPOLL_INVALID) {
+ RTE_LOG(INFO, EAL, "Event already been added.\n");
+ return -EEXIST;
+ }
+
+ /* attach to intr vector fd */
+ epdata = &rev->epdata;
+ epdata->event = EPOLLIN | EPOLLPRI | EPOLLET;
+ epdata->data = data;
+ epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;
+ epdata->cb_arg = (void *)intr_handle;
+ rc = rte_epoll_ctl(epfd, epfd_op,
+ intr_handle->efds[efd_idx], rev);
+ if (!rc)
+ RTE_LOG(DEBUG, EAL,
+ "efd %d associated with vec %d added on epfd %d"
+ "\n", rev->fd, vec, epfd);
+ else
+ rc = -EPERM;
+ break;
+ case RTE_INTR_EVENT_DEL:
+ epfd_op = EPOLL_CTL_DEL;
+ rev = &intr_handle->elist[efd_idx];
+ if (rev->status == RTE_EPOLL_INVALID) {
+ RTE_LOG(INFO, EAL, "Event does not exist.\n");
+ return -EPERM;
+ }
+
+ rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev);
+ if (rc)
+ rc = -EPERM;
+ break;
+ default:
+ RTE_LOG(ERR, EAL, "event op type mismatch\n");
+ rc = -EPERM;
+ }
+
+ return rc;
+}
+
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle)
+{
+ uint32_t i;
+ struct rte_epoll_event *rev;
+
+ for (i = 0; i < intr_handle->nb_efd; i++) {
+ rev = &intr_handle->elist[i];
+ if (rev->status == RTE_EPOLL_INVALID)
+ continue;
+ if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) {
+ /* force free if the entry valid */
+ eal_epoll_data_safe_free(rev);
+ rev->status = RTE_EPOLL_INVALID;
+ }
+ }
+}
+
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
+{
+ uint32_t i;
+ int fd;
+ uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+
+ assert(nb_efd != 0);
+
+ if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) {
+ for (i = 0; i < n; i++) {
+ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "can't setup eventfd, error %i (%s)\n",
+ errno, strerror(errno));
+ return -errno;
+ }
+ intr_handle->efds[i] = fd;
+ }
+ intr_handle->nb_efd = n;
+ intr_handle->max_intr = NB_OTHER_INTR + n;
+ } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) {
+ /* only check, initialization would be done in vdev driver.*/
+ if (intr_handle->efd_counter_size >
+ sizeof(union rte_intr_read_buffer)) {
+ RTE_LOG(ERR, EAL, "the efd_counter_size is oversized");
+ return -EINVAL;
+ }
+ } else {
+ intr_handle->efds[0] = intr_handle->fd;
+ intr_handle->nb_efd = RTE_MIN(nb_efd, 1U);
+ intr_handle->max_intr = NB_OTHER_INTR;
+ }
+
+ return 0;
+}
+
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
+{
+ uint32_t i;
+
+ rte_intr_free_epoll_fd(intr_handle);
+ if (intr_handle->max_intr > intr_handle->nb_efd) {
+ for (i = 0; i < intr_handle->nb_efd; i++)
+ close(intr_handle->efds[i]);
+ }
+ intr_handle->nb_efd = 0;
+ intr_handle->max_intr = 0;
+}
+
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
+{
+ return !(!intr_handle->nb_efd);
+}
+
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle)
+{
+ if (!rte_intr_dp_is_en(intr_handle))
+ return 1;
+ else
+ return !!(intr_handle->max_intr - intr_handle->nb_efd);
+}
+
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
+{
+ if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX)
+ return 1;
+
+ if (intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 1;
+
+ return 0;
+}
+
+int rte_thread_is_intr(void)
+{
+ return pthread_equal(intr_thread, pthread_self());
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_lcore.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_lcore.c
new file mode 100644
index 000000000..bc8965844
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_lcore.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <dirent.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_filesystem.h"
+#include "eal_thread.h"
+
+#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u"
+#define CORE_ID_FILE "topology/core_id"
+#define NUMA_NODE_PATH "/sys/devices/system/node"
+
+/* Check if a cpu is present by the presence of the cpu information for it */
+int
+eal_cpu_detected(unsigned lcore_id)
+{
+ char path[PATH_MAX];
+ int len = snprintf(path, sizeof(path), SYS_CPU_DIR
+ "/"CORE_ID_FILE, lcore_id);
+ if (len <= 0 || (unsigned)len >= sizeof(path))
+ return 0;
+ if (access(path, F_OK) != 0)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Get CPU socket id (NUMA node) for a logical core.
+ *
+ * This searches each nodeX directories in /sys for the symlink for the given
+ * lcore_id and returns the numa node where the lcore is found. If lcore is not
+ * found on any numa node, returns zero.
+ */
+unsigned
+eal_cpu_socket_id(unsigned lcore_id)
+{
+ unsigned socket;
+
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "%s/node%u/cpu%u", NUMA_NODE_PATH,
+ socket, lcore_id);
+ if (access(path, F_OK) == 0)
+ return socket;
+ }
+ return 0;
+}
+
+/* Get the cpu core id value from the /sys/.../cpuX core_id value */
+unsigned
+eal_cpu_core_id(unsigned lcore_id)
+{
+ char path[PATH_MAX];
+ unsigned long id;
+
+ int len = snprintf(path, sizeof(path), SYS_CPU_DIR "/%s", lcore_id, CORE_ID_FILE);
+ if (len <= 0 || (unsigned)len >= sizeof(path))
+ goto err;
+ if (eal_parse_sysfs_value(path, &id) != 0)
+ goto err;
+ return (unsigned)id;
+
+err:
+ RTE_LOG(ERR, EAL, "Error reading core id value from %s "
+ "for lcore %u - assuming core 0\n", SYS_CPU_DIR, lcore_id);
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_log.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_log.c
new file mode 100644
index 000000000..43c8460bf
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_log.c
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+
+/*
+ * default log function
+ */
+static ssize_t
+console_log_write(__rte_unused void *c, const char *buf, size_t size)
+{
+ ssize_t ret;
+
+ /* write on stdout */
+ ret = fwrite(buf, 1, size, stdout);
+ fflush(stdout);
+
+ /* Syslog error levels are from 0 to 7, so subtract 1 to convert */
+ syslog(rte_log_cur_msg_loglevel() - 1, "%.*s", (int)size, buf);
+
+ return ret;
+}
+
+static cookie_io_functions_t console_log_func = {
+ .write = console_log_write,
+};
+
+/*
+ * set the log to default function, called during eal init process,
+ * once memzones are available.
+ */
+int
+rte_eal_log_init(const char *id, int facility)
+{
+ FILE *log_stream;
+
+ log_stream = fopencookie(NULL, "w+", console_log_func);
+ if (log_stream == NULL)
+ return -1;
+
+ openlog(id, LOG_NDELAY | LOG_PID, facility);
+
+ eal_log_set_default(log_stream);
+
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_memalloc.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_memalloc.c
new file mode 100644
index 000000000..2c717f8bd
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_memalloc.c
@@ -0,0 +1,1607 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <signal.h>
+#include <setjmp.h>
+#ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */
+#include <linux/memfd.h>
+#define MEMFD_SUPPORTED
+#endif
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+#include <numa.h>
+#include <numaif.h>
+#endif
+#include <linux/falloc.h>
+#include <linux/mman.h> /* for hugetlb-related mmap flags */
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_spinlock.h>
+
+#include "eal_filesystem.h"
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+
+const int anonymous_hugepages_supported =
+#ifdef MAP_HUGE_SHIFT
+ 1;
+#define RTE_MAP_HUGE_SHIFT MAP_HUGE_SHIFT
+#else
+ 0;
+#define RTE_MAP_HUGE_SHIFT 26
+#endif
+
+/*
+ * we've already checked memfd support at compile-time, but we also need to
+ * check if we can create hugepage files with memfd.
+ *
+ * also, this is not a constant, because while we may be *compiled* with memfd
+ * hugetlbfs support, we might not be *running* on a system that supports memfd
+ * and/or memfd with hugetlbfs, so we need to be able to adjust this flag at
+ * runtime, and fall back to anonymous memory.
+ */
+static int memfd_create_supported =
+#ifdef MFD_HUGETLB
+ 1;
+#define RTE_MFD_HUGETLB MFD_HUGETLB
+#else
+ 0;
+#define RTE_MFD_HUGETLB 4U
+#endif
+
+/*
+ * not all kernel version support fallocate on hugetlbfs, so fall back to
+ * ftruncate and disallow deallocation if fallocate is not supported.
+ */
+static int fallocate_supported = -1; /* unknown */
+
+/*
+ * we have two modes - single file segments, and file-per-page mode.
+ *
+ * for single-file segments, we use memseg_list_fd to store the segment fd,
+ * while the fds[] will not be allocated, and len will be set to 0.
+ *
+ * for file-per-page mode, each page will have its own fd, so 'memseg_list_fd'
+ * will be invalid (set to -1), and we'll use 'fds' to keep track of page fd's.
+ *
+ * we cannot know how many pages a system will have in advance, but we do know
+ * that they come in lists, and we know lengths of these lists. so, simply store
+ * a malloc'd array of fd's indexed by list and segment index.
+ *
+ * they will be initialized at startup, and filled as we allocate/deallocate
+ * segments.
+ */
+static struct {
+ int *fds; /**< dynamically allocated array of segment lock fd's */
+ int memseg_list_fd; /**< memseg list fd */
+ int len; /**< total length of the array */
+ int count; /**< entries used in an array */
+} fd_list[RTE_MAX_MEMSEG_LISTS];
+
+/** local copy of a memory map, used to synchronize memory hotplug in MP */
+static struct rte_memseg_list local_memsegs[RTE_MAX_MEMSEG_LISTS];
+
+static sigjmp_buf huge_jmpenv;
+
+static void __rte_unused huge_sigbus_handler(int signo __rte_unused)
+{
+ siglongjmp(huge_jmpenv, 1);
+}
+
+/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile,
+ * non-static local variable in the stack frame calling sigsetjmp might be
+ * clobbered by a call to longjmp.
+ */
+static int __rte_unused huge_wrap_sigsetjmp(void)
+{
+ return sigsetjmp(huge_jmpenv, 1);
+}
+
+static struct sigaction huge_action_old;
+static int huge_need_recover;
+
+static void __rte_unused
+huge_register_sigbus(void)
+{
+ sigset_t mask;
+ struct sigaction action;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGBUS);
+ action.sa_flags = 0;
+ action.sa_mask = mask;
+ action.sa_handler = huge_sigbus_handler;
+
+ huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old);
+}
+
+static void __rte_unused
+huge_recover_sigbus(void)
+{
+ if (huge_need_recover) {
+ sigaction(SIGBUS, &huge_action_old, NULL);
+ huge_need_recover = 0;
+ }
+}
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+static bool
+check_numa(void)
+{
+ bool ret = true;
+ /* Check if kernel supports NUMA. */
+ if (numa_available() != 0) {
+ RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n");
+ ret = false;
+ }
+ return ret;
+}
+
+static void
+prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id)
+{
+ RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
+ if (get_mempolicy(oldpolicy, oldmask->maskp,
+ oldmask->size + 1, 0, 0) < 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to get current mempolicy: %s. "
+ "Assuming MPOL_DEFAULT.\n", strerror(errno));
+ *oldpolicy = MPOL_DEFAULT;
+ }
+ RTE_LOG(DEBUG, EAL,
+ "Setting policy MPOL_PREFERRED for socket %d\n",
+ socket_id);
+ numa_set_preferred(socket_id);
+}
+
+static void
+restore_numa(int *oldpolicy, struct bitmask *oldmask)
+{
+ RTE_LOG(DEBUG, EAL,
+ "Restoring previous memory policy: %d\n", *oldpolicy);
+ if (*oldpolicy == MPOL_DEFAULT) {
+ numa_set_localalloc();
+ } else if (set_mempolicy(*oldpolicy, oldmask->maskp,
+ oldmask->size + 1) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n",
+ strerror(errno));
+ numa_set_localalloc();
+ }
+ numa_free_cpumask(oldmask);
+}
+#endif
+
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static off_t
+get_file_size(int fd)
+{
+ struct stat st;
+ if (fstat(fd, &st) < 0)
+ return 0;
+ return st.st_size;
+}
+
+static int
+pagesz_flags(uint64_t page_sz)
+{
+ /* as per mmap() manpage, all page sizes are log2 of page size
+ * shifted by MAP_HUGE_SHIFT
+ */
+ int log2 = rte_log2_u64(page_sz);
+ return log2 << RTE_MAP_HUGE_SHIFT;
+}
+
+/* returns 1 on successful lock, 0 on unsuccessful lock, -1 on error */
+static int lock(int fd, int type)
+{
+ int ret;
+
+ /* flock may be interrupted */
+ do {
+ ret = flock(fd, type | LOCK_NB);
+ } while (ret && errno == EINTR);
+
+ if (ret && errno == EWOULDBLOCK) {
+ /* couldn't lock */
+ return 0;
+ } else if (ret) {
+ RTE_LOG(ERR, EAL, "%s(): error calling flock(): %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ /* lock was successful */
+ return 1;
+}
+
+static int
+get_seg_memfd(struct hugepage_info *hi __rte_unused,
+ unsigned int list_idx __rte_unused,
+ unsigned int seg_idx __rte_unused)
+{
+#ifdef MEMFD_SUPPORTED
+ int fd;
+ char segname[250]; /* as per manpage, limit is 249 bytes plus null */
+
+ int flags = RTE_MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
+
+ if (internal_config.single_file_segments) {
+ fd = fd_list[list_idx].memseg_list_fd;
+
+ if (fd < 0) {
+ snprintf(segname, sizeof(segname), "seg_%i", list_idx);
+ fd = memfd_create(segname, flags);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): memfd create failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ fd_list[list_idx].memseg_list_fd = fd;
+ }
+ } else {
+ fd = fd_list[list_idx].fds[seg_idx];
+
+ if (fd < 0) {
+ snprintf(segname, sizeof(segname), "seg_%i-%i",
+ list_idx, seg_idx);
+ fd = memfd_create(segname, flags);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): memfd create failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ fd_list[list_idx].fds[seg_idx] = fd;
+ }
+ }
+ return fd;
+#endif
+ return -1;
+}
+
+static int
+get_seg_fd(char *path, int buflen, struct hugepage_info *hi,
+ unsigned int list_idx, unsigned int seg_idx)
+{
+ int fd;
+
+ /* for in-memory mode, we only make it here when we're sure we support
+ * memfd, and this is a special case.
+ */
+ if (internal_config.in_memory)
+ return get_seg_memfd(hi, list_idx, seg_idx);
+
+ if (internal_config.single_file_segments) {
+ /* create a hugepage file path */
+ eal_get_hugefile_path(path, buflen, hi->hugedir, list_idx);
+
+ fd = fd_list[list_idx].memseg_list_fd;
+
+ if (fd < 0) {
+ fd = open(path, O_CREAT | O_RDWR, 0600);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): open failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ /* take out a read lock and keep it indefinitely */
+ if (lock(fd, LOCK_SH) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n",
+ __func__, strerror(errno));
+ close(fd);
+ return -1;
+ }
+ fd_list[list_idx].memseg_list_fd = fd;
+ }
+ } else {
+ /* create a hugepage file path */
+ eal_get_hugefile_path(path, buflen, hi->hugedir,
+ list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
+
+ fd = fd_list[list_idx].fds[seg_idx];
+
+ if (fd < 0) {
+ fd = open(path, O_CREAT | O_RDWR, 0600);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ /* take out a read lock */
+ if (lock(fd, LOCK_SH) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n",
+ __func__, strerror(errno));
+ close(fd);
+ return -1;
+ }
+ fd_list[list_idx].fds[seg_idx] = fd;
+ }
+ }
+ return fd;
+}
+
+static int
+resize_hugefile_in_memory(int fd, uint64_t fa_offset,
+ uint64_t page_sz, bool grow)
+{
+ int flags = grow ? 0 : FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_KEEP_SIZE;
+ int ret;
+
+ /* grow or shrink the file */
+ ret = fallocate(fd, flags, fa_offset, page_sz);
+
+ if (ret < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): fallocate() failed: %s\n",
+ __func__,
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+static int
+resize_hugefile_in_filesystem(int fd, uint64_t fa_offset, uint64_t page_sz,
+ bool grow)
+{
+ bool again = false;
+
+ do {
+ if (fallocate_supported == 0) {
+ /* we cannot deallocate memory if fallocate() is not
+ * supported, and hugepage file is already locked at
+ * creation, so no further synchronization needed.
+ */
+
+ if (!grow) {
+ RTE_LOG(DEBUG, EAL, "%s(): fallocate not supported, not freeing page back to the system\n",
+ __func__);
+ return -1;
+ }
+ uint64_t new_size = fa_offset + page_sz;
+ uint64_t cur_size = get_file_size(fd);
+
+ /* fallocate isn't supported, fall back to ftruncate */
+ if (new_size > cur_size &&
+ ftruncate(fd, new_size) < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ } else {
+ int flags = grow ? 0 : FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_KEEP_SIZE;
+ int ret;
+
+ /*
+ * technically, it is perfectly safe for both primary
+ * and secondary to grow and shrink the page files:
+ * growing the file repeatedly has no effect because
+ * a page can only be allocated once, while mmap ensures
+ * that secondaries hold on to the page even after the
+ * page itself is removed from the filesystem.
+ *
+ * however, leaving growing/shrinking to the primary
+ * tends to expose bugs in fdlist page count handling,
+ * so leave this here just in case.
+ */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
+ /* grow or shrink the file */
+ ret = fallocate(fd, flags, fa_offset, page_sz);
+
+ if (ret < 0) {
+ if (fallocate_supported == -1 &&
+ errno == ENOTSUP) {
+ RTE_LOG(ERR, EAL, "%s(): fallocate() not supported, hugepage deallocation will be disabled\n",
+ __func__);
+ again = true;
+ fallocate_supported = 0;
+ } else {
+ RTE_LOG(DEBUG, EAL, "%s(): fallocate() failed: %s\n",
+ __func__,
+ strerror(errno));
+ return -1;
+ }
+ } else
+ fallocate_supported = 1;
+ }
+ } while (again);
+
+ return 0;
+}
+
+static void
+close_hugefile(int fd, char *path, int list_idx)
+{
+ /*
+ * primary process must unlink the file, but only when not in in-memory
+ * mode (as in that case there is no file to unlink).
+ */
+ if (!internal_config.in_memory &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY &&
+ unlink(path))
+ RTE_LOG(ERR, EAL, "%s(): unlinking '%s' failed: %s\n",
+ __func__, path, strerror(errno));
+
+ close(fd);
+ fd_list[list_idx].memseg_list_fd = -1;
+}
+
+static int
+resize_hugefile(int fd, uint64_t fa_offset, uint64_t page_sz, bool grow)
+{
+ /* in-memory mode is a special case, because we can be sure that
+ * fallocate() is supported.
+ */
+ if (internal_config.in_memory)
+ return resize_hugefile_in_memory(fd, fa_offset,
+ page_sz, grow);
+
+ return resize_hugefile_in_filesystem(fd, fa_offset, page_sz,
+ grow);
+}
+
+static int
+alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
+ struct hugepage_info *hi, unsigned int list_idx,
+ unsigned int seg_idx)
+{
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ int cur_socket_id = 0;
+#endif
+ uint64_t map_offset;
+ rte_iova_t iova;
+ void *va;
+ char path[PATH_MAX];
+ int ret = 0;
+ int fd;
+ size_t alloc_sz;
+ int flags;
+ void *new_addr;
+
+ alloc_sz = hi->hugepage_sz;
+
+ /* these are checked at init, but code analyzers don't know that */
+ if (internal_config.in_memory && !anonymous_hugepages_supported) {
+ RTE_LOG(ERR, EAL, "Anonymous hugepages not supported, in-memory mode cannot allocate memory\n");
+ return -1;
+ }
+ if (internal_config.in_memory && !memfd_create_supported &&
+ internal_config.single_file_segments) {
+ RTE_LOG(ERR, EAL, "Single-file segments are not supported without memfd support\n");
+ return -1;
+ }
+
+ /* in-memory without memfd is a special case */
+ int mmap_flags;
+
+ if (internal_config.in_memory && !memfd_create_supported) {
+ const int in_memory_flags = MAP_HUGETLB | MAP_FIXED |
+ MAP_PRIVATE | MAP_ANONYMOUS;
+ int pagesz_flag;
+
+ pagesz_flag = pagesz_flags(alloc_sz);
+ fd = -1;
+ mmap_flags = in_memory_flags | pagesz_flag;
+
+ /* single-file segments codepath will never be active
+ * here because in-memory mode is incompatible with the
+ * fallback path, and it's stopped at EAL initialization
+ * stage.
+ */
+ map_offset = 0;
+ } else {
+ /* takes out a read lock on segment or segment list */
+ fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n");
+ return -1;
+ }
+
+ if (internal_config.single_file_segments) {
+ map_offset = seg_idx * alloc_sz;
+ ret = resize_hugefile(fd, map_offset, alloc_sz, true);
+ if (ret < 0)
+ goto resized;
+
+ fd_list[list_idx].count++;
+ } else {
+ map_offset = 0;
+ if (ftruncate(fd, alloc_sz) < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
+ __func__, strerror(errno));
+ goto resized;
+ }
+ if (internal_config.hugepage_unlink &&
+ !internal_config.in_memory) {
+ if (unlink(path)) {
+ RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n",
+ __func__, strerror(errno));
+ goto resized;
+ }
+ }
+ }
+ mmap_flags = MAP_SHARED | MAP_POPULATE | MAP_FIXED;
+ }
+
+ /*
+ * map the segment, and populate page tables, the kernel fills
+ * this segment with zeros if it's a new page.
+ */
+ va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, mmap_flags, fd,
+ map_offset);
+
+ if (va == MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL, "%s(): mmap() failed: %s\n", __func__,
+ strerror(errno));
+ /* mmap failed, but the previous region might have been
+ * unmapped anyway. try to remap it
+ */
+ goto unmapped;
+ }
+ if (va != addr) {
+ RTE_LOG(DEBUG, EAL, "%s(): wrong mmap() address\n", __func__);
+ munmap(va, alloc_sz);
+ goto resized;
+ }
+
+ /* In linux, hugetlb limitations, like cgroup, are
+ * enforced at fault time instead of mmap(), even
+ * with the option of MAP_POPULATE. Kernel will send
+ * a SIGBUS signal. To avoid to be killed, save stack
+ * environment here, if SIGBUS happens, we can jump
+ * back here.
+ */
+ if (huge_wrap_sigsetjmp()) {
+ RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more hugepages of size %uMB\n",
+ (unsigned int)(alloc_sz >> 20));
+ goto mapped;
+ }
+
+ /* we need to trigger a write to the page to enforce page fault and
+ * ensure that page is accessible to us, but we can't overwrite value
+ * that is already there, so read the old value, and write itback.
+ * kernel populates the page with zeroes initially.
+ */
+ *(volatile int *)addr = *(volatile int *)addr;
+
+ iova = rte_mem_virt2iova(addr);
+ if (iova == RTE_BAD_PHYS_ADDR) {
+ RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n",
+ __func__);
+ goto mapped;
+ }
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ ret = get_mempolicy(&cur_socket_id, NULL, 0, addr,
+ MPOL_F_NODE | MPOL_F_ADDR);
+ if (ret < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): get_mempolicy: %s\n",
+ __func__, strerror(errno));
+ goto mapped;
+ } else if (cur_socket_id != socket_id) {
+ RTE_LOG(DEBUG, EAL,
+ "%s(): allocation happened on wrong socket (wanted %d, got %d)\n",
+ __func__, socket_id, cur_socket_id);
+ goto mapped;
+ }
+#else
+ if (rte_socket_count() > 1)
+ RTE_LOG(DEBUG, EAL, "%s(): not checking hugepage NUMA node.\n",
+ __func__);
+#endif
+
+ ms->addr = addr;
+ ms->hugepage_sz = alloc_sz;
+ ms->len = alloc_sz;
+ ms->nchannel = rte_memory_get_nchannel();
+ ms->nrank = rte_memory_get_nrank();
+ ms->iova = iova;
+ ms->socket_id = socket_id;
+
+ return 0;
+
+mapped:
+ munmap(addr, alloc_sz);
+unmapped:
+ flags = MAP_FIXED;
+ new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags);
+ if (new_addr != addr) {
+ if (new_addr != NULL)
+ munmap(new_addr, alloc_sz);
+ /* we're leaving a hole in our virtual address space. if
+ * somebody else maps this hole now, we could accidentally
+ * override it in the future.
+ */
+ RTE_LOG(CRIT, EAL, "Can't mmap holes in our virtual address space\n");
+ }
+ /* roll back the ref count */
+ if (internal_config.single_file_segments)
+ fd_list[list_idx].count--;
+resized:
+ /* some codepaths will return negative fd, so exit early */
+ if (fd < 0)
+ return -1;
+
+ if (internal_config.single_file_segments) {
+ resize_hugefile(fd, map_offset, alloc_sz, false);
+ /* ignore failure, can't make it any worse */
+
+ /* if refcount is at zero, close the file */
+ if (fd_list[list_idx].count == 0)
+ close_hugefile(fd, path, list_idx);
+ } else {
+ /* only remove file if we can take out a write lock */
+ if (internal_config.hugepage_unlink == 0 &&
+ internal_config.in_memory == 0 &&
+ lock(fd, LOCK_EX) == 1)
+ unlink(path);
+ close(fd);
+ fd_list[list_idx].fds[seg_idx] = -1;
+ }
+ return -1;
+}
+
+static int
+free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
+ unsigned int list_idx, unsigned int seg_idx)
+{
+ uint64_t map_offset;
+ char path[PATH_MAX];
+ int fd, ret = 0;
+ bool exit_early;
+
+ /* erase page data */
+ memset(ms->addr, 0, ms->len);
+
+ if (mmap(ms->addr, ms->len, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
+ MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL, "couldn't unmap page\n");
+ return -1;
+ }
+
+ if (madvise(ms->addr, ms->len, MADV_DONTDUMP) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
+
+ exit_early = false;
+
+ /* if we're using anonymous hugepages, nothing to be done */
+ if (internal_config.in_memory && !memfd_create_supported)
+ exit_early = true;
+
+ /* if we've already unlinked the page, nothing needs to be done */
+ if (!internal_config.in_memory && internal_config.hugepage_unlink)
+ exit_early = true;
+
+ if (exit_early) {
+ memset(ms, 0, sizeof(*ms));
+ return 0;
+ }
+
+ /* if we are not in single file segments mode, we're going to unmap the
+ * segment and thus drop the lock on original fd, but hugepage dir is
+ * now locked so we can take out another one without races.
+ */
+ fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
+ if (fd < 0)
+ return -1;
+
+ if (internal_config.single_file_segments) {
+ map_offset = seg_idx * ms->len;
+ if (resize_hugefile(fd, map_offset, ms->len, false))
+ return -1;
+
+ if (--(fd_list[list_idx].count) == 0)
+ close_hugefile(fd, path, list_idx);
+
+ ret = 0;
+ } else {
+ /* if we're able to take out a write lock, we're the last one
+ * holding onto this page.
+ */
+ if (!internal_config.in_memory) {
+ ret = lock(fd, LOCK_EX);
+ if (ret >= 0) {
+ /* no one else is using this page */
+ if (ret == 1)
+ unlink(path);
+ }
+ }
+ /* closing fd will drop the lock */
+ close(fd);
+ fd_list[list_idx].fds[seg_idx] = -1;
+ }
+
+ memset(ms, 0, sizeof(*ms));
+
+ return ret < 0 ? -1 : 0;
+}
+
+struct alloc_walk_param {
+ struct hugepage_info *hi;
+ struct rte_memseg **ms;
+ size_t page_sz;
+ unsigned int segs_allocated;
+ unsigned int n_segs;
+ int socket;
+ bool exact;
+};
+static int
+alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct alloc_walk_param *wa = arg;
+ struct rte_memseg_list *cur_msl;
+ size_t page_sz;
+ int cur_idx, start_idx, j, dir_fd = -1;
+ unsigned int msl_idx, need, i;
+
+ if (msl->page_sz != wa->page_sz)
+ return 0;
+ if (msl->socket_id != wa->socket)
+ return 0;
+
+ page_sz = (size_t)msl->page_sz;
+
+ msl_idx = msl - mcfg->memsegs;
+ cur_msl = &mcfg->memsegs[msl_idx];
+
+ need = wa->n_segs;
+
+ /* try finding space in memseg list */
+ if (wa->exact) {
+ /* if we require exact number of pages in a list, find them */
+ cur_idx = rte_fbarray_find_next_n_free(&cur_msl->memseg_arr, 0,
+ need);
+ if (cur_idx < 0)
+ return 0;
+ start_idx = cur_idx;
+ } else {
+ int cur_len;
+
+ /* we don't require exact number of pages, so we're going to go
+ * for best-effort allocation. that means finding the biggest
+ * unused block, and going with that.
+ */
+ cur_idx = rte_fbarray_find_biggest_free(&cur_msl->memseg_arr,
+ 0);
+ if (cur_idx < 0)
+ return 0;
+ start_idx = cur_idx;
+ /* adjust the size to possibly be smaller than original
+ * request, but do not allow it to be bigger.
+ */
+ cur_len = rte_fbarray_find_contig_free(&cur_msl->memseg_arr,
+ cur_idx);
+ need = RTE_MIN(need, (unsigned int)cur_len);
+ }
+
+ /* do not allow any page allocations during the time we're allocating,
+ * because file creation and locking operations are not atomic,
+ * and we might be the first or the last ones to use a particular page,
+ * so we need to ensure atomicity of every operation.
+ *
+ * during init, we already hold a write lock, so don't try to take out
+ * another one.
+ */
+ if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
+ dir_fd = open(wa->hi->hugedir, O_RDONLY);
+ if (dir_fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
+ __func__, wa->hi->hugedir, strerror(errno));
+ return -1;
+ }
+ /* blocking writelock */
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n",
+ __func__, wa->hi->hugedir, strerror(errno));
+ close(dir_fd);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < need; i++, cur_idx++) {
+ struct rte_memseg *cur;
+ void *map_addr;
+
+ cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx);
+ map_addr = RTE_PTR_ADD(cur_msl->base_va,
+ cur_idx * page_sz);
+
+ if (alloc_seg(cur, map_addr, wa->socket, wa->hi,
+ msl_idx, cur_idx)) {
+ RTE_LOG(DEBUG, EAL, "attempted to allocate %i segments, but only %i were allocated\n",
+ need, i);
+
+ /* if exact number wasn't requested, stop */
+ if (!wa->exact)
+ goto out;
+
+ /* clean up */
+ for (j = start_idx; j < cur_idx; j++) {
+ struct rte_memseg *tmp;
+ struct rte_fbarray *arr =
+ &cur_msl->memseg_arr;
+
+ tmp = rte_fbarray_get(arr, j);
+ rte_fbarray_set_free(arr, j);
+
+ /* free_seg may attempt to create a file, which
+ * may fail.
+ */
+ if (free_seg(tmp, wa->hi, msl_idx, j))
+ RTE_LOG(DEBUG, EAL, "Cannot free page\n");
+ }
+ /* clear the list */
+ if (wa->ms)
+ memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);
+
+ if (dir_fd >= 0)
+ close(dir_fd);
+ return -1;
+ }
+ if (wa->ms)
+ wa->ms[i] = cur;
+
+ rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx);
+ }
+out:
+ wa->segs_allocated = i;
+ if (i > 0)
+ cur_msl->version++;
+ if (dir_fd >= 0)
+ close(dir_fd);
+ /* if we didn't allocate any segments, move on to the next list */
+ return i > 0;
+}
+
+struct free_walk_param {
+ struct hugepage_info *hi;
+ struct rte_memseg *ms;
+};
+static int
+free_seg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *found_msl;
+ struct free_walk_param *wa = arg;
+ uintptr_t start_addr, end_addr;
+ int msl_idx, seg_idx, ret, dir_fd = -1;
+
+ start_addr = (uintptr_t) msl->base_va;
+ end_addr = start_addr + msl->len;
+
+ if ((uintptr_t)wa->ms->addr < start_addr ||
+ (uintptr_t)wa->ms->addr >= end_addr)
+ return 0;
+
+ msl_idx = msl - mcfg->memsegs;
+ seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz;
+
+ /* msl is const */
+ found_msl = &mcfg->memsegs[msl_idx];
+
+ /* do not allow any page allocations during the time we're freeing,
+ * because file creation and locking operations are not atomic,
+ * and we might be the first or the last ones to use a particular page,
+ * so we need to ensure atomicity of every operation.
+ *
+ * during init, we already hold a write lock, so don't try to take out
+ * another one.
+ */
+ if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
+ dir_fd = open(wa->hi->hugedir, O_RDONLY);
+ if (dir_fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
+ __func__, wa->hi->hugedir, strerror(errno));
+ return -1;
+ }
+ /* blocking writelock */
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n",
+ __func__, wa->hi->hugedir, strerror(errno));
+ close(dir_fd);
+ return -1;
+ }
+ }
+
+ found_msl->version++;
+
+ rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx);
+
+ ret = free_seg(wa->ms, wa->hi, msl_idx, seg_idx);
+
+ if (dir_fd >= 0)
+ close(dir_fd);
+
+ if (ret < 0)
+ return -1;
+
+ return 1;
+}
+
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
+ int socket, bool exact)
+{
+ int i, ret = -1;
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ bool have_numa = false;
+ int oldpolicy;
+ struct bitmask *oldmask;
+#endif
+ struct alloc_walk_param wa;
+ struct hugepage_info *hi = NULL;
+
+ memset(&wa, 0, sizeof(wa));
+
+ /* dynamic allocation not supported in legacy mode */
+ if (internal_config.legacy_mem)
+ return -1;
+
+ for (i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) {
+ if (page_sz ==
+ internal_config.hugepage_info[i].hugepage_sz) {
+ hi = &internal_config.hugepage_info[i];
+ break;
+ }
+ }
+ if (!hi) {
+ RTE_LOG(ERR, EAL, "%s(): can't find relevant hugepage_info entry\n",
+ __func__);
+ return -1;
+ }
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (check_numa()) {
+ oldmask = numa_allocate_nodemask();
+ prepare_numa(&oldpolicy, oldmask, socket);
+ have_numa = true;
+ }
+#endif
+
+ wa.exact = exact;
+ wa.hi = hi;
+ wa.ms = ms;
+ wa.n_segs = n_segs;
+ wa.page_sz = page_sz;
+ wa.socket = socket;
+ wa.segs_allocated = 0;
+
+ /* memalloc is locked, so it's safe to use thread-unsafe version */
+ ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
+ if (ret == 0) {
+ RTE_LOG(ERR, EAL, "%s(): couldn't find suitable memseg_list\n",
+ __func__);
+ ret = -1;
+ } else if (ret > 0) {
+ ret = (int)wa.segs_allocated;
+ }
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (have_numa)
+ restore_numa(&oldpolicy, oldmask);
+#endif
+ return ret;
+}
+
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t page_sz, int socket)
+{
+ struct rte_memseg *ms;
+ if (eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true) < 0)
+ return NULL;
+ /* return pointer to newly allocated memseg */
+ return ms;
+}
+
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
+{
+ int seg, ret = 0;
+
+ /* dynamic free not supported in legacy mode */
+ if (internal_config.legacy_mem)
+ return -1;
+
+ for (seg = 0; seg < n_segs; seg++) {
+ struct rte_memseg *cur = ms[seg];
+ struct hugepage_info *hi = NULL;
+ struct free_walk_param wa;
+ int i, walk_res;
+
+ /* if this page is marked as unfreeable, fail */
+ if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
+ RTE_LOG(DEBUG, EAL, "Page is not allowed to be freed\n");
+ ret = -1;
+ continue;
+ }
+
+ memset(&wa, 0, sizeof(wa));
+
+ for (i = 0; i < (int)RTE_DIM(internal_config.hugepage_info);
+ i++) {
+ hi = &internal_config.hugepage_info[i];
+ if (cur->hugepage_sz == hi->hugepage_sz)
+ break;
+ }
+ if (i == (int)RTE_DIM(internal_config.hugepage_info)) {
+ RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+ ret = -1;
+ continue;
+ }
+
+ wa.ms = cur;
+ wa.hi = hi;
+
+ /* memalloc is locked, so it's safe to use thread-unsafe version
+ */
+ walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
+ &wa);
+ if (walk_res == 1)
+ continue;
+ if (walk_res == 0)
+ RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+eal_memalloc_free_seg(struct rte_memseg *ms)
+{
+ /* dynamic free not supported in legacy mode */
+ if (internal_config.legacy_mem)
+ return -1;
+
+ return eal_memalloc_free_seg_bulk(&ms, 1);
+}
+
+static int
+sync_chunk(struct rte_memseg_list *primary_msl,
+ struct rte_memseg_list *local_msl, struct hugepage_info *hi,
+ unsigned int msl_idx, bool used, int start, int end)
+{
+ struct rte_fbarray *l_arr, *p_arr;
+ int i, ret, chunk_len, diff_len;
+
+ l_arr = &local_msl->memseg_arr;
+ p_arr = &primary_msl->memseg_arr;
+
+ /* we need to aggregate allocations/deallocations into bigger chunks,
+ * as we don't want to spam the user with per-page callbacks.
+ *
+ * to avoid any potential issues, we also want to trigger
+ * deallocation callbacks *before* we actually deallocate
+ * memory, so that the user application could wrap up its use
+ * before it goes away.
+ */
+
+ chunk_len = end - start;
+
+ /* find how many contiguous pages we can map/unmap for this chunk */
+ diff_len = used ?
+ rte_fbarray_find_contig_free(l_arr, start) :
+ rte_fbarray_find_contig_used(l_arr, start);
+
+ /* has to be at least one page */
+ if (diff_len < 1)
+ return -1;
+
+ diff_len = RTE_MIN(chunk_len, diff_len);
+
+ /* if we are freeing memory, notify the application */
+ if (!used) {
+ struct rte_memseg *ms;
+ void *start_va;
+ size_t len, page_sz;
+
+ ms = rte_fbarray_get(l_arr, start);
+ start_va = ms->addr;
+ page_sz = (size_t)primary_msl->page_sz;
+ len = page_sz * diff_len;
+
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ start_va, len);
+ }
+
+ for (i = 0; i < diff_len; i++) {
+ struct rte_memseg *p_ms, *l_ms;
+ int seg_idx = start + i;
+
+ l_ms = rte_fbarray_get(l_arr, seg_idx);
+ p_ms = rte_fbarray_get(p_arr, seg_idx);
+
+ if (l_ms == NULL || p_ms == NULL)
+ return -1;
+
+ if (used) {
+ ret = alloc_seg(l_ms, p_ms->addr,
+ p_ms->socket_id, hi,
+ msl_idx, seg_idx);
+ if (ret < 0)
+ return -1;
+ rte_fbarray_set_used(l_arr, seg_idx);
+ } else {
+ ret = free_seg(l_ms, hi, msl_idx, seg_idx);
+ rte_fbarray_set_free(l_arr, seg_idx);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ /* if we just allocated memory, notify the application */
+ if (used) {
+ struct rte_memseg *ms;
+ void *start_va;
+ size_t len, page_sz;
+
+ ms = rte_fbarray_get(l_arr, start);
+ start_va = ms->addr;
+ page_sz = (size_t)primary_msl->page_sz;
+ len = page_sz * diff_len;
+
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+ start_va, len);
+ }
+
+ /* calculate how much we can advance until next chunk */
+ diff_len = used ?
+ rte_fbarray_find_contig_used(l_arr, start) :
+ rte_fbarray_find_contig_free(l_arr, start);
+ ret = RTE_MIN(chunk_len, diff_len);
+
+ return ret;
+}
+
+static int
+sync_status(struct rte_memseg_list *primary_msl,
+ struct rte_memseg_list *local_msl, struct hugepage_info *hi,
+ unsigned int msl_idx, bool used)
+{
+ struct rte_fbarray *l_arr, *p_arr;
+ int p_idx, l_chunk_len, p_chunk_len, ret;
+ int start, end;
+
+ /* this is a little bit tricky, but the basic idea is - walk both lists
+ * and spot any places where there are discrepancies. walking both lists
+ * and noting discrepancies in a single go is a hard problem, so we do
+ * it in two passes - first we spot any places where allocated segments
+ * mismatch (i.e. ensure that everything that's allocated in the primary
+ * is also allocated in the secondary), and then we do it by looking at
+ * free segments instead.
+ *
+ * we also need to aggregate changes into chunks, as we have to call
+ * callbacks per allocation, not per page.
+ */
+ l_arr = &local_msl->memseg_arr;
+ p_arr = &primary_msl->memseg_arr;
+
+ if (used)
+ p_idx = rte_fbarray_find_next_used(p_arr, 0);
+ else
+ p_idx = rte_fbarray_find_next_free(p_arr, 0);
+
+ while (p_idx >= 0) {
+ int next_chunk_search_idx;
+
+ if (used) {
+ p_chunk_len = rte_fbarray_find_contig_used(p_arr,
+ p_idx);
+ l_chunk_len = rte_fbarray_find_contig_used(l_arr,
+ p_idx);
+ } else {
+ p_chunk_len = rte_fbarray_find_contig_free(p_arr,
+ p_idx);
+ l_chunk_len = rte_fbarray_find_contig_free(l_arr,
+ p_idx);
+ }
+ /* best case scenario - no differences (or bigger, which will be
+ * fixed during next iteration), look for next chunk
+ */
+ if (l_chunk_len >= p_chunk_len) {
+ next_chunk_search_idx = p_idx + p_chunk_len;
+ goto next_chunk;
+ }
+
+ /* if both chunks start at the same point, skip parts we know
+ * are identical, and sync the rest. each call to sync_chunk
+ * will only sync contiguous segments, so we need to call this
+ * until we are sure there are no more differences in this
+ * chunk.
+ */
+ start = p_idx + l_chunk_len;
+ end = p_idx + p_chunk_len;
+ do {
+ ret = sync_chunk(primary_msl, local_msl, hi, msl_idx,
+ used, start, end);
+ start += ret;
+ } while (start < end && ret >= 0);
+ /* if ret is negative, something went wrong */
+ if (ret < 0)
+ return -1;
+
+ next_chunk_search_idx = p_idx + p_chunk_len;
+next_chunk:
+ /* skip to end of this chunk */
+ if (used) {
+ p_idx = rte_fbarray_find_next_used(p_arr,
+ next_chunk_search_idx);
+ } else {
+ p_idx = rte_fbarray_find_next_free(p_arr,
+ next_chunk_search_idx);
+ }
+ }
+ return 0;
+}
+
+static int
+sync_existing(struct rte_memseg_list *primary_msl,
+ struct rte_memseg_list *local_msl, struct hugepage_info *hi,
+ unsigned int msl_idx)
+{
+ int ret, dir_fd;
+
+ /* do not allow any page allocations during the time we're allocating,
+ * because file creation and locking operations are not atomic,
+ * and we might be the first or the last ones to use a particular page,
+ * so we need to ensure atomicity of every operation.
+ */
+ dir_fd = open(hi->hugedir, O_RDONLY);
+ if (dir_fd < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n", __func__,
+ hi->hugedir, strerror(errno));
+ return -1;
+ }
+ /* blocking writelock */
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n", __func__,
+ hi->hugedir, strerror(errno));
+ close(dir_fd);
+ return -1;
+ }
+
+ /* ensure all allocated space is the same in both lists */
+ ret = sync_status(primary_msl, local_msl, hi, msl_idx, true);
+ if (ret < 0)
+ goto fail;
+
+ /* ensure all unallocated space is the same in both lists */
+ ret = sync_status(primary_msl, local_msl, hi, msl_idx, false);
+ if (ret < 0)
+ goto fail;
+
+ /* update version number */
+ local_msl->version = primary_msl->version;
+
+ close(dir_fd);
+
+ return 0;
+fail:
+ close(dir_fd);
+ return -1;
+}
+
+static int
+sync_walk(const struct rte_memseg_list *msl, void *arg __rte_unused)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *primary_msl, *local_msl;
+ struct hugepage_info *hi = NULL;
+ unsigned int i;
+ int msl_idx;
+
+ if (msl->external)
+ return 0;
+
+ msl_idx = msl - mcfg->memsegs;
+ primary_msl = &mcfg->memsegs[msl_idx];
+ local_msl = &local_memsegs[msl_idx];
+
+ for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
+ uint64_t cur_sz =
+ internal_config.hugepage_info[i].hugepage_sz;
+ uint64_t msl_sz = primary_msl->page_sz;
+ if (msl_sz == cur_sz) {
+ hi = &internal_config.hugepage_info[i];
+ break;
+ }
+ }
+ if (!hi) {
+ RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+ return -1;
+ }
+
+ /* if versions don't match, synchronize everything */
+ if (local_msl->version != primary_msl->version &&
+ sync_existing(primary_msl, local_msl, hi, msl_idx))
+ return -1;
+ return 0;
+}
+
+
+int
+eal_memalloc_sync_with_primary(void)
+{
+ /* nothing to be done in primary */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return 0;
+
+ /* memalloc is locked, so it's safe to call thread-unsafe version */
+ if (rte_memseg_list_walk_thread_unsafe(sync_walk, NULL))
+ return -1;
+ return 0;
+}
+
+static int
+secondary_msl_create_walk(const struct rte_memseg_list *msl,
+ void *arg __rte_unused)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *primary_msl, *local_msl;
+ char name[PATH_MAX];
+ int msl_idx, ret;
+
+ if (msl->external)
+ return 0;
+
+ msl_idx = msl - mcfg->memsegs;
+ primary_msl = &mcfg->memsegs[msl_idx];
+ local_msl = &local_memsegs[msl_idx];
+
+ /* create distinct fbarrays for each secondary */
+ snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
+ primary_msl->memseg_arr.name, getpid());
+
+ ret = rte_fbarray_init(&local_msl->memseg_arr, name,
+ primary_msl->memseg_arr.len,
+ primary_msl->memseg_arr.elt_sz);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Cannot initialize local memory map\n");
+ return -1;
+ }
+ local_msl->base_va = primary_msl->base_va;
+ local_msl->len = primary_msl->len;
+
+ return 0;
+}
+
+static int
+alloc_list(int list_idx, int len)
+{
+ int *data;
+ int i;
+
+ /* single-file segments mode does not need fd list */
+ if (!internal_config.single_file_segments) {
+ /* ensure we have space to store fd per each possible segment */
+ data = malloc(sizeof(int) * len);
+ if (data == NULL) {
+ RTE_LOG(ERR, EAL, "Unable to allocate space for file descriptors\n");
+ return -1;
+ }
+ /* set all fd's as invalid */
+ for (i = 0; i < len; i++)
+ data[i] = -1;
+ fd_list[list_idx].fds = data;
+ fd_list[list_idx].len = len;
+ } else {
+ fd_list[list_idx].fds = NULL;
+ fd_list[list_idx].len = 0;
+ }
+
+ fd_list[list_idx].count = 0;
+ fd_list[list_idx].memseg_list_fd = -1;
+
+ return 0;
+}
+
+static int
+fd_list_create_walk(const struct rte_memseg_list *msl,
+ void *arg __rte_unused)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int len;
+ int msl_idx;
+
+ if (msl->external)
+ return 0;
+
+ msl_idx = msl - mcfg->memsegs;
+ len = msl->memseg_arr.len;
+
+ return alloc_list(msl_idx, len);
+}
+
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* single file segments mode doesn't support individual segment fd's */
+ if (internal_config.single_file_segments)
+ return -ENOTSUP;
+
+ /* if list is not allocated, allocate it */
+ if (fd_list[list_idx].len == 0) {
+ int len = mcfg->memsegs[list_idx].memseg_arr.len;
+
+ if (alloc_list(list_idx, len) < 0)
+ return -ENOMEM;
+ }
+ fd_list[list_idx].fds[seg_idx] = fd;
+
+ return 0;
+}
+
+int
+eal_memalloc_set_seg_list_fd(int list_idx, int fd)
+{
+ /* non-single file segment mode doesn't support segment list fd's */
+ if (!internal_config.single_file_segments)
+ return -ENOTSUP;
+
+ fd_list[list_idx].memseg_list_fd = fd;
+
+ return 0;
+}
+
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
+{
+ int fd;
+
+ if (internal_config.in_memory || internal_config.no_hugetlbfs) {
+#ifndef MEMFD_SUPPORTED
+ /* in in-memory or no-huge mode, we rely on memfd support */
+ return -ENOTSUP;
+#endif
+ /* memfd supported, but hugetlbfs memfd may not be */
+ if (!internal_config.no_hugetlbfs && !memfd_create_supported)
+ return -ENOTSUP;
+ }
+
+ if (internal_config.single_file_segments) {
+ fd = fd_list[list_idx].memseg_list_fd;
+ } else if (fd_list[list_idx].len == 0) {
+ /* list not initialized */
+ fd = -1;
+ } else {
+ fd = fd_list[list_idx].fds[seg_idx];
+ }
+ if (fd < 0)
+ return -ENODEV;
+ return fd;
+}
+
+static int
+test_memfd_create(void)
+{
+#ifdef MEMFD_SUPPORTED
+ unsigned int i;
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+ uint64_t pagesz = internal_config.hugepage_info[i].hugepage_sz;
+ int pagesz_flag = pagesz_flags(pagesz);
+ int flags;
+
+ flags = pagesz_flag | RTE_MFD_HUGETLB;
+ int fd = memfd_create("test", flags);
+ if (fd < 0) {
+ /* we failed - let memalloc know this isn't working */
+ if (errno == EINVAL) {
+ memfd_create_supported = 0;
+ return 0; /* not supported */
+ }
+
+ /* we got other error - something's wrong */
+ return -1; /* error */
+ }
+ close(fd);
+ return 1; /* supported */
+ }
+#endif
+ return 0; /* not supported */
+}
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+ if (internal_config.in_memory || internal_config.no_hugetlbfs) {
+#ifndef MEMFD_SUPPORTED
+ /* in in-memory or no-huge mode, we rely on memfd support */
+ return -ENOTSUP;
+#endif
+ /* memfd supported, but hugetlbfs memfd may not be */
+ if (!internal_config.no_hugetlbfs && !memfd_create_supported)
+ return -ENOTSUP;
+ }
+
+ if (internal_config.single_file_segments) {
+ size_t pgsz = mcfg->memsegs[list_idx].page_sz;
+
+ /* segment not active? */
+ if (fd_list[list_idx].memseg_list_fd < 0)
+ return -ENOENT;
+ *offset = pgsz * seg_idx;
+ } else {
+ /* fd_list not initialized? */
+ if (fd_list[list_idx].len == 0)
+ return -ENODEV;
+
+ /* segment not active? */
+ if (fd_list[list_idx].fds[seg_idx] < 0)
+ return -ENOENT;
+ *offset = 0;
+ }
+ return 0;
+}
+
+int
+eal_memalloc_init(void)
+{
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ if (rte_memseg_list_walk(secondary_msl_create_walk, NULL) < 0)
+ return -1;
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+ internal_config.in_memory) {
+ int mfd_res = test_memfd_create();
+
+ if (mfd_res < 0) {
+ RTE_LOG(ERR, EAL, "Unable to check if memfd is supported\n");
+ return -1;
+ }
+ if (mfd_res == 1)
+ RTE_LOG(DEBUG, EAL, "Using memfd for anonymous memory\n");
+ else
+ RTE_LOG(INFO, EAL, "Using memfd is not supported, falling back to anonymous hugepages\n");
+
+ /* we only support single-file segments mode with in-memory mode
+ * if we support hugetlbfs with memfd_create. this code will
+ * test if we do.
+ */
+ if (internal_config.single_file_segments &&
+ mfd_res != 1) {
+ RTE_LOG(ERR, EAL, "Single-file segments mode cannot be used without memfd support\n");
+ return -1;
+ }
+ /* this cannot ever happen but better safe than sorry */
+ if (!anonymous_hugepages_supported) {
+ RTE_LOG(ERR, EAL, "Using anonymous memory is not supported\n");
+ return -1;
+ }
+ }
+
+ /* initialize all of the fd lists */
+ if (rte_memseg_list_walk(fd_list_create_walk, NULL))
+ return -1;
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_memory.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_memory.c
new file mode 100644
index 000000000..7a9c97ff8
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_memory.c
@@ -0,0 +1,2481 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2013 6WIND S.A.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <limits.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <signal.h>
+#include <setjmp.h>
+#ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */
+#include <linux/memfd.h>
+#define MEMFD_SUPPORTED
+#endif
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+#include <numa.h>
+#include <numaif.h>
+#endif
+
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include "eal_hugepages.h"
+#include "eal_options.h"
+
+#define PFN_MASK_SIZE 8
+
+/**
+ * @file
+ * Huge page mapping under linux
+ *
+ * To reserve a big contiguous amount of memory, we use the hugepage
+ * feature of linux. For that, we need to have hugetlbfs mounted. This
+ * code will create many files in this directory (one per page) and
+ * map them in virtual memory. For each page, we will retrieve its
+ * physical address and remap it in order to have a virtual contiguous
+ * zone as well as a physical contiguous zone.
+ */
+
+static int phys_addrs_available = -1;
+
+#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
+
+uint64_t eal_get_baseaddr(void)
+{
+ /*
+ * Linux kernel uses a really high address as starting address for
+ * serving mmaps calls. If there exists addressing limitations and IOVA
+ * mode is VA, this starting address is likely too high for those
+ * devices. However, it is possible to use a lower address in the
+ * process virtual address space as with 64 bits there is a lot of
+ * available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting
+ * address at 4GB implies there are 508GB or 1020GB for mapping the
+ * available hugepages. This is likely enough for most systems, although
+ * a device with addressing limitations should call
+ * rte_mem_check_dma_mask for ensuring all memory is within supported
+ * range.
+ */
+ return 0x100000000ULL;
+}
+
+/*
+ * Get physical address of any mapped virtual address in the current process.
+ */
+phys_addr_t
+rte_mem_virt2phy(const void *virtaddr)
+{
+ int fd, retval;
+ uint64_t page, physaddr;
+ unsigned long virt_pfn;
+ int page_size;
+ off_t offset;
+
+ if (phys_addrs_available == 0)
+ return RTE_BAD_IOVA;
+
+ /* standard page size */
+ page_size = getpagesize();
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(INFO, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ return RTE_BAD_IOVA;
+ }
+
+ virt_pfn = (unsigned long)virtaddr / page_size;
+ offset = sizeof(uint64_t) * virt_pfn;
+ if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
+ RTE_LOG(INFO, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ close(fd);
+ return RTE_BAD_IOVA;
+ }
+
+ retval = read(fd, &page, PFN_MASK_SIZE);
+ close(fd);
+ if (retval < 0) {
+ RTE_LOG(INFO, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
+ __func__, strerror(errno));
+ return RTE_BAD_IOVA;
+ } else if (retval != PFN_MASK_SIZE) {
+ RTE_LOG(INFO, EAL, "%s(): read %d bytes from /proc/self/pagemap "
+ "but expected %d:\n",
+ __func__, retval, PFN_MASK_SIZE);
+ return RTE_BAD_IOVA;
+ }
+
+ /*
+ * the pfn (page frame number) are bits 0-54 (see
+ * pagemap.txt in linux Documentation)
+ */
+ if ((page & 0x7fffffffffffffULL) == 0)
+ return RTE_BAD_IOVA;
+
+ physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ + ((unsigned long)virtaddr % page_size);
+
+ return physaddr;
+}
+
+rte_iova_t
+rte_mem_virt2iova(const void *virtaddr)
+{
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ return (uintptr_t)virtaddr;
+ return rte_mem_virt2phy(virtaddr);
+}
+
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value. We find
+ * it by browsing the /proc/self/pagemap special file.
+ */
+static int
+find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned int i;
+ phys_addr_t addr;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va);
+ if (addr == RTE_BAD_PHYS_ADDR)
+ return -1;
+ hugepg_tbl[i].physaddr = addr;
+ }
+ return 0;
+}
+
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned int i;
+ static phys_addr_t addr;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ hugepg_tbl[i].physaddr = addr;
+ addr += hugepg_tbl[i].size;
+ }
+ return 0;
+}
+
+/*
+ * Check whether address-space layout randomization is enabled in
+ * the kernel. This is important for multi-process as it can prevent
+ * two processes mapping data to the same virtual address
+ * Returns:
+ * 0 - address space randomization disabled
+ * 1/2 - address space randomization enabled
+ * negative error code on error
+ */
+static int
+aslr_enabled(void)
+{
+ char c;
+ int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+ retval = read(fd, &c, 1);
+ close(fd);
+ if (retval < 0)
+ return -errno;
+ if (retval == 0)
+ return -EIO;
+ switch (c) {
+ case '0' : return 0;
+ case '1' : return 1;
+ case '2' : return 2;
+ default: return -EINVAL;
+ }
+}
+
+static sigjmp_buf huge_jmpenv;
+
+static void huge_sigbus_handler(int signo __rte_unused)
+{
+ siglongjmp(huge_jmpenv, 1);
+}
+
+/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile,
+ * non-static local variable in the stack frame calling sigsetjmp might be
+ * clobbered by a call to longjmp.
+ */
+static int huge_wrap_sigsetjmp(void)
+{
+ return sigsetjmp(huge_jmpenv, 1);
+}
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+/* Callback for numa library. */
+void numa_error(char *where)
+{
+ RTE_LOG(ERR, EAL, "%s failed: %s\n", where, strerror(errno));
+}
+#endif
+
+/*
+ * Mmap all hugepages of hugepage table: it first open a file in
+ * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
+ * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
+ * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
+ * map contiguous physical blocks in contiguous virtual blocks.
+ */
+static unsigned
+map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
+ uint64_t *essential_memory __rte_unused)
+{
+ int fd;
+ unsigned i;
+ void *virtaddr;
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ int node_id = -1;
+ int essential_prev = 0;
+ int oldpolicy;
+ struct bitmask *oldmask = NULL;
+ bool have_numa = true;
+ unsigned long maxnode = 0;
+
+ /* Check if kernel supports NUMA. */
+ if (numa_available() != 0) {
+ RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n");
+ have_numa = false;
+ }
+
+ if (have_numa) {
+ RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
+ oldmask = numa_allocate_nodemask();
+ if (get_mempolicy(&oldpolicy, oldmask->maskp,
+ oldmask->size + 1, 0, 0) < 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to get current mempolicy: %s. "
+ "Assuming MPOL_DEFAULT.\n", strerror(errno));
+ oldpolicy = MPOL_DEFAULT;
+ }
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ if (internal_config.socket_mem[i])
+ maxnode = i + 1;
+ }
+#endif
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ struct hugepage_file *hf = &hugepg_tbl[i];
+ uint64_t hugepage_sz = hpi->hugepage_sz;
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (maxnode) {
+ unsigned int j;
+
+ for (j = 0; j < maxnode; j++)
+ if (essential_memory[j])
+ break;
+
+ if (j == maxnode) {
+ node_id = (node_id + 1) % maxnode;
+ while (!internal_config.socket_mem[node_id]) {
+ node_id++;
+ node_id %= maxnode;
+ }
+ essential_prev = 0;
+ } else {
+ node_id = j;
+ essential_prev = essential_memory[j];
+
+ if (essential_memory[j] < hugepage_sz)
+ essential_memory[j] = 0;
+ else
+ essential_memory[j] -= hugepage_sz;
+ }
+
+ RTE_LOG(DEBUG, EAL,
+ "Setting policy MPOL_PREFERRED for socket %d\n",
+ node_id);
+ numa_set_preferred(node_id);
+ }
+#endif
+
+ hf->file_id = i;
+ hf->size = hugepage_sz;
+ eal_get_hugefile_path(hf->filepath, sizeof(hf->filepath),
+ hpi->hugedir, hf->file_id);
+ hf->filepath[sizeof(hf->filepath) - 1] = '\0';
+
+ /* try to create hugepage file */
+ fd = open(hf->filepath, O_CREAT | O_RDWR, 0600);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
+ strerror(errno));
+ goto out;
+ }
+
+ /* map the segment, and populate page tables,
+ * the kernel fills this segment with zeros. we don't care where
+ * this gets mapped - we already have contiguous memory areas
+ * ready for us to map into.
+ */
+ virtaddr = mmap(NULL, hugepage_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (virtaddr == MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__,
+ strerror(errno));
+ close(fd);
+ goto out;
+ }
+
+ hf->orig_va = virtaddr;
+
+ /* In linux, hugetlb limitations, like cgroup, are
+ * enforced at fault time instead of mmap(), even
+ * with the option of MAP_POPULATE. Kernel will send
+ * a SIGBUS signal. To avoid to be killed, save stack
+ * environment here, if SIGBUS happens, we can jump
+ * back here.
+ */
+ if (huge_wrap_sigsetjmp()) {
+ RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more "
+ "hugepages of size %u MB\n",
+ (unsigned int)(hugepage_sz / 0x100000));
+ munmap(virtaddr, hugepage_sz);
+ close(fd);
+ unlink(hugepg_tbl[i].filepath);
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (maxnode)
+ essential_memory[node_id] =
+ essential_prev;
+#endif
+ goto out;
+ }
+ *(int *)virtaddr = 0;
+
+ /* set shared lock on the file. */
+ if (flock(fd, LOCK_SH) < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n",
+ __func__, strerror(errno));
+ close(fd);
+ goto out;
+ }
+
+ close(fd);
+ }
+
+out:
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (maxnode) {
+ RTE_LOG(DEBUG, EAL,
+ "Restoring previous memory policy: %d\n", oldpolicy);
+ if (oldpolicy == MPOL_DEFAULT) {
+ numa_set_localalloc();
+ } else if (set_mempolicy(oldpolicy, oldmask->maskp,
+ oldmask->size + 1) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n",
+ strerror(errno));
+ numa_set_localalloc();
+ }
+ }
+ if (oldmask != NULL)
+ numa_free_cpumask(oldmask);
+#endif
+ return i;
+}
+
+/*
+ * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
+ * page.
+ */
+static int
+find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ int socket_id;
+ char *end, *nodestr;
+ unsigned i, hp_count = 0;
+ uint64_t virt_addr;
+ char buf[BUFSIZ];
+ char hugedir_str[PATH_MAX];
+ FILE *f;
+
+ f = fopen("/proc/self/numa_maps", "r");
+ if (f == NULL) {
+ RTE_LOG(NOTICE, EAL, "NUMA support not available"
+ " consider that all memory is in socket_id 0\n");
+ return 0;
+ }
+
+ snprintf(hugedir_str, sizeof(hugedir_str),
+ "%s/%s", hpi->hugedir, eal_get_hugefile_prefix());
+
+ /* parse numa map */
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+
+ /* ignore non huge page */
+ if (strstr(buf, " huge ") == NULL &&
+ strstr(buf, hugedir_str) == NULL)
+ continue;
+
+ /* get zone addr */
+ virt_addr = strtoull(buf, &end, 16);
+ if (virt_addr == 0 || end == buf) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+
+ /* get node id (socket id) */
+ nodestr = strstr(buf, " N");
+ if (nodestr == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+ nodestr += 2;
+ end = strstr(nodestr, "=");
+ if (end == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+ end[0] = '\0';
+ end = NULL;
+
+ socket_id = strtoul(nodestr, &end, 0);
+ if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
+ RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
+ goto error;
+ }
+
+ /* if we find this page in our mappings, set socket_id */
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ void *va = (void *)(unsigned long)virt_addr;
+ if (hugepg_tbl[i].orig_va == va) {
+ hugepg_tbl[i].socket_id = socket_id;
+ hp_count++;
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ RTE_LOG(DEBUG, EAL,
+ "Hugepage %s is on socket %d\n",
+ hugepg_tbl[i].filepath, socket_id);
+#endif
+ }
+ }
+ }
+
+ if (hp_count < hpi->num_pages[0])
+ goto error;
+
+ fclose(f);
+ return 0;
+
+error:
+ fclose(f);
+ return -1;
+}
+
+static int
+cmp_physaddr(const void *a, const void *b)
+{
+#ifndef RTE_ARCH_PPC_64
+ const struct hugepage_file *p1 = a;
+ const struct hugepage_file *p2 = b;
+#else
+ /* PowerPC needs memory sorted in reverse order from x86 */
+ const struct hugepage_file *p1 = b;
+ const struct hugepage_file *p2 = a;
+#endif
+ if (p1->physaddr < p2->physaddr)
+ return -1;
+ else if (p1->physaddr > p2->physaddr)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Uses mmap to create a shared memory area for storage of data
+ * Used in this file to store the hugepage file map on disk
+ */
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+ void *retval;
+ int fd;
+
+ /* if no shared files mode is used, create anonymous memory instead */
+ if (internal_config.no_shconf) {
+ retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (retval == MAP_FAILED)
+ return NULL;
+ return retval;
+ }
+
+ fd = open(filename, O_CREAT | O_RDWR, 0600);
+ if (fd < 0)
+ return NULL;
+ if (ftruncate(fd, mem_size) < 0) {
+ close(fd);
+ return NULL;
+ }
+ retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ close(fd);
+ if (retval == MAP_FAILED)
+ return NULL;
+ return retval;
+}
+
+/*
+ * this copies *active* hugepages from one hugepage table to another.
+ * destination is typically the shared memory.
+ */
+static int
+copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
+ const struct hugepage_file * src, int src_size)
+{
+ int src_pos, dst_pos = 0;
+
+ for (src_pos = 0; src_pos < src_size; src_pos++) {
+ if (src[src_pos].orig_va != NULL) {
+ /* error on overflow attempt */
+ if (dst_pos == dest_size)
+ return -1;
+ memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file));
+ dst_pos++;
+ }
+ }
+ return 0;
+}
+
+static int
+unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
+ unsigned num_hp_info)
+{
+ unsigned socket, size;
+ int page, nrpages = 0;
+
+ /* get total number of hugepages */
+ for (size = 0; size < num_hp_info; size++)
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
+ nrpages +=
+ internal_config.hugepage_info[size].num_pages[socket];
+
+ for (page = 0; page < nrpages; page++) {
+ struct hugepage_file *hp = &hugepg_tbl[page];
+
+ if (hp->orig_va != NULL && unlink(hp->filepath)) {
+ RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
+ __func__, hp->filepath, strerror(errno));
+ }
+ }
+ return 0;
+}
+
+/*
+ * unmaps hugepages that are not going to be used. since we originally allocate
+ * ALL hugepages (not just those we need), additional unmapping needs to be done.
+ */
+static int
+unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
+ struct hugepage_info *hpi,
+ unsigned num_hp_info)
+{
+ unsigned socket, size;
+ int page, nrpages = 0;
+
+ /* get total number of hugepages */
+ for (size = 0; size < num_hp_info; size++)
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
+ nrpages += internal_config.hugepage_info[size].num_pages[socket];
+
+ for (size = 0; size < num_hp_info; size++) {
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+ unsigned pages_found = 0;
+
+ /* traverse until we have unmapped all the unused pages */
+ for (page = 0; page < nrpages; page++) {
+ struct hugepage_file *hp = &hugepg_tbl[page];
+
+ /* find a page that matches the criteria */
+ if ((hp->size == hpi[size].hugepage_sz) &&
+ (hp->socket_id == (int) socket)) {
+
+ /* if we skipped enough pages, unmap the rest */
+ if (pages_found == hpi[size].num_pages[socket]) {
+ uint64_t unmap_len;
+
+ unmap_len = hp->size;
+
+ /* get start addr and len of the remaining segment */
+ munmap(hp->orig_va,
+ (size_t)unmap_len);
+
+ hp->orig_va = NULL;
+ if (unlink(hp->filepath) == -1) {
+ RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
+ __func__, hp->filepath, strerror(errno));
+ return -1;
+ }
+ } else {
+ /* lock the page and skip */
+ pages_found++;
+ }
+
+ } /* match page */
+ } /* foreach page */
+ } /* foreach socket */
+ } /* foreach pagesize */
+
+ return 0;
+}
+
+static int
+remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ int cur_page, seg_len;
+ unsigned int msl_idx;
+ int ms_idx;
+ uint64_t page_sz;
+ size_t memseg_len;
+ int socket_id;
+
+ page_sz = hugepages[seg_start].size;
+ socket_id = hugepages[seg_start].socket_id;
+ seg_len = seg_end - seg_start;
+
+ RTE_LOG(DEBUG, EAL, "Attempting to map %" PRIu64 "M on socket %i\n",
+ (seg_len * page_sz) >> 20ULL, socket_id);
+
+ /* find free space in memseg lists */
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+ bool empty;
+ msl = &mcfg->memsegs[msl_idx];
+ arr = &msl->memseg_arr;
+
+ if (msl->page_sz != page_sz)
+ continue;
+ if (msl->socket_id != socket_id)
+ continue;
+
+ /* leave space for a hole if array is not empty */
+ empty = arr->count == 0;
+ ms_idx = rte_fbarray_find_next_n_free(arr, 0,
+ seg_len + (empty ? 0 : 1));
+
+ /* memseg list is full? */
+ if (ms_idx < 0)
+ continue;
+
+ /* leave some space between memsegs, they are not IOVA
+ * contiguous, so they shouldn't be VA contiguous either.
+ */
+ if (!empty)
+ ms_idx++;
+ break;
+ }
+ if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
+ RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
+ return -1;
+ }
+
+#ifdef RTE_ARCH_PPC_64
+ /* for PPC64 we go through the list backwards */
+ for (cur_page = seg_end - 1; cur_page >= seg_start;
+ cur_page--, ms_idx++) {
+#else
+ for (cur_page = seg_start; cur_page < seg_end; cur_page++, ms_idx++) {
+#endif
+ struct hugepage_file *hfile = &hugepages[cur_page];
+ struct rte_memseg *ms = rte_fbarray_get(arr, ms_idx);
+ void *addr;
+ int fd;
+
+ fd = open(hfile->filepath, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
+ hfile->filepath, strerror(errno));
+ return -1;
+ }
+ /* set shared lock on the file. */
+ if (flock(fd, LOCK_SH) < 0) {
+ RTE_LOG(DEBUG, EAL, "Could not lock '%s': %s\n",
+ hfile->filepath, strerror(errno));
+ close(fd);
+ return -1;
+ }
+ memseg_len = (size_t)page_sz;
+ addr = RTE_PTR_ADD(msl->base_va, ms_idx * memseg_len);
+
+ /* we know this address is already mmapped by memseg list, so
+ * using MAP_FIXED here is safe
+ */
+ addr = mmap(addr, page_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, 0);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Couldn't remap '%s': %s\n",
+ hfile->filepath, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ /* we have a new address, so unmap previous one */
+#ifndef RTE_ARCH_64
+ /* in 32-bit legacy mode, we have already unmapped the page */
+ if (!internal_config.legacy_mem)
+ munmap(hfile->orig_va, page_sz);
+#else
+ munmap(hfile->orig_va, page_sz);
+#endif
+
+ hfile->orig_va = NULL;
+ hfile->final_va = addr;
+
+ /* rewrite physical addresses in IOVA as VA mode */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ hfile->physaddr = (uintptr_t)addr;
+
+ /* set up memseg data */
+ ms->addr = addr;
+ ms->hugepage_sz = page_sz;
+ ms->len = memseg_len;
+ ms->iova = hfile->physaddr;
+ ms->socket_id = hfile->socket_id;
+ ms->nchannel = rte_memory_get_nchannel();
+ ms->nrank = rte_memory_get_nrank();
+
+ rte_fbarray_set_used(arr, ms_idx);
+
+ /* store segment fd internally */
+ if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
+ RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
+ rte_strerror(rte_errno));
+ }
+ RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
+ (seg_len * page_sz) >> 20, socket_id);
+ return 0;
+}
+
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+ uint64_t area_sz, max_pages;
+
+ /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+ max_pages = RTE_MAX_MEMSEG_PER_LIST;
+ max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+ area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+ /* make sure the list isn't smaller than the page size */
+ area_sz = RTE_MAX(area_sz, page_sz);
+
+ return RTE_ALIGN(area_sz, page_sz);
+}
+
+static int
+free_memseg_list(struct rte_memseg_list *msl)
+{
+ if (rte_fbarray_destroy(&msl->memseg_arr)) {
+ RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
+ return -1;
+ }
+ memset(msl, 0, sizeof(*msl));
+ return 0;
+}
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+static int
+alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
+ int n_segs, int socket_id, int type_msl_idx)
+{
+ char name[RTE_FBARRAY_NAME_LEN];
+
+ snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
+ type_msl_idx);
+ if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
+ sizeof(struct rte_memseg))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
+ rte_strerror(rte_errno));
+ return -1;
+ }
+
+ msl->page_sz = page_sz;
+ msl->socket_id = socket_id;
+ msl->base_va = NULL;
+ msl->heap = 1; /* mark it as a heap segment */
+
+ RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
+ (size_t)page_sz >> 10, socket_id);
+
+ return 0;
+}
+
+static int
+alloc_va_space(struct rte_memseg_list *msl)
+{
+ uint64_t page_sz;
+ size_t mem_sz;
+ void *addr;
+ int flags = 0;
+
+ page_sz = msl->page_sz;
+ mem_sz = page_sz * msl->memseg_arr.len;
+
+ addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+ if (addr == NULL) {
+ if (rte_errno == EADDRNOTAVAIL)
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - "
+ "please use '--" OPT_BASE_VIRTADDR "' option\n",
+ (unsigned long long)mem_sz, msl->base_va);
+ else
+ RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+ return -1;
+ }
+ msl->base_va = addr;
+ msl->len = mem_sz;
+
+ return 0;
+}
+
+/*
+ * Our VA space is not preallocated yet, so preallocate it here. We need to know
+ * how many segments there are in order to map all pages into one address space,
+ * and leave appropriate holes between segments so that rte_malloc does not
+ * concatenate them into one big segment.
+ *
+ * we also need to unmap original pages to free up address space.
+ */
+static int __rte_unused
+prealloc_segments(struct hugepage_file *hugepages, int n_pages)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int cur_page, seg_start_page, end_seg, new_memseg;
+ unsigned int hpi_idx, socket, i;
+ int n_contig_segs, n_segs;
+ int msl_idx;
+
+ /* before we preallocate segments, we need to free up our VA space.
+ * we're not removing files, and we already have information about
+ * PA-contiguousness, so it is safe to unmap everything.
+ */
+ for (cur_page = 0; cur_page < n_pages; cur_page++) {
+ struct hugepage_file *hpi = &hugepages[cur_page];
+ munmap(hpi->orig_va, hpi->size);
+ hpi->orig_va = NULL;
+ }
+
+ /* we cannot know how many page sizes and sockets we have discovered, so
+ * loop over all of them
+ */
+ for (hpi_idx = 0; hpi_idx < internal_config.num_hugepage_sizes;
+ hpi_idx++) {
+ uint64_t page_sz =
+ internal_config.hugepage_info[hpi_idx].hugepage_sz;
+
+ for (i = 0; i < rte_socket_count(); i++) {
+ struct rte_memseg_list *msl;
+
+ socket = rte_socket_id_by_idx(i);
+ n_contig_segs = 0;
+ n_segs = 0;
+ seg_start_page = -1;
+
+ for (cur_page = 0; cur_page < n_pages; cur_page++) {
+ struct hugepage_file *prev, *cur;
+ int prev_seg_start_page = -1;
+
+ cur = &hugepages[cur_page];
+ prev = cur_page == 0 ? NULL :
+ &hugepages[cur_page - 1];
+
+ new_memseg = 0;
+ end_seg = 0;
+
+ if (cur->size == 0)
+ end_seg = 1;
+ else if (cur->socket_id != (int) socket)
+ end_seg = 1;
+ else if (cur->size != page_sz)
+ end_seg = 1;
+ else if (cur_page == 0)
+ new_memseg = 1;
+#ifdef RTE_ARCH_PPC_64
+ /* On PPC64 architecture, the mmap always start
+ * from higher address to lower address. Here,
+ * physical addresses are in descending order.
+ */
+ else if ((prev->physaddr - cur->physaddr) !=
+ cur->size)
+ new_memseg = 1;
+#else
+ else if ((cur->physaddr - prev->physaddr) !=
+ cur->size)
+ new_memseg = 1;
+#endif
+ if (new_memseg) {
+ /* if we're already inside a segment,
+ * new segment means end of current one
+ */
+ if (seg_start_page != -1) {
+ end_seg = 1;
+ prev_seg_start_page =
+ seg_start_page;
+ }
+ seg_start_page = cur_page;
+ }
+
+ if (end_seg) {
+ if (prev_seg_start_page != -1) {
+ /* we've found a new segment */
+ n_contig_segs++;
+ n_segs += cur_page -
+ prev_seg_start_page;
+ } else if (seg_start_page != -1) {
+ /* we didn't find new segment,
+ * but did end current one
+ */
+ n_contig_segs++;
+ n_segs += cur_page -
+ seg_start_page;
+ seg_start_page = -1;
+ continue;
+ } else {
+ /* we're skipping this page */
+ continue;
+ }
+ }
+ /* segment continues */
+ }
+ /* check if we missed last segment */
+ if (seg_start_page != -1) {
+ n_contig_segs++;
+ n_segs += cur_page - seg_start_page;
+ }
+
+ /* if no segments were found, do not preallocate */
+ if (n_segs == 0)
+ continue;
+
+ /* we now have total number of pages that we will
+ * allocate for this segment list. add separator pages
+ * to the total count, and preallocate VA space.
+ */
+ n_segs += n_contig_segs - 1;
+
+ /* now, preallocate VA space for these segments */
+
+ /* first, find suitable memseg list for this */
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
+ msl_idx++) {
+ msl = &mcfg->memsegs[msl_idx];
+
+ if (msl->base_va != NULL)
+ continue;
+ break;
+ }
+ if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL, "Not enough space in memseg lists, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ return -1;
+ }
+
+ /* now, allocate fbarray itself */
+ if (alloc_memseg_list(msl, page_sz, n_segs, socket,
+ msl_idx) < 0)
+ return -1;
+
+ /* finally, allocate VA space */
+ if (alloc_va_space(msl) < 0)
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * We cannot reallocate memseg lists on the fly because PPC64 stores pages
+ * backwards, therefore we have to process the entire memseg first before
+ * remapping it into memseg list VA space.
+ */
+static int
+remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
+{
+ int cur_page, seg_start_page, new_memseg, ret;
+
+ seg_start_page = 0;
+ for (cur_page = 0; cur_page < n_pages; cur_page++) {
+ struct hugepage_file *prev, *cur;
+
+ new_memseg = 0;
+
+ cur = &hugepages[cur_page];
+ prev = cur_page == 0 ? NULL : &hugepages[cur_page - 1];
+
+ /* if size is zero, no more pages left */
+ if (cur->size == 0)
+ break;
+
+ if (cur_page == 0)
+ new_memseg = 1;
+ else if (cur->socket_id != prev->socket_id)
+ new_memseg = 1;
+ else if (cur->size != prev->size)
+ new_memseg = 1;
+#ifdef RTE_ARCH_PPC_64
+ /* On PPC64 architecture, the mmap always start from higher
+ * address to lower address. Here, physical addresses are in
+ * descending order.
+ */
+ else if ((prev->physaddr - cur->physaddr) != cur->size)
+ new_memseg = 1;
+#else
+ else if ((cur->physaddr - prev->physaddr) != cur->size)
+ new_memseg = 1;
+#endif
+
+ if (new_memseg) {
+ /* if this isn't the first time, remap segment */
+ if (cur_page != 0) {
+ ret = remap_segment(hugepages, seg_start_page,
+ cur_page);
+ if (ret != 0)
+ return -1;
+ }
+ /* remember where we started */
+ seg_start_page = cur_page;
+ }
+ /* continuation of previous memseg */
+ }
+ /* we were stopped, but we didn't remap the last segment, do it now */
+ if (cur_page != 0) {
+ ret = remap_segment(hugepages, seg_start_page,
+ cur_page);
+ if (ret != 0)
+ return -1;
+ }
+ return 0;
+}
+
+__rte_unused /* function is unused on 32-bit builds */
+static inline uint64_t
+get_socket_mem_size(int socket)
+{
+ uint64_t size = 0;
+ unsigned i;
+
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++){
+ struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+ size += hpi->hugepage_sz * hpi->num_pages[socket];
+ }
+
+ return size;
+}
+
+/*
+ * This function is a NUMA-aware equivalent of calc_num_pages.
+ * It takes in the list of hugepage sizes and the
+ * number of pages thereof, and calculates the best number of
+ * pages of each size to fulfill the request for <memory> ram
+ */
+static int
+calc_num_pages_per_socket(uint64_t * memory,
+ struct hugepage_info *hp_info,
+ struct hugepage_info *hp_used,
+ unsigned num_hp_info)
+{
+ unsigned socket, j, i = 0;
+ unsigned requested, available;
+ int total_num_pages = 0;
+ uint64_t remaining_mem, cur_mem;
+ uint64_t total_mem = internal_config.memory;
+
+ if (num_hp_info == 0)
+ return -1;
+
+ /* if specific memory amounts per socket weren't requested */
+ if (internal_config.force_sockets == 0) {
+ size_t total_size;
+#ifdef RTE_ARCH_64
+ int cpu_per_socket[RTE_MAX_NUMA_NODES];
+ size_t default_size;
+ unsigned lcore_id;
+
+ /* Compute number of cores per socket */
+ memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
+ RTE_LCORE_FOREACH(lcore_id) {
+ cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
+ }
+
+ /*
+ * Automatically spread requested memory amongst detected sockets according
+ * to number of cores from cpu mask present on each socket
+ */
+ total_size = internal_config.memory;
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
+
+ /* Set memory amount per socket */
+ default_size = (internal_config.memory * cpu_per_socket[socket])
+ / rte_lcore_count();
+
+ /* Limit to maximum available memory on socket */
+ default_size = RTE_MIN(default_size, get_socket_mem_size(socket));
+
+ /* Update sizes */
+ memory[socket] = default_size;
+ total_size -= default_size;
+ }
+
+ /*
+ * If some memory is remaining, try to allocate it by getting all
+ * available memory from sockets, one after the other
+ */
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
+ /* take whatever is available */
+ default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket],
+ total_size);
+
+ /* Update sizes */
+ memory[socket] += default_size;
+ total_size -= default_size;
+ }
+#else
+ /* in 32-bit mode, allocate all of the memory only on master
+ * lcore socket
+ */
+ total_size = internal_config.memory;
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
+ socket++) {
+ struct rte_config *cfg = rte_eal_get_configuration();
+ unsigned int master_lcore_socket;
+
+ master_lcore_socket =
+ rte_lcore_to_socket_id(cfg->master_lcore);
+
+ if (master_lcore_socket != socket)
+ continue;
+
+ /* Update sizes */
+ memory[socket] = total_size;
+ break;
+ }
+#endif
+ }
+
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
+ /* skips if the memory on specific socket wasn't requested */
+ for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
+ strlcpy(hp_used[i].hugedir, hp_info[i].hugedir,
+ sizeof(hp_used[i].hugedir));
+ hp_used[i].num_pages[socket] = RTE_MIN(
+ memory[socket] / hp_info[i].hugepage_sz,
+ hp_info[i].num_pages[socket]);
+
+ cur_mem = hp_used[i].num_pages[socket] *
+ hp_used[i].hugepage_sz;
+
+ memory[socket] -= cur_mem;
+ total_mem -= cur_mem;
+
+ total_num_pages += hp_used[i].num_pages[socket];
+
+ /* check if we have met all memory requests */
+ if (memory[socket] == 0)
+ break;
+
+ /* check if we have any more pages left at this size, if so
+ * move on to next size */
+ if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket])
+ continue;
+ /* At this point we know that there are more pages available that are
+ * bigger than the memory we want, so lets see if we can get enough
+ * from other page sizes.
+ */
+ remaining_mem = 0;
+ for (j = i+1; j < num_hp_info; j++)
+ remaining_mem += hp_info[j].hugepage_sz *
+ hp_info[j].num_pages[socket];
+
+ /* is there enough other memory, if not allocate another page and quit */
+ if (remaining_mem < memory[socket]){
+ cur_mem = RTE_MIN(memory[socket],
+ hp_info[i].hugepage_sz);
+ memory[socket] -= cur_mem;
+ total_mem -= cur_mem;
+ hp_used[i].num_pages[socket]++;
+ total_num_pages++;
+ break; /* we are done with this socket*/
+ }
+ }
+ /* if we didn't satisfy all memory requirements per socket */
+ if (memory[socket] > 0 &&
+ internal_config.socket_mem[socket] != 0) {
+ /* to prevent icc errors */
+ requested = (unsigned) (internal_config.socket_mem[socket] /
+ 0x100000);
+ available = requested -
+ ((unsigned) (memory[socket] / 0x100000));
+ RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! "
+ "Requested: %uMB, available: %uMB\n", socket,
+ requested, available);
+ return -1;
+ }
+ }
+
+ /* if we didn't satisfy total memory requirements */
+ if (total_mem > 0) {
+ requested = (unsigned) (internal_config.memory / 0x100000);
+ available = requested - (unsigned) (total_mem / 0x100000);
+ RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB,"
+ " available: %uMB\n", requested, available);
+ return -1;
+ }
+ return total_num_pages;
+}
+
+static inline size_t
+eal_get_hugepage_mem_size(void)
+{
+ uint64_t size = 0;
+ unsigned i, j;
+
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+ struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+ if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+ size += hpi->hugepage_sz * hpi->num_pages[j];
+ }
+ }
+ }
+
+ return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
+}
+
+static struct sigaction huge_action_old;
+static int huge_need_recover;
+
+static void
+huge_register_sigbus(void)
+{
+ sigset_t mask;
+ struct sigaction action;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGBUS);
+ action.sa_flags = 0;
+ action.sa_mask = mask;
+ action.sa_handler = huge_sigbus_handler;
+
+ huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old);
+}
+
+static void
+huge_recover_sigbus(void)
+{
+ if (huge_need_recover) {
+ sigaction(SIGBUS, &huge_action_old, NULL);
+ huge_need_recover = 0;
+ }
+}
+
+/*
+ * Prepare physical memory mapping: fill configuration structure with
+ * these infos, return 0 on success.
+ * 1. map N huge pages in separate files in hugetlbfs
+ * 2. find associated physical addr
+ * 3. find associated NUMA socket ID
+ * 4. sort all huge pages by physical address
+ * 5. remap these N huge pages in the correct order
+ * 6. unmap the first mapping
+ * 7. fill memsegs in configuration with contiguous zones
+ */
+static int
+eal_legacy_hugepage_init(void)
+{
+ struct rte_mem_config *mcfg;
+ struct hugepage_file *hugepage = NULL, *tmp_hp = NULL;
+ struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
+ struct rte_fbarray *arr;
+ struct rte_memseg *ms;
+
+ uint64_t memory[RTE_MAX_NUMA_NODES];
+
+ unsigned hp_offset;
+ int i, j;
+ int nr_hugefiles, nr_hugepages = 0;
+ void *addr;
+
+ memset(used_hp, 0, sizeof(used_hp));
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* hugetlbfs can be disabled */
+ if (internal_config.no_hugetlbfs) {
+ void *prealloc_addr;
+ size_t mem_sz;
+ struct rte_memseg_list *msl;
+ int n_segs, cur_seg, fd, flags;
+#ifdef MEMFD_SUPPORTED
+ int memfd;
+#endif
+ uint64_t page_sz;
+
+ /* nohuge mode is legacy mode */
+ internal_config.legacy_mem = 1;
+
+ /* nohuge mode is single-file segments mode */
+ internal_config.single_file_segments = 1;
+
+ /* create a memseg list */
+ msl = &mcfg->memsegs[0];
+
+ page_sz = RTE_PGSIZE_4K;
+ n_segs = internal_config.memory / page_sz;
+
+ if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
+ sizeof(struct rte_memseg))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+ return -1;
+ }
+
+ /* set up parameters for anonymous mmap */
+ fd = -1;
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+#ifdef MEMFD_SUPPORTED
+ /* create a memfd and store it in the segment fd table */
+ memfd = memfd_create("nohuge", 0);
+ if (memfd < 0) {
+ RTE_LOG(DEBUG, EAL, "Cannot create memfd: %s\n",
+ strerror(errno));
+ RTE_LOG(DEBUG, EAL, "Falling back to anonymous map\n");
+ } else {
+ /* we got an fd - now resize it */
+ if (ftruncate(memfd, internal_config.memory) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot resize memfd: %s\n",
+ strerror(errno));
+ RTE_LOG(ERR, EAL, "Falling back to anonymous map\n");
+ close(memfd);
+ } else {
+ /* creating memfd-backed file was successful.
+ * we want changes to memfd to be visible to
+ * other processes (such as vhost backend), so
+ * map it as shared memory.
+ */
+ RTE_LOG(DEBUG, EAL, "Using memfd for anonymous memory\n");
+ fd = memfd;
+ flags = MAP_SHARED;
+ }
+ }
+#endif
+ /* preallocate address space for the memory, so that it can be
+ * fit into the DMA mask.
+ */
+ mem_sz = internal_config.memory;
+ prealloc_addr = eal_get_virtual_area(
+ NULL, &mem_sz, page_sz, 0, 0);
+ if (prealloc_addr == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s: reserving memory area failed: "
+ "%s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ addr = mmap(prealloc_addr, mem_sz, PROT_READ | PROT_WRITE,
+ flags | MAP_FIXED, fd, 0);
+ if (addr == MAP_FAILED || addr != prealloc_addr) {
+ RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
+ strerror(errno));
+ munmap(prealloc_addr, mem_sz);
+ return -1;
+ }
+ msl->base_va = addr;
+ msl->page_sz = page_sz;
+ msl->socket_id = 0;
+ msl->len = mem_sz;
+ msl->heap = 1;
+
+ /* we're in single-file segments mode, so only the segment list
+ * fd needs to be set up.
+ */
+ if (fd != -1) {
+ if (eal_memalloc_set_seg_list_fd(0, fd) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot set up segment list fd\n");
+ /* not a serious error, proceed */
+ }
+ }
+
+ /* populate memsegs. each memseg is one page long */
+ for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
+ arr = &msl->memseg_arr;
+
+ ms = rte_fbarray_get(arr, cur_seg);
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ ms->iova = (uintptr_t)addr;
+ else
+ ms->iova = RTE_BAD_IOVA;
+ ms->addr = addr;
+ ms->hugepage_sz = page_sz;
+ ms->socket_id = 0;
+ ms->len = page_sz;
+
+ rte_fbarray_set_used(arr, cur_seg);
+
+ addr = RTE_PTR_ADD(addr, (size_t)page_sz);
+ }
+ if (mcfg->dma_maskbits &&
+ rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
+ RTE_LOG(ERR, EAL,
+ "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
+ __func__);
+ if (rte_eal_iova_mode() == RTE_IOVA_VA &&
+ rte_eal_using_phys_addrs())
+ RTE_LOG(ERR, EAL,
+ "%s(): Please try initializing EAL with --iova-mode=pa parameter.\n",
+ __func__);
+ goto fail;
+ }
+ return 0;
+ }
+
+ /* calculate total number of hugepages available. at this point we haven't
+ * yet started sorting them so they all are on socket 0 */
+ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+ /* meanwhile, also initialize used_hp hugepage sizes in used_hp */
+ used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz;
+
+ nr_hugepages += internal_config.hugepage_info[i].num_pages[0];
+ }
+
+ /*
+ * allocate a memory area for hugepage table.
+ * this isn't shared memory yet. due to the fact that we need some
+ * processing done on these pages, shared memory will be created
+ * at a later stage.
+ */
+ tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
+ if (tmp_hp == NULL)
+ goto fail;
+
+ memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
+
+ hp_offset = 0; /* where we start the current page size entries */
+
+ huge_register_sigbus();
+
+ /* make a copy of socket_mem, needed for balanced allocation. */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ memory[i] = internal_config.socket_mem[i];
+
+ /* map all hugepages and sort them */
+ for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){
+ unsigned pages_old, pages_new;
+ struct hugepage_info *hpi;
+
+ /*
+ * we don't yet mark hugepages as used at this stage, so
+ * we just map all hugepages available to the system
+ * all hugepages are still located on socket 0
+ */
+ hpi = &internal_config.hugepage_info[i];
+
+ if (hpi->num_pages[0] == 0)
+ continue;
+
+ /* map all hugepages available */
+ pages_old = hpi->num_pages[0];
+ pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, memory);
+ if (pages_new < pages_old) {
+ RTE_LOG(DEBUG, EAL,
+ "%d not %d hugepages of size %u MB allocated\n",
+ pages_new, pages_old,
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+
+ int pages = pages_old - pages_new;
+
+ nr_hugepages -= pages;
+ hpi->num_pages[0] = pages_new;
+ if (pages_new == 0)
+ continue;
+ }
+
+ if (rte_eal_using_phys_addrs() &&
+ rte_eal_iova_mode() != RTE_IOVA_VA) {
+ /* find physical addresses for each hugepage */
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+ } else {
+ /* set physical addresses for each hugepage */
+ if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+ }
+
+ if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
+ RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
+ (unsigned)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+
+ qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
+ sizeof(struct hugepage_file), cmp_physaddr);
+
+ /* we have processed a num of hugepages of this size, so inc offset */
+ hp_offset += hpi->num_pages[0];
+ }
+
+ huge_recover_sigbus();
+
+ if (internal_config.memory == 0 && internal_config.force_sockets == 0)
+ internal_config.memory = eal_get_hugepage_mem_size();
+
+ nr_hugefiles = nr_hugepages;
+
+
+ /* clean out the numbers of pages */
+ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++)
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
+ internal_config.hugepage_info[i].num_pages[j] = 0;
+
+ /* get hugepages for each socket */
+ for (i = 0; i < nr_hugefiles; i++) {
+ int socket = tmp_hp[i].socket_id;
+
+ /* find a hugepage info with right size and increment num_pages */
+ const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES,
+ (int)internal_config.num_hugepage_sizes);
+ for (j = 0; j < nb_hpsizes; j++) {
+ if (tmp_hp[i].size ==
+ internal_config.hugepage_info[j].hugepage_sz) {
+ internal_config.hugepage_info[j].num_pages[socket]++;
+ }
+ }
+ }
+
+ /* make a copy of socket_mem, needed for number of pages calculation */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ memory[i] = internal_config.socket_mem[i];
+
+ /* calculate final number of pages */
+ nr_hugepages = calc_num_pages_per_socket(memory,
+ internal_config.hugepage_info, used_hp,
+ internal_config.num_hugepage_sizes);
+
+ /* error if not enough memory available */
+ if (nr_hugepages < 0)
+ goto fail;
+
+ /* reporting in! */
+ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+ for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+ if (used_hp[i].num_pages[j] > 0) {
+ RTE_LOG(DEBUG, EAL,
+ "Requesting %u pages of size %uMB"
+ " from socket %i\n",
+ used_hp[i].num_pages[j],
+ (unsigned)
+ (used_hp[i].hugepage_sz / 0x100000),
+ j);
+ }
+ }
+ }
+
+ /* create shared memory */
+ hugepage = create_shared_memory(eal_hugepage_data_path(),
+ nr_hugefiles * sizeof(struct hugepage_file));
+
+ if (hugepage == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
+ goto fail;
+ }
+ memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
+
+ /*
+ * unmap pages that we won't need (looks at used_hp).
+ * also, sets final_va to NULL on pages that were unmapped.
+ */
+ if (unmap_unneeded_hugepages(tmp_hp, used_hp,
+ internal_config.num_hugepage_sizes) < 0) {
+ RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n");
+ goto fail;
+ }
+
+ /*
+ * copy stuff from malloc'd hugepage* to the actual shared memory.
+ * this procedure only copies those hugepages that have orig_va
+ * not NULL. has overflow protection.
+ */
+ if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
+ tmp_hp, nr_hugefiles) < 0) {
+ RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
+ goto fail;
+ }
+
+#ifndef RTE_ARCH_64
+ /* for legacy 32-bit mode, we did not preallocate VA space, so do it */
+ if (internal_config.legacy_mem &&
+ prealloc_segments(hugepage, nr_hugefiles)) {
+ RTE_LOG(ERR, EAL, "Could not preallocate VA space for hugepages\n");
+ goto fail;
+ }
+#endif
+
+ /* remap all pages we do need into memseg list VA space, so that those
+ * pages become first-class citizens in DPDK memory subsystem
+ */
+ if (remap_needed_hugepages(hugepage, nr_hugefiles)) {
+ RTE_LOG(ERR, EAL, "Couldn't remap hugepage files into memseg lists\n");
+ goto fail;
+ }
+
+ /* free the hugepage backing files */
+ if (internal_config.hugepage_unlink &&
+ unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) {
+ RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n");
+ goto fail;
+ }
+
+ /* free the temporary hugepage table */
+ free(tmp_hp);
+ tmp_hp = NULL;
+
+ munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
+ hugepage = NULL;
+
+ /* we're not going to allocate more pages, so release VA space for
+ * unused memseg lists
+ */
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+ size_t mem_sz;
+
+ /* skip inactive lists */
+ if (msl->base_va == NULL)
+ continue;
+ /* skip lists where there is at least one page allocated */
+ if (msl->memseg_arr.count > 0)
+ continue;
+ /* this is an unused list, deallocate it */
+ mem_sz = msl->len;
+ munmap(msl->base_va, mem_sz);
+ msl->base_va = NULL;
+ msl->heap = 0;
+
+ /* destroy backing fbarray */
+ rte_fbarray_destroy(&msl->memseg_arr);
+ }
+
+ if (mcfg->dma_maskbits &&
+ rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
+ RTE_LOG(ERR, EAL,
+ "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
+ __func__);
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ huge_recover_sigbus();
+ free(tmp_hp);
+ if (hugepage != NULL)
+ munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
+
+ return -1;
+}
+
+static int __rte_unused
+hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
+{
+ struct hugepage_info *hpi = arg;
+
+ if (msl->page_sz != hpi->hugepage_sz)
+ return 0;
+
+ hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
+ return 0;
+}
+
+static int
+limits_callback(int socket_id, size_t cur_limit, size_t new_len)
+{
+ RTE_SET_USED(socket_id);
+ RTE_SET_USED(cur_limit);
+ RTE_SET_USED(new_len);
+ return -1;
+}
+
+static int
+eal_hugepage_init(void)
+{
+ struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
+ uint64_t memory[RTE_MAX_NUMA_NODES];
+ int hp_sz_idx, socket_id;
+
+ memset(used_hp, 0, sizeof(used_hp));
+
+ for (hp_sz_idx = 0;
+ hp_sz_idx < (int) internal_config.num_hugepage_sizes;
+ hp_sz_idx++) {
+#ifndef RTE_ARCH_64
+ struct hugepage_info dummy;
+ unsigned int i;
+#endif
+ /* also initialize used_hp hugepage sizes in used_hp */
+ struct hugepage_info *hpi;
+ hpi = &internal_config.hugepage_info[hp_sz_idx];
+ used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
+
+#ifndef RTE_ARCH_64
+ /* for 32-bit, limit number of pages on socket to whatever we've
+ * preallocated, as we cannot allocate more.
+ */
+ memset(&dummy, 0, sizeof(dummy));
+ dummy.hugepage_sz = hpi->hugepage_sz;
+ if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0)
+ return -1;
+
+ for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
+ hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
+ dummy.num_pages[i]);
+ }
+#endif
+ }
+
+ /* make a copy of socket_mem, needed for balanced allocation. */
+ for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
+ memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx];
+
+ /* calculate final number of pages */
+ if (calc_num_pages_per_socket(memory,
+ internal_config.hugepage_info, used_hp,
+ internal_config.num_hugepage_sizes) < 0)
+ return -1;
+
+ for (hp_sz_idx = 0;
+ hp_sz_idx < (int)internal_config.num_hugepage_sizes;
+ hp_sz_idx++) {
+ for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
+ socket_id++) {
+ struct rte_memseg **pages;
+ struct hugepage_info *hpi = &used_hp[hp_sz_idx];
+ unsigned int num_pages = hpi->num_pages[socket_id];
+ unsigned int num_pages_alloc;
+
+ if (num_pages == 0)
+ continue;
+
+ RTE_LOG(DEBUG, EAL, "Allocating %u pages of size %" PRIu64 "M on socket %i\n",
+ num_pages, hpi->hugepage_sz >> 20, socket_id);
+
+ /* we may not be able to allocate all pages in one go,
+ * because we break up our memory map into multiple
+ * memseg lists. therefore, try allocating multiple
+ * times and see if we can get the desired number of
+ * pages from multiple allocations.
+ */
+
+ num_pages_alloc = 0;
+ do {
+ int i, cur_pages, needed;
+
+ needed = num_pages - num_pages_alloc;
+
+ pages = malloc(sizeof(*pages) * needed);
+
+ /* do not request exact number of pages */
+ cur_pages = eal_memalloc_alloc_seg_bulk(pages,
+ needed, hpi->hugepage_sz,
+ socket_id, false);
+ if (cur_pages <= 0) {
+ free(pages);
+ return -1;
+ }
+
+ /* mark preallocated pages as unfreeable */
+ for (i = 0; i < cur_pages; i++) {
+ struct rte_memseg *ms = pages[i];
+ ms->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE;
+ }
+ free(pages);
+
+ num_pages_alloc += cur_pages;
+ } while (num_pages_alloc != num_pages);
+ }
+ }
+ /* if socket limits were specified, set them */
+ if (internal_config.force_socket_limits) {
+ unsigned int i;
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+ uint64_t limit = internal_config.socket_limit[i];
+ if (limit == 0)
+ continue;
+ if (rte_mem_alloc_validator_register("socket-limit",
+ limits_callback, i, limit))
+ RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
+ }
+ }
+ return 0;
+}
+
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static off_t
+getFileSize(int fd)
+{
+ struct stat st;
+ if (fstat(fd, &st) < 0)
+ return 0;
+ return st.st_size;
+}
+
+/*
+ * This creates the memory mappings in the secondary process to match that of
+ * the server process. It goes through each memory segment in the DPDK runtime
+ * configuration and finds the hugepages which form that segment, mapping them
+ * in order to form a contiguous block in the virtual memory space
+ */
+static int
+eal_legacy_hugepage_attach(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct hugepage_file *hp = NULL;
+ unsigned int num_hp = 0;
+ unsigned int i = 0;
+ unsigned int cur_seg;
+ off_t size = 0;
+ int fd, fd_hugepage = -1;
+
+ if (aslr_enabled() > 0) {
+ RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
+ "(ASLR) is enabled in the kernel.\n");
+ RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory "
+ "into secondary processes\n");
+ }
+
+ fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY);
+ if (fd_hugepage < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n",
+ eal_hugepage_data_path());
+ goto error;
+ }
+
+ size = getFileSize(fd_hugepage);
+ hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
+ if (hp == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Could not mmap %s\n",
+ eal_hugepage_data_path());
+ goto error;
+ }
+
+ num_hp = size / sizeof(struct hugepage_file);
+ RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
+
+ /* map all segments into memory to make sure we get the addrs. the
+ * segments themselves are already in memseg list (which is shared and
+ * has its VA space already preallocated), so we just need to map
+ * everything into correct addresses.
+ */
+ for (i = 0; i < num_hp; i++) {
+ struct hugepage_file *hf = &hp[i];
+ size_t map_sz = hf->size;
+ void *map_addr = hf->final_va;
+ int msl_idx, ms_idx;
+ struct rte_memseg_list *msl;
+ struct rte_memseg *ms;
+
+ /* if size is zero, no more pages left */
+ if (map_sz == 0)
+ break;
+
+ fd = open(hf->filepath, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
+ hf->filepath, strerror(errno));
+ goto error;
+ }
+
+ map_addr = mmap(map_addr, map_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, 0);
+ if (map_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Could not map %s: %s\n",
+ hf->filepath, strerror(errno));
+ goto fd_error;
+ }
+
+ /* set shared lock on the file. */
+ if (flock(fd, LOCK_SH) < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n",
+ __func__, strerror(errno));
+ goto mmap_error;
+ }
+
+ /* find segment data */
+ msl = rte_mem_virt2memseg_list(map_addr);
+ if (msl == NULL) {
+ RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg list\n",
+ __func__);
+ goto mmap_error;
+ }
+ ms = rte_mem_virt2memseg(map_addr, msl);
+ if (ms == NULL) {
+ RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg\n",
+ __func__);
+ goto mmap_error;
+ }
+
+ msl_idx = msl - mcfg->memsegs;
+ ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+ if (ms_idx < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg idx\n",
+ __func__);
+ goto mmap_error;
+ }
+
+ /* store segment fd internally */
+ if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
+ RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
+ rte_strerror(rte_errno));
+ }
+ /* unmap the hugepage config file, since we are done using it */
+ munmap(hp, size);
+ close(fd_hugepage);
+ return 0;
+
+mmap_error:
+ munmap(hp[i].final_va, hp[i].size);
+fd_error:
+ close(fd);
+error:
+ /* unwind mmap's done so far */
+ for (cur_seg = 0; cur_seg < i; cur_seg++)
+ munmap(hp[cur_seg].final_va, hp[cur_seg].size);
+
+ if (hp != NULL && hp != MAP_FAILED)
+ munmap(hp, size);
+ if (fd_hugepage >= 0)
+ close(fd_hugepage);
+ return -1;
+}
+
+static int
+eal_hugepage_attach(void)
+{
+ if (eal_memalloc_sync_with_primary()) {
+ RTE_LOG(ERR, EAL, "Could not map memory from primary process\n");
+ if (aslr_enabled() > 0)
+ RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel and retry running both primary and secondary processes\n");
+ return -1;
+ }
+ return 0;
+}
+
+int
+rte_eal_hugepage_init(void)
+{
+ return internal_config.legacy_mem ?
+ eal_legacy_hugepage_init() :
+ eal_hugepage_init();
+}
+
+int
+rte_eal_hugepage_attach(void)
+{
+ return internal_config.legacy_mem ?
+ eal_legacy_hugepage_attach() :
+ eal_hugepage_attach();
+}
+
+int
+rte_eal_using_phys_addrs(void)
+{
+ if (phys_addrs_available == -1) {
+ uint64_t tmp = 0;
+
+ if (rte_eal_has_hugepages() != 0 &&
+ rte_mem_virt2phy(&tmp) != RTE_BAD_PHYS_ADDR)
+ phys_addrs_available = 1;
+ else
+ phys_addrs_available = 0;
+ }
+ return phys_addrs_available;
+}
+
+static int __rte_unused
+memseg_primary_init_32(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int active_sockets, hpi_idx, msl_idx = 0;
+ unsigned int socket_id, i;
+ struct rte_memseg_list *msl;
+ uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
+ uint64_t max_mem;
+
+ /* no-huge does not need this at all */
+ if (internal_config.no_hugetlbfs)
+ return 0;
+
+ /* this is a giant hack, but desperate times call for desperate
+ * measures. in legacy 32-bit mode, we cannot preallocate VA space,
+ * because having upwards of 2 gigabytes of VA space already mapped will
+ * interfere with our ability to map and sort hugepages.
+ *
+ * therefore, in legacy 32-bit mode, we will be initializing memseg
+ * lists much later - in eal_memory.c, right after we unmap all the
+ * unneeded pages. this will not affect secondary processes, as those
+ * should be able to mmap the space without (too many) problems.
+ */
+ if (internal_config.legacy_mem)
+ return 0;
+
+ /* 32-bit mode is a very special case. we cannot know in advance where
+ * the user will want to allocate their memory, so we have to do some
+ * heuristics.
+ */
+ active_sockets = 0;
+ total_requested_mem = 0;
+ if (internal_config.force_sockets)
+ for (i = 0; i < rte_socket_count(); i++) {
+ uint64_t mem;
+
+ socket_id = rte_socket_id_by_idx(i);
+ mem = internal_config.socket_mem[socket_id];
+
+ if (mem == 0)
+ continue;
+
+ active_sockets++;
+ total_requested_mem += mem;
+ }
+ else
+ total_requested_mem = internal_config.memory;
+
+ max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+ if (total_requested_mem > max_mem) {
+ RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
+ (unsigned int)(max_mem >> 20));
+ return -1;
+ }
+ total_extra_mem = max_mem - total_requested_mem;
+ extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
+ total_extra_mem / active_sockets;
+
+ /* the allocation logic is a little bit convoluted, but here's how it
+ * works, in a nutshell:
+ * - if user hasn't specified on which sockets to allocate memory via
+ * --socket-mem, we allocate all of our memory on master core socket.
+ * - if user has specified sockets to allocate memory on, there may be
+ * some "unused" memory left (e.g. if user has specified --socket-mem
+ * such that not all memory adds up to 2 gigabytes), so add it to all
+ * sockets that are in use equally.
+ *
+ * page sizes are sorted by size in descending order, so we can safely
+ * assume that we dispense with bigger page sizes first.
+ */
+
+ /* create memseg lists */
+ for (i = 0; i < rte_socket_count(); i++) {
+ int hp_sizes = (int) internal_config.num_hugepage_sizes;
+ uint64_t max_socket_mem, cur_socket_mem;
+ unsigned int master_lcore_socket;
+ struct rte_config *cfg = rte_eal_get_configuration();
+ bool skip;
+
+ socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ /* we can still sort pages by socket in legacy mode */
+ if (!internal_config.legacy_mem && socket_id > 0)
+ break;
+#endif
+
+ /* if we didn't specifically request memory on this socket */
+ skip = active_sockets != 0 &&
+ internal_config.socket_mem[socket_id] == 0;
+ /* ...or if we didn't specifically request memory on *any*
+ * socket, and this is not master lcore
+ */
+ master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore);
+ skip |= active_sockets == 0 && socket_id != master_lcore_socket;
+
+ if (skip) {
+ RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
+ socket_id);
+ continue;
+ }
+
+ /* max amount of memory on this socket */
+ max_socket_mem = (active_sockets != 0 ?
+ internal_config.socket_mem[socket_id] :
+ internal_config.memory) +
+ extra_mem_per_socket;
+ cur_socket_mem = 0;
+
+ for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
+ uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
+ uint64_t hugepage_sz;
+ struct hugepage_info *hpi;
+ int type_msl_idx, max_segs, total_segs = 0;
+
+ hpi = &internal_config.hugepage_info[hpi_idx];
+ hugepage_sz = hpi->hugepage_sz;
+
+ /* check if pages are actually available */
+ if (hpi->num_pages[socket_id] == 0)
+ continue;
+
+ max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+ max_pagesz_mem = max_socket_mem - cur_socket_mem;
+
+ /* make it multiple of page size */
+ max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
+ hugepage_sz);
+
+ RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
+ "%" PRIu64 "M on socket %i\n",
+ max_pagesz_mem >> 20, socket_id);
+
+ type_msl_idx = 0;
+ while (cur_pagesz_mem < max_pagesz_mem &&
+ total_segs < max_segs) {
+ uint64_t cur_mem;
+ unsigned int n_segs;
+
+ if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL,
+ "No more space in memseg lists, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ return -1;
+ }
+
+ msl = &mcfg->memsegs[msl_idx];
+
+ cur_mem = get_mem_amount(hugepage_sz,
+ max_pagesz_mem);
+ n_segs = cur_mem / hugepage_sz;
+
+ if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+ socket_id, type_msl_idx)) {
+ /* failing to allocate a memseg list is
+ * a serious error.
+ */
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+ return -1;
+ }
+
+ if (alloc_va_space(msl)) {
+ /* if we couldn't allocate VA space, we
+ * can try with smaller page sizes.
+ */
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
+ /* deallocate memseg list */
+ if (free_memseg_list(msl))
+ return -1;
+ break;
+ }
+
+ total_segs += msl->memseg_arr.len;
+ cur_pagesz_mem = total_segs * hugepage_sz;
+ type_msl_idx++;
+ msl_idx++;
+ }
+ cur_socket_mem += cur_pagesz_mem;
+ }
+ if (cur_socket_mem == 0) {
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
+ socket_id);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int __rte_unused
+memseg_primary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct memtype {
+ uint64_t page_sz;
+ int socket_id;
+ } *memtypes = NULL;
+ int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
+ struct rte_memseg_list *msl;
+ uint64_t max_mem, max_mem_per_type;
+ unsigned int max_seglists_per_type;
+ unsigned int n_memtypes, cur_type;
+
+ /* no-huge does not need this at all */
+ if (internal_config.no_hugetlbfs)
+ return 0;
+
+ /*
+ * figuring out amount of memory we're going to have is a long and very
+ * involved process. the basic element we're operating with is a memory
+ * type, defined as a combination of NUMA node ID and page size (so that
+ * e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
+ *
+ * deciding amount of memory going towards each memory type is a
+ * balancing act between maximum segments per type, maximum memory per
+ * type, and number of detected NUMA nodes. the goal is to make sure
+ * each memory type gets at least one memseg list.
+ *
+ * the total amount of memory is limited by RTE_MAX_MEM_MB value.
+ *
+ * the total amount of memory per type is limited by either
+ * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
+ * of detected NUMA nodes. additionally, maximum number of segments per
+ * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
+ * smaller page sizes, it can take hundreds of thousands of segments to
+ * reach the above specified per-type memory limits.
+ *
+ * additionally, each type may have multiple memseg lists associated
+ * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
+ * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
+ *
+ * the number of memseg lists per type is decided based on the above
+ * limits, and also taking number of detected NUMA nodes, to make sure
+ * that we don't run out of memseg lists before we populate all NUMA
+ * nodes with memory.
+ *
+ * we do this in three stages. first, we collect the number of types.
+ * then, we figure out memory constraints and populate the list of
+ * would-be memseg lists. then, we go ahead and allocate the memseg
+ * lists.
+ */
+
+ /* create space for mem types */
+ n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count();
+ memtypes = calloc(n_memtypes, sizeof(*memtypes));
+ if (memtypes == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
+ return -1;
+ }
+
+ /* populate mem types */
+ cur_type = 0;
+ for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+ hpi_idx++) {
+ struct hugepage_info *hpi;
+ uint64_t hugepage_sz;
+
+ hpi = &internal_config.hugepage_info[hpi_idx];
+ hugepage_sz = hpi->hugepage_sz;
+
+ for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
+ int socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ /* we can still sort pages by socket in legacy mode */
+ if (!internal_config.legacy_mem && socket_id > 0)
+ break;
+#endif
+ memtypes[cur_type].page_sz = hugepage_sz;
+ memtypes[cur_type].socket_id = socket_id;
+
+ RTE_LOG(DEBUG, EAL, "Detected memory type: "
+ "socket_id:%u hugepage_sz:%" PRIu64 "\n",
+ socket_id, hugepage_sz);
+ }
+ }
+ /* number of memtypes could have been lower due to no NUMA support */
+ n_memtypes = cur_type;
+
+ /* set up limits for types */
+ max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+ max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
+ max_mem / n_memtypes);
+ /*
+ * limit maximum number of segment lists per type to ensure there's
+ * space for memseg lists for all NUMA nodes with all page sizes
+ */
+ max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
+
+ if (max_seglists_per_type == 0) {
+ RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ goto out;
+ }
+
+ /* go through all mem types and create segment lists */
+ msl_idx = 0;
+ for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
+ unsigned int cur_seglist, n_seglists, n_segs;
+ unsigned int max_segs_per_type, max_segs_per_list;
+ struct memtype *type = &memtypes[cur_type];
+ uint64_t max_mem_per_list, pagesz;
+ int socket_id;
+
+ pagesz = type->page_sz;
+ socket_id = type->socket_id;
+
+ /*
+ * we need to create segment lists for this type. we must take
+ * into account the following things:
+ *
+ * 1. total amount of memory we can use for this memory type
+ * 2. total amount of memory per memseg list allowed
+ * 3. number of segments needed to fit the amount of memory
+ * 4. number of segments allowed per type
+ * 5. number of segments allowed per memseg list
+ * 6. number of memseg lists we are allowed to take up
+ */
+
+ /* calculate how much segments we will need in total */
+ max_segs_per_type = max_mem_per_type / pagesz;
+ /* limit number of segments to maximum allowed per type */
+ max_segs_per_type = RTE_MIN(max_segs_per_type,
+ (unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
+ /* limit number of segments to maximum allowed per list */
+ max_segs_per_list = RTE_MIN(max_segs_per_type,
+ (unsigned int)RTE_MAX_MEMSEG_PER_LIST);
+
+ /* calculate how much memory we can have per segment list */
+ max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
+ (uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
+
+ /* calculate how many segments each segment list will have */
+ n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
+
+ /* calculate how many segment lists we can have */
+ n_seglists = RTE_MIN(max_segs_per_type / n_segs,
+ max_mem_per_type / max_mem_per_list);
+
+ /* limit number of segment lists according to our maximum */
+ n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
+
+ RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
+ "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
+ n_seglists, n_segs, socket_id, pagesz);
+
+ /* create all segment lists */
+ for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
+ if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL,
+ "No more space in memseg lists, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ goto out;
+ }
+ msl = &mcfg->memsegs[msl_idx++];
+
+ if (alloc_memseg_list(msl, pagesz, n_segs,
+ socket_id, cur_seglist))
+ goto out;
+
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+ goto out;
+ }
+ }
+ }
+ /* we're successful */
+ ret = 0;
+out:
+ free(memtypes);
+ return ret;
+}
+
+static int
+memseg_secondary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx = 0;
+ struct rte_memseg_list *msl;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+ msl = &mcfg->memsegs[msl_idx];
+
+ /* skip empty memseg lists */
+ if (msl->memseg_arr.len == 0)
+ continue;
+
+ if (rte_fbarray_attach(&msl->memseg_arr)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+ return -1;
+ }
+
+ /* preallocate VA space */
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+ /* increase rlimit to maximum */
+ struct rlimit lim;
+
+ if (getrlimit(RLIMIT_NOFILE, &lim) == 0) {
+ /* set limit to maximum */
+ lim.rlim_cur = lim.rlim_max;
+
+ if (setrlimit(RLIMIT_NOFILE, &lim) < 0) {
+ RTE_LOG(DEBUG, EAL, "Setting maximum number of open files failed: %s\n",
+ strerror(errno));
+ } else {
+ RTE_LOG(DEBUG, EAL, "Setting maximum number of open files to %"
+ PRIu64 "\n",
+ (uint64_t)lim.rlim_cur);
+ }
+ } else {
+ RTE_LOG(ERR, EAL, "Cannot get current resource limits\n");
+ }
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (!internal_config.legacy_mem && rte_socket_count() > 1) {
+ RTE_LOG(WARNING, EAL, "DPDK is running on a NUMA system, but is compiled without NUMA support.\n");
+ RTE_LOG(WARNING, EAL, "This will have adverse consequences for performance and usability.\n");
+ RTE_LOG(WARNING, EAL, "Please use --"OPT_LEGACY_MEM" option, or recompile with NUMA support.\n");
+ }
+#endif
+
+ return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+#ifndef RTE_ARCH_64
+ memseg_primary_init_32() :
+#else
+ memseg_primary_init() :
+#endif
+ memseg_secondary_init();
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_thread.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_thread.c
new file mode 100644
index 000000000..cd9d6e0eb
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_thread.c
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/queue.h>
+#include <sys/syscall.h>
+
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_eal_trace.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+
+RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
+RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id)
+{
+ int n;
+ char c = 0;
+ int m2s = lcore_config[slave_id].pipe_master2slave[1];
+ int s2m = lcore_config[slave_id].pipe_slave2master[0];
+ int rc = -EBUSY;
+
+ if (lcore_config[slave_id].state != WAIT)
+ goto finish;
+
+ lcore_config[slave_id].f = f;
+ lcore_config[slave_id].arg = arg;
+
+ /* send message */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(m2s, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ /* wait ack */
+ do {
+ n = read(s2m, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ rc = 0;
+finish:
+ rte_eal_trace_thread_remote_launch(f, arg, slave_id, rc);
+ return rc;
+}
+
+/* set affinity for current EAL thread */
+static int
+eal_thread_set_affinity(void)
+{
+ unsigned lcore_id = rte_lcore_id();
+
+ /* acquire system unique id */
+ rte_gettid();
+
+ /* update EAL thread core affinity */
+ return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset);
+}
+
+void eal_thread_init_master(unsigned lcore_id)
+{
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+}
+
+/* main loop of threads */
+__rte_noreturn void *
+eal_thread_loop(__rte_unused void *arg)
+{
+ char c;
+ int n, ret;
+ unsigned lcore_id;
+ pthread_t thread_id;
+ int m2s, s2m;
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+
+ thread_id = pthread_self();
+
+ /* retrieve our lcore_id from the configuration structure */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (thread_id == lcore_config[lcore_id].thread_id)
+ break;
+ }
+ if (lcore_id == RTE_MAX_LCORE)
+ rte_panic("cannot retrieve lcore id\n");
+
+ m2s = lcore_config[lcore_id].pipe_master2slave[0];
+ s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ /* set CPU affinity */
+ if (eal_thread_set_affinity() < 0)
+ rte_panic("cannot set affinity\n");
+
+ ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
+
+ RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
+ lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
+
+ __rte_trace_mem_per_thread_alloc();
+ rte_eal_trace_thread_lcore_ready(lcore_id, cpuset);
+
+ /* read on our pipe to get commands */
+ while (1) {
+ void *fct_arg;
+
+ /* wait command */
+ do {
+ n = read(m2s, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ lcore_config[lcore_id].state = RUNNING;
+
+ /* send ack */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = write(s2m, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ if (lcore_config[lcore_id].f == NULL)
+ rte_panic("NULL function pointer\n");
+
+ /* call the function and store the return value */
+ fct_arg = lcore_config[lcore_id].arg;
+ ret = lcore_config[lcore_id].f(fct_arg);
+ lcore_config[lcore_id].ret = ret;
+ rte_wmb();
+
+ /* when a service core returns, it should go directly to WAIT
+ * state, because the application will not lcore_wait() for it.
+ */
+ if (lcore_config[lcore_id].core_role == ROLE_SERVICE)
+ lcore_config[lcore_id].state = WAIT;
+ else
+ lcore_config[lcore_id].state = FINISHED;
+ }
+
+ /* never reached */
+ /* pthread_exit(NULL); */
+ /* return NULL; */
+}
+
+/* require calling thread tid by gettid() */
+int rte_sys_gettid(void)
+{
+ return (int)syscall(SYS_gettid);
+}
+
+int rte_thread_setname(pthread_t id, const char *name)
+{
+ int ret = ENOSYS;
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+ ret = pthread_setname_np(id, name);
+#endif
+#endif
+ RTE_SET_USED(id);
+ RTE_SET_USED(name);
+ return -ret;
+}
+
+int rte_thread_getname(pthread_t id, char *name, size_t len)
+{
+ int ret = ENOSYS;
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+ ret = pthread_getname_np(id, name, len);
+#endif
+#endif
+ RTE_SET_USED(id);
+ RTE_SET_USED(name);
+ RTE_SET_USED(len);
+ return -ret;
+
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_timer.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_timer.c
new file mode 100644
index 000000000..6dc6b565d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_timer.c
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2012-2013 6WIND S.A.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+
+enum timer_source eal_timer_source = EAL_TIMER_HPET;
+
+#ifdef RTE_LIBEAL_USE_HPET
+
+#define DEV_HPET "/dev/hpet"
+
+/* Maximum number of counters. */
+#define HPET_TIMER_NUM 3
+
+/* General capabilities register */
+#define CLK_PERIOD_SHIFT 32 /* Clock period shift. */
+#define CLK_PERIOD_MASK 0xffffffff00000000ULL /* Clock period mask. */
+
+/**
+ * HPET timer registers. From the Intel IA-PC HPET (High Precision Event
+ * Timers) Specification.
+ */
+struct eal_hpet_regs {
+ /* Memory-mapped, software visible registers */
+ uint64_t capabilities; /**< RO General Capabilities Register. */
+ uint64_t reserved0; /**< Reserved for future use. */
+ uint64_t config; /**< RW General Configuration Register. */
+ uint64_t reserved1; /**< Reserved for future use. */
+ uint64_t isr; /**< RW Clear General Interrupt Status. */
+ uint64_t reserved2[25]; /**< Reserved for future use. */
+ union {
+ uint64_t counter; /**< RW Main Counter Value Register. */
+ struct {
+ uint32_t counter_l; /**< RW Main Counter Low. */
+ uint32_t counter_h; /**< RW Main Counter High. */
+ };
+ };
+ uint64_t reserved3; /**< Reserved for future use. */
+ struct {
+ uint64_t config; /**< RW Timer Config and Capability Reg. */
+ uint64_t comp; /**< RW Timer Comparator Value Register. */
+ uint64_t fsb; /**< RW FSB Interrupt Route Register. */
+ uint64_t reserved4; /**< Reserved for future use. */
+ } timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */
+};
+
+/* Mmap'd hpet registers */
+static volatile struct eal_hpet_regs *eal_hpet = NULL;
+
+/* Period at which the HPET counter increments in
+ * femtoseconds (10^-15 seconds). */
+static uint32_t eal_hpet_resolution_fs = 0;
+
+/* Frequency of the HPET counter in Hz */
+static uint64_t eal_hpet_resolution_hz = 0;
+
+/* Incremented 4 times during one 32bits hpet full count */
+static uint32_t eal_hpet_msb;
+
+static pthread_t msb_inc_thread_id;
+
+/*
+ * This function runs on a specific thread to update a global variable
+ * containing used to process MSB of the HPET (unfortunately, we need
+ * this because hpet is 32 bits by default under linux).
+ */
+static void *
+hpet_msb_inc(__rte_unused void *arg)
+{
+ uint32_t t;
+
+ while (1) {
+ t = (eal_hpet->counter_l >> 30);
+ if (t != (eal_hpet_msb & 3))
+ eal_hpet_msb ++;
+ sleep(10);
+ }
+ return NULL;
+}
+
+uint64_t
+rte_get_hpet_hz(void)
+{
+ if(internal_config.no_hpet)
+ rte_panic("Error, HPET called, but no HPET present\n");
+
+ return eal_hpet_resolution_hz;
+}
+
+uint64_t
+rte_get_hpet_cycles(void)
+{
+ uint32_t t, msb;
+ uint64_t ret;
+
+ if(internal_config.no_hpet)
+ rte_panic("Error, HPET called, but no HPET present\n");
+
+ t = eal_hpet->counter_l;
+ msb = eal_hpet_msb;
+ ret = (msb + 2 - (t >> 30)) / 4;
+ ret <<= 32;
+ ret += t;
+ return ret;
+}
+
+#endif
+
+#ifdef RTE_LIBEAL_USE_HPET
+/*
+ * Open and mmap /dev/hpet (high precision event timer) that will
+ * provide our time reference.
+ */
+int
+rte_eal_hpet_init(int make_default)
+{
+ int fd, ret;
+
+ if (internal_config.no_hpet) {
+ RTE_LOG(NOTICE, EAL, "HPET is disabled\n");
+ return -1;
+ }
+
+ fd = open(DEV_HPET, O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "ERROR: Cannot open "DEV_HPET": %s!\n",
+ strerror(errno));
+ internal_config.no_hpet = 1;
+ return -1;
+ }
+ eal_hpet = mmap(NULL, 1024, PROT_READ, MAP_SHARED, fd, 0);
+ if (eal_hpet == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "ERROR: Cannot mmap "DEV_HPET"!\n"
+ "Please enable CONFIG_HPET_MMAP in your kernel configuration "
+ "to allow HPET support.\n"
+ "To run without using HPET, set CONFIG_RTE_LIBEAL_USE_HPET=n "
+ "in your build configuration or use '--no-hpet' EAL flag.\n");
+ close(fd);
+ internal_config.no_hpet = 1;
+ return -1;
+ }
+ close(fd);
+
+ eal_hpet_resolution_fs = (uint32_t)((eal_hpet->capabilities &
+ CLK_PERIOD_MASK) >>
+ CLK_PERIOD_SHIFT);
+
+ eal_hpet_resolution_hz = (1000ULL*1000ULL*1000ULL*1000ULL*1000ULL) /
+ (uint64_t)eal_hpet_resolution_fs;
+
+ RTE_LOG(INFO, EAL, "HPET frequency is ~%"PRIu64" kHz\n",
+ eal_hpet_resolution_hz/1000);
+
+ eal_hpet_msb = (eal_hpet->counter_l >> 30);
+
+ /* create a thread that will increment a global variable for
+ * msb (hpet is 32 bits by default under linux) */
+ ret = rte_ctrl_thread_create(&msb_inc_thread_id, "hpet-msb-inc", NULL,
+ hpet_msb_inc, NULL);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n");
+ internal_config.no_hpet = 1;
+ return -1;
+ }
+
+ if (make_default)
+ eal_timer_source = EAL_TIMER_HPET;
+ return 0;
+}
+#endif
+
+uint64_t
+get_tsc_freq(void)
+{
+#ifdef CLOCK_MONOTONIC_RAW
+#define NS_PER_SEC 1E9
+#define CYC_PER_10MHZ 1E7
+
+ struct timespec sleeptime = {.tv_nsec = NS_PER_SEC / 10 }; /* 1/10 second */
+
+ struct timespec t_start, t_end;
+ uint64_t tsc_hz;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) {
+ uint64_t ns, end, start = rte_rdtsc();
+ nanosleep(&sleeptime,NULL);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);
+ end = rte_rdtsc();
+ ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC);
+ ns += (t_end.tv_nsec - t_start.tv_nsec);
+
+ double secs = (double)ns/NS_PER_SEC;
+ tsc_hz = (uint64_t)((end - start)/secs);
+ /* Round up to 10Mhz. 1E7 ~ 10Mhz */
+ return RTE_ALIGN_MUL_NEAR(tsc_hz, CYC_PER_10MHZ);
+ }
+#endif
+ return 0;
+}
+
+int
+rte_eal_timer_init(void)
+{
+
+ eal_timer_source = EAL_TIMER_TSC;
+
+ set_tsc_freq();
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio.c
new file mode 100644
index 000000000..d26e1649a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio.c
@@ -0,0 +1,2186 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <inttypes.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_eal_memconfig.h>
+#include <rte_vfio.h>
+
+#include "eal_filesystem.h"
+#include "eal_memcfg.h"
+#include "eal_vfio.h"
+#include "eal_private.h"
+
+#ifdef VFIO_PRESENT
+
+#define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb"
+
+/* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can
+ * recreate the mappings for DPDK segments, but we cannot do so for memory that
+ * was registered by the user themselves, so we need to store the user mappings
+ * somewhere, to recreate them later.
+ */
+#define VFIO_MAX_USER_MEM_MAPS 256
+struct user_mem_map {
+ uint64_t addr;
+ uint64_t iova;
+ uint64_t len;
+};
+
+struct user_mem_maps {
+ rte_spinlock_recursive_t lock;
+ int n_maps;
+ struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS];
+};
+
+struct vfio_config {
+ int vfio_enabled;
+ int vfio_container_fd;
+ int vfio_active_groups;
+ const struct vfio_iommu_type *vfio_iommu_type;
+ struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
+ struct user_mem_maps mem_maps;
+};
+
+/* per-process VFIO config */
+static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS];
+static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0];
+
+static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
+static int vfio_spapr_dma_map(int);
+static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
+static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
+static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr,
+ uint64_t iova, uint64_t len, int do_map);
+
+/* IOMMU types we support */
+static const struct vfio_iommu_type iommu_types[] = {
+ /* x86 IOMMU, otherwise known as type 1 */
+ {
+ .type_id = RTE_VFIO_TYPE1,
+ .name = "Type 1",
+ .dma_map_func = &vfio_type1_dma_map,
+ .dma_user_map_func = &vfio_type1_dma_mem_map
+ },
+ /* ppc64 IOMMU, otherwise known as spapr */
+ {
+ .type_id = RTE_VFIO_SPAPR,
+ .name = "sPAPR",
+ .dma_map_func = &vfio_spapr_dma_map,
+ .dma_user_map_func = &vfio_spapr_dma_mem_map
+ },
+ /* IOMMU-less mode */
+ {
+ .type_id = RTE_VFIO_NOIOMMU,
+ .name = "No-IOMMU",
+ .dma_map_func = &vfio_noiommu_dma_map,
+ .dma_user_map_func = &vfio_noiommu_dma_mem_map
+ },
+};
+
+static int
+is_null_map(const struct user_mem_map *map)
+{
+ return map->addr == 0 && map->iova == 0 && map->len == 0;
+}
+
+/* we may need to merge user mem maps together in case of user mapping/unmapping
+ * chunks of memory, so we'll need a comparator function to sort segments.
+ */
+static int
+user_mem_map_cmp(const void *a, const void *b)
+{
+ const struct user_mem_map *umm_a = a;
+ const struct user_mem_map *umm_b = b;
+
+ /* move null entries to end */
+ if (is_null_map(umm_a))
+ return 1;
+ if (is_null_map(umm_b))
+ return -1;
+
+ /* sort by iova first */
+ if (umm_a->iova < umm_b->iova)
+ return -1;
+ if (umm_a->iova > umm_b->iova)
+ return 1;
+
+ if (umm_a->addr < umm_b->addr)
+ return -1;
+ if (umm_a->addr > umm_b->addr)
+ return 1;
+
+ if (umm_a->len < umm_b->len)
+ return -1;
+ if (umm_a->len > umm_b->len)
+ return 1;
+
+ return 0;
+}
+
+/* adjust user map entry. this may result in shortening of existing map, or in
+ * splitting existing map in two pieces.
+ */
+static void
+adjust_map(struct user_mem_map *src, struct user_mem_map *end,
+ uint64_t remove_va_start, uint64_t remove_len)
+{
+ /* if va start is same as start address, we're simply moving start */
+ if (remove_va_start == src->addr) {
+ src->addr += remove_len;
+ src->iova += remove_len;
+ src->len -= remove_len;
+ } else if (remove_va_start + remove_len == src->addr + src->len) {
+ /* we're shrinking mapping from the end */
+ src->len -= remove_len;
+ } else {
+ /* we're blowing a hole in the middle */
+ struct user_mem_map tmp;
+ uint64_t total_len = src->len;
+
+ /* adjust source segment length */
+ src->len = remove_va_start - src->addr;
+
+ /* create temporary segment in the middle */
+ tmp.addr = src->addr + src->len;
+ tmp.iova = src->iova + src->len;
+ tmp.len = remove_len;
+
+ /* populate end segment - this one we will be keeping */
+ end->addr = tmp.addr + tmp.len;
+ end->iova = tmp.iova + tmp.len;
+ end->len = total_len - src->len - tmp.len;
+ }
+}
+
+/* try merging two maps into one, return 1 if succeeded */
+static int
+merge_map(struct user_mem_map *left, struct user_mem_map *right)
+{
+ if (left->addr + left->len != right->addr)
+ return 0;
+ if (left->iova + left->len != right->iova)
+ return 0;
+
+ left->len += right->len;
+
+ memset(right, 0, sizeof(*right));
+
+ return 1;
+}
+
+static struct user_mem_map *
+find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
+ uint64_t iova, uint64_t len)
+{
+ uint64_t va_end = addr + len;
+ uint64_t iova_end = iova + len;
+ int i;
+
+ for (i = 0; i < user_mem_maps->n_maps; i++) {
+ struct user_mem_map *map = &user_mem_maps->maps[i];
+ uint64_t map_va_end = map->addr + map->len;
+ uint64_t map_iova_end = map->iova + map->len;
+
+ /* check start VA */
+ if (addr < map->addr || addr >= map_va_end)
+ continue;
+ /* check if VA end is within boundaries */
+ if (va_end <= map->addr || va_end > map_va_end)
+ continue;
+
+ /* check start IOVA */
+ if (iova < map->iova || iova >= map_iova_end)
+ continue;
+ /* check if IOVA end is within boundaries */
+ if (iova_end <= map->iova || iova_end > map_iova_end)
+ continue;
+
+ /* we've found our map */
+ return map;
+ }
+ return NULL;
+}
+
+/* this will sort all user maps, and merge/compact any adjacent maps */
+static void
+compact_user_maps(struct user_mem_maps *user_mem_maps)
+{
+ int i, n_merged, cur_idx;
+
+ qsort(user_mem_maps->maps, user_mem_maps->n_maps,
+ sizeof(user_mem_maps->maps[0]), user_mem_map_cmp);
+
+ /* we'll go over the list backwards when merging */
+ n_merged = 0;
+ for (i = user_mem_maps->n_maps - 2; i >= 0; i--) {
+ struct user_mem_map *l, *r;
+
+ l = &user_mem_maps->maps[i];
+ r = &user_mem_maps->maps[i + 1];
+
+ if (is_null_map(l) || is_null_map(r))
+ continue;
+
+ if (merge_map(l, r))
+ n_merged++;
+ }
+
+ /* the entries are still sorted, but now they have holes in them, so
+ * walk through the list and remove the holes
+ */
+ if (n_merged > 0) {
+ cur_idx = 0;
+ for (i = 0; i < user_mem_maps->n_maps; i++) {
+ if (!is_null_map(&user_mem_maps->maps[i])) {
+ struct user_mem_map *src, *dst;
+
+ src = &user_mem_maps->maps[i];
+ dst = &user_mem_maps->maps[cur_idx++];
+
+ if (src != dst) {
+ memcpy(dst, src, sizeof(*src));
+ memset(src, 0, sizeof(*src));
+ }
+ }
+ }
+ user_mem_maps->n_maps = cur_idx;
+ }
+}
+
+static int
+vfio_open_group_fd(int iommu_group_num)
+{
+ int vfio_group_fd;
+ char filename[PATH_MAX];
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply = {0};
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
+ /* if primary, try to open the group */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ /* try regular group format */
+ snprintf(filename, sizeof(filename),
+ VFIO_GROUP_FMT, iommu_group_num);
+ vfio_group_fd = open(filename, O_RDWR);
+ if (vfio_group_fd < 0) {
+ /* if file not found, it's not an error */
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+ strerror(errno));
+ return -1;
+ }
+
+ /* special case: try no-IOMMU path as well */
+ snprintf(filename, sizeof(filename),
+ VFIO_NOIOMMU_GROUP_FMT,
+ iommu_group_num);
+ vfio_group_fd = open(filename, O_RDWR);
+ if (vfio_group_fd < 0) {
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+ /* noiommu group found */
+ }
+
+ return vfio_group_fd;
+ }
+ /* if we're in a secondary process, request group fd from the primary
+ * process via mp channel.
+ */
+ p->req = SOCKET_REQ_GROUP;
+ p->group_num = iommu_group_num;
+ strcpy(mp_req.name, EAL_VFIO_MP);
+ mp_req.len_param = sizeof(*p);
+ mp_req.num_fds = 0;
+
+ vfio_group_fd = -1;
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+ mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ p = (struct vfio_mp_param *)mp_rep->param;
+ if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+ vfio_group_fd = mp_rep->fds[0];
+ } else if (p->result == SOCKET_NO_FD) {
+ RTE_LOG(ERR, EAL, " bad VFIO group fd\n");
+ vfio_group_fd = 0;
+ }
+ }
+
+ free(mp_reply.msgs);
+ if (vfio_group_fd < 0)
+ RTE_LOG(ERR, EAL, " cannot request group fd\n");
+ return vfio_group_fd;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_group_num(int iommu_group_num)
+{
+ struct vfio_config *vfio_cfg;
+ int i, j;
+
+ for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+ vfio_cfg = &vfio_cfgs[i];
+ for (j = 0; j < VFIO_MAX_GROUPS; j++) {
+ if (vfio_cfg->vfio_groups[j].group_num ==
+ iommu_group_num)
+ return vfio_cfg;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+vfio_get_group_fd(struct vfio_config *vfio_cfg,
+ int iommu_group_num)
+{
+ int i;
+ int vfio_group_fd;
+ struct vfio_group *cur_grp;
+
+ /* check if we already have the group descriptor open */
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num)
+ return vfio_cfg->vfio_groups[i].fd;
+
+ /* Lets see first if there is room for a new group */
+ if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+ return -1;
+ }
+
+ /* Now lets get an index for the new group */
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg->vfio_groups[i].group_num == -1) {
+ cur_grp = &vfio_cfg->vfio_groups[i];
+ break;
+ }
+
+ /* This should not happen */
+ if (i == VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+ return -1;
+ }
+
+ vfio_group_fd = vfio_open_group_fd(iommu_group_num);
+ if (vfio_group_fd <= 0) {
+ RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num);
+ return -1;
+ }
+
+ cur_grp->group_num = iommu_group_num;
+ cur_grp->fd = vfio_group_fd;
+ vfio_cfg->vfio_active_groups++;
+
+ return vfio_group_fd;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_group_fd(int vfio_group_fd)
+{
+ struct vfio_config *vfio_cfg;
+ int i, j;
+
+ for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+ vfio_cfg = &vfio_cfgs[i];
+ for (j = 0; j < VFIO_MAX_GROUPS; j++)
+ if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+ return vfio_cfg;
+ }
+
+ return NULL;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_container_fd(int container_fd)
+{
+ int i;
+
+ if (container_fd == RTE_VFIO_DEFAULT_CONTAINER_FD)
+ return default_vfio_cfg;
+
+ for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+ if (vfio_cfgs[i].vfio_container_fd == container_fd)
+ return &vfio_cfgs[i];
+ }
+
+ return NULL;
+}
+
+int
+rte_vfio_get_group_fd(int iommu_group_num)
+{
+ struct vfio_config *vfio_cfg;
+
+ /* get the vfio_config it belongs to */
+ vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+ vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+
+ return vfio_get_group_fd(vfio_cfg, iommu_group_num);
+}
+
+static int
+get_vfio_group_idx(int vfio_group_fd)
+{
+ struct vfio_config *vfio_cfg;
+ int i, j;
+
+ for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+ vfio_cfg = &vfio_cfgs[i];
+ for (j = 0; j < VFIO_MAX_GROUPS; j++)
+ if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+ return j;
+ }
+
+ return -1;
+}
+
+static void
+vfio_group_device_get(int vfio_group_fd)
+{
+ struct vfio_config *vfio_cfg;
+ int i;
+
+ vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, " invalid group fd!\n");
+ return;
+ }
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ else
+ vfio_cfg->vfio_groups[i].devices++;
+}
+
+static void
+vfio_group_device_put(int vfio_group_fd)
+{
+ struct vfio_config *vfio_cfg;
+ int i;
+
+ vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, " invalid group fd!\n");
+ return;
+ }
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ else
+ vfio_cfg->vfio_groups[i].devices--;
+}
+
+static int
+vfio_group_device_count(int vfio_group_fd)
+{
+ struct vfio_config *vfio_cfg;
+ int i;
+
+ vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, " invalid group fd!\n");
+ return -1;
+ }
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) {
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ return -1;
+ }
+
+ return vfio_cfg->vfio_groups[i].devices;
+}
+
+static void
+vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
+ void *arg __rte_unused)
+{
+ rte_iova_t iova_start, iova_expected;
+ struct rte_memseg_list *msl;
+ struct rte_memseg *ms;
+ size_t cur_len = 0;
+ uint64_t va_start;
+
+ msl = rte_mem_virt2memseg_list(addr);
+
+ /* for IOVA as VA mode, no need to care for IOVA addresses */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA && msl->external == 0) {
+ uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
+ if (type == RTE_MEM_EVENT_ALLOC)
+ vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
+ len, 1);
+ else
+ vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
+ len, 0);
+ return;
+ }
+
+#ifdef RTE_ARCH_PPC_64
+ ms = rte_mem_virt2memseg(addr, msl);
+ while (cur_len < len) {
+ int idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+
+ rte_fbarray_set_free(&msl->memseg_arr, idx);
+ cur_len += ms->len;
+ ++ms;
+ }
+ cur_len = 0;
+#endif
+ /* memsegs are contiguous in memory */
+ ms = rte_mem_virt2memseg(addr, msl);
+
+ /*
+ * This memory is not guaranteed to be contiguous, but it still could
+ * be, or it could have some small contiguous chunks. Since the number
+ * of VFIO mappings is limited, and VFIO appears to not concatenate
+ * adjacent mappings, we have to do this ourselves.
+ *
+ * So, find contiguous chunks, then map them.
+ */
+ va_start = ms->addr_64;
+ iova_start = iova_expected = ms->iova;
+ while (cur_len < len) {
+ bool new_contig_area = ms->iova != iova_expected;
+ bool last_seg = (len - cur_len) == ms->len;
+ bool skip_last = false;
+
+ /* only do mappings when current contiguous area ends */
+ if (new_contig_area) {
+ if (type == RTE_MEM_EVENT_ALLOC)
+ vfio_dma_mem_map(default_vfio_cfg, va_start,
+ iova_start,
+ iova_expected - iova_start, 1);
+ else
+ vfio_dma_mem_map(default_vfio_cfg, va_start,
+ iova_start,
+ iova_expected - iova_start, 0);
+ va_start = ms->addr_64;
+ iova_start = ms->iova;
+ }
+ /* some memory segments may have invalid IOVA */
+ if (ms->iova == RTE_BAD_IOVA) {
+ RTE_LOG(DEBUG, EAL, "Memory segment at %p has bad IOVA, skipping\n",
+ ms->addr);
+ skip_last = true;
+ }
+ iova_expected = ms->iova + ms->len;
+ cur_len += ms->len;
+ ++ms;
+
+ /*
+ * don't count previous segment, and don't attempt to
+ * dereference a potentially invalid pointer.
+ */
+ if (skip_last && !last_seg) {
+ iova_expected = iova_start = ms->iova;
+ va_start = ms->addr_64;
+ } else if (!skip_last && last_seg) {
+ /* this is the last segment and we're not skipping */
+ if (type == RTE_MEM_EVENT_ALLOC)
+ vfio_dma_mem_map(default_vfio_cfg, va_start,
+ iova_start,
+ iova_expected - iova_start, 1);
+ else
+ vfio_dma_mem_map(default_vfio_cfg, va_start,
+ iova_start,
+ iova_expected - iova_start, 0);
+ }
+ }
+#ifdef RTE_ARCH_PPC_64
+ cur_len = 0;
+ ms = rte_mem_virt2memseg(addr, msl);
+ while (cur_len < len) {
+ int idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+
+ rte_fbarray_set_used(&msl->memseg_arr, idx);
+ cur_len += ms->len;
+ ++ms;
+ }
+#endif
+}
+
+static int
+vfio_sync_default_container(void)
+{
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply = {0};
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+ int iommu_type_id;
+ unsigned int i;
+
+ /* cannot be called from primary */
+ if (rte_eal_process_type() != RTE_PROC_SECONDARY)
+ return -1;
+
+ /* default container fd should have been opened in rte_vfio_enable() */
+ if (!default_vfio_cfg->vfio_enabled ||
+ default_vfio_cfg->vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, "VFIO support is not initialized\n");
+ return -1;
+ }
+
+ /* find default container's IOMMU type */
+ p->req = SOCKET_REQ_IOMMU_TYPE;
+ strcpy(mp_req.name, EAL_VFIO_MP);
+ mp_req.len_param = sizeof(*p);
+ mp_req.num_fds = 0;
+
+ iommu_type_id = -1;
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+ mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ p = (struct vfio_mp_param *)mp_rep->param;
+ if (p->result == SOCKET_OK)
+ iommu_type_id = p->iommu_type_id;
+ }
+ free(mp_reply.msgs);
+ if (iommu_type_id < 0) {
+ RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n");
+ return -1;
+ }
+
+ /* we now have an fd for default container, as well as its IOMMU type.
+ * now, set up default VFIO container config to match.
+ */
+ for (i = 0; i < RTE_DIM(iommu_types); i++) {
+ const struct vfio_iommu_type *t = &iommu_types[i];
+ if (t->type_id != iommu_type_id)
+ continue;
+
+ /* we found our IOMMU type */
+ default_vfio_cfg->vfio_iommu_type = t;
+
+ return 0;
+ }
+ RTE_LOG(ERR, EAL, "Could not find IOMMU type id (%i)\n",
+ iommu_type_id);
+ return -1;
+}
+
+int
+rte_vfio_clear_group(int vfio_group_fd)
+{
+ int i;
+ struct vfio_config *vfio_cfg;
+
+ vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, " invalid group fd!\n");
+ return -1;
+ }
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0)
+ return -1;
+ vfio_cfg->vfio_groups[i].group_num = -1;
+ vfio_cfg->vfio_groups[i].fd = -1;
+ vfio_cfg->vfio_groups[i].devices = 0;
+ vfio_cfg->vfio_active_groups--;
+
+ return 0;
+}
+
+int
+rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status)
+ };
+ struct vfio_config *vfio_cfg;
+ struct user_mem_maps *user_mem_maps;
+ int vfio_container_fd;
+ int vfio_group_fd;
+ int iommu_group_num;
+ int i, ret;
+
+ /* get group number */
+ ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num);
+ if (ret == 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ dev_addr);
+ return 1;
+ }
+
+ /* if negative, something failed */
+ if (ret < 0)
+ return -1;
+
+ /* get the actual group fd */
+ vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num);
+ if (vfio_group_fd < 0)
+ return -1;
+
+ /* if group_fd == 0, that means the device isn't managed by VFIO */
+ if (vfio_group_fd == 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ dev_addr);
+ return 1;
+ }
+
+ /*
+ * at this point, we know that this group is viable (meaning, all devices
+ * are either bound to VFIO or not bound to anything)
+ */
+
+ /* check if the group is viable */
+ ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot get group status, "
+ "error %i (%s)\n", dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+ RTE_LOG(ERR, EAL, " %s VFIO group is not viable! "
+ "Not all devices in IOMMU group bound to VFIO or unbound\n",
+ dev_addr);
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ }
+
+ /* get the vfio_config it belongs to */
+ vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+ vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+ vfio_container_fd = vfio_cfg->vfio_container_fd;
+ user_mem_maps = &vfio_cfg->mem_maps;
+
+ /* check if group does not have a container yet */
+ if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
+
+ /* add group to a container */
+ ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
+ &vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "
+ "error %i (%s)\n", dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ }
+
+ /*
+ * pick an IOMMU type and set up DMA mappings for container
+ *
+ * needs to be done only once, only when first group is
+ * assigned to a container and only in primary process.
+ * Note this can happen several times with the hotplug
+ * functionality.
+ */
+ if (internal_config.process_type == RTE_PROC_PRIMARY &&
+ vfio_cfg->vfio_active_groups == 1 &&
+ vfio_group_device_count(vfio_group_fd) == 0) {
+ const struct vfio_iommu_type *t;
+
+ /* select an IOMMU type which we will be using */
+ t = vfio_set_iommu_type(vfio_container_fd);
+ if (!t) {
+ RTE_LOG(ERR, EAL,
+ " %s failed to select IOMMU type\n",
+ dev_addr);
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ }
+ /* lock memory hotplug before mapping and release it
+ * after registering callback, to prevent races
+ */
+ rte_mcfg_mem_read_lock();
+ if (vfio_cfg == default_vfio_cfg)
+ ret = t->dma_map_func(vfio_container_fd);
+ else
+ ret = 0;
+ if (ret) {
+ RTE_LOG(ERR, EAL,
+ " %s DMA remapping failed, error %i (%s)\n",
+ dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ rte_mcfg_mem_read_unlock();
+ return -1;
+ }
+
+ vfio_cfg->vfio_iommu_type = t;
+
+ /* re-map all user-mapped segments */
+ rte_spinlock_recursive_lock(&user_mem_maps->lock);
+
+ /* this IOMMU type may not support DMA mapping, but
+ * if we have mappings in the list - that means we have
+ * previously mapped something successfully, so we can
+ * be sure that DMA mapping is supported.
+ */
+ for (i = 0; i < user_mem_maps->n_maps; i++) {
+ struct user_mem_map *map;
+ map = &user_mem_maps->maps[i];
+
+ ret = t->dma_user_map_func(
+ vfio_container_fd,
+ map->addr, map->iova, map->len,
+ 1);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Couldn't map user memory for DMA: "
+ "va: 0x%" PRIx64 " "
+ "iova: 0x%" PRIx64 " "
+ "len: 0x%" PRIu64 "\n",
+ map->addr, map->iova,
+ map->len);
+ rte_spinlock_recursive_unlock(
+ &user_mem_maps->lock);
+ rte_mcfg_mem_read_unlock();
+ return -1;
+ }
+ }
+ rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+
+ /* register callback for mem events */
+ if (vfio_cfg == default_vfio_cfg)
+ ret = rte_mem_event_callback_register(
+ VFIO_MEM_EVENT_CLB_NAME,
+ vfio_mem_event_callback, NULL);
+ else
+ ret = 0;
+ /* unlock memory hotplug */
+ rte_mcfg_mem_read_unlock();
+
+ if (ret && rte_errno != ENOTSUP) {
+ RTE_LOG(ERR, EAL, "Could not install memory event callback for VFIO\n");
+ return -1;
+ }
+ if (ret)
+ RTE_LOG(DEBUG, EAL, "Memory event callbacks not supported\n");
+ else
+ RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n");
+ }
+ } else if (rte_eal_process_type() != RTE_PROC_PRIMARY &&
+ vfio_cfg == default_vfio_cfg &&
+ vfio_cfg->vfio_iommu_type == NULL) {
+ /* if we're not a primary process, we do not set up the VFIO
+ * container because it's already been set up by the primary
+ * process. instead, we simply ask the primary about VFIO type
+ * we are using, and set the VFIO config up appropriately.
+ */
+ ret = vfio_sync_default_container();
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Could not sync default VFIO container\n");
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ }
+ /* we have successfully initialized VFIO, notify user */
+ const struct vfio_iommu_type *t =
+ default_vfio_cfg->vfio_iommu_type;
+ RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
+ t->type_id, t->name);
+ }
+
+ /* get a file descriptor for the device */
+ *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
+ if (*vfio_dev_fd < 0) {
+ /* if we cannot get a device fd, this implies a problem with
+ * the VFIO group or the container not having IOMMU configured.
+ */
+
+ RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n",
+ dev_addr);
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ }
+
+ /* test and setup the device */
+ ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot get device info, "
+ "error %i (%s)\n", dev_addr, errno,
+ strerror(errno));
+ close(*vfio_dev_fd);
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ }
+ vfio_group_device_get(vfio_group_fd);
+
+ return 0;
+}
+
+int
+rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
+ int vfio_dev_fd)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status)
+ };
+ struct vfio_config *vfio_cfg;
+ int vfio_group_fd;
+ int iommu_group_num;
+ int ret;
+
+ /* we don't want any DMA mapping messages to come while we're detaching
+ * VFIO device, because this might be the last device and we might need
+ * to unregister the callback.
+ */
+ rte_mcfg_mem_read_lock();
+
+ /* get group number */
+ ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num);
+ if (ret <= 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n",
+ dev_addr);
+ /* This is an error at this point. */
+ ret = -1;
+ goto out;
+ }
+
+ /* get the actual group fd */
+ vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num);
+ if (vfio_group_fd <= 0) {
+ RTE_LOG(INFO, EAL, "rte_vfio_get_group_fd failed for %s\n",
+ dev_addr);
+ ret = -1;
+ goto out;
+ }
+
+ /* get the vfio_config it belongs to */
+ vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+ vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+
+ /* At this point we got an active group. Closing it will make the
+ * container detachment. If this is the last active group, VFIO kernel
+ * code will unset the container and the IOMMU mappings.
+ */
+
+ /* Closing a device */
+ if (close(vfio_dev_fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",
+ dev_addr);
+ ret = -1;
+ goto out;
+ }
+
+ /* An VFIO group can have several devices attached. Just when there is
+ * no devices remaining should the group be closed.
+ */
+ vfio_group_device_put(vfio_group_fd);
+ if (!vfio_group_device_count(vfio_group_fd)) {
+
+ if (close(vfio_group_fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",
+ dev_addr);
+ ret = -1;
+ goto out;
+ }
+
+ if (rte_vfio_clear_group(vfio_group_fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",
+ dev_addr);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* if there are no active device groups, unregister the callback to
+ * avoid spurious attempts to map/unmap memory from VFIO.
+ */
+ if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0 &&
+ rte_eal_process_type() != RTE_PROC_SECONDARY)
+ rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME,
+ NULL);
+
+ /* success */
+ ret = 0;
+
+out:
+ rte_mcfg_mem_read_unlock();
+ return ret;
+}
+
+int
+rte_vfio_enable(const char *modname)
+{
+ /* initialize group list */
+ int i, j;
+ int vfio_available;
+
+ rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER;
+
+ for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+ vfio_cfgs[i].vfio_container_fd = -1;
+ vfio_cfgs[i].vfio_active_groups = 0;
+ vfio_cfgs[i].vfio_iommu_type = NULL;
+ vfio_cfgs[i].mem_maps.lock = lock;
+
+ for (j = 0; j < VFIO_MAX_GROUPS; j++) {
+ vfio_cfgs[i].vfio_groups[j].fd = -1;
+ vfio_cfgs[i].vfio_groups[j].group_num = -1;
+ vfio_cfgs[i].vfio_groups[j].devices = 0;
+ }
+ }
+
+ /* inform the user that we are probing for VFIO */
+ RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
+
+ /* check if vfio module is loaded */
+ vfio_available = rte_eal_check_module(modname);
+
+ /* return error directly */
+ if (vfio_available == -1) {
+ RTE_LOG(INFO, EAL, "Could not get loaded module details!\n");
+ return -1;
+ }
+
+ /* return 0 if VFIO modules not loaded */
+ if (vfio_available == 0) {
+ RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, "
+ "skipping VFIO support...\n");
+ return 0;
+ }
+
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ /* open a new container */
+ default_vfio_cfg->vfio_container_fd =
+ rte_vfio_get_container_fd();
+ } else {
+ /* get the default container from the primary process */
+ default_vfio_cfg->vfio_container_fd =
+ vfio_get_default_container_fd();
+ }
+
+ /* check if we have VFIO driver enabled */
+ if (default_vfio_cfg->vfio_container_fd != -1) {
+ RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
+ default_vfio_cfg->vfio_enabled = 1;
+ } else {
+ RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
+ }
+
+ return 0;
+}
+
+int
+rte_vfio_is_enabled(const char *modname)
+{
+ const int mod_available = rte_eal_check_module(modname) > 0;
+ return default_vfio_cfg->vfio_enabled && mod_available;
+}
+
+int
+vfio_get_default_container_fd(void)
+{
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply = {0};
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+ int container_fd;
+
+ if (default_vfio_cfg->vfio_enabled)
+ return default_vfio_cfg->vfio_container_fd;
+
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ /* if we were secondary process we would try requesting
+ * container fd from the primary, but we're the primary
+ * process so just exit here
+ */
+ return -1;
+ }
+
+ p->req = SOCKET_REQ_DEFAULT_CONTAINER;
+ strcpy(mp_req.name, EAL_VFIO_MP);
+ mp_req.len_param = sizeof(*p);
+ mp_req.num_fds = 0;
+
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+ mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ p = (struct vfio_mp_param *)mp_rep->param;
+ if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+ container_fd = mp_rep->fds[0];
+ free(mp_reply.msgs);
+ return container_fd;
+ }
+ }
+
+ free(mp_reply.msgs);
+ RTE_LOG(ERR, EAL, " cannot request default container fd\n");
+ return -1;
+}
+
+int
+vfio_get_iommu_type(void)
+{
+ if (default_vfio_cfg->vfio_iommu_type == NULL)
+ return -1;
+
+ return default_vfio_cfg->vfio_iommu_type->type_id;
+}
+
+const struct vfio_iommu_type *
+vfio_set_iommu_type(int vfio_container_fd)
+{
+ unsigned idx;
+ for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = &iommu_types[idx];
+
+ int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
+ t->type_id);
+ if (!ret) {
+ RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
+ t->type_id, t->name);
+ return t;
+ }
+ /* not an error, there may be more supported IOMMU types */
+ RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, "
+ "error %i (%s)\n", t->type_id, t->name, errno,
+ strerror(errno));
+ }
+ /* if we didn't find a suitable IOMMU type, fail */
+ return NULL;
+}
+
+int
+vfio_has_supported_extensions(int vfio_container_fd)
+{
+ int ret;
+ unsigned idx, n_extensions = 0;
+ for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = &iommu_types[idx];
+
+ ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
+ t->type_id);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " could not get IOMMU type, "
+ "error %i (%s)\n", errno,
+ strerror(errno));
+ close(vfio_container_fd);
+ return -1;
+ } else if (ret == 1) {
+ /* we found a supported extension */
+ n_extensions++;
+ }
+ RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n",
+ t->type_id, t->name,
+ ret ? "supported" : "not supported");
+ }
+
+ /* if we didn't find any supported IOMMU types, fail */
+ if (!n_extensions) {
+ close(vfio_container_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+rte_vfio_get_container_fd(void)
+{
+ int ret, vfio_container_fd;
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply = {0};
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
+
+ /* if we're in a primary process, try to open the container */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
+ if (vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot open VFIO container, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* check VFIO API version */
+ ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
+ if (ret != VFIO_API_VERSION) {
+ if (ret < 0)
+ RTE_LOG(ERR, EAL, " could not get VFIO API version, "
+ "error %i (%s)\n", errno, strerror(errno));
+ else
+ RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n");
+ close(vfio_container_fd);
+ return -1;
+ }
+
+ ret = vfio_has_supported_extensions(vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " no supported IOMMU "
+ "extensions found!\n");
+ return -1;
+ }
+
+ return vfio_container_fd;
+ }
+ /*
+ * if we're in a secondary process, request container fd from the
+ * primary process via mp channel
+ */
+ p->req = SOCKET_REQ_CONTAINER;
+ strcpy(mp_req.name, EAL_VFIO_MP);
+ mp_req.len_param = sizeof(*p);
+ mp_req.num_fds = 0;
+
+ vfio_container_fd = -1;
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+ mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ p = (struct vfio_mp_param *)mp_rep->param;
+ if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+ vfio_container_fd = mp_rep->fds[0];
+ free(mp_reply.msgs);
+ return vfio_container_fd;
+ }
+ }
+
+ free(mp_reply.msgs);
+ RTE_LOG(ERR, EAL, " cannot request container fd\n");
+ return -1;
+}
+
+int
+rte_vfio_get_group_num(const char *sysfs_base,
+ const char *dev_addr, int *iommu_group_num)
+{
+ char linkname[PATH_MAX];
+ char filename[PATH_MAX];
+ char *tok[16], *group_tok, *end;
+ int ret;
+
+ memset(linkname, 0, sizeof(linkname));
+ memset(filename, 0, sizeof(filename));
+
+ /* try to find out IOMMU group for this device */
+ snprintf(linkname, sizeof(linkname),
+ "%s/%s/iommu_group", sysfs_base, dev_addr);
+
+ ret = readlink(linkname, filename, sizeof(filename));
+
+ /* if the link doesn't exist, no VFIO for us */
+ if (ret < 0)
+ return 0;
+
+ ret = rte_strsplit(filename, sizeof(filename),
+ tok, RTE_DIM(tok), '/');
+
+ if (ret <= 0) {
+ RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", dev_addr);
+ return -1;
+ }
+
+ /* IOMMU group is always the last token */
+ errno = 0;
+ group_tok = tok[ret - 1];
+ end = group_tok;
+ *iommu_group_num = strtol(group_tok, &end, 10);
+ if ((end != group_tok && *end != '\0') || errno != 0) {
+ RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr);
+ return -1;
+ }
+
+ return 1;
+}
+
+static int
+type1_map_contig(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ size_t len, void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ if (msl->external)
+ return 0;
+
+ return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ len, 1);
+}
+
+static int
+type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
+ /* if IOVA mode is VA, we've already mapped the internal segments */
+ if (!msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
+ return 0;
+
+ return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 1);
+}
+
+static int
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
+ int ret;
+
+ if (do_map != 0) {
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ /**
+ * In case the mapping was already done EEXIST will be
+ * returned from kernel.
+ */
+ if (errno == EEXIST) {
+ RTE_LOG(DEBUG, EAL,
+ " Memory segment is already mapped,"
+ " skipping");
+ } else {
+ RTE_LOG(ERR, EAL,
+ " cannot set up DMA remapping,"
+ " error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+ } else {
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+ &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+ if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+ /* with IOVA as VA mode, we can get away with mapping contiguous
+ * chunks rather than going page-by-page.
+ */
+ int ret = rte_memseg_contig_walk(type1_map_contig,
+ &vfio_container_fd);
+ if (ret)
+ return ret;
+ /* we have to continue the walk because we've skipped the
+ * external segments during the config walk.
+ */
+ }
+ return rte_memseg_walk(type1_map, &vfio_container_fd);
+}
+
+static int
+vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
+ int ret;
+ struct vfio_iommu_spapr_register_memory reg = {
+ .argsz = sizeof(reg),
+ .flags = 0
+ };
+ reg.vaddr = (uintptr_t) vaddr;
+ reg.size = len;
+
+ if (do_map != 0) {
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ /**
+ * In case the mapping was already done EBUSY will be
+ * returned from kernel.
+ */
+ if (errno == EBUSY) {
+ RTE_LOG(DEBUG, EAL,
+ " Memory segment is already mapped,"
+ " skipping");
+ } else {
+ RTE_LOG(ERR, EAL,
+ " cannot set up DMA remapping,"
+ " error %i (%s)\n", errno,
+ strerror(errno));
+ return -1;
+ }
+ }
+
+ } else {
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+ &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vfio_spapr_map_walk(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
+ return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 1);
+}
+
+static int
+vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
+ return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 0);
+}
+
+struct spapr_walk_param {
+ uint64_t window_size;
+ uint64_t hugepage_sz;
+};
+
+static int
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ struct spapr_walk_param *param = arg;
+ uint64_t max = ms->iova + ms->len;
+
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
+ if (max > param->window_size) {
+ param->hugepage_sz = ms->hugepage_sz;
+ param->window_size = max;
+ }
+
+ return 0;
+}
+
+static int
+vfio_spapr_create_new_dma_window(int vfio_container_fd,
+ struct vfio_iommu_spapr_tce_create *create) {
+ struct vfio_iommu_spapr_tce_remove remove = {
+ .argsz = sizeof(remove),
+ };
+ struct vfio_iommu_spapr_tce_info info = {
+ .argsz = sizeof(info),
+ };
+ int ret;
+
+ /* query spapr iommu info */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot get iommu info, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* remove default DMA of 32 bit window */
+ remove.start_addr = info.dma32_window_start;
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* create new DMA window */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, create);
+ if (ret) {
+#ifdef VFIO_IOMMU_SPAPR_INFO_DDW
+ /* try possible page_shift and levels for workaround */
+ uint32_t levels;
+
+ for (levels = create->levels + 1;
+ ret && levels <= info.ddw.levels; levels++) {
+ create->levels = levels;
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_TCE_CREATE, create);
+ }
+#endif
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot create new DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ if (create->start_addr != 0) {
+ RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ struct spapr_walk_param param;
+ struct vfio_iommu_spapr_tce_create create = {
+ .argsz = sizeof(create),
+ };
+ struct vfio_config *vfio_cfg;
+ struct user_mem_maps *user_mem_maps;
+ int i, ret = 0;
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, " invalid container fd!\n");
+ return -1;
+ }
+
+ user_mem_maps = &vfio_cfg->mem_maps;
+ rte_spinlock_recursive_lock(&user_mem_maps->lock);
+
+ /* check if window size needs to be adjusted */
+ memset(&param, 0, sizeof(param));
+
+ /* we're inside a callback so use thread-unsafe version */
+ if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
+ &param) < 0) {
+ RTE_LOG(ERR, EAL, "Could not get window size\n");
+ ret = -1;
+ goto out;
+ }
+
+ /* also check user maps */
+ for (i = 0; i < user_mem_maps->n_maps; i++) {
+ uint64_t max = user_mem_maps->maps[i].iova +
+ user_mem_maps->maps[i].len;
+ param.window_size = RTE_MAX(param.window_size, max);
+ }
+
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(param.window_size);
+ create.page_shift = __builtin_ctzll(param.hugepage_sz);
+ create.levels = 1;
+
+ if (do_map) {
+ /* re-create window and remap the entire memory */
+ if (iova + len > create.window_size) {
+ /* release all maps before recreating the window */
+ if (rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
+ &vfio_container_fd) < 0) {
+ RTE_LOG(ERR, EAL, "Could not release DMA maps\n");
+ ret = -1;
+ goto out;
+ }
+ /* release all user maps */
+ for (i = 0; i < user_mem_maps->n_maps; i++) {
+ struct user_mem_map *map =
+ &user_mem_maps->maps[i];
+ if (vfio_spapr_dma_do_map(vfio_container_fd,
+ map->addr, map->iova, map->len,
+ 0)) {
+ RTE_LOG(ERR, EAL, "Could not release user DMA maps\n");
+ ret = -1;
+ goto out;
+ }
+ }
+ create.window_size = rte_align64pow2(iova + len);
+ if (vfio_spapr_create_new_dma_window(vfio_container_fd,
+ &create) < 0) {
+ RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
+ ret = -1;
+ goto out;
+ }
+ /* we're inside a callback, so use thread-unsafe version
+ */
+ if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
+ &vfio_container_fd) < 0) {
+ RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
+ ret = -1;
+ goto out;
+ }
+ /* remap all user maps */
+ for (i = 0; i < user_mem_maps->n_maps; i++) {
+ struct user_mem_map *map =
+ &user_mem_maps->maps[i];
+ if (vfio_spapr_dma_do_map(vfio_container_fd,
+ map->addr, map->iova, map->len,
+ 1)) {
+ RTE_LOG(ERR, EAL, "Could not recreate user DMA maps\n");
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+ if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
+ RTE_LOG(ERR, EAL, "Failed to map DMA\n");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ /* for unmap, check if iova within DMA window */
+ if (iova > create.window_size) {
+ RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap");
+ ret = -1;
+ goto out;
+ }
+
+ vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);
+ }
+out:
+ rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+ return ret;
+}
+
+static int
+vfio_spapr_dma_map(int vfio_container_fd)
+{
+ struct vfio_iommu_spapr_tce_create create = {
+ .argsz = sizeof(create),
+ };
+ struct spapr_walk_param param;
+
+ memset(&param, 0, sizeof(param));
+
+ /* create DMA window from 0 to max(phys_addr + len) */
+ rte_memseg_walk(vfio_spapr_window_size_walk, &param);
+
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(param.window_size);
+ create.page_shift = __builtin_ctzll(param.hugepage_sz);
+ create.levels = 1;
+
+ if (vfio_spapr_create_new_dma_window(vfio_container_fd, &create) < 0) {
+ RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
+ return -1;
+ }
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int
+vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+ uint64_t __rte_unused vaddr,
+ uint64_t __rte_unused iova, uint64_t __rte_unused len,
+ int __rte_unused do_map)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+static int
+vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type;
+
+ if (!t) {
+ RTE_LOG(ERR, EAL, " VFIO support not initialized\n");
+ rte_errno = ENODEV;
+ return -1;
+ }
+
+ if (!t->dma_user_map_func) {
+ RTE_LOG(ERR, EAL,
+ " VFIO custom DMA region maping not supported by IOMMU %s\n",
+ t->name);
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova,
+ len, do_map);
+}
+
+static int
+container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+ uint64_t len)
+{
+ struct user_mem_map *new_map;
+ struct user_mem_maps *user_mem_maps;
+ int ret = 0;
+
+ user_mem_maps = &vfio_cfg->mem_maps;
+ rte_spinlock_recursive_lock(&user_mem_maps->lock);
+ if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
+ RTE_LOG(ERR, EAL, "No more space for user mem maps\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ /* map the entry */
+ if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) {
+ /* technically, this will fail if there are currently no devices
+ * plugged in, even if a device were added later, this mapping
+ * might have succeeded. however, since we cannot verify if this
+ * is a valid mapping without having a device attached, consider
+ * this to be unsupported, because we can't just store any old
+ * mapping and pollute list of active mappings willy-nilly.
+ */
+ RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n");
+ ret = -1;
+ goto out;
+ }
+ /* create new user mem map entry */
+ new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
+ new_map->addr = vaddr;
+ new_map->iova = iova;
+ new_map->len = len;
+
+ compact_user_maps(user_mem_maps);
+out:
+ rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+ return ret;
+}
+
+static int
+container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+ uint64_t len)
+{
+ struct user_mem_map *map, *new_map = NULL;
+ struct user_mem_maps *user_mem_maps;
+ int ret = 0;
+
+ user_mem_maps = &vfio_cfg->mem_maps;
+ rte_spinlock_recursive_lock(&user_mem_maps->lock);
+
+ /* find our mapping */
+ map = find_user_mem_map(user_mem_maps, vaddr, iova, len);
+ if (!map) {
+ RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n");
+ rte_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+ if (map->addr != vaddr || map->iova != iova || map->len != len) {
+ /* we're partially unmapping a previously mapped region, so we
+ * need to split entry into two.
+ */
+ if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
+ RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
+ }
+
+ /* unmap the entry */
+ if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) {
+ /* there may not be any devices plugged in, so unmapping will
+ * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't
+ * stop us from removing the mapping, as the assumption is we
+ * won't be needing this memory any more and thus will want to
+ * prevent it from being remapped again on hotplug. so, only
+ * fail if we indeed failed to unmap (e.g. if the mapping was
+ * within our mapped range but had invalid alignment).
+ */
+ if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
+ RTE_LOG(ERR, EAL, "Couldn't unmap region for DMA\n");
+ ret = -1;
+ goto out;
+ } else {
+ RTE_LOG(DEBUG, EAL, "DMA unmapping failed, but removing mappings anyway\n");
+ }
+ }
+ /* remove map from the list of active mappings */
+ if (new_map != NULL) {
+ adjust_map(map, new_map, vaddr, len);
+
+ /* if we've created a new map by splitting, sort everything */
+ if (!is_null_map(new_map)) {
+ compact_user_maps(user_mem_maps);
+ } else {
+ /* we've created a new mapping, but it was unused */
+ user_mem_maps->n_maps--;
+ }
+ } else {
+ memset(map, 0, sizeof(*map));
+ compact_user_maps(user_mem_maps);
+ user_mem_maps->n_maps--;
+ }
+
+out:
+ rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+ return ret;
+}
+
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+ int fd;
+ ssize_t cnt;
+ char c;
+
+ fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+ if (fd < 0) {
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, " cannot open vfio noiommu file %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ /*
+ * else the file does not exists
+ * i.e. noiommu is not enabled
+ */
+ return 0;
+ }
+
+ cnt = read(fd, &c, 1);
+ close(fd);
+ if (cnt != 1) {
+ RTE_LOG(ERR, EAL, " unable to read from vfio noiommu "
+ "file %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ return c == 'Y';
+}
+
+int
+rte_vfio_container_create(void)
+{
+ int i;
+
+ /* Find an empty slot to store new vfio config */
+ for (i = 1; i < VFIO_MAX_CONTAINERS; i++) {
+ if (vfio_cfgs[i].vfio_container_fd == -1)
+ break;
+ }
+
+ if (i == VFIO_MAX_CONTAINERS) {
+ RTE_LOG(ERR, EAL, "exceed max vfio container limit\n");
+ return -1;
+ }
+
+ vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd();
+ if (vfio_cfgs[i].vfio_container_fd < 0) {
+ RTE_LOG(NOTICE, EAL, "fail to create a new container\n");
+ return -1;
+ }
+
+ return vfio_cfgs[i].vfio_container_fd;
+}
+
+int
+rte_vfio_container_destroy(int container_fd)
+{
+ struct vfio_config *vfio_cfg;
+ int i;
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg->vfio_groups[i].group_num != -1)
+ rte_vfio_container_group_unbind(container_fd,
+ vfio_cfg->vfio_groups[i].group_num);
+
+ close(container_fd);
+ vfio_cfg->vfio_container_fd = -1;
+ vfio_cfg->vfio_active_groups = 0;
+ vfio_cfg->vfio_iommu_type = NULL;
+
+ return 0;
+}
+
+int
+rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
+{
+ struct vfio_config *vfio_cfg;
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ return vfio_get_group_fd(vfio_cfg, iommu_group_num);
+}
+
+int
+rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
+{
+ struct vfio_config *vfio_cfg;
+ struct vfio_group *cur_grp = NULL;
+ int i;
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ for (i = 0; i < VFIO_MAX_GROUPS; i++) {
+ if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) {
+ cur_grp = &vfio_cfg->vfio_groups[i];
+ break;
+ }
+ }
+
+ /* This should not happen */
+ if (i == VFIO_MAX_GROUPS || cur_grp == NULL) {
+ RTE_LOG(ERR, EAL, "Specified group number not found\n");
+ return -1;
+ }
+
+ if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) {
+ RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for"
+ " iommu_group_num %d\n", iommu_group_num);
+ return -1;
+ }
+ cur_grp->group_num = -1;
+ cur_grp->fd = -1;
+ cur_grp->devices = 0;
+ vfio_cfg->vfio_active_groups--;
+
+ return 0;
+}
+
+int
+rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len)
+{
+ struct vfio_config *vfio_cfg;
+
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ return container_dma_map(vfio_cfg, vaddr, iova, len);
+}
+
+int
+rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len)
+{
+ struct vfio_config *vfio_cfg;
+
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ return container_dma_unmap(vfio_cfg, vaddr, iova, len);
+}
+
+#else
+
+int
+rte_vfio_setup_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *vfio_dev_fd,
+ __rte_unused struct vfio_device_info *device_info)
+{
+ return -1;
+}
+
+int
+rte_vfio_release_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr, __rte_unused int fd)
+{
+ return -1;
+}
+
+int
+rte_vfio_enable(__rte_unused const char *modname)
+{
+ return -1;
+}
+
+int
+rte_vfio_is_enabled(__rte_unused const char *modname)
+{
+ return -1;
+}
+
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_clear_group(__rte_unused int vfio_group_fd)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_container_fd(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_create(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_destroy(__rte_unused int container_fd)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_group_bind(__rte_unused int container_fd,
+ __rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_group_unbind(__rte_unused int container_fd,
+ __rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_dma_map(__rte_unused int container_fd,
+ __rte_unused uint64_t vaddr,
+ __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_dma_unmap(__rte_unused int container_fd,
+ __rte_unused uint64_t vaddr,
+ __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+#endif /* VFIO_PRESENT */
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio.h b/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio.h
new file mode 100644
index 000000000..cb2d35fb1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef EAL_VFIO_H_
+#define EAL_VFIO_H_
+
+#include <rte_common.h>
+
+/*
+ * determine if VFIO is present on the system
+ */
+#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+#define VFIO_PRESENT
+#else
+#pragma message("VFIO configured but not supported by this kernel, disabling.")
+#endif /* kernel version >= 3.6.0 */
+#endif /* RTE_EAL_VFIO */
+
+#ifdef VFIO_PRESENT
+
+#include <stdint.h>
+#include <linux/vfio.h>
+
+#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
+
+#ifndef VFIO_SPAPR_TCE_v2_IOMMU
+#define RTE_VFIO_SPAPR 7
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17)
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18)
+#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19)
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20)
+
+struct vfio_iommu_spapr_register_memory {
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t vaddr;
+ uint64_t size;
+};
+
+struct vfio_iommu_spapr_tce_create {
+ uint32_t argsz;
+ uint32_t flags;
+ /* in */
+ uint32_t page_shift;
+ uint32_t __resv1;
+ uint64_t window_size;
+ uint32_t levels;
+ uint32_t __resv2;
+ /* out */
+ uint64_t start_addr;
+};
+
+struct vfio_iommu_spapr_tce_remove {
+ uint32_t argsz;
+ uint32_t flags;
+ /* in */
+ uint64_t start_addr;
+};
+
+struct vfio_iommu_spapr_tce_ddw_info {
+ uint64_t pgsizes;
+ uint32_t max_dynamic_windows_supported;
+ uint32_t levels;
+};
+
+/* SPAPR_v2 is not present, but SPAPR might be */
+#ifndef VFIO_SPAPR_TCE_IOMMU
+#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+struct vfio_iommu_spapr_tce_info {
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t dma32_window_start;
+ uint32_t dma32_window_size;
+ struct vfio_iommu_spapr_tce_ddw_info ddw;
+};
+#endif /* VFIO_SPAPR_TCE_IOMMU */
+
+#else /* VFIO_SPAPR_TCE_v2_IOMMU */
+#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU
+#endif
+
+#define VFIO_MAX_GROUPS RTE_MAX_VFIO_GROUPS
+#define VFIO_MAX_CONTAINERS RTE_MAX_VFIO_CONTAINERS
+
+/*
+ * we don't need to store device fd's anywhere since they can be obtained from
+ * the group fd via an ioctl() call.
+ */
+struct vfio_group {
+ int group_num;
+ int fd;
+ int devices;
+};
+
+/* DMA mapping function prototype.
+ * Takes VFIO container fd as a parameter.
+ * Returns 0 on success, -1 on error.
+ * */
+typedef int (*vfio_dma_func_t)(int);
+
+/* Custom memory region DMA mapping function prototype.
+ * Takes VFIO container fd, virtual address, phisical address, length and
+ * operation type (0 to unmap 1 for map) as a parameters.
+ * Returns 0 on success, -1 on error.
+ **/
+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map);
+
+struct vfio_iommu_type {
+ int type_id;
+ const char *name;
+ vfio_dma_user_func_t dma_user_map_func;
+ vfio_dma_func_t dma_map_func;
+};
+
+/* get the vfio container that devices are bound to by default */
+int vfio_get_default_container_fd(void);
+
+/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
+const struct vfio_iommu_type *
+vfio_set_iommu_type(int vfio_container_fd);
+
+int
+vfio_get_iommu_type(void);
+
+/* check if we have any supported extensions */
+int
+vfio_has_supported_extensions(int vfio_container_fd);
+
+int vfio_mp_sync_setup(void);
+
+#define EAL_VFIO_MP "eal_vfio_mp_sync"
+
+#define SOCKET_REQ_CONTAINER 0x100
+#define SOCKET_REQ_GROUP 0x200
+#define SOCKET_REQ_DEFAULT_CONTAINER 0x400
+#define SOCKET_REQ_IOMMU_TYPE 0x800
+#define SOCKET_OK 0x0
+#define SOCKET_NO_FD 0x1
+#define SOCKET_ERR 0xFF
+
+struct vfio_mp_param {
+ int req;
+ int result;
+ RTE_STD_C11
+ union {
+ int group_num;
+ int iommu_type_id;
+ };
+};
+
+#endif /* VFIO_PRESENT */
+
+#endif /* EAL_VFIO_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio_mp_sync.c b/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio_mp_sync.c
new file mode 100644
index 000000000..5f2a5fc1d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/eal_vfio_mp_sync.c
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <string.h>
+
+#include <rte_compat.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_eal.h>
+
+#include "eal_vfio.h"
+
+/**
+ * @file
+ * VFIO socket for communication between primary and secondary processes.
+ *
+ * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
+ */
+
+#ifdef VFIO_PRESENT
+
+static int
+vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
+{
+ int fd = -1;
+ int ret;
+ struct rte_mp_msg reply;
+ struct vfio_mp_param *r = (struct vfio_mp_param *)reply.param;
+ const struct vfio_mp_param *m =
+ (const struct vfio_mp_param *)msg->param;
+
+ if (msg->len_param != sizeof(*m)) {
+ RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
+ return -1;
+ }
+
+ memset(&reply, 0, sizeof(reply));
+
+ switch (m->req) {
+ case SOCKET_REQ_GROUP:
+ r->req = SOCKET_REQ_GROUP;
+ r->group_num = m->group_num;
+ fd = rte_vfio_get_group_fd(m->group_num);
+ if (fd < 0)
+ r->result = SOCKET_ERR;
+ else if (fd == 0)
+ /* if VFIO group exists but isn't bound to VFIO driver */
+ r->result = SOCKET_NO_FD;
+ else {
+ /* if group exists and is bound to VFIO driver */
+ r->result = SOCKET_OK;
+ reply.num_fds = 1;
+ reply.fds[0] = fd;
+ }
+ break;
+ case SOCKET_REQ_CONTAINER:
+ r->req = SOCKET_REQ_CONTAINER;
+ fd = rte_vfio_get_container_fd();
+ if (fd < 0)
+ r->result = SOCKET_ERR;
+ else {
+ r->result = SOCKET_OK;
+ reply.num_fds = 1;
+ reply.fds[0] = fd;
+ }
+ break;
+ case SOCKET_REQ_DEFAULT_CONTAINER:
+ r->req = SOCKET_REQ_DEFAULT_CONTAINER;
+ fd = vfio_get_default_container_fd();
+ if (fd < 0)
+ r->result = SOCKET_ERR;
+ else {
+ r->result = SOCKET_OK;
+ reply.num_fds = 1;
+ reply.fds[0] = fd;
+ }
+ break;
+ case SOCKET_REQ_IOMMU_TYPE:
+ {
+ int iommu_type_id;
+
+ r->req = SOCKET_REQ_IOMMU_TYPE;
+
+ iommu_type_id = vfio_get_iommu_type();
+
+ if (iommu_type_id < 0)
+ r->result = SOCKET_ERR;
+ else {
+ r->iommu_type_id = iommu_type_id;
+ r->result = SOCKET_OK;
+ }
+ break;
+ }
+ default:
+ RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
+ return -1;
+ }
+
+ strcpy(reply.name, EAL_VFIO_MP);
+ reply.len_param = sizeof(*r);
+
+ ret = rte_mp_reply(&reply, peer);
+ if (m->req == SOCKET_REQ_CONTAINER && fd >= 0)
+ close(fd);
+ return ret;
+}
+
+int
+vfio_mp_sync_setup(void)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ int ret = rte_mp_action_register(EAL_VFIO_MP, vfio_mp_primary);
+ if (ret && rte_errno != ENOTSUP)
+ return -1;
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/include/meson.build b/src/spdk/dpdk/lib/librte_eal/linux/include/meson.build
new file mode 100644
index 000000000..1241894b3
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/include/meson.build
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+includes += include_directories('.')
+
+headers += files(
+ 'rte_kni_common.h',
+ 'rte_os.h',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/include/rte_kni_common.h b/src/spdk/dpdk/lib/librte_eal/linux/include/rte_kni_common.h
new file mode 100644
index 000000000..7313ef504
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/include/rte_kni_common.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR LGPL-2.1) */
+/*
+ * Copyright(c) 2007-2014 Intel Corporation.
+ */
+
+#ifndef _RTE_KNI_COMMON_H_
+#define _RTE_KNI_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#include <asm/barrier.h>
+#define RTE_STD_C11
+#else
+#include <rte_common.h>
+#include <rte_config.h>
+#endif
+
+/*
+ * KNI name is part of memzone name. Must not exceed IFNAMSIZ.
+ */
+#define RTE_KNI_NAMESIZE 16
+
+#define RTE_CACHE_LINE_MIN_SIZE 64
+
+/*
+ * Request id.
+ */
+enum rte_kni_req_id {
+ RTE_KNI_REQ_UNKNOWN = 0,
+ RTE_KNI_REQ_CHANGE_MTU,
+ RTE_KNI_REQ_CFG_NETWORK_IF,
+ RTE_KNI_REQ_CHANGE_MAC_ADDR,
+ RTE_KNI_REQ_CHANGE_PROMISC,
+ RTE_KNI_REQ_CHANGE_ALLMULTI,
+ RTE_KNI_REQ_MAX,
+};
+
+/*
+ * Structure for KNI request.
+ */
+struct rte_kni_request {
+ uint32_t req_id; /**< Request id */
+ RTE_STD_C11
+ union {
+ uint32_t new_mtu; /**< New MTU */
+ uint8_t if_up; /**< 1: interface up, 0: interface down */
+ uint8_t mac_addr[6]; /**< MAC address for interface */
+ uint8_t promiscusity;/**< 1: promisc mode enable, 0: disable */
+ uint8_t allmulti; /**< 1: all-multicast mode enable, 0: disable */
+ };
+ int32_t result; /**< Result for processing request */
+} __attribute__((__packed__));
+
+/*
+ * Fifo struct mapped in a shared memory. It describes a circular buffer FIFO
+ * Write and read should wrap around. Fifo is empty when write == read
+ * Writing should never overwrite the read position
+ */
+struct rte_kni_fifo {
+#ifdef RTE_USE_C11_MEM_MODEL
+ unsigned write; /**< Next position to be written*/
+ unsigned read; /**< Next position to be read */
+#else
+ volatile unsigned write; /**< Next position to be written*/
+ volatile unsigned read; /**< Next position to be read */
+#endif
+ unsigned len; /**< Circular buffer length */
+ unsigned elem_size; /**< Pointer size - for 32/64 bit OS */
+ void *volatile buffer[]; /**< The buffer contains mbuf pointers */
+};
+
+/*
+ * The kernel image of the rte_mbuf struct, with only the relevant fields.
+ * Padding is necessary to assure the offsets of these fields
+ */
+struct rte_kni_mbuf {
+ void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
+ uint64_t buf_physaddr;
+ uint16_t data_off; /**< Start address of data in segment buffer. */
+ char pad1[2];
+ uint16_t nb_segs; /**< Number of segments. */
+ char pad4[2];
+ uint64_t ol_flags; /**< Offload features. */
+ char pad2[4];
+ uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */
+ uint16_t data_len; /**< Amount of data in segment buffer. */
+
+ /* fields on second cache line */
+ char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+ void *pool;
+ void *next; /**< Physical address of next mbuf in kernel. */
+};
+
+/*
+ * Struct used to create a KNI device. Passed to the kernel in IOCTL call
+ */
+
+struct rte_kni_device_info {
+ char name[RTE_KNI_NAMESIZE]; /**< Network device name for KNI */
+
+ phys_addr_t tx_phys;
+ phys_addr_t rx_phys;
+ phys_addr_t alloc_phys;
+ phys_addr_t free_phys;
+
+ /* Used by Ethtool */
+ phys_addr_t req_phys;
+ phys_addr_t resp_phys;
+ phys_addr_t sync_phys;
+ void * sync_va;
+
+ /* mbuf mempool */
+ void * mbuf_va;
+ phys_addr_t mbuf_phys;
+
+ uint16_t group_id; /**< Group ID */
+ uint32_t core_id; /**< core ID to bind for kernel thread */
+
+ __extension__
+ uint8_t force_bind : 1; /**< Flag for kernel thread binding */
+
+ /* mbuf size */
+ unsigned mbuf_size;
+ unsigned int mtu;
+ unsigned int min_mtu;
+ unsigned int max_mtu;
+ uint8_t mac_addr[6];
+ uint8_t iova_mode;
+};
+
+#define KNI_DEVICE "kni"
+
+#define RTE_KNI_IOCTL_TEST _IOWR(0, 1, int)
+#define RTE_KNI_IOCTL_CREATE _IOWR(0, 2, struct rte_kni_device_info)
+#define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info)
+
+#endif /* _RTE_KNI_COMMON_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/include/rte_os.h b/src/spdk/dpdk/lib/librte_eal/linux/include/rte_os.h
new file mode 100644
index 000000000..218d4fa86
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/include/rte_os.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+#ifndef _RTE_OS_H_
+#define _RTE_OS_H_
+
+/**
+ * This is header should contain any function/macro definition
+ * which are not supported natively or named differently in the
+ * linux OS. Functions will be added in future releases.
+ */
+
+#include <sched.h>
+
+typedef cpu_set_t rte_cpuset_t;
+#define RTE_CPU_AND(dst, src1, src2) CPU_AND(dst, src1, src2)
+#define RTE_CPU_OR(dst, src1, src2) CPU_OR(dst, src1, src2)
+#define RTE_CPU_FILL(set) do \
+{ \
+ unsigned int i; \
+ CPU_ZERO(set); \
+ for (i = 0; i < CPU_SETSIZE; i++) \
+ CPU_SET(i, set); \
+} while (0)
+#define RTE_CPU_NOT(dst, src) do \
+{ \
+ cpu_set_t tmp; \
+ RTE_CPU_FILL(&tmp); \
+ CPU_XOR(dst, &tmp, src); \
+} while (0)
+
+#endif /* _RTE_OS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/linux/meson.build b/src/spdk/dpdk/lib/librte_eal/linux/meson.build
new file mode 100644
index 000000000..7742aa475
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/linux/meson.build
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+subdir('include')
+
+sources += files(
+ 'eal.c',
+ 'eal_alarm.c',
+ 'eal_cpuflags.c',
+ 'eal_debug.c',
+ 'eal_dev.c',
+ 'eal_hugepage_info.c',
+ 'eal_interrupts.c',
+ 'eal_lcore.c',
+ 'eal_log.c',
+ 'eal_memalloc.c',
+ 'eal_memory.c',
+ 'eal_thread.c',
+ 'eal_timer.c',
+ 'eal_vfio.c',
+ 'eal_vfio_mp_sync.c',
+)
+
+deps += ['kvargs', 'telemetry']
+if has_libnuma == 1
+ dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true)
+endif
diff --git a/src/spdk/dpdk/lib/librte_eal/meson.build b/src/spdk/dpdk/lib/librte_eal/meson.build
new file mode 100644
index 000000000..e301f4558
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/meson.build
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017-2019 Intel Corporation
+
+includes += global_inc
+subdir('include')
+
+subdir('common')
+
+dpdk_conf.set('RTE_EXEC_ENV_' + exec_env.to_upper(), 1)
+subdir(exec_env)
+
+subdir(arch_subdir)
+
+deps += ['kvargs']
+if not is_windows
+ deps += ['telemetry']
+endif
+if dpdk_conf.has('RTE_USE_LIBBSD')
+ ext_deps += libbsd
+endif
+if cc.has_function('getentropy', prefix : '#include <unistd.h>')
+ cflags += '-DRTE_LIBEAL_USE_GETENTROPY'
+endif
+if cc.has_header('getopt.h')
+ cflags += ['-DHAVE_GETOPT_H', '-DHAVE_GETOPT', '-DHAVE_GETOPT_LONG']
+endif
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/meson.build b/src/spdk/dpdk/lib/librte_eal/ppc/include/meson.build
new file mode 100644
index 000000000..ab4bd2809
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/meson.build
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+arch_headers = files(
+ 'rte_altivec.h',
+ 'rte_atomic.h',
+ 'rte_byteorder.h',
+ 'rte_cpuflags.h',
+ 'rte_cycles.h',
+ 'rte_io.h',
+ 'rte_memcpy.h',
+ 'rte_pause.h',
+ 'rte_prefetch.h',
+ 'rte_rwlock.h',
+ 'rte_spinlock.h',
+ 'rte_vect.h',
+)
+install_headers(arch_headers, subdir: get_option('include_subdir_arch'))
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_altivec.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_altivec.h
new file mode 100644
index 000000000..1551a9454
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_altivec.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) Mellanox 2020.
+ */
+
+#ifndef _RTE_ALTIVEC_H_
+#define _RTE_ALTIVEC_H_
+
+/* To include altivec.h, GCC version must be >= 4.8 */
+#include <altivec.h>
+
+/*
+ * Compilation workaround for PPC64 when AltiVec is fully enabled, e.g. std=c11.
+ * Otherwise there would be a type conflict between stdbool and altivec.
+ */
+#if defined(__PPC64__) && !defined(__APPLE_ALTIVEC__)
+#undef bool
+/* redefine as in stdbool.h */
+#define bool _Bool
+#endif
+
+#endif /* _RTE_ALTIVEC_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_atomic.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_atomic.h
new file mode 100644
index 000000000..7e3e13118
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_atomic.h
@@ -0,0 +1,413 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Inspired from FreeBSD src/sys/powerpc/include/atomic.h
+ * Copyright (c) 2008 Marcel Moolenaar
+ * Copyright (c) 2001 Benno Rice
+ * Copyright (c) 2001 David E. O'Brien
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_PPC_64_H_
+#define _RTE_ATOMIC_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "generic/rte_atomic.h"
+
+#define rte_mb() asm volatile("sync" : : : "memory")
+
+#define rte_wmb() asm volatile("sync" : : : "memory")
+
+#define rte_rmb() asm volatile("sync" : : : "memory")
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_wmb()
+
+#define rte_smp_rmb() rte_rmb()
+
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_wmb()
+
+#define rte_io_rmb() rte_rmb()
+
+#define rte_cio_wmb() rte_wmb()
+
+#define rte_cio_rmb() rte_rmb()
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+/* To be compatible with Power7, use GCC built-in functions for 16 bit
+ * operations */
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+ return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE,
+ __ATOMIC_ACQUIRE) ? 1 : 0;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+ return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+ __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+ __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE);
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+ return __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+ return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
+}
+
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+ return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+ unsigned int ret = 0;
+
+ asm volatile(
+ "\tlwsync\n"
+ "1:\tlwarx %[ret], 0, %[dst]\n"
+ "cmplw %[exp], %[ret]\n"
+ "bne 2f\n"
+ "stwcx. %[src], 0, %[dst]\n"
+ "bne- 1b\n"
+ "li %[ret], 1\n"
+ "b 3f\n"
+ "2:\n"
+ "stwcx. %[ret], 0, %[dst]\n"
+ "li %[ret], 0\n"
+ "3:\n"
+ "isync\n"
+ : [ret] "=&r" (ret), "=m" (*dst)
+ : [dst] "r" (dst),
+ [exp] "r" (exp),
+ [src] "r" (src),
+ "m" (*dst)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+ return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+ int t;
+
+ asm volatile(
+ "1: lwarx %[t],0,%[cnt]\n"
+ "addic %[t],%[t],1\n"
+ "stwcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "=m" (v->cnt)
+ : [cnt] "r" (&v->cnt), "m" (v->cnt)
+ : "cc", "xer", "memory");
+}
+
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+ int t;
+
+ asm volatile(
+ "1: lwarx %[t],0,%[cnt]\n"
+ "addic %[t],%[t],-1\n"
+ "stwcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "=m" (v->cnt)
+ : [cnt] "r" (&v->cnt), "m" (v->cnt)
+ : "cc", "xer", "memory");
+}
+
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+ int ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: lwarx %[ret],0,%[cnt]\n"
+ "addic %[ret],%[ret],1\n"
+ "stwcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [cnt] "r" (&v->cnt)
+ : "cc", "xer", "memory");
+
+ return ret == 0;
+}
+
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+ int ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: lwarx %[ret],0,%[cnt]\n"
+ "addic %[ret],%[ret],-1\n"
+ "stwcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [cnt] "r" (&v->cnt)
+ : "cc", "xer", "memory");
+
+ return ret == 0;
+}
+
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+ return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+}
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+ unsigned int ret = 0;
+
+ asm volatile (
+ "\tlwsync\n"
+ "1: ldarx %[ret], 0, %[dst]\n"
+ "cmpld %[exp], %[ret]\n"
+ "bne 2f\n"
+ "stdcx. %[src], 0, %[dst]\n"
+ "bne- 1b\n"
+ "li %[ret], 1\n"
+ "b 3f\n"
+ "2:\n"
+ "stdcx. %[ret], 0, %[dst]\n"
+ "li %[ret], 0\n"
+ "3:\n"
+ "isync\n"
+ : [ret] "=&r" (ret), "=m" (*dst)
+ : [dst] "r" (dst),
+ [exp] "r" (exp),
+ [src] "r" (src),
+ "m" (*dst)
+ : "cc", "memory");
+ return ret;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+ v->cnt = 0;
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+ long ret;
+
+ asm volatile("ld%U1%X1 %[ret],%[cnt]"
+ : [ret] "=r"(ret)
+ : [cnt] "m"(v->cnt));
+
+ return ret;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+ asm volatile("std%U0%X0 %[new_value],%[cnt]"
+ : [cnt] "=m"(v->cnt)
+ : [new_value] "r"(new_value));
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+ long t;
+
+ asm volatile(
+ "1: ldarx %[t],0,%[cnt]\n"
+ "add %[t],%[inc],%[t]\n"
+ "stdcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "=m" (v->cnt)
+ : [cnt] "r" (&v->cnt), [inc] "r" (inc), "m" (v->cnt)
+ : "cc", "memory");
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+ long t;
+
+ asm volatile(
+ "1: ldarx %[t],0,%[cnt]\n"
+ "subf %[t],%[dec],%[t]\n"
+ "stdcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "+m" (v->cnt)
+ : [cnt] "r" (&v->cnt), [dec] "r" (dec), "m" (v->cnt)
+ : "cc", "memory");
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+ long t;
+
+ asm volatile(
+ "1: ldarx %[t],0,%[cnt]\n"
+ "addic %[t],%[t],1\n"
+ "stdcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "+m" (v->cnt)
+ : [cnt] "r" (&v->cnt), "m" (v->cnt)
+ : "cc", "xer", "memory");
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+ long t;
+
+ asm volatile(
+ "1: ldarx %[t],0,%[cnt]\n"
+ "addic %[t],%[t],-1\n"
+ "stdcx. %[t],0,%[cnt]\n"
+ "bne- 1b\n"
+ : [t] "=&r" (t), "+m" (v->cnt)
+ : [cnt] "r" (&v->cnt), "m" (v->cnt)
+ : "cc", "xer", "memory");
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+ long ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: ldarx %[ret],0,%[cnt]\n"
+ "add %[ret],%[inc],%[ret]\n"
+ "stdcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [inc] "r" (inc), [cnt] "r" (&v->cnt)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+ long ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: ldarx %[ret],0,%[cnt]\n"
+ "subf %[ret],%[dec],%[ret]\n"
+ "stdcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [dec] "r" (dec), [cnt] "r" (&v->cnt)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+ long ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: ldarx %[ret],0,%[cnt]\n"
+ "addic %[ret],%[ret],1\n"
+ "stdcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [cnt] "r" (&v->cnt)
+ : "cc", "xer", "memory");
+
+ return ret == 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+ long ret;
+
+ asm volatile(
+ "\n\tlwsync\n"
+ "1: ldarx %[ret],0,%[cnt]\n"
+ "addic %[ret],%[ret],-1\n"
+ "stdcx. %[ret],0,%[cnt]\n"
+ "bne- 1b\n"
+ "isync\n"
+ : [ret] "=&r" (ret)
+ : [cnt] "r" (&v->cnt)
+ : "cc", "xer", "memory");
+
+ return ret == 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+ return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+/**
+ * Atomically set a 64-bit counter to 0.
+ *
+ * @param v
+ * A pointer to the atomic counter.
+ */
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+ v->cnt = 0;
+}
+
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+ return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_byteorder.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_byteorder.h
new file mode 100644
index 000000000..bfdded40f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_byteorder.h
@@ -0,0 +1,120 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Inspired from FreeBSD src/sys/powerpc/include/endian.h
+ * Copyright (c) 1987, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ */
+
+#ifndef _RTE_BYTEORDER_PPC_64_H_
+#define _RTE_BYTEORDER_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "generic/rte_byteorder.h"
+
+/*
+ * An architecture-optimized byte swap for a 16-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap16().
+ */
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+ return (_x >> 8) | ((_x << 8) & 0xff00);
+}
+
+/*
+ * An architecture-optimized byte swap for a 32-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap32().
+ */
+static inline uint32_t rte_arch_bswap32(uint32_t _x)
+{
+ return (_x >> 24) | ((_x >> 8) & 0xff00) | ((_x << 8) & 0xff0000) |
+ ((_x << 24) & 0xff000000);
+}
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* 64-bit mode */
+static inline uint64_t rte_arch_bswap64(uint64_t _x)
+{
+ return (_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff0000) |
+ ((_x >> 8) & 0xff000000) | ((_x << 8) & (0xffULL << 32)) |
+ ((_x << 24) & (0xffULL << 40)) |
+ ((_x << 40) & (0xffULL << 48)) | ((_x << 56));
+}
+
+#ifndef RTE_FORCE_INTRINSICS
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+
+#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap32(x) : \
+ rte_arch_bswap32(x)))
+
+#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap64(x) : \
+ rte_arch_bswap64(x)))
+#else
+/*
+ * __builtin_bswap16 is only available gcc 4.8 and upwards
+ */
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+#endif
+#endif
+
+/* Power 8 have both little endian and big endian mode
+ * Power 7 only support big endian
+ */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#else /* RTE_BIG_ENDIAN */
+
+#define rte_cpu_to_le_16(x) rte_bswap16(x)
+#define rte_cpu_to_le_32(x) rte_bswap32(x)
+#define rte_cpu_to_le_64(x) rte_bswap64(x)
+
+#define rte_cpu_to_be_16(x) (x)
+#define rte_cpu_to_be_32(x) (x)
+#define rte_cpu_to_be_64(x) (x)
+
+#define rte_le_to_cpu_16(x) rte_bswap16(x)
+#define rte_le_to_cpu_32(x) rte_bswap32(x)
+#define rte_le_to_cpu_64(x) rte_bswap64(x)
+
+#define rte_be_to_cpu_16(x) (x)
+#define rte_be_to_cpu_32(x) (x)
+#define rte_be_to_cpu_64(x) (x)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_cpuflags.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_cpuflags.h
new file mode 100644
index 000000000..a88355d17
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_cpuflags.h
@@ -0,0 +1,61 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) IBM Corporation 2014.
+ */
+
+#ifndef _RTE_CPUFLAGS_PPC_64_H_
+#define _RTE_CPUFLAGS_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Enumeration of all CPU features supported
+ */
+enum rte_cpu_flag_t {
+ RTE_CPUFLAG_PPC_LE = 0,
+ RTE_CPUFLAG_TRUE_LE,
+ RTE_CPUFLAG_PSERIES_PERFMON_COMPAT,
+ RTE_CPUFLAG_VSX,
+ RTE_CPUFLAG_ARCH_2_06,
+ RTE_CPUFLAG_POWER6_EXT,
+ RTE_CPUFLAG_DFP,
+ RTE_CPUFLAG_PA6T,
+ RTE_CPUFLAG_ARCH_2_05,
+ RTE_CPUFLAG_ICACHE_SNOOP,
+ RTE_CPUFLAG_SMT,
+ RTE_CPUFLAG_BOOKE,
+ RTE_CPUFLAG_CELLBE,
+ RTE_CPUFLAG_POWER5_PLUS,
+ RTE_CPUFLAG_POWER5,
+ RTE_CPUFLAG_POWER4,
+ RTE_CPUFLAG_NOTB,
+ RTE_CPUFLAG_EFP_DOUBLE,
+ RTE_CPUFLAG_EFP_SINGLE,
+ RTE_CPUFLAG_SPE,
+ RTE_CPUFLAG_UNIFIED_CACHE,
+ RTE_CPUFLAG_4xxMAC,
+ RTE_CPUFLAG_MMU,
+ RTE_CPUFLAG_FPU,
+ RTE_CPUFLAG_ALTIVEC,
+ RTE_CPUFLAG_PPC601,
+ RTE_CPUFLAG_PPC64,
+ RTE_CPUFLAG_PPC32,
+ RTE_CPUFLAG_TAR,
+ RTE_CPUFLAG_LSEL,
+ RTE_CPUFLAG_EBB,
+ RTE_CPUFLAG_DSCR,
+ RTE_CPUFLAG_HTM,
+ RTE_CPUFLAG_ARCH_2_07,
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_cycles.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_cycles.h
new file mode 100644
index 000000000..5585f9273
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_cycles.h
@@ -0,0 +1,46 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) IBM Corporation 2014.
+ */
+
+#ifndef _RTE_CYCLES_PPC_64_H_
+#define _RTE_CYCLES_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/platform/ppc.h>
+
+#include "generic/rte_cycles.h"
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ * The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+ return __ppc_get_timebase();
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_io.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_io.h
new file mode 100644
index 000000000..01455065e
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_io.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Cavium, Inc
+ */
+
+#ifndef _RTE_IO_PPC_64_H_
+#define _RTE_IO_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_io.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IO_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_mcslock.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_mcslock.h
new file mode 100644
index 000000000..c58a6edc1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_mcslock.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_MCSLOCK_PPC_64_H_
+#define _RTE_MCSLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_mcslock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MCSLOCK_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_memcpy.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_memcpy.h
new file mode 100644
index 000000000..c2a1f356d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_memcpy.h
@@ -0,0 +1,209 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) IBM Corporation 2014.
+ */
+
+#ifndef _RTE_MEMCPY_PPC_64_H_
+#define _RTE_MEMCPY_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "rte_altivec.h"
+#include "rte_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcpy.h"
+
+#if (GCC_VERSION >= 90000 && GCC_VERSION < 90400)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+ vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+ vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+ vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+ vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+ vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+ vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ vec_vsx_st(vec_vsx_ld(0, src), 0, dst);
+ vec_vsx_st(vec_vsx_ld(16, src), 16, dst);
+ vec_vsx_st(vec_vsx_ld(32, src), 32, dst);
+ vec_vsx_st(vec_vsx_ld(48, src), 48, dst);
+ vec_vsx_st(vec_vsx_ld(64, src), 64, dst);
+ vec_vsx_st(vec_vsx_ld(80, src), 80, dst);
+ vec_vsx_st(vec_vsx_ld(96, src), 96, dst);
+ vec_vsx_st(vec_vsx_ld(112, src), 112, dst);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov128(dst, src);
+ rte_mov128(dst + 128, src + 128);
+}
+
+#define rte_memcpy(dst, src, n) \
+ __extension__ ({ \
+ (__builtin_constant_p(n)) ? \
+ memcpy((dst), (src), (n)) : \
+ rte_memcpy_func((dst), (src), (n)); })
+
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
+{
+ void *ret = dst;
+
+ /* We can't copy < 16 bytes using XMM registers so do it manually. */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dst = *(const uint8_t *)src;
+ dst = (uint8_t *)dst + 1;
+ src = (const uint8_t *)src + 1;
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dst = *(const uint16_t *)src;
+ dst = (uint16_t *)dst + 1;
+ src = (const uint16_t *)src + 1;
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ dst = (uint32_t *)dst + 1;
+ src = (const uint32_t *)src + 1;
+ }
+ if (n & 0x08)
+ *(uint64_t *)dst = *(const uint64_t *)src;
+ return ret;
+ }
+
+ /* Special fast cases for <= 128 bytes */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+
+ if (n <= 128) {
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+ return ret;
+ }
+
+ /*
+ * For large copies > 128 bytes. This combination of 256, 64 and 16 byte
+ * copies was found to be faster than doing 128 and 32 byte copies as
+ * well.
+ */
+ for ( ; n >= 256; n -= 256) {
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ dst = (uint8_t *)dst + 256;
+ src = (const uint8_t *)src + 256;
+ }
+
+ /*
+ * We split the remaining bytes (which will be less than 256) into
+ * 64byte (2^6) chunks.
+ * Using incrementing integers in the case labels of a switch statement
+ * encourages the compiler to use a jump table. To get incrementing
+ * integers, we shift the 2 relevant bits to the LSB position to first
+ * get decrementing integers, and then subtract.
+ */
+ switch (3 - (n >> 6)) {
+ case 0x00:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ case 0x01:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ case 0x02:
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ n -= 64;
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64; /* fallthrough */
+ default:
+ ;
+ }
+
+ /*
+ * We split the remaining bytes (which will be less than 64) into
+ * 16byte (2^4) chunks, using the same switch structure as above.
+ */
+ switch (3 - (n >> 4)) {
+ case 0x00:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ case 0x01:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ case 0x02:
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ n -= 16;
+ dst = (uint8_t *)dst + 16;
+ src = (const uint8_t *)src + 16; /* fallthrough */
+ default:
+ ;
+ }
+
+ /* Copy any remaining bytes, without going beyond end of buffers */
+ if (n != 0)
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+}
+
+#if (GCC_VERSION >= 90000 && GCC_VERSION < 90400)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_pause.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_pause.h
new file mode 100644
index 000000000..16e47ce22
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_pause.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+
+#ifndef _RTE_PAUSE_PPC64_H_
+#define _RTE_PAUSE_PPC64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "rte_atomic.h"
+
+#include "generic/rte_pause.h"
+
+static inline void rte_pause(void)
+{
+ /* Set hardware multi-threading low priority */
+ asm volatile("or 1,1,1");
+ /* Set hardware multi-threading medium priority */
+ asm volatile("or 2,2,2");
+ rte_compiler_barrier();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_PPC64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_prefetch.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_prefetch.h
new file mode 100644
index 000000000..9ba07c815
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_prefetch.h
@@ -0,0 +1,41 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) IBM Corporation 2014.
+ */
+
+#ifndef _RTE_PREFETCH_PPC_64_H_
+#define _RTE_PREFETCH_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ /* non-temporal version not available, fallback to rte_prefetch0 */
+ rte_prefetch0(p);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_rwlock.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_rwlock.h
new file mode 100644
index 000000000..9fadc0407
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_rwlock.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ */
+#ifndef _RTE_RWLOCK_PPC_64_H_
+#define _RTE_RWLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_read_unlock(rwl);
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+ rte_rwlock_write_unlock(rwl);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_spinlock.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_spinlock.h
new file mode 100644
index 000000000..149ec245c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_spinlock.h
@@ -0,0 +1,88 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) IBM Corporation 2014.
+ */
+
+#ifndef _RTE_SPINLOCK_PPC_64_H_
+#define _RTE_SPINLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include <rte_pause.h>
+#include "generic/rte_spinlock.h"
+
+/* Fixme: Use intrinsics to implement the spinlock on Power architecture */
+
+#ifndef RTE_FORCE_INTRINSICS
+
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+ while (__sync_lock_test_and_set(&sl->locked, 1))
+ while (sl->locked)
+ rte_pause();
+}
+
+static inline void
+rte_spinlock_unlock(rte_spinlock_t *sl)
+{
+ __sync_lock_release(&sl->locked);
+}
+
+static inline int
+rte_spinlock_trylock(rte_spinlock_t *sl)
+{
+ return __sync_lock_test_and_set(&sl->locked, 1) == 0;
+}
+
+#endif
+
+static inline int rte_tm_supported(void)
+{
+ return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+ return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+ rte_spinlock_unlock(sl);
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+ rte_spinlock_recursive_unlock(slr);
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+ return rte_spinlock_recursive_trylock(slr);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_ticketlock.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_ticketlock.h
new file mode 100644
index 000000000..c175e9eab
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_ticketlock.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_TICKETLOCK_PPC_64_H_
+#define _RTE_TICKETLOCK_PPC_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_ticketlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TICKETLOCK_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_vect.h b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_vect.h
new file mode 100644
index 000000000..b0545c878
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/include/rte_vect.h
@@ -0,0 +1,36 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) IBM Corporation 2016.
+ */
+
+#ifndef _RTE_VECT_PPC_64_H_
+#define _RTE_VECT_PPC_64_H_
+
+#include "rte_altivec.h"
+
+#include "generic/rte_vect.h"
+#include "rte_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef vector signed int xmm_t;
+
+#define XMM_SIZE (sizeof(xmm_t))
+#define XMM_MASK (XMM_SIZE - 1)
+
+typedef union rte_xmm {
+ xmm_t x;
+ uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+ double pd[XMM_SIZE / sizeof(double)];
+} __rte_aligned(16) rte_xmm_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VECT_PPC_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/meson.build b/src/spdk/dpdk/lib/librte_eal/ppc/meson.build
new file mode 100644
index 000000000..f4b6d95c4
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+subdir('include')
+
+sources += files(
+ 'rte_cpuflags.c',
+ 'rte_cycles.c',
+ 'rte_hypervisor.c',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/rte_cpuflags.c b/src/spdk/dpdk/lib/librte_eal/ppc/rte_cpuflags.c
new file mode 100644
index 000000000..3bb7563ce
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/rte_cpuflags.c
@@ -0,0 +1,110 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) IBM Corporation 2014.
+ */
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP 16
+#define AT_HWCAP2 26
+
+/* software based registers */
+enum cpu_register_t {
+ REG_NONE = 0,
+ REG_HWCAP,
+ REG_HWCAP2,
+ REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+struct feature_entry {
+ uint32_t reg;
+ uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+ char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+ [RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(PPC_LE, REG_HWCAP, 0)
+ FEAT_DEF(TRUE_LE, REG_HWCAP, 1)
+ FEAT_DEF(PSERIES_PERFMON_COMPAT, REG_HWCAP, 6)
+ FEAT_DEF(VSX, REG_HWCAP, 7)
+ FEAT_DEF(ARCH_2_06, REG_HWCAP, 8)
+ FEAT_DEF(POWER6_EXT, REG_HWCAP, 9)
+ FEAT_DEF(DFP, REG_HWCAP, 10)
+ FEAT_DEF(PA6T, REG_HWCAP, 11)
+ FEAT_DEF(ARCH_2_05, REG_HWCAP, 12)
+ FEAT_DEF(ICACHE_SNOOP, REG_HWCAP, 13)
+ FEAT_DEF(SMT, REG_HWCAP, 14)
+ FEAT_DEF(BOOKE, REG_HWCAP, 15)
+ FEAT_DEF(CELLBE, REG_HWCAP, 16)
+ FEAT_DEF(POWER5_PLUS, REG_HWCAP, 17)
+ FEAT_DEF(POWER5, REG_HWCAP, 18)
+ FEAT_DEF(POWER4, REG_HWCAP, 19)
+ FEAT_DEF(NOTB, REG_HWCAP, 20)
+ FEAT_DEF(EFP_DOUBLE, REG_HWCAP, 21)
+ FEAT_DEF(EFP_SINGLE, REG_HWCAP, 22)
+ FEAT_DEF(SPE, REG_HWCAP, 23)
+ FEAT_DEF(UNIFIED_CACHE, REG_HWCAP, 24)
+ FEAT_DEF(4xxMAC, REG_HWCAP, 25)
+ FEAT_DEF(MMU, REG_HWCAP, 26)
+ FEAT_DEF(FPU, REG_HWCAP, 27)
+ FEAT_DEF(ALTIVEC, REG_HWCAP, 28)
+ FEAT_DEF(PPC601, REG_HWCAP, 29)
+ FEAT_DEF(PPC64, REG_HWCAP, 30)
+ FEAT_DEF(PPC32, REG_HWCAP, 31)
+ FEAT_DEF(TAR, REG_HWCAP2, 26)
+ FEAT_DEF(LSEL, REG_HWCAP2, 27)
+ FEAT_DEF(EBB, REG_HWCAP2, 28)
+ FEAT_DEF(DSCR, REG_HWCAP2, 29)
+ FEAT_DEF(HTM, REG_HWCAP2, 30)
+ FEAT_DEF(ARCH_2_07, REG_HWCAP2, 31)
+};
+
+/*
+ * Read AUXV software register and get cpu features for Power
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+ out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+ out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+ const struct feature_entry *feat;
+ hwcap_registers_t regs = {0};
+
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return -ENOENT;
+
+ feat = &rte_cpu_feature_table[feature];
+ if (feat->reg == REG_NONE)
+ return -EFAULT;
+
+ rte_cpu_get_features(regs);
+ return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return NULL;
+ return rte_cpu_feature_table[feature].name;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/rte_cycles.c b/src/spdk/dpdk/lib/librte_eal/ppc/rte_cycles.c
new file mode 100644
index 000000000..c96a2143b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/rte_cycles.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) IBM Corporation 2019.
+ */
+
+#include "eal_private.h"
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/ppc/rte_hypervisor.c b/src/spdk/dpdk/lib/librte_eal/ppc/rte_hypervisor.c
new file mode 100644
index 000000000..08a1c97d1
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/ppc/rte_hypervisor.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include "rte_hypervisor.h"
+
+enum rte_hypervisor
+rte_hypervisor_get(void)
+{
+ return RTE_HYPERVISOR_UNKNOWN;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/rte_eal_exports.def b/src/spdk/dpdk/lib/librte_eal/rte_eal_exports.def
new file mode 100644
index 000000000..12a6c79d6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/rte_eal_exports.def
@@ -0,0 +1,9 @@
+EXPORTS
+ __rte_panic
+ rte_eal_get_configuration
+ rte_eal_init
+ rte_eal_mp_remote_launch
+ rte_eal_mp_wait_lcore
+ rte_eal_remote_launch
+ rte_log
+ rte_vlog
diff --git a/src/spdk/dpdk/lib/librte_eal/rte_eal_version.map b/src/spdk/dpdk/lib/librte_eal/rte_eal_version.map
new file mode 100644
index 000000000..d8038749a
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/rte_eal_version.map
@@ -0,0 +1,389 @@
+DPDK_20.0 {
+ global:
+
+ __rte_panic;
+ eal_parse_sysfs_value;
+ eal_timer_source;
+ per_lcore__lcore_id;
+ per_lcore__rte_errno;
+ rte_bus_dump;
+ rte_bus_find;
+ rte_bus_find_by_device;
+ rte_bus_find_by_name;
+ rte_bus_get_iommu_class;
+ rte_bus_probe;
+ rte_bus_register;
+ rte_bus_scan;
+ rte_bus_unregister;
+ rte_calloc;
+ rte_calloc_socket;
+ rte_cpu_get_flag_enabled;
+ rte_cpu_get_flag_name;
+ rte_cpu_is_supported;
+ rte_ctrl_thread_create;
+ rte_cycles_vmware_tsc_map;
+ rte_delay_us;
+ rte_delay_us_block;
+ rte_delay_us_callback_register;
+ rte_dev_is_probed;
+ rte_dev_probe;
+ rte_dev_remove;
+ rte_devargs_add;
+ rte_devargs_dump;
+ rte_devargs_insert;
+ rte_devargs_next;
+ rte_devargs_parse;
+ rte_devargs_parsef;
+ rte_devargs_remove;
+ rte_devargs_type_count;
+ rte_dump_physmem_layout;
+ rte_dump_registers;
+ rte_dump_stack;
+ rte_dump_tailq;
+ rte_eal_alarm_cancel;
+ rte_eal_alarm_set;
+ rte_eal_cleanup;
+ rte_eal_create_uio_dev;
+ rte_eal_get_lcore_state;
+ rte_eal_get_physmem_size;
+ rte_eal_get_runtime_dir;
+ rte_eal_has_hugepages;
+ rte_eal_has_pci;
+ rte_eal_hotplug_add;
+ rte_eal_hotplug_remove;
+ rte_eal_hpet_init;
+ rte_eal_init;
+ rte_eal_iopl_init;
+ rte_eal_iova_mode;
+ rte_eal_lcore_role;
+ rte_eal_mbuf_user_pool_ops;
+ rte_eal_mp_remote_launch;
+ rte_eal_mp_wait_lcore;
+ rte_eal_primary_proc_alive;
+ rte_eal_process_type;
+ rte_eal_remote_launch;
+ rte_eal_tailq_lookup;
+ rte_eal_tailq_register;
+ rte_eal_using_phys_addrs;
+ rte_eal_vfio_intr_mode;
+ rte_eal_wait_lcore;
+ rte_epoll_ctl;
+ rte_epoll_wait;
+ rte_exit;
+ rte_free;
+ rte_get_hpet_cycles;
+ rte_get_hpet_hz;
+ rte_get_master_lcore;
+ rte_get_next_lcore;
+ rte_get_tsc_hz;
+ rte_hexdump;
+ rte_hypervisor_get;
+ rte_hypervisor_get_name;
+ rte_intr_allow_others;
+ rte_intr_callback_register;
+ rte_intr_callback_unregister;
+ rte_intr_cap_multiple;
+ rte_intr_disable;
+ rte_intr_dp_is_en;
+ rte_intr_efd_disable;
+ rte_intr_efd_enable;
+ rte_intr_enable;
+ rte_intr_free_epoll_fd;
+ rte_intr_rx_ctl;
+ rte_intr_tls_epfd;
+ rte_keepalive_create;
+ rte_keepalive_dispatch_pings;
+ rte_keepalive_mark_alive;
+ rte_keepalive_mark_sleep;
+ rte_keepalive_register_core;
+ rte_keepalive_register_relay_callback;
+ rte_lcore_count;
+ rte_lcore_has_role;
+ rte_lcore_index;
+ rte_lcore_is_enabled;
+ rte_lcore_to_socket_id;
+ rte_log;
+ rte_log_cur_msg_loglevel;
+ rte_log_cur_msg_logtype;
+ rte_log_dump;
+ rte_log_get_global_level;
+ rte_log_get_level;
+ rte_log_register;
+ rte_log_set_global_level;
+ rte_log_set_level;
+ rte_log_set_level_pattern;
+ rte_log_set_level_regexp;
+ rte_logs;
+ rte_malloc;
+ rte_malloc_dump_stats;
+ rte_malloc_get_socket_stats;
+ rte_malloc_set_limit;
+ rte_malloc_socket;
+ rte_malloc_validate;
+ rte_malloc_virt2iova;
+ rte_mcfg_mem_read_lock;
+ rte_mcfg_mem_read_unlock;
+ rte_mcfg_mem_write_lock;
+ rte_mcfg_mem_write_unlock;
+ rte_mcfg_mempool_read_lock;
+ rte_mcfg_mempool_read_unlock;
+ rte_mcfg_mempool_write_lock;
+ rte_mcfg_mempool_write_unlock;
+ rte_mcfg_tailq_read_lock;
+ rte_mcfg_tailq_read_unlock;
+ rte_mcfg_tailq_write_lock;
+ rte_mcfg_tailq_write_unlock;
+ rte_mem_lock_page;
+ rte_mem_virt2iova;
+ rte_mem_virt2phy;
+ rte_memdump;
+ rte_memory_get_nchannel;
+ rte_memory_get_nrank;
+ rte_memzone_dump;
+ rte_memzone_free;
+ rte_memzone_lookup;
+ rte_memzone_reserve;
+ rte_memzone_reserve_aligned;
+ rte_memzone_reserve_bounded;
+ rte_memzone_walk;
+ rte_openlog_stream;
+ rte_rand;
+ rte_realloc;
+ rte_reciprocal_value;
+ rte_reciprocal_value_u64;
+ rte_rtm_supported;
+ rte_service_attr_get;
+ rte_service_attr_reset_all;
+ rte_service_component_register;
+ rte_service_component_runstate_set;
+ rte_service_component_unregister;
+ rte_service_dump;
+ rte_service_finalize;
+ rte_service_get_by_id;
+ rte_service_get_by_name;
+ rte_service_get_count;
+ rte_service_get_name;
+ rte_service_lcore_add;
+ rte_service_lcore_attr_get;
+ rte_service_lcore_attr_reset_all;
+ rte_service_lcore_count;
+ rte_service_lcore_count_services;
+ rte_service_lcore_del;
+ rte_service_lcore_list;
+ rte_service_lcore_reset_all;
+ rte_service_lcore_start;
+ rte_service_lcore_stop;
+ rte_service_map_lcore_get;
+ rte_service_map_lcore_set;
+ rte_service_may_be_active;
+ rte_service_probe_capability;
+ rte_service_run_iter_on_app_lcore;
+ rte_service_runstate_get;
+ rte_service_runstate_set;
+ rte_service_set_runstate_mapped_check;
+ rte_service_set_stats_enable;
+ rte_service_start_with_defaults;
+ rte_set_application_usage_hook;
+ rte_socket_count;
+ rte_socket_id;
+ rte_socket_id_by_idx;
+ rte_srand;
+ rte_strerror;
+ rte_strscpy;
+ rte_strsplit;
+ rte_sys_gettid;
+ rte_thread_get_affinity;
+ rte_thread_set_affinity;
+ rte_thread_setname;
+ rte_uuid_compare;
+ rte_uuid_is_null;
+ rte_uuid_parse;
+ rte_uuid_unparse;
+ rte_vfio_clear_group;
+ rte_vfio_container_create;
+ rte_vfio_container_destroy;
+ rte_vfio_container_dma_map;
+ rte_vfio_container_dma_unmap;
+ rte_vfio_container_group_bind;
+ rte_vfio_container_group_unbind;
+ rte_vfio_enable;
+ rte_vfio_get_container_fd;
+ rte_vfio_get_group_fd;
+ rte_vfio_get_group_num;
+ rte_vfio_is_enabled;
+ rte_vfio_noiommu_is_enabled;
+ rte_vfio_release_device;
+ rte_vfio_setup_device;
+ rte_vlog;
+ rte_zmalloc;
+ rte_zmalloc_socket;
+
+ local: *;
+};
+
+EXPERIMENTAL {
+ global:
+
+ # added in 18.02
+ rte_mp_action_register;
+ rte_mp_action_unregister;
+ rte_mp_reply;
+ rte_mp_sendmsg;
+
+ # added in 18.05
+ rte_dev_event_callback_register;
+ rte_dev_event_callback_unregister;
+ rte_dev_event_monitor_start;
+ rte_dev_event_monitor_stop;
+ rte_fbarray_attach;
+ rte_fbarray_destroy;
+ rte_fbarray_detach;
+ rte_fbarray_dump_metadata;
+ rte_fbarray_find_contig_free;
+ rte_fbarray_find_contig_used;
+ rte_fbarray_find_idx;
+ rte_fbarray_find_next_free;
+ rte_fbarray_find_next_n_free;
+ rte_fbarray_find_next_n_used;
+ rte_fbarray_find_next_used;
+ rte_fbarray_get;
+ rte_fbarray_init;
+ rte_fbarray_is_used;
+ rte_fbarray_set_free;
+ rte_fbarray_set_used;
+ rte_log_register_type_and_pick_level;
+ rte_malloc_dump_heaps;
+ rte_mem_alloc_validator_register;
+ rte_mem_alloc_validator_unregister;
+ rte_mem_check_dma_mask;
+ rte_mem_event_callback_register;
+ rte_mem_event_callback_unregister;
+ rte_mem_iova2virt;
+ rte_mem_virt2memseg;
+ rte_mem_virt2memseg_list;
+ rte_memseg_contig_walk;
+ rte_memseg_list_walk;
+ rte_memseg_walk;
+ rte_mp_request_async;
+ rte_mp_request_sync;
+
+ # added in 18.08
+ rte_class_find;
+ rte_class_find_by_name;
+ rte_class_register;
+ rte_class_unregister;
+ rte_dev_iterator_init;
+ rte_dev_iterator_next;
+ rte_fbarray_find_prev_free;
+ rte_fbarray_find_prev_n_free;
+ rte_fbarray_find_prev_n_used;
+ rte_fbarray_find_prev_used;
+ rte_fbarray_find_rev_contig_free;
+ rte_fbarray_find_rev_contig_used;
+ rte_memseg_contig_walk_thread_unsafe;
+ rte_memseg_list_walk_thread_unsafe;
+ rte_memseg_walk_thread_unsafe;
+
+ # added in 18.11
+ rte_delay_us_sleep;
+ rte_dev_event_callback_process;
+ rte_dev_hotplug_handle_disable;
+ rte_dev_hotplug_handle_enable;
+ rte_malloc_heap_create;
+ rte_malloc_heap_destroy;
+ rte_malloc_heap_get_socket;
+ rte_malloc_heap_memory_add;
+ rte_malloc_heap_memory_attach;
+ rte_malloc_heap_memory_detach;
+ rte_malloc_heap_memory_remove;
+ rte_malloc_heap_socket_is_external;
+ rte_mem_check_dma_mask_thread_unsafe;
+ rte_mem_set_dma_mask;
+ rte_memseg_get_fd;
+ rte_memseg_get_fd_offset;
+ rte_memseg_get_fd_offset_thread_unsafe;
+ rte_memseg_get_fd_thread_unsafe;
+
+ # added in 19.02
+ rte_extmem_attach;
+ rte_extmem_detach;
+ rte_extmem_register;
+ rte_extmem_unregister;
+
+ # added in 19.05
+ rte_dev_dma_map;
+ rte_dev_dma_unmap;
+ rte_fbarray_find_biggest_free;
+ rte_fbarray_find_biggest_used;
+ rte_fbarray_find_rev_biggest_free;
+ rte_fbarray_find_rev_biggest_used;
+ rte_intr_callback_unregister_pending;
+ rte_realloc_socket;
+
+ # added in 19.08
+ rte_intr_ack;
+ rte_lcore_cpuset;
+ rte_lcore_to_cpu_id;
+ rte_mcfg_timer_lock;
+ rte_mcfg_timer_unlock;
+ rte_rand_max;
+
+ # added in 19.11
+ rte_log_get_stream;
+ rte_mcfg_get_single_file_segments;
+
+ # added in 20.02
+ rte_thread_is_intr;
+
+ # added in 20.05
+ __rte_eal_trace_alarm_cancel;
+ __rte_eal_trace_alarm_set;
+ __rte_eal_trace_generic_double;
+ __rte_eal_trace_generic_float;
+ __rte_eal_trace_generic_func;
+ __rte_eal_trace_generic_i16;
+ __rte_eal_trace_generic_i32;
+ __rte_eal_trace_generic_i64;
+ __rte_eal_trace_generic_i8;
+ __rte_eal_trace_generic_int;
+ __rte_eal_trace_generic_long;
+ __rte_eal_trace_generic_ptr;
+ __rte_eal_trace_generic_str;
+ __rte_eal_trace_generic_u16;
+ __rte_eal_trace_generic_u32;
+ __rte_eal_trace_generic_u64;
+ __rte_eal_trace_generic_u8;
+ __rte_eal_trace_generic_void;
+ __rte_eal_trace_intr_callback_register;
+ __rte_eal_trace_intr_callback_unregister;
+ __rte_eal_trace_intr_enable;
+ __rte_eal_trace_intr_disable;
+ __rte_eal_trace_mem_free;
+ __rte_eal_trace_mem_malloc;
+ __rte_eal_trace_mem_realloc;
+ __rte_eal_trace_mem_zmalloc;
+ __rte_eal_trace_memzone_free;
+ __rte_eal_trace_memzone_lookup;
+ __rte_eal_trace_memzone_reserve;
+ __rte_eal_trace_thread_lcore_ready;
+ __rte_eal_trace_thread_remote_launch;
+ __rte_trace_mem_per_thread_alloc;
+ __rte_trace_point_emit_field;
+ __rte_trace_point_register;
+ per_lcore_trace_mem;
+ per_lcore_trace_point_sz;
+ rte_log_can_log;
+ rte_thread_getname;
+ rte_trace_dump;
+ rte_trace_is_enabled;
+ rte_trace_metadata_dump;
+ rte_trace_mode_get;
+ rte_trace_mode_set;
+ rte_trace_pattern;
+ rte_trace_point_disable;
+ rte_trace_point_enable;
+ rte_trace_point_is_enabled;
+ rte_trace_point_lookup;
+ rte_trace_regexp;
+ rte_trace_save;
+};
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/eal.c b/src/spdk/dpdk/lib/librte_eal/windows/eal.c
new file mode 100644
index 000000000..d084606a6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/eal.c
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <io.h>
+#include <share.h>
+#include <sys/stat.h>
+
+#include <rte_debug.h>
+#include <rte_eal.h>
+#include <eal_memcfg.h>
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <eal_thread.h>
+#include <eal_internal_cfg.h>
+#include <eal_filesystem.h>
+#include <eal_options.h>
+#include <eal_private.h>
+
+#include "eal_windows.h"
+
+ /* Allow the application to print its usage message too if set */
+static rte_usage_hook_t rte_application_usage_hook;
+
+/* define fd variable here, because file needs to be kept open for the
+ * duration of the program, as we hold a write lock on it in the primary proc
+ */
+static int mem_cfg_fd = -1;
+
+/* early configuration structure, when memory config is not mmapped */
+static struct rte_mem_config early_mem_config;
+
+/* Address of global and public configuration */
+static struct rte_config rte_config = {
+ .mem_config = &early_mem_config,
+};
+
+/* internal configuration (per-core) */
+struct lcore_config lcore_config[RTE_MAX_LCORE];
+
+/* internal configuration */
+struct internal_config internal_config;
+
+/* platform-specific runtime dir */
+static char runtime_dir[PATH_MAX];
+
+const char *
+rte_eal_get_runtime_dir(void)
+{
+ return runtime_dir;
+}
+
+/* Return a pointer to the configuration structure */
+struct rte_config *
+rte_eal_get_configuration(void)
+{
+ return &rte_config;
+}
+
+/* Detect if we are a primary or a secondary process */
+enum rte_proc_type_t
+eal_proc_type_detect(void)
+{
+ enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
+ const char *pathname = eal_runtime_config_path();
+
+ /* if we can open the file but not get a write-lock we are a secondary
+ * process. NOTE: if we get a file handle back, we keep that open
+ * and don't close it to prevent a race condition between multiple opens
+ */
+ errno_t err = _sopen_s(&mem_cfg_fd, pathname,
+ _O_RDWR, _SH_DENYNO, _S_IREAD | _S_IWRITE);
+ if (err == 0) {
+ OVERLAPPED soverlapped = { 0 };
+ soverlapped.Offset = sizeof(*rte_config.mem_config);
+ soverlapped.OffsetHigh = 0;
+
+ HANDLE hwinfilehandle = (HANDLE)_get_osfhandle(mem_cfg_fd);
+
+ if (!LockFileEx(hwinfilehandle,
+ LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, 0,
+ sizeof(*rte_config.mem_config), 0, &soverlapped))
+ ptype = RTE_PROC_SECONDARY;
+ }
+
+ RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
+ ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
+
+ return ptype;
+}
+
+/* display usage */
+static void
+eal_usage(const char *prgname)
+{
+ printf("\nUsage: %s ", prgname);
+ eal_common_usage();
+ /* Allow the application to print its usage message too
+ * if hook is set
+ */
+ if (rte_application_usage_hook) {
+ printf("===== Application Usage =====\n\n");
+ rte_application_usage_hook(prgname);
+ }
+}
+
+/* Parse the arguments for --log-level only */
+static void
+eal_log_level_parse(int argc, char **argv)
+{
+ int opt;
+ char **argvopt;
+ int option_index;
+
+ argvopt = argv;
+
+ eal_reset_internal_config(&internal_config);
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ int ret;
+
+ /* getopt is not happy, stop right now */
+ if (opt == '?')
+ break;
+
+ ret = (opt == OPT_LOG_LEVEL_NUM) ?
+ eal_parse_common_option(opt, optarg,
+ &internal_config) : 0;
+
+ /* common parser is not happy */
+ if (ret < 0)
+ break;
+ }
+
+ optind = 0; /* reset getopt lib */
+}
+
+/* Parse the argument given in the command line of the application */
+__attribute__((optnone)) static int
+eal_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, eal_short_options,
+ eal_long_options, &option_index)) != EOF) {
+
+ int ret;
+
+ /* getopt is not happy, stop right now */
+ if (opt == '?') {
+ eal_usage(prgname);
+ return -1;
+ }
+
+ ret = eal_parse_common_option(opt, optarg, &internal_config);
+ /* common parser is not happy */
+ if (ret < 0) {
+ eal_usage(prgname);
+ return -1;
+ }
+ /* common parser handled this option */
+ if (ret == 0)
+ continue;
+
+ switch (opt) {
+ case 'h':
+ eal_usage(prgname);
+ exit(EXIT_SUCCESS);
+ default:
+ if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
+ RTE_LOG(ERR, EAL, "Option %c is not supported "
+ "on Windows\n", opt);
+ } else if (opt >= OPT_LONG_MIN_NUM &&
+ opt < OPT_LONG_MAX_NUM) {
+ RTE_LOG(ERR, EAL, "Option %s is not supported "
+ "on Windows\n",
+ eal_long_options[option_index].name);
+ } else {
+ RTE_LOG(ERR, EAL, "Option %d is not supported "
+ "on Windows\n", opt);
+ }
+ eal_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (eal_adjust_config(&internal_config) != 0)
+ return -1;
+
+ /* sanity checks */
+ if (eal_check_common_options(&internal_config) != 0) {
+ eal_usage(prgname);
+ return -1;
+ }
+
+ if (optind >= 0)
+ argv[optind - 1] = prgname;
+ ret = optind - 1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static int
+sync_func(void *arg __rte_unused)
+{
+ return 0;
+}
+
+static void
+rte_eal_init_alert(const char *msg)
+{
+ fprintf(stderr, "EAL: FATAL: %s\n", msg);
+ RTE_LOG(ERR, EAL, "%s\n", msg);
+}
+
+ /* Launch threads, called at application init(). */
+int
+rte_eal_init(int argc, char **argv)
+{
+ int i, fctret;
+
+ rte_eal_log_init(NULL, 0);
+
+ eal_log_level_parse(argc, argv);
+
+ /* create a map of all processors in the system */
+ eal_create_cpu_map();
+
+ if (rte_eal_cpu_init() < 0) {
+ rte_eal_init_alert("Cannot detect lcores.");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ fctret = eal_parse_args(argc, argv);
+ if (fctret < 0)
+ exit(1);
+
+ eal_thread_init_master(rte_config.master_lcore);
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+
+ /*
+ * create communication pipes between master thread
+ * and children
+ */
+ if (_pipe(lcore_config[i].pipe_master2slave,
+ sizeof(char), _O_BINARY) < 0)
+ rte_panic("Cannot create pipe\n");
+ if (_pipe(lcore_config[i].pipe_slave2master,
+ sizeof(char), _O_BINARY) < 0)
+ rte_panic("Cannot create pipe\n");
+
+ lcore_config[i].state = WAIT;
+
+ /* create a thread for each lcore */
+ if (eal_thread_create(&lcore_config[i].thread_id) != 0)
+ rte_panic("Cannot create thread\n");
+ }
+
+ /*
+ * Launch a dummy function on all slave lcores, so that master lcore
+ * knows they are all ready when this function returns.
+ */
+ rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+ rte_eal_mp_wait_lcore();
+ return fctret;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/eal_debug.c b/src/spdk/dpdk/lib/librte_eal/windows/eal_debug.c
new file mode 100644
index 000000000..669be6ff9
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/eal_debug.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+
+ /* call abort(), it will generate a coredump if enabled */
+void
+__rte_panic(const char *funcname, const char *format, ...)
+{
+ va_list ap;
+
+ rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
+ va_start(ap, format);
+ rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
+ va_end(ap);
+ abort();
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/eal_lcore.c b/src/spdk/dpdk/lib/librte_eal/windows/eal_lcore.c
new file mode 100644
index 000000000..82ee45413
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/eal_lcore.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <pthread.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+
+#include "eal_private.h"
+#include "eal_thread.h"
+#include "eal_windows.h"
+
+/* global data structure that contains the CPU map */
+static struct _wcpu_map {
+ unsigned int total_procs;
+ unsigned int proc_sockets;
+ unsigned int proc_cores;
+ unsigned int reserved;
+ struct _win_lcore_map {
+ uint8_t socket_id;
+ uint8_t core_id;
+ } wlcore_map[RTE_MAX_LCORE];
+} wcpu_map = { 0 };
+
+/*
+ * Create a map of all processors and associated cores on the system
+ */
+void
+eal_create_cpu_map()
+{
+ wcpu_map.total_procs =
+ GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
+
+ LOGICAL_PROCESSOR_RELATIONSHIP lprocRel;
+ DWORD lprocInfoSize = 0;
+ BOOL ht_enabled = FALSE;
+
+ /* First get the processor package information */
+ lprocRel = RelationProcessorPackage;
+ /* Determine the size of buffer we need (pass NULL) */
+ GetLogicalProcessorInformationEx(lprocRel, NULL, &lprocInfoSize);
+ wcpu_map.proc_sockets = lprocInfoSize / 48;
+
+ lprocInfoSize = 0;
+ /* Next get the processor core information */
+ lprocRel = RelationProcessorCore;
+ GetLogicalProcessorInformationEx(lprocRel, NULL, &lprocInfoSize);
+ wcpu_map.proc_cores = lprocInfoSize / 48;
+
+ if (wcpu_map.total_procs > wcpu_map.proc_cores)
+ ht_enabled = TRUE;
+
+ /* Distribute the socket and core ids appropriately
+ * across the logical cores. For now, split the cores
+ * equally across the sockets.
+ */
+ unsigned int lcore = 0;
+ for (unsigned int socket = 0; socket <
+ wcpu_map.proc_sockets; ++socket) {
+ for (unsigned int core = 0;
+ core < (wcpu_map.proc_cores / wcpu_map.proc_sockets);
+ ++core) {
+ wcpu_map.wlcore_map[lcore]
+ .socket_id = socket;
+ wcpu_map.wlcore_map[lcore]
+ .core_id = core;
+ lcore++;
+ if (ht_enabled) {
+ wcpu_map.wlcore_map[lcore]
+ .socket_id = socket;
+ wcpu_map.wlcore_map[lcore]
+ .core_id = core;
+ lcore++;
+ }
+ }
+ }
+}
+
+/*
+ * Check if a cpu is present by the presence of the cpu information for it
+ */
+int
+eal_cpu_detected(unsigned int lcore_id)
+{
+ return (lcore_id < wcpu_map.total_procs);
+}
+
+/*
+ * Get CPU socket id for a logical core
+ */
+unsigned
+eal_cpu_socket_id(unsigned int lcore_id)
+{
+ return wcpu_map.wlcore_map[lcore_id].socket_id;
+}
+
+/*
+ * Get CPU socket id (NUMA node) for a logical core
+ */
+unsigned
+eal_cpu_core_id(unsigned int lcore_id)
+{
+ return wcpu_map.wlcore_map[lcore_id].core_id;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/eal_log.c b/src/spdk/dpdk/lib/librte_eal/windows/eal_log.c
new file mode 100644
index 000000000..875981f13
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/eal_log.c
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include "eal_private.h"
+
+/* set the log to default function, called during eal init process. */
+int
+rte_eal_log_init(__rte_unused const char *id, __rte_unused int facility)
+{
+ rte_openlog_stream(stderr);
+
+ eal_log_set_default(stderr);
+
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/eal_thread.c b/src/spdk/dpdk/lib/librte_eal/windows/eal_thread.c
new file mode 100644
index 000000000..e149199a6
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/eal_thread.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <io.h>
+
+#include <rte_atomic.h>
+#include <rte_debug.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <eal_thread.h>
+
+#include "eal_private.h"
+#include "eal_windows.h"
+
+RTE_DEFINE_PER_LCORE(unsigned int, _lcore_id) = LCORE_ID_ANY;
+RTE_DEFINE_PER_LCORE(unsigned int, _socket_id) = (unsigned int)SOCKET_ID_ANY;
+RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
+
+/*
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg. Once the execution is done, the
+ * remote lcore switch in FINISHED state.
+ */
+int
+rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned int slave_id)
+{
+ int n;
+ char c = 0;
+ int m2s = lcore_config[slave_id].pipe_master2slave[1];
+ int s2m = lcore_config[slave_id].pipe_slave2master[0];
+
+ if (lcore_config[slave_id].state != WAIT)
+ return -EBUSY;
+
+ lcore_config[slave_id].f = f;
+ lcore_config[slave_id].arg = arg;
+
+ /* send message */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = _write(m2s, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ /* wait ack */
+ do {
+ n = _read(s2m, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ return 0;
+}
+
+void
+eal_thread_init_master(unsigned int lcore_id)
+{
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+}
+
+static inline pthread_t
+eal_thread_self(void)
+{
+ return GetCurrentThreadId();
+}
+
+/* main loop of threads */
+void *
+eal_thread_loop(void *arg __rte_unused)
+{
+ char c;
+ int n, ret;
+ unsigned int lcore_id;
+ pthread_t thread_id;
+ int m2s, s2m;
+ char cpuset[RTE_CPU_AFFINITY_STR_LEN];
+
+ thread_id = eal_thread_self();
+
+ /* retrieve our lcore_id from the configuration structure */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (thread_id == lcore_config[lcore_id].thread_id)
+ break;
+ }
+ if (lcore_id == RTE_MAX_LCORE)
+ rte_panic("cannot retrieve lcore id\n");
+
+ m2s = lcore_config[lcore_id].pipe_master2slave[0];
+ s2m = lcore_config[lcore_id].pipe_slave2master[1];
+
+ /* set the lcore ID in per-lcore memory area */
+ RTE_PER_LCORE(_lcore_id) = lcore_id;
+
+ RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s])\n",
+ lcore_id, (uintptr_t)thread_id, cpuset);
+
+ /* read on our pipe to get commands */
+ while (1) {
+ void *fct_arg;
+
+ /* wait command */
+ do {
+ n = _read(m2s, &c, 1);
+ } while (n < 0 && errno == EINTR);
+
+ if (n <= 0)
+ rte_panic("cannot read on configuration pipe\n");
+
+ lcore_config[lcore_id].state = RUNNING;
+
+ /* send ack */
+ n = 0;
+ while (n == 0 || (n < 0 && errno == EINTR))
+ n = _write(s2m, &c, 1);
+ if (n < 0)
+ rte_panic("cannot write on configuration pipe\n");
+
+ if (lcore_config[lcore_id].f == NULL)
+ rte_panic("NULL function pointer\n");
+
+ /* call the function and store the return value */
+ fct_arg = lcore_config[lcore_id].arg;
+ ret = lcore_config[lcore_id].f(fct_arg);
+ lcore_config[lcore_id].ret = ret;
+ rte_wmb();
+
+ /* when a service core returns, it should go directly to WAIT
+ * state, because the application will not lcore_wait() for it.
+ */
+ if (lcore_config[lcore_id].core_role == ROLE_SERVICE)
+ lcore_config[lcore_id].state = WAIT;
+ else
+ lcore_config[lcore_id].state = FINISHED;
+ }
+}
+
+/* function to create threads */
+int
+eal_thread_create(pthread_t *thread)
+{
+ HANDLE th;
+
+ th = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)eal_thread_loop,
+ NULL, 0, (LPDWORD)thread);
+ if (!th)
+ return -1;
+
+ SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
+ SetThreadPriority(th, THREAD_PRIORITY_TIME_CRITICAL);
+
+ return 0;
+}
+
+int
+rte_thread_setname(__rte_unused pthread_t id, __rte_unused const char *name)
+{
+ /* TODO */
+ /* This is a stub, not the expected result */
+ return 0;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/eal_windows.h b/src/spdk/dpdk/lib/librte_eal/windows/eal_windows.h
new file mode 100644
index 000000000..fadd676b2
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/eal_windows.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Dmitry Kozlyuk
+ */
+
+#ifndef _EAL_WINDOWS_H_
+#define _EAL_WINDOWS_H_
+
+/**
+ * @file Facilities private to Windows EAL
+ */
+
+#include <rte_windows.h>
+
+/**
+ * Create a map of processors and cores on the system.
+ */
+void eal_create_cpu_map(void);
+
+/**
+ * Create a thread.
+ *
+ * @param thread
+ * The location to store the thread id if successful.
+ * @return
+ * 0 for success, -1 if the thread is not created.
+ */
+int eal_thread_create(pthread_t *thread);
+
+#endif /* _EAL_WINDOWS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/fnmatch.c b/src/spdk/dpdk/lib/librte_eal/windows/fnmatch.c
new file mode 100644
index 000000000..f622bf54c
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/fnmatch.c
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Guido van Rossum.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
+ * Compares a filename or pathname to a pattern.
+ */
+
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "fnmatch.h"
+
+#define EOS '\0'
+
+static const char *rangematch(const char *, char, int);
+
+int
+fnmatch(const char *pattern, const char *string, int flags)
+{
+ const char *stringstart;
+ char c, test;
+
+ for (stringstart = string;;)
+ switch (c = *pattern++) {
+ case EOS:
+ if ((flags & FNM_LEADING_DIR) && *string == '/')
+ return (0);
+ return (*string == EOS ? 0 : FNM_NOMATCH);
+ case '?':
+ if (*string == EOS)
+ return (FNM_NOMATCH);
+ if (*string == '/' && (flags & FNM_PATHNAME))
+ return (FNM_NOMATCH);
+ if (*string == '.' && (flags & FNM_PERIOD) &&
+ (string == stringstart ||
+ ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
+ return (FNM_NOMATCH);
+ ++string;
+ break;
+ case '*':
+ c = *pattern;
+ /* Collapse multiple stars. */
+ while (c == '*')
+ c = *++pattern;
+
+ if (*string == '.' && (flags & FNM_PERIOD) &&
+ (string == stringstart ||
+ ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
+ return (FNM_NOMATCH);
+
+ /* Optimize for pattern with * at end or before /. */
+ if (c == EOS)
+ if (flags & FNM_PATHNAME)
+ return ((flags & FNM_LEADING_DIR) ||
+ strchr(string, '/') == NULL ?
+ 0 : FNM_NOMATCH);
+ else
+ return (0);
+ else if (c == '/' && flags & FNM_PATHNAME) {
+ string = strchr(string, '/');
+ if (string == NULL)
+ return (FNM_NOMATCH);
+ break;
+ }
+
+ /* General case, use recursion. */
+ while ((test = *string) != EOS) {
+ if (!fnmatch(pattern, string,
+ flags & ~FNM_PERIOD))
+ return (0);
+ if (test == '/' && flags & FNM_PATHNAME)
+ break;
+ ++string;
+ }
+ return (FNM_NOMATCH);
+ case '[':
+ if (*string == EOS)
+ return (FNM_NOMATCH);
+ if (*string == '/' && flags & FNM_PATHNAME)
+ return (FNM_NOMATCH);
+ pattern = rangematch(pattern, *string, flags);
+ if (pattern == NULL)
+ return (FNM_NOMATCH);
+ ++string;
+ break;
+ case '\\':
+ if (!(flags & FNM_NOESCAPE)) {
+ c = *pattern++;
+ if (c == EOS) {
+ c = '\\';
+ --pattern;
+ }
+ }
+ /* FALLTHROUGH */
+ default:
+ if (c == *string)
+ ;
+ else if ((flags & FNM_CASEFOLD) &&
+ (tolower((unsigned char)c) ==
+ tolower((unsigned char)*string)))
+ ;
+ else if ((flags & FNM_PREFIX_DIRS) && *string == EOS &&
+ ((c == '/' && string != stringstart) ||
+ (string == stringstart+1 && *stringstart == '/')))
+ return (0);
+ else
+ return (FNM_NOMATCH);
+ string++;
+ break;
+ }
+ /* NOTREACHED */
+}
+
+static const char *
+rangematch(const char *pattern, char test, int flags)
+{
+ int negate, ok;
+ char c, c2;
+
+ /*
+ * A bracket expression starting with an unquoted circumflex
+ * character produces unspecified results (IEEE 1003.2-1992,
+ * 3.13.2). This implementation treats it like '!', for
+ * consistency with the regular expression syntax.
+ * J.T. Conklin (conklin@ngai.kaleida.com)
+ */
+ negate = (*pattern == '!' || *pattern == '^');
+ if (negate)
+ ++pattern;
+
+ if (flags & FNM_CASEFOLD)
+ test = tolower((unsigned char)test);
+
+ for (ok = 0; (c = *pattern++) != ']';) {
+ if (c == '\\' && !(flags & FNM_NOESCAPE))
+ c = *pattern++;
+ if (c == EOS)
+ return (NULL);
+
+ if (flags & FNM_CASEFOLD)
+ c = tolower((unsigned char)c);
+
+ c2 = *(pattern + 1);
+ if (*pattern == '-' && c2 != EOS && c2 != ']') {
+ pattern += 2;
+ if (c2 == '\\' && !(flags & FNM_NOESCAPE))
+ c2 = *pattern++;
+ if (c2 == EOS)
+ return (NULL);
+
+ if (flags & FNM_CASEFOLD)
+ c2 = tolower((unsigned char)c2);
+
+ if ((unsigned char)c <= (unsigned char)test &&
+ (unsigned char)test <= (unsigned char)c2)
+ ok = 1;
+ } else if (c == test)
+ ok = 1;
+ }
+ return (ok == negate ? NULL : pattern);
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/getopt.c b/src/spdk/dpdk/lib/librte_eal/windows/getopt.c
new file mode 100644
index 000000000..170c9b5e0
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/getopt.c
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: ISC AND BSD-2-Clause
+ * Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ */
+/*
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Dieter Baron and Thomas Klausner.
+ */
+
+#include <getopt.h>
+
+#ifdef NEED_USUAL_GETOPT
+
+#include <string.h>
+#include <stdlib.h>
+
+const char *optarg; /* argument associated with option */
+int opterr = 1; /* if error message should be printed */
+int optind = 1; /* index into parent argv vector */
+int optopt = '?'; /* character checked for validity */
+
+static void pass(void) {}
+#define warnx(a, ...) pass()
+
+#define PRINT_ERROR ((opterr) && (*options != ':'))
+
+#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
+#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
+#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
+
+/* return values */
+#define BADCH ((int)'?')
+#define BADARG ((*options == ':') ? (int)':' : (int)'?')
+#define INORDER 1
+
+#define EMSG ""
+
+static const char *place = EMSG; /* option letter processing */
+
+/* XXX: set optreset to 1 rather than these two */
+static int nonopt_start = -1; /* first non option argument (for permute) */
+static int nonopt_end = -1; /* first option after non options (for permute) */
+
+/* Error messages */
+static const char recargchar[] = "option requires an argument -- %c";
+static const char recargstring[] = "option requires an argument -- %s";
+static const char ambig[] = "ambiguous option -- %.*s";
+static const char noarg[] = "option doesn't take an argument -- %.*s";
+static const char illoptchar[] = "unknown option -- %c";
+static const char illoptstring[] = "unknown option -- %s";
+
+/*
+ * Compute the greatest common divisor of a and b.
+ */
+static int
+gcd(int a, int b)
+{
+ int c;
+
+ c = a % b;
+ while (c != 0) {
+ a = b;
+ b = c;
+ c = a % b;
+ }
+
+ return (b);
+}
+
+/*
+ * Exchange the block from nonopt_start to nonopt_end with the block
+ * from nonopt_end to opt_end (keeping the same order of arguments
+ * in each block).
+ */
+static void
+permute_args(int panonopt_start, int panonopt_end, int opt_end,
+ char **nargv)
+{
+ int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
+ char *swap;
+
+ /*
+ * compute lengths of blocks and number and size of cycles
+ */
+ nnonopts = panonopt_end - panonopt_start;
+ nopts = opt_end - panonopt_end;
+ ncycle = gcd(nnonopts, nopts);
+ cyclelen = (opt_end - panonopt_start) / ncycle;
+
+ for (i = 0; i < ncycle; i++) {
+ cstart = panonopt_end+i;
+ pos = cstart;
+ for (j = 0; j < cyclelen; j++) {
+ if (pos >= panonopt_end)
+ pos -= nnonopts;
+ else
+ pos += nopts;
+ swap = nargv[pos];
+ /* LINTED const cast */
+ ((char **) nargv)[pos] = nargv[cstart];
+ /* LINTED const cast */
+ ((char **)nargv)[cstart] = swap;
+ }
+ }
+}
+
+/*
+ * parse_long_options --
+ * Parse long options in argc/argv argument vector.
+ * Returns -1 if short_too is set and the option does not match long_options.
+ */
+static int
+parse_long_options(char **nargv, const char *options,
+ const struct option *long_options, int *idx, int short_too)
+{
+ const char *current_argv;
+ char *has_equal;
+ size_t current_argv_len;
+ int i, match;
+
+ current_argv = place;
+ match = -1;
+
+ optind++;
+
+ has_equal = strchr(current_argv, '=');
+ if (has_equal != NULL) {
+ /* argument found (--option=arg) */
+ current_argv_len = has_equal - current_argv;
+ has_equal++;
+ } else
+ current_argv_len = strlen(current_argv);
+
+ for (i = 0; long_options[i].name; i++) {
+ /* find matching long option */
+ if (strncmp(current_argv, long_options[i].name,
+ current_argv_len))
+ continue;
+
+ if (strlen(long_options[i].name) == current_argv_len) {
+ /* exact match */
+ match = i;
+ break;
+ }
+ /*
+ * If this is a known short option, don't allow
+ * a partial match of a single character.
+ */
+ if (short_too && current_argv_len == 1)
+ continue;
+
+ if (match == -1) /* partial match */
+ match = i;
+ else {
+ /* ambiguous abbreviation */
+ if (PRINT_ERROR)
+ warnx(ambig, (int)current_argv_len,
+ current_argv);
+ optopt = 0;
+ return BADCH;
+ }
+ }
+ if (match != -1) { /* option found */
+ if (long_options[match].has_arg == no_argument
+ && has_equal) {
+ if (PRINT_ERROR)
+ warnx(noarg, (int)current_argv_len,
+ current_argv);
+ /*
+ * XXX: GNU sets optopt to val regardless of flag
+ */
+ if (long_options[match].flag == NULL)
+ optopt = long_options[match].val;
+ else
+ optopt = 0;
+ return BADARG;
+ }
+ if (long_options[match].has_arg == required_argument ||
+ long_options[match].has_arg == optional_argument) {
+ if (has_equal)
+ optarg = has_equal;
+ else if (long_options[match].has_arg ==
+ required_argument) {
+ /*
+ * optional argument doesn't use next nargv
+ */
+ optarg = nargv[optind++];
+ }
+ }
+ if ((long_options[match].has_arg == required_argument)
+ && (optarg == NULL)) {
+ /*
+ * Missing argument; leading ':' indicates no error
+ * should be generated.
+ */
+ if (PRINT_ERROR)
+ warnx(recargstring,
+ current_argv);
+ /*
+ * XXX: GNU sets optopt to val regardless of flag
+ */
+ if (long_options[match].flag == NULL)
+ optopt = long_options[match].val;
+ else
+ optopt = 0;
+ --optind;
+ return BADARG;
+ }
+ } else { /* unknown option */
+ if (short_too) {
+ --optind;
+ return (-1);
+ }
+ if (PRINT_ERROR)
+ warnx(illoptstring, current_argv);
+ optopt = 0;
+ return BADCH;
+ }
+ if (idx)
+ *idx = match;
+ if (long_options[match].flag) {
+ *long_options[match].flag = long_options[match].val;
+ return 0;
+ } else
+ return (long_options[match].val);
+}
+
+/*
+ * getopt_internal --
+ * Parse argc/argv argument vector. Called by user level routines.
+ */
+static int
+getopt_internal(int nargc, char **nargv, const char *options,
+ const struct option *long_options, int *idx, int flags)
+{
+ char *oli; /* option letter list index */
+ int optchar, short_too;
+ static int posixly_correct = -1;
+ char *buf;
+ size_t len;
+ int optreset = 0;
+
+ if (options == NULL)
+ return (-1);
+
+ /*
+ * Disable GNU extensions if POSIXLY_CORRECT is set or options
+ * string begins with a '+'.
+ */
+ if (posixly_correct == -1)
+ posixly_correct = _dupenv_s(&buf, &len, "POSIXLY_CORRECT");
+ if (!posixly_correct || *options == '+')
+ flags &= ~FLAG_PERMUTE;
+ else if (*options == '-')
+ flags |= FLAG_ALLARGS;
+ if (*options == '+' || *options == '-')
+ options++;
+ if (!posixly_correct)
+ free(buf);
+ /*
+ * reset if requested
+ */
+ if (optind == 0)
+ optind = optreset = 1;
+
+ optarg = NULL;
+ if (optreset)
+ nonopt_start = nonopt_end = -1;
+start:
+ if (optreset || !*place) { /* update scanning pointer */
+ optreset = 0;
+ if (optind >= nargc) { /* end of argument vector */
+ place = EMSG;
+ if (nonopt_end != -1) {
+ /* do permutation, if we have to */
+ permute_args(nonopt_start, nonopt_end,
+ optind, nargv);
+ optind -= nonopt_end - nonopt_start;
+ } else if (nonopt_start != -1) {
+ /*
+ * If we skipped non-options, set optind
+ * to the first of them.
+ */
+ optind = nonopt_start;
+ }
+ nonopt_start = nonopt_end = -1;
+ return (-1);
+ }
+ place = nargv[optind];
+ if (*place != '-' ||
+ (place[1] == '\0' && strchr(options, '-') == NULL)) {
+ place = EMSG; /* found non-option */
+ if (flags & FLAG_ALLARGS) {
+ /*
+ * GNU extension:
+ * return non-option as argument to option 1
+ */
+ optarg = nargv[optind++];
+ return INORDER;
+ }
+ if (!(flags & FLAG_PERMUTE)) {
+ /*
+ * If no permutation wanted, stop parsing
+ * at first non-option.
+ */
+ return (-1);
+ }
+ /* do permutation */
+ if (nonopt_start == -1)
+ nonopt_start = optind;
+ else if (nonopt_end != -1) {
+ permute_args(nonopt_start, nonopt_end,
+ optind, nargv);
+ nonopt_start = optind -
+ (nonopt_end - nonopt_start);
+ nonopt_end = -1;
+ }
+ optind++;
+ /* process next argument */
+ goto start;
+ }
+ if (nonopt_start != -1 && nonopt_end == -1)
+ nonopt_end = optind;
+
+ /*
+ * If we have "-" do nothing, if "--" we are done.
+ */
+ if (place[1] != '\0' && *++place == '-' && place[1] == '\0') {
+ optind++;
+ place = EMSG;
+ /*
+ * We found an option (--), so if we skipped
+ * non-options, we have to permute.
+ */
+ if (nonopt_end != -1) {
+ permute_args(nonopt_start, nonopt_end,
+ optind, nargv);
+ optind -= nonopt_end - nonopt_start;
+ }
+ nonopt_start = nonopt_end = -1;
+ return (-1);
+ }
+ }
+
+ /*
+ * Check long options if:
+ * 1) we were passed some
+ * 2) the arg is not just "-"
+ * 3) either the arg starts with -- we are getopt_long_only()
+ */
+ if (long_options != NULL && place != nargv[optind] &&
+ (*place == '-' || (flags & FLAG_LONGONLY))) {
+ short_too = 0;
+ if (*place == '-')
+ place++; /* --foo long option */
+ else if (*place != ':' && strchr(options, *place) != NULL)
+ short_too = 1; /* could be short option too */
+
+ optchar = parse_long_options(nargv, options, long_options,
+ idx, short_too);
+ if (optchar != -1) {
+ place = EMSG;
+ return optchar;
+ }
+ }
+
+ optchar = (int)*place++;
+ oli = strchr(options, optchar);
+ if (optchar == (int)':' ||
+ (optchar == (int)'-' && *place != '\0') ||
+ oli == NULL) {
+ /*
+ * If the user specified "-" and '-' isn't listed in
+ * options, return -1 (non-option) as per POSIX.
+ * Otherwise, it is an unknown option character (or ':').
+ */
+ if (optchar == (int)'-' && *place == '\0')
+ return (-1);
+ if (!*place)
+ ++optind;
+ if (PRINT_ERROR)
+ warnx(illoptchar, optchar);
+ optopt = optchar;
+ return BADCH;
+ }
+ if (long_options != NULL && optchar == 'W' && oli[1] == ';') {
+ /* -W long-option */
+ if (*place)
+ ;
+ else if (++optind >= nargc) { /* no arg */
+ place = EMSG;
+ if (PRINT_ERROR)
+ warnx(recargchar, optchar);
+ optopt = optchar;
+ return BADARG;
+ } /* white space */
+ place = nargv[optind];
+ optchar = parse_long_options(nargv, options, long_options,
+ idx, 0);
+ place = EMSG;
+ return optchar;
+ }
+ if (*++oli != ':') { /* doesn't take argument */
+ if (!*place)
+ ++optind;
+ } else { /* takes (optional) argument */
+ optarg = NULL;
+ if (*place) /* no white space */
+ optarg = place;
+ else if (oli[1] != ':') { /* arg not optional */
+ if (++optind >= nargc) { /* no arg */
+ place = EMSG;
+ if (PRINT_ERROR)
+ warnx(recargchar, optchar);
+ optopt = optchar;
+ return BADARG;
+ }
+ optarg = nargv[optind];
+ }
+ place = EMSG;
+ ++optind;
+ }
+ /* dump back option letter */
+ return optchar;
+}
+
+/*
+ * getopt --
+ * Parse argc/argv argument vector.
+ */
+int
+getopt(int nargc, char *nargv[], const char *options)
+{
+ return getopt_internal(nargc, nargv, options, NULL, NULL,
+ FLAG_PERMUTE);
+}
+
+/*
+ * getopt_long --
+ * Parse argc/argv argument vector.
+ */
+int
+getopt_long(int nargc, char *nargv[], const char *options,
+ const struct option *long_options, int *idx)
+{
+
+ return (getopt_internal(nargc, nargv, options, long_options, idx,
+ FLAG_PERMUTE));
+}
+
+/*
+ * getopt_long_only --
+ * Parse argc/argv argument vector.
+ */
+int
+getopt_long_only(int nargc, char *nargv[], const char *options,
+ const struct option *long_options, int *idx)
+{
+
+ return (getopt_internal(nargc, nargv, options, long_options, idx,
+ FLAG_PERMUTE|FLAG_LONGONLY));
+}
+
+#endif /* NEED_USUAL_GETOPT */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/dirent.h b/src/spdk/dpdk/lib/librte_eal/windows/include/dirent.h
new file mode 100644
index 000000000..869a59837
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/dirent.h
@@ -0,0 +1,664 @@
+/* SPDX-License-Identifier: MIT
+ * Dirent interface for Microsoft Visual Studio
+ * Version 1.21
+ * Copyright (C) 2006-2012 Toni Ronkko
+ * https://github.com/tronkko/dirent
+ */
+
+#ifndef DIRENT_H
+#define DIRENT_H
+
+/*
+ * Include windows.h without Windows Sockets 1.1 to prevent conflicts with
+ * Windows Sockets 2.0.
+ */
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
+
+#include <windows.h>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+/* Maximum length of file name */
+#if !defined(PATH_MAX)
+# define PATH_MAX MAX_PATH
+#endif
+
+/* File type flags for d_type */
+#define DT_UNKNOWN 0
+#define DT_REG S_IFREG
+#define DT_DIR S_IFDIR
+#define DT_CHR S_IFCHR
+
+/*
+ * File type macros. Note that block devices, sockets and links cannot be
+ * distinguished on Windows and the macros S_ISBLK, S_ISSOCK and S_ISLNK are
+ * only defined for compatibility. These macros should always return false
+ * on Windows.
+ */
+#if !defined(S_ISDIR)
+# define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
+#endif
+#if !defined(S_ISREG)
+# define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
+#endif
+
+/* Wide-character version */
+struct _wdirent {
+ /* Always zero */
+ long d_ino;
+
+ /* Structure size */
+ unsigned short d_reclen;
+
+ /* Length of name without \0 */
+ size_t d_namlen;
+
+ /* File type */
+ int d_type;
+
+ /* File name */
+ wchar_t d_name[PATH_MAX];
+};
+typedef struct _wdirent _wdirent;
+
+struct _WDIR {
+ /* Current directory entry */
+ struct _wdirent ent;
+
+ /* Private file data */
+ WIN32_FIND_DATAW data;
+
+ /* True if data is valid */
+ int cached;
+
+ /* Win32 search handle */
+ HANDLE handle;
+
+ /* Initial directory name */
+ wchar_t *patt;
+};
+typedef struct _WDIR _WDIR;
+
+static _WDIR *_wopendir(const wchar_t *dirname);
+static int _wclosedir(_WDIR *dirp);
+
+/* For compatibility with Symbian */
+#define wdirent _wdirent
+#define WDIR _WDIR
+#define wopendir _wopendir
+#define wclosedir _wclosedir
+
+/* Multi-byte character versions */
+struct dirent {
+ /* Always zero */
+ long d_ino;
+
+ /* Structure size */
+ unsigned short d_reclen;
+
+ /* Length of name without \0 */
+ size_t d_namlen;
+
+ /* File type */
+ int d_type;
+
+ /* File name */
+ char d_name[PATH_MAX];
+};
+typedef struct dirent dirent;
+
+struct DIR {
+ struct dirent ent;
+ struct _WDIR *wdirp;
+};
+typedef struct DIR DIR;
+
+static DIR *opendir(const char *dirname);
+static struct dirent *readdir(DIR *dirp);
+static int closedir(DIR *dirp);
+
+/* Internal utility functions */
+static WIN32_FIND_DATAW *dirent_first(_WDIR *dirp);
+static WIN32_FIND_DATAW *dirent_next(_WDIR *dirp);
+
+static int dirent_mbstowcs_s(
+ size_t *pReturnValue,
+ wchar_t *wcstr,
+ size_t sizeInWords,
+ const char *mbstr,
+ size_t count);
+
+static int dirent_wcstombs_s(
+ size_t *pReturnValue,
+ char *mbstr,
+ size_t sizeInBytes,
+ const wchar_t *wcstr,
+ size_t count);
+
+static void dirent_set_errno(int error);
+
+/*
+ * Open directory stream DIRNAME for read and return a pointer to the
+ * internal working area that is used to retrieve individual directory
+ * entries.
+ */
+static _WDIR*
+_wopendir(const wchar_t *dirname)
+{
+ _WDIR *dirp = NULL;
+ int error;
+
+ /* Must have directory name */
+ if (dirname == NULL || dirname[0] == '\0') {
+ dirent_set_errno(ENOENT);
+ return NULL;
+ }
+
+ /* Allocate new _WDIR structure */
+ dirp = (_WDIR *)malloc(sizeof(struct _WDIR));
+ if (dirp != NULL) {
+ DWORD n;
+
+ /* Reset _WDIR structure */
+ dirp->handle = INVALID_HANDLE_VALUE;
+ dirp->patt = NULL;
+ dirp->cached = 0;
+
+ /* Compute the length of full path plus zero terminator
+ *
+ * Note that on WinRT there's no way to convert relative paths
+ * into absolute paths, so just assume its an absolute path.
+ */
+ #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP)
+ n = wcslen(dirname);
+ #else
+ n = GetFullPathNameW(dirname, 0, NULL, NULL);
+ #endif
+
+ /* Allocate room for absolute directory name and search
+ * pattern
+ */
+ dirp->patt = (wchar_t *)malloc(sizeof(wchar_t) * n + 16);
+ if (dirp->patt) {
+ /* Convert relative directory name to an
+ * absolute one. This allows rewinddir() to
+ * function correctly even when current working
+ * directory is changed between opendir()
+ * and rewinddir().
+ *
+ * Note that on WinRT there's no way to convert
+ * relative paths into absolute paths, so just
+ * assume its an absolute path.
+ */
+ #if defined(WINAPI_FAMILY) && \
+ (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP)
+ wcsncpy_s(dirp->patt, n + 1, dirname, n);
+ #else
+ n = GetFullPathNameW(dirname, n, dirp->patt, NULL);
+ #endif
+ if (n > 0) {
+ wchar_t *p;
+
+ /* Append search pattern \* to the directory
+ * name
+ */
+ p = dirp->patt + n;
+ if (dirp->patt < p) {
+ switch (p[-1]) {
+ case '\\':
+ case '/':
+ case ':':
+ /* Directory ends in path separator,
+ * e.g.c:\temp\
+ */
+ /*NOP*/;
+ break;
+
+ default:
+ /* Directory name doesn't end in path
+ * separator
+ */
+ *p++ = '\\';
+ }
+ }
+ *p++ = '*';
+ *p = '\0';
+
+ /* Open directory stream and retrieve the first
+ * entry
+ */
+ if (dirent_first(dirp)) {
+ /* Directory stream opened successfully */
+ error = 0;
+ } else {
+ /* Cannot retrieve first entry */
+ error = 1;
+ dirent_set_errno(ENOENT);
+ }
+
+ } else {
+ /* Cannot retrieve full path name */
+ dirent_set_errno(ENOENT);
+ error = 1;
+ }
+
+ } else {
+ /* Cannot allocate memory for search pattern */
+ error = 1;
+ }
+
+ } else {
+ /* Cannot allocate _WDIR structure */
+ error = 1;
+ }
+
+ /* Clean up in case of error */
+ if (error && dirp) {
+ _wclosedir(dirp);
+ dirp = NULL;
+ }
+
+ return dirp;
+}
+
+/*
+ * Close directory stream opened by opendir() function.
+ * This invalidates the DIR structure as well as any directory
+ * entry read previously by _wreaddir().
+ */
+static int
+_wclosedir(_WDIR *dirp)
+{
+ int ok;
+ if (dirp) {
+
+ /* Release search handle */
+ if (dirp->handle != INVALID_HANDLE_VALUE) {
+ FindClose(dirp->handle);
+ dirp->handle = INVALID_HANDLE_VALUE;
+ }
+
+ /* Release search pattern */
+ if (dirp->patt) {
+ free(dirp->patt);
+ dirp->patt = NULL;
+ }
+
+ /* Release directory structure */
+ free(dirp);
+ ok = /*success*/0;
+
+ } else {
+ /* Invalid directory stream */
+ dirent_set_errno(EBADF);
+ ok = /*failure*/-1;
+ }
+ return ok;
+}
+
+/* Get first directory entry (internal) */
+static WIN32_FIND_DATAW*
+dirent_first(_WDIR *dirp)
+{
+ WIN32_FIND_DATAW *datap;
+
+ /* Open directory and retrieve the first entry */
+ dirp->handle = FindFirstFileExW(
+ dirp->patt, FindExInfoStandard, &dirp->data,
+ FindExSearchNameMatch, NULL, 0);
+ if (dirp->handle != INVALID_HANDLE_VALUE) {
+
+ /* a directory entry is now waiting in memory */
+ datap = &dirp->data;
+ dirp->cached = 1;
+
+ } else {
+
+ /* Failed to re-open directory: no directory entry in memory */
+ dirp->cached = 0;
+ datap = NULL;
+
+ }
+ return datap;
+}
+
+/* Get next directory entry (internal) */
+static WIN32_FIND_DATAW*
+dirent_next(_WDIR *dirp)
+{
+ WIN32_FIND_DATAW *p;
+
+ /* Get next directory entry */
+ if (dirp->cached != 0) {
+
+ /* A valid directory entry already in memory */
+ p = &dirp->data;
+ dirp->cached = 0;
+
+ } else if (dirp->handle != INVALID_HANDLE_VALUE) {
+
+ /* Get the next directory entry from stream */
+ if (FindNextFileW(dirp->handle, &dirp->data) != FALSE) {
+ /* Got a file */
+ p = &dirp->data;
+ } else {
+ /* The very last entry has been processed
+ *or an error occurred
+ */
+ FindClose(dirp->handle);
+ dirp->handle = INVALID_HANDLE_VALUE;
+ p = NULL;
+ }
+
+ } else {
+
+ /* End of directory stream reached */
+ p = NULL;
+
+ }
+
+ return p;
+}
+
+/*
+ * Open directory stream using plain old C-string.
+ */
+static DIR*
+opendir(const char *dirname)
+{
+ struct DIR *dirp;
+ int error;
+
+ /* Must have directory name */
+ if (dirname == NULL || dirname[0] == '\0') {
+ dirent_set_errno(ENOENT);
+ return NULL;
+ }
+
+ /* Allocate memory for DIR structure */
+ dirp = (DIR *)malloc(sizeof(struct DIR));
+ if (dirp) {
+ wchar_t wname[PATH_MAX];
+ size_t n;
+
+ /* Convert directory name to wide-character string */
+ error = dirent_mbstowcs_s(&n, wname, PATH_MAX,
+ dirname, PATH_MAX);
+ if (!error) {
+
+ /* Open directory stream using wide-character name */
+ dirp->wdirp = _wopendir(wname);
+ if (dirp->wdirp) {
+ /* Directory stream opened */
+ error = 0;
+ } else {
+ /* Failed to open directory stream */
+ error = 1;
+ }
+
+ } else {
+ /*
+ * Cannot convert file name to wide-character string.
+ * This occurs if the string contains invalid multi-byte
+ * sequences or the output buffer is too small to
+ * contain the resulting string.
+ */
+ error = 1;
+ }
+
+ } else {
+ /* Cannot allocate DIR structure */
+ error = 1;
+ }
+
+ /* Clean up in case of error */
+ if (error && dirp) {
+ free(dirp);
+ dirp = NULL;
+ }
+
+ return dirp;
+}
+
+/*
+ * Read next directory entry.
+ *
+ * When working with text consoles, please note that file names
+ * returned by readdir() are represented in the default ANSI code
+ * page while any output toconsole is typically formatted on another
+ * code page. Thus, non-ASCII characters in file names will not usually
+ * display correctly on console. The problem can be fixed in two ways:
+ * (1) change the character set of console to 1252 using chcp utility
+ * and use Lucida Console font, or (2) use _cprintf function when
+ * writing to console. The _cprinf() will re-encode ANSI strings to the
+ * console code page so many non-ASCII characters will display correctly.
+ */
+static struct dirent*
+readdir(DIR *dirp)
+{
+ WIN32_FIND_DATAW *datap;
+ struct dirent *entp;
+
+ /* Read next directory entry */
+ datap = dirent_next(dirp->wdirp);
+ if (datap) {
+ size_t n;
+ int error;
+
+ /* Attempt to convert file name to multi-byte string */
+ error = dirent_wcstombs_s(&n, dirp->ent.d_name,
+ PATH_MAX, datap->cFileName, PATH_MAX);
+
+ /*
+ * If the file name cannot be represented by a multi-byte
+ * string, then attempt to use old 8+3 file name.
+ * This allows traditional Unix-code to access some file
+ * names despite of unicode characters, although file names
+ * may seem unfamiliar to the user.
+ *
+ * Be ware that the code below cannot come up with a short
+ * file name unless the file system provides one. At least
+ * VirtualBox shared folders fail to do this.
+ */
+ if (error && datap->cAlternateFileName[0] != '\0') {
+ error = dirent_wcstombs_s(
+ &n, dirp->ent.d_name, PATH_MAX,
+ datap->cAlternateFileName, PATH_MAX);
+ }
+
+ if (!error) {
+ DWORD attr;
+
+ /* Initialize directory entry for return */
+ entp = &dirp->ent;
+
+ /* Length of file name excluding zero terminator */
+ entp->d_namlen = n - 1;
+
+ /* File attributes */
+ attr = datap->dwFileAttributes;
+ if ((attr & FILE_ATTRIBUTE_DEVICE) != 0)
+ entp->d_type = DT_CHR;
+ else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0)
+ entp->d_type = DT_DIR;
+ else
+ entp->d_type = DT_REG;
+
+ /* Reset dummy fields */
+ entp->d_ino = 0;
+ entp->d_reclen = sizeof(struct dirent);
+
+ } else {
+ /*
+ * Cannot convert file name to multi-byte string so
+ * construct an erroneous directory entry and return
+ * that. Note that we cannot return NULL as that would
+ * stop the processing of directory entries completely.
+ */
+ entp = &dirp->ent;
+ entp->d_name[0] = '?';
+ entp->d_name[1] = '\0';
+ entp->d_namlen = 1;
+ entp->d_type = DT_UNKNOWN;
+ entp->d_ino = 0;
+ entp->d_reclen = 0;
+ }
+
+ } else {
+ /* No more directory entries */
+ entp = NULL;
+ }
+
+ return entp;
+}
+
+/*
+ * Close directory stream.
+ */
+static int
+closedir(DIR *dirp)
+{
+ int ok;
+ if (dirp) {
+
+ /* Close wide-character directory stream */
+ ok = _wclosedir(dirp->wdirp);
+ dirp->wdirp = NULL;
+
+ /* Release multi-byte character version */
+ free(dirp);
+
+ } else {
+
+ /* Invalid directory stream */
+ dirent_set_errno(EBADF);
+ ok = /*failure*/-1;
+
+ }
+ return ok;
+}
+
+/* Convert multi-byte string to wide character string */
+static int
+dirent_mbstowcs_s(
+ size_t *pReturnValue,
+ wchar_t *wcstr,
+ size_t sizeInWords,
+ const char *mbstr,
+ size_t count)
+{
+ int error;
+
+ #if defined(_MSC_VER) && _MSC_VER >= 1400
+ /* Microsoft Visual Studio 2005 or later */
+ error = mbstowcs_s(pReturnValue, wcstr,
+ sizeInWords, mbstr, count);
+ #else
+
+ /* Older Visual Studio or non-Microsoft compiler */
+ size_t n;
+
+ /* Convert to wide-character string (or count characters) */
+ n = mbstowcs(wcstr, mbstr, sizeInWords);
+ if (!wcstr || n < count) {
+
+ /* Zero-terminate output buffer */
+ if (wcstr && sizeInWords) {
+ if (n >= sizeInWords)
+ n = sizeInWords - 1;
+ wcstr[n] = 0;
+ }
+
+ /* Length of resuting multi-byte string WITH zero
+ *terminator
+ */
+ if (pReturnValue)
+ *pReturnValue = n + 1;
+
+ /* Success */
+ error = 0;
+
+ } else {
+
+ /* Could not convert string */
+ error = 1;
+
+ }
+ #endif
+
+ return error;
+}
+
+/* Convert wide-character string to multi-byte string */
+static int
+dirent_wcstombs_s(
+ size_t *pReturnValue,
+ char *mbstr,
+ size_t sizeInBytes, /* max size of mbstr */
+ const wchar_t *wcstr,
+ size_t count)
+{
+ int error;
+
+ #if defined(_MSC_VER) && _MSC_VER >= 1400
+ /* Microsoft Visual Studio 2005 or later */
+ error = wcstombs_s(pReturnValue, mbstr, sizeInBytes, wcstr, count);
+ #else
+ /* Older Visual Studio or non-Microsoft compiler */
+ size_t n;
+
+ /* Convert to multi-byte string
+ * (or count the number of bytes needed)
+ */
+ n = wcstombs(mbstr, wcstr, sizeInBytes);
+ if (!mbstr || n < count) {
+ /* Zero-terminate output buffer */
+ if (mbstr && sizeInBytes) {
+ if (n >= sizeInBytes)
+ n = sizeInBytes - 1;
+ mbstr[n] = '\0';
+ }
+ /* Length of resulting multi-bytes string WITH
+ *zero-terminator
+ */
+ if (pReturnValue)
+ *pReturnValue = n + 1;
+ /* Success */
+ error = 0;
+ } else {
+ /* Cannot convert string */
+ error = 1;
+ }
+ #endif
+
+ return error;
+}
+
+/* Set errno variable */
+static void
+dirent_set_errno(int error)
+{
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+ /* Microsoft Visual Studio 2005 and later */
+ _set_errno(error);
+#else
+
+ /* Non-Microsoft compiler or older Microsoft compiler */
+ errno = error;
+#endif
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif /*DIRENT_H*/
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/fnmatch.h b/src/spdk/dpdk/lib/librte_eal/windows/include/fnmatch.h
new file mode 100644
index 000000000..142753c35
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/fnmatch.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _FNMATCH_H_
+#define _FNMATCH_H_
+
+/**
+ * This file is required to support the common code in eal_common_log.c
+ * as Microsoft libc does not contain fnmatch.h. This may be removed in
+ * future releases.
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+#define FNM_NOMATCH 1
+
+#define FNM_NOESCAPE 0x01
+#define FNM_PATHNAME 0x02
+#define FNM_PERIOD 0x04
+#define FNM_LEADING_DIR 0x08
+#define FNM_CASEFOLD 0x10
+#define FNM_PREFIX_DIRS 0x20
+
+/**
+ * This function is used for searhing a given string source
+ * with the given regular expression pattern.
+ *
+ * @param pattern
+ * regular expression notation decribing the pattern to match
+ *
+ * @param string
+ * source string to searcg for the pattern
+ *
+ * @param flag
+ * containing information about the pattern
+ *
+ * @return
+ * if the pattern is found then return 0 or else FNM_NOMATCH
+ */
+int fnmatch(const char *pattern, const char *string, int flags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _FNMATCH_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/getopt.h b/src/spdk/dpdk/lib/librte_eal/windows/include/getopt.h
new file mode 100644
index 000000000..6f57af454
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/getopt.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: BSD-2-Clause
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Dieter Baron and Thomas Klausner.
+ */
+
+/**
+ * @file
+ * getopt compat.
+ *
+ * This module provides getopt() and getopt_long().
+ */
+
+#ifndef _USUAL_GETOPT_H_
+#define _USUAL_GETOPT_H_
+
+#ifndef NEED_USUAL_GETOPT
+#if !defined(HAVE_GETOPT_H) || !defined(HAVE_GETOPT) || \
+ !defined(HAVE_GETOPT_LONG)
+#define NEED_USUAL_GETOPT
+#endif
+#endif
+
+#ifndef NEED_USUAL_GETOPT
+
+/* Use system getopt */
+#ifdef RTE_TOOLCHAIN_GCC
+#include_next <getopt.h>
+#else
+#include <getopt.h>
+#endif
+
+#else /* NEED_USUAL_GETOPT */
+
+/* avoid name collision */
+#define optarg usual_optarg
+#define opterr usual_opterr
+#define optind usual_optind
+#define optopt usual_optopt
+#define getopt(a, b, c) usual_getopt(a, b, c)
+#define getopt_long(a, b, c, d, e) usual_getopt_long(a, b, c, d, e)
+
+
+/** argument to current option, or NULL if it has none */
+extern const char *optarg;
+/** Current position in arg string. Starts from 1.
+ * Setting to 0 resets state.
+ */
+extern int optind;
+/** whether getopt() should print error messages on problems. Default: 1. */
+extern int opterr;
+/** Option char which caused error */
+extern int optopt;
+
+/** long option takes no argument */
+#define no_argument 0
+/** long option requires argument */
+#define required_argument 1
+/** long option has optional argument */
+#define optional_argument 2
+
+/** Long option description */
+struct option {
+ /** name of long option */
+ const char *name;
+
+ /**
+ * whether option takes an argument.
+ * One of no_argument, required_argument, and optional_argument.
+ */
+ int has_arg;
+
+ /** if not NULL, set *flag to val when option found */
+ int *flag;
+
+ /** if flag not NULL, value to set *flag to; else return value */
+ int val;
+};
+
+/** Compat: getopt */
+int getopt(int argc, char *argv[], const char *options);
+
+/** Compat: getopt_long */
+int getopt_long(int argc, char *argv[], const char *options,
+ const struct option *longopts, int *longindex);
+
+/** Compat: getopt_long_only */
+int getopt_long_only(int nargc, char *argv[], const char *options,
+ const struct option *long_options, int *idx);
+
+
+#endif /* NEED_USUAL_GETOPT */
+
+#endif /* !_USUAL_GETOPT_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/meson.build b/src/spdk/dpdk/lib/librte_eal/windows/include/meson.build
new file mode 100644
index 000000000..5fb1962ac
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/meson.build
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2020 Mellanox Technologies, Ltd
+
+includes += include_directories('.')
+
+headers += files(
+ 'rte_os.h',
+ 'rte_windows.h',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/pthread.h b/src/spdk/dpdk/lib/librte_eal/windows/include/pthread.h
new file mode 100644
index 000000000..0bbed5c3b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/pthread.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _PTHREAD_H_
+#define _PTHREAD_H_
+
+#include <stdint.h>
+
+/**
+ * This file is required to support the common code in eal_common_proc.c,
+ * eal_common_thread.c and common\include\rte_per_lcore.h as Microsoft libc
+ * does not contain pthread.h. This may be removed in future releases.
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <windows.h>
+#include <rte_common.h>
+
+#define PTHREAD_BARRIER_SERIAL_THREAD TRUE
+
+/* defining pthread_t type on Windows since there is no in Microsoft libc*/
+typedef uintptr_t pthread_t;
+
+/* defining pthread_attr_t type on Windows since there is no in Microsoft libc*/
+typedef void *pthread_attr_t;
+
+typedef SYNCHRONIZATION_BARRIER pthread_barrier_t;
+
+#define pthread_barrier_init(barrier, attr, count) \
+ InitializeSynchronizationBarrier(barrier, count, -1)
+#define pthread_barrier_wait(barrier) EnterSynchronizationBarrier(barrier, \
+ SYNCHRONIZATION_BARRIER_FLAGS_BLOCK_ONLY)
+#define pthread_barrier_destroy(barrier) \
+ DeleteSynchronizationBarrier(barrier)
+#define pthread_cancel(thread) TerminateThread((HANDLE) thread, 0)
+
+/* pthread function overrides */
+#define pthread_self() \
+ ((pthread_t)GetCurrentThreadId())
+#define pthread_setaffinity_np(thread, size, cpuset) \
+ eal_set_thread_affinity_mask(thread, (unsigned long *) cpuset)
+#define pthread_getaffinity_np(thread, size, cpuset) \
+ eal_get_thread_affinity_mask(thread, (unsigned long *) cpuset)
+#define pthread_create(threadid, threadattr, threadfunc, args) \
+ eal_create_thread(threadid, threadfunc, args)
+
+static inline int
+eal_set_thread_affinity_mask(pthread_t threadid, unsigned long *cpuset)
+{
+ SetThreadAffinityMask((HANDLE) threadid, *cpuset);
+ return 0;
+}
+
+static inline int
+eal_get_thread_affinity_mask(pthread_t threadid, unsigned long *cpuset)
+{
+ /* Workaround for the lack of a GetThreadAffinityMask()
+ *API in Windows
+ */
+ /* obtain previous mask by setting dummy mask */
+ DWORD dwprevaffinitymask =
+ SetThreadAffinityMask((HANDLE) threadid, 0x1);
+ /* set it back! */
+ SetThreadAffinityMask((HANDLE) threadid, dwprevaffinitymask);
+ *cpuset = dwprevaffinitymask;
+ return 0;
+}
+
+static inline int
+eal_create_thread(void *threadid, void *threadfunc, void *args)
+{
+ HANDLE hThread;
+ hThread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)threadfunc,
+ args, 0, (LPDWORD)threadid);
+ if (hThread) {
+ SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
+ SetThreadPriority(hThread, THREAD_PRIORITY_TIME_CRITICAL);
+ }
+ return ((hThread != NULL) ? 0 : E_FAIL);
+}
+
+static inline int
+pthread_join(__rte_unused pthread_t thread,
+ __rte_unused void **value_ptr)
+{
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PTHREAD_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/regex.h b/src/spdk/dpdk/lib/librte_eal/windows/include/regex.h
new file mode 100644
index 000000000..827f93841
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/regex.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _REGEX_H_
+#define _REGEX_H_
+
+/**
+ * This file is required to support the common code in eal_common_log.c
+ * as Microsoft libc does not contain regex.h. This may be removed in
+ * future releases.
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REG_NOMATCH 1
+#define REG_ESPACE 12
+
+#include <rte_common.h>
+
+/* defining regex_t for Windows */
+typedef void *regex_t;
+/* defining regmatch_t for Windows */
+typedef void *regmatch_t;
+
+/**
+ * The regcomp() function will compile the regular expression
+ * contained in the string pointed to by the pattern argument
+ * and place the results in the structure pointed to by preg.
+ * The cflags argument is the bitwise inclusive OR of zero or
+ * more of the flags
+ */
+static inline int regcomp(__rte_unused regex_t *preg,
+ __rte_unused const char *regex, __rte_unused int cflags)
+{
+ /* TODO */
+ /* This is a stub, not the expected result */
+ return REG_ESPACE;
+}
+
+/**
+ * The regexec() function compares the null-terminated string
+ * specified by string with the compiled regular expression
+ * preg initialised by a previous call to regcomp(). If it finds
+ * a match, regexec() returns 0; otherwise it returns non-zero
+ * indicating either no match or an error. The eflags argument
+ * is the bitwise inclusive OR of zero or more of the flags.
+ */
+static inline int regexec(__rte_unused const regex_t *preg,
+ __rte_unused const char *string, __rte_unused size_t nmatch,
+ __rte_unused regmatch_t pmatch[], __rte_unused int eflags)
+{
+ /* TODO */
+ /* This is a stub, not the expected result */
+ return REG_NOMATCH;
+}
+
+/**
+ * The regerror() function provides a mapping from error codes
+ * returned by regcomp() and regexec() to unspecified printable strings.
+ */
+static inline size_t regerror(__rte_unused int errcode,
+ __rte_unused const regex_t *preg, char *errbuf,
+ __rte_unused size_t errbuf_size)
+{
+ /* TODO */
+ /* This is a stub, not the expected result */
+ if (errbuf) {
+ *errbuf = '\0';
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * The regfree() function frees any memory allocated by regcomp()
+ * associated with preg.
+ */
+static inline void regfree(__rte_unused regex_t *preg)
+{
+ /* TODO */
+ /* This is a stub, not the expected result */
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _REGEX_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/rte_os.h b/src/spdk/dpdk/lib/librte_eal/windows/include/rte_os.h
new file mode 100644
index 000000000..510e39e03
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/rte_os.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2019 Intel Corporation
+ */
+
+#ifndef _RTE_OS_H_
+#define _RTE_OS_H_
+
+/**
+ * This is header should contain any function/macro definition
+ * which are not supported natively or named differently in the
+ * Windows OS. It must not include Windows-specific headers.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* limits.h replacement, value as in <windows.h> */
+#ifndef PATH_MAX
+#define PATH_MAX _MAX_PATH
+#endif
+
+#define strerror_r(a, b, c) strerror_s(b, c, a)
+
+/* strdup is deprecated in Microsoft libc and _strdup is preferred */
+#define strdup(str) _strdup(str)
+
+#define strtok_r(str, delim, saveptr) strtok_s(str, delim, saveptr)
+
+#define index(a, b) strchr(a, b)
+#define rindex(a, b) strrchr(a, b)
+
+#define strncasecmp(s1, s2, count) _strnicmp(s1, s2, count)
+
+/* cpu_set macros implementation */
+#define RTE_CPU_AND(dst, src1, src2) CPU_AND(dst, src1, src2)
+#define RTE_CPU_OR(dst, src1, src2) CPU_OR(dst, src1, src2)
+#define RTE_CPU_FILL(set) CPU_FILL(set)
+#define RTE_CPU_NOT(dst, src) CPU_NOT(dst, src)
+
+/* as in <windows.h> */
+typedef long long ssize_t;
+
+#ifndef RTE_TOOLCHAIN_GCC
+static inline int
+asprintf(char **buffer, const char *format, ...)
+{
+ int size, ret;
+ va_list arg;
+
+ va_start(arg, format);
+ size = vsnprintf(NULL, 0, format, arg);
+ va_end(arg);
+ if (size < 0)
+ return -1;
+ size++;
+
+ *buffer = malloc(size);
+ if (*buffer == NULL)
+ return -1;
+
+ va_start(arg, format);
+ ret = vsnprintf(*buffer, size, format, arg);
+ va_end(arg);
+ if (ret != size - 1) {
+ free(*buffer);
+ return -1;
+ }
+ return ret;
+}
+#endif /* RTE_TOOLCHAIN_GCC */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_OS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/rte_windows.h b/src/spdk/dpdk/lib/librte_eal/windows/include/rte_windows.h
new file mode 100644
index 000000000..ed6e4c148
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/rte_windows.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Dmitry Kozlyuk
+ */
+
+#ifndef _RTE_WINDOWS_H_
+#define _RTE_WINDOWS_H_
+
+/**
+ * @file Windows-specific facilities
+ *
+ * This file should be included by DPDK libraries and applications
+ * that need access to Windows API. It includes platform SDK headers
+ * in compatible order with proper options and defines error-handling macros.
+ */
+
+/* Disable excessive libraries. */
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+
+/* Must come first. */
+#include <windows.h>
+
+#include <basetsd.h>
+#include <psapi.h>
+
+/* Have GUIDs defined. */
+#ifndef INITGUID
+#define INITGUID
+#endif
+#include <initguid.h>
+
+/**
+ * Log GetLastError() with context, usually a Win32 API function and arguments.
+ */
+#define RTE_LOG_WIN32_ERR(...) \
+ RTE_LOG(DEBUG, EAL, RTE_FMT("GetLastError()=%lu: " \
+ RTE_FMT_HEAD(__VA_ARGS__,) "\n", GetLastError(), \
+ RTE_FMT_TAIL(__VA_ARGS__,)))
+
+#endif /* _RTE_WINDOWS_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/sched.h b/src/spdk/dpdk/lib/librte_eal/windows/include/sched.h
new file mode 100644
index 000000000..fbe07f742
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/sched.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _SCHED_H_
+#define _SCHED_H_
+
+/**
+ * This file is added to support the common code in eal_common_thread.c
+ * as Microsoft libc does not contain sched.h. This may be removed
+ * in future releases.
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef CPU_SETSIZE
+#define CPU_SETSIZE RTE_MAX_LCORE
+#endif
+
+#define _BITS_PER_SET (sizeof(long long) * 8)
+#define _BIT_SET_MASK (_BITS_PER_SET - 1)
+
+#define _NUM_SETS(b) (((b) + _BIT_SET_MASK) / _BITS_PER_SET)
+#define _WHICH_SET(b) ((b) / _BITS_PER_SET)
+#define _WHICH_BIT(b) ((b) & (_BITS_PER_SET - 1))
+
+typedef struct _rte_cpuset_s {
+ long long _bits[_NUM_SETS(CPU_SETSIZE)];
+} rte_cpuset_t;
+
+#define CPU_SET(b, s) ((s)->_bits[_WHICH_SET(b)] |= (1LL << _WHICH_BIT(b)))
+
+#define CPU_ZERO(s) \
+ do { \
+ unsigned int _i; \
+ \
+ for (_i = 0; _i < _NUM_SETS(CPU_SETSIZE); _i++) \
+ (s)->_bits[_i] = 0LL; \
+ } while (0)
+
+#define CPU_ISSET(b, s) (((s)->_bits[_WHICH_SET(b)] & \
+ (1LL << _WHICH_BIT(b))) != 0LL)
+
+static inline int
+count_cpu(rte_cpuset_t *s)
+{
+ unsigned int _i;
+ int count = 0;
+
+ for (_i = 0; _i < _NUM_SETS(CPU_SETSIZE); _i++)
+ if (CPU_ISSET(_i, s) != 0LL)
+ count++;
+ return count;
+}
+#define CPU_COUNT(s) count_cpu(s)
+
+#define CPU_AND(dst, src1, src2) \
+do { \
+ unsigned int _i; \
+ \
+ for (_i = 0; _i < _NUM_SETS(CPU_SETSIZE); _i++) \
+ (dst)->_bits[_i] = (src1)->_bits[_i] & (src2)->_bits[_i]; \
+} while (0)
+
+#define CPU_OR(dst, src1, src2) \
+do { \
+ unsigned int _i; \
+ \
+ for (_i = 0; _i < _NUM_SETS(CPU_SETSIZE); _i++) \
+ (dst)->_bits[_i] = (src1)->_bits[_i] | (src2)->_bits[_i]; \
+} while (0)
+
+#define CPU_FILL(s) \
+do { \
+ unsigned int _i; \
+ for (_i = 0; _i < _NUM_SETS(CPU_SETSIZE); _i++) \
+ (s)->_bits[_i] = -1LL; \
+} while (0)
+
+#define CPU_NOT(dst, src) \
+do { \
+ unsigned int _i; \
+ for (_i = 0; _i < _NUM_SETS(CPU_SETSIZE); _i++) \
+ (dst)->_bits[_i] = (src)->_bits[_i] ^ -1LL; \
+} while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SCHED_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/sys/queue.h b/src/spdk/dpdk/lib/librte_eal/windows/include/sys/queue.h
new file mode 100644
index 000000000..a65949a78
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/sys/queue.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ */
+
+#ifndef _SYS_QUEUE_H_
+#define _SYS_QUEUE_H_
+
+/*
+ * This file defines tail queues.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * Below is a summary of implemented functions where:
+ * + means the macro is available
+ * - means the macro is not available
+ * s means the macro is available but is slow (runs in O(n) time)
+ *
+ * TAILQ
+ * _HEAD +
+ * _CLASS_HEAD +
+ * _HEAD_INITIALIZER +
+ * _ENTRY +
+ * _CLASS_ENTRY +
+ * _INIT +
+ * _EMPTY +
+ * _FIRST +
+ * _NEXT +
+ * _PREV +
+ * _LAST +
+ * _LAST_FAST +
+ * _FOREACH +
+ * _FOREACH_FROM +
+ * _FOREACH_SAFE +
+ * _FOREACH_FROM_SAFE +
+ * _FOREACH_REVERSE +
+ * _FOREACH_REVERSE_FROM +
+ * _FOREACH_REVERSE_SAFE +
+ * _FOREACH_REVERSE_FROM_SAFE +
+ * _INSERT_HEAD +
+ * _INSERT_BEFORE +
+ * _INSERT_AFTER +
+ * _INSERT_TAIL +
+ * _CONCAT +
+ * _REMOVE_AFTER -
+ * _REMOVE_HEAD -
+ * _REMOVE +
+ * _SWAP +
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * List definitions.
+ */
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define QMD_TRACE_ELEM(elem)
+#define QMD_TRACE_HEAD(head)
+#define TRACEBUF
+#define TRACEBUF_INITIALIZER
+
+#define TRASHIT(x)
+#define QMD_IS_TRASHED(x) 0
+
+#define QMD_SAVELINK(name, link)
+
+#ifdef __cplusplus
+/*
+ * In C++ there can be structure lists and class lists:
+ */
+#define QUEUE_TYPEOF(type) type
+#else
+#define QUEUE_TYPEOF(type) struct type
+#endif
+
+/*
+ * Tail queue declarations.
+ */
+#define TAILQ_HEAD(name, type) \
+struct name { \
+ struct type *tqh_first; /* first element */ \
+ struct type **tqh_last; /* addr of last next element */ \
+ TRACEBUF \
+}
+
+#define TAILQ_CLASS_HEAD(name, type) \
+struct name { \
+ class type *tqh_first; /* first element */ \
+ class type **tqh_last; /* addr of last next element */ \
+ TRACEBUF \
+}
+
+#define TAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).tqh_first, TRACEBUF_INITIALIZER }
+
+#define TAILQ_ENTRY(type) \
+struct { \
+ struct type *tqe_next; /* next element */ \
+ struct type **tqe_prev; /* address of previous next element */ \
+ TRACEBUF \
+}
+
+#define TAILQ_CLASS_ENTRY(type) \
+struct { \
+ class type *tqe_next; /* next element */ \
+ class type **tqe_prev; /* address of previous next element */ \
+ TRACEBUF \
+}
+
+/*
+ * Tail queue functions.
+ */
+#define QMD_TAILQ_CHECK_HEAD(head, field)
+#define QMD_TAILQ_CHECK_TAIL(head, headname)
+#define QMD_TAILQ_CHECK_NEXT(elm, field)
+#define QMD_TAILQ_CHECK_PREV(elm, field)
+
+#define TAILQ_CONCAT(head1, head2, field) do { \
+ if (!TAILQ_EMPTY(head2)) { \
+ *(head1)->tqh_last = (head2)->tqh_first; \
+ (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ TAILQ_INIT((head2)); \
+ QMD_TRACE_HEAD(head1); \
+ QMD_TRACE_HEAD(head2); \
+ } \
+} while (0)
+
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+
+#define TAILQ_FOREACH(var, head, field) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_INIT(head) do { \
+ TAILQ_FIRST((head)) = NULL; \
+ (head)->tqh_last = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+} while (0)
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ QMD_TAILQ_CHECK_NEXT(listelm, field); \
+ TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field); \
+ if (TAILQ_NEXT((listelm), field) != NULL) \
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ &TAILQ_NEXT((elm), field); \
+ else { \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ } \
+ TAILQ_NEXT((listelm), field) = (elm); \
+ (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&(listelm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ QMD_TAILQ_CHECK_PREV(listelm, field); \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ TAILQ_NEXT((elm), field) = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+ QMD_TRACE_ELEM(&(listelm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_HEAD(head, elm, field) do { \
+ QMD_TAILQ_CHECK_HEAD(head, field); \
+ TAILQ_NEXT((elm), field) = TAILQ_FIRST((head)); \
+ if (TAILQ_FIRST((head)) != NULL) \
+ TAILQ_FIRST((head))->field.tqe_prev = \
+ &TAILQ_NEXT((elm), field); \
+ else \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ TAILQ_FIRST((head)) = (elm); \
+ (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_INSERT_TAIL(head, elm, field) do { \
+ QMD_TAILQ_CHECK_TAIL(head, field); \
+ TAILQ_NEXT((elm), field) = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &TAILQ_NEXT((elm), field); \
+ QMD_TRACE_HEAD(head); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+/*
+ * The FAST function is fast in that it causes no data access other
+ * then the access to the head. The standard LAST function above
+ * will cause a data access of both the element you want and
+ * the previous element. FAST is very useful for instances when
+ * you may want to prefetch the last data element.
+ */
+#define TAILQ_LAST_FAST(head, type, field) \
+ (TAILQ_EMPTY(head) ? NULL : __containerof((head)->tqh_last, \
+ QUEUE_TYPEOF(type), field.tqe_next))
+
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define TAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#define TAILQ_REMOVE(head, elm, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \
+ QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \
+ QMD_TAILQ_CHECK_NEXT(elm, field); \
+ QMD_TAILQ_CHECK_PREV(elm, field); \
+ if ((TAILQ_NEXT((elm), field)) != NULL) \
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else { \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ QMD_TRACE_HEAD(head); \
+ } \
+ *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \
+ TRASHIT(*oldnext); \
+ TRASHIT(*oldprev); \
+ QMD_TRACE_ELEM(&(elm)->field); \
+} while (0)
+
+#define TAILQ_SWAP(head1, head2, type, field) do { \
+ QUEUE_TYPEOF(type) * swap_first = (head1)->tqh_first; \
+ QUEUE_TYPEOF(type) * *swap_last = (head1)->tqh_last; \
+ (head1)->tqh_first = (head2)->tqh_first; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ (head2)->tqh_first = swap_first; \
+ (head2)->tqh_last = swap_last; \
+ swap_first = (head1)->tqh_first; \
+ if (swap_first != NULL) \
+ swap_first->field.tqe_prev = &(head1)->tqh_first; \
+ else \
+ (head1)->tqh_last = &(head1)->tqh_first; \
+ swap_first = (head2)->tqh_first; \
+ if (swap_first != NULL) \
+ swap_first->field.tqe_prev = &(head2)->tqh_first; \
+ else \
+ (head2)->tqh_last = &(head2)->tqh_first; \
+} while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_QUEUE_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/include/unistd.h b/src/spdk/dpdk/lib/librte_eal/windows/include/unistd.h
new file mode 100644
index 000000000..757b7f3c5
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/include/unistd.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _UNISTD_H_
+#define _UNISTD_H_
+/**
+ * This file is added to support common code in eal_common_lcore.c
+ * as Microsoft libc does not contain unistd.h. This may be removed
+ * in future releases.
+ */
+#endif /* _UNISTD_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/windows/meson.build b/src/spdk/dpdk/lib/librte_eal/windows/meson.build
new file mode 100644
index 000000000..adfc8b9b7
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/windows/meson.build
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2019 Intel Corporation
+
+subdir('include')
+
+sources += files(
+ 'eal.c',
+ 'eal_debug.c',
+ 'eal_lcore.c',
+ 'eal_log.c',
+ 'eal_thread.c',
+ 'fnmatch.c',
+ 'getopt.c',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/meson.build b/src/spdk/dpdk/lib/librte_eal/x86/include/meson.build
new file mode 100644
index 000000000..f0e998c2f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/meson.build
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+arch_headers = files(
+ 'rte_atomic_32.h',
+ 'rte_atomic_64.h',
+ 'rte_atomic.h',
+ 'rte_byteorder_32.h',
+ 'rte_byteorder_64.h',
+ 'rte_byteorder.h',
+ 'rte_cpuflags.h',
+ 'rte_cycles.h',
+ 'rte_io.h',
+ 'rte_memcpy.h',
+ 'rte_prefetch.h',
+ 'rte_pause.h',
+ 'rte_rtm.h',
+ 'rte_rwlock.h',
+ 'rte_spinlock.h',
+ 'rte_vect.h',
+)
+install_headers(arch_headers, subdir: get_option('include_subdir_arch'))
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic.h
new file mode 100644
index 000000000..b9dcd30ab
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic.h
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_ATOMIC_X86_H_
+#define _RTE_ATOMIC_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_config.h>
+#include <emmintrin.h>
+#include "generic/rte_atomic.h"
+
+#if RTE_MAX_LCORE == 1
+#define MPLOCKED /**< No need to insert MP lock prefix. */
+#else
+#define MPLOCKED "lock ; " /**< Insert MP lock prefix. */
+#endif
+
+#define rte_mb() _mm_mfence()
+
+#define rte_wmb() _mm_sfence()
+
+#define rte_rmb() _mm_lfence()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
+/*
+ * From Intel Software Development Manual; Vol 3;
+ * 8.2.2 Memory Ordering in P6 and More Recent Processor Families:
+ * ...
+ * . Reads are not reordered with other reads.
+ * . Writes are not reordered with older reads.
+ * . Writes to memory are not reordered with other writes,
+ * with the following exceptions:
+ * . streaming stores (writes) executed with the non-temporal move
+ * instructions (MOVNTI, MOVNTQ, MOVNTDQ, MOVNTPS, and MOVNTPD); and
+ * . string operations (see Section 8.2.4.1).
+ * ...
+ * . Reads may be reordered with older writes to different locations but not
+ * with older writes to the same location.
+ * . Reads or writes cannot be reordered with I/O instructions,
+ * locked instructions, or serializing instructions.
+ * . Reads cannot pass earlier LFENCE and MFENCE instructions.
+ * . Writes ... cannot pass earlier LFENCE, SFENCE, and MFENCE instructions.
+ * . LFENCE instructions cannot pass earlier reads.
+ * . SFENCE instructions cannot pass earlier writes ...
+ * . MFENCE instructions cannot pass earlier reads, writes ...
+ *
+ * As pointed by Java guys, that makes possible to use lock-prefixed
+ * instructions to get the same effect as mfence and on most modern HW
+ * that gives a better performance then using mfence:
+ * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
+ * Basic idea is to use lock prefixed add with some dummy memory location
+ * as the destination. From their experiments 128B(2 cache lines) below
+ * current stack pointer looks like a good candidate.
+ * So below we use that techinque for rte_smp_mb() implementation.
+ */
+
+static __rte_always_inline void
+rte_smp_mb(void)
+{
+#ifdef RTE_ARCH_I686
+ asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
+#else
+ asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
+#endif
+}
+
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_compiler_barrier()
+
+#define rte_io_rmb() rte_compiler_barrier()
+
+#define rte_cio_wmb() rte_compiler_barrier()
+
+#define rte_cio_rmb() rte_compiler_barrier()
+
+/*------------------------- 16 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+ uint8_t res;
+
+ asm volatile(
+ MPLOCKED
+ "cmpxchgw %[src], %[dst];"
+ "sete %[res];"
+ : [res] "=a" (res), /* output */
+ [dst] "=m" (*dst)
+ : [src] "r" (src), /* input */
+ "a" (exp),
+ "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return res;
+}
+
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+ asm volatile(
+ MPLOCKED
+ "xchgw %0, %1;"
+ : "=r" (val), "=m" (*dst)
+ : "0" (val), "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return val;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+ return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "incw %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "decw %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(
+ MPLOCKED
+ "incw %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(MPLOCKED
+ "decw %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+
+/*------------------------- 32 bit atomic operations -------------------------*/
+
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+ uint8_t res;
+
+ asm volatile(
+ MPLOCKED
+ "cmpxchgl %[src], %[dst];"
+ "sete %[res];"
+ : [res] "=a" (res), /* output */
+ [dst] "=m" (*dst)
+ : [src] "r" (src), /* input */
+ "a" (exp),
+ "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return res;
+}
+
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+ asm volatile(
+ MPLOCKED
+ "xchgl %0, %1;"
+ : "=r" (val), "=m" (*dst)
+ : "0" (val), "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return val;
+}
+
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+ return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "incl %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "decl %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(
+ MPLOCKED
+ "incl %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(MPLOCKED
+ "decl %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+#endif
+
+#ifdef RTE_ARCH_I686
+#include "rte_atomic_32.h"
+#else
+#include "rte_atomic_64.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_X86_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic_32.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic_32.h
new file mode 100644
index 000000000..f63b7fa27
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic_32.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+/*
+ * Inspired from FreeBSD src/sys/i386/include/atomic.h
+ * Copyright (c) 1998 Doug Rabson
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_X86_H_
+#error do not include this file directly, use <rte_atomic.h> instead
+#endif
+
+#ifndef _RTE_ATOMIC_I686_H_
+#define _RTE_ATOMIC_I686_H_
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+ uint8_t res;
+ RTE_STD_C11
+ union {
+ struct {
+ uint32_t l32;
+ uint32_t h32;
+ };
+ uint64_t u64;
+ } _exp, _src;
+
+ _exp.u64 = exp;
+ _src.u64 = src;
+
+#ifndef __PIC__
+ asm volatile (
+ MPLOCKED
+ "cmpxchg8b (%[dst]);"
+ "setz %[res];"
+ : [res] "=a" (res) /* result in eax */
+ : [dst] "S" (dst), /* esi */
+ "b" (_src.l32), /* ebx */
+ "c" (_src.h32), /* ecx */
+ "a" (_exp.l32), /* eax */
+ "d" (_exp.h32) /* edx */
+ : "memory" ); /* no-clobber list */
+#else
+ asm volatile (
+ "xchgl %%ebx, %%edi;\n"
+ MPLOCKED
+ "cmpxchg8b (%[dst]);"
+ "setz %[res];"
+ "xchgl %%ebx, %%edi;\n"
+ : [res] "=a" (res) /* result in eax */
+ : [dst] "S" (dst), /* esi */
+ "D" (_src.l32), /* ebx */
+ "c" (_src.h32), /* ecx */
+ "a" (_exp.l32), /* eax */
+ "d" (_exp.h32) /* edx */
+ : "memory" ); /* no-clobber list */
+#endif
+
+ return res;
+}
+
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dest, uint64_t val)
+{
+ uint64_t old;
+
+ do {
+ old = *dest;
+ } while (rte_atomic64_cmpset(dest, old, val) == 0);
+
+ return old;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, 0);
+ }
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ /* replace the value by itself */
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp);
+ }
+ return tmp;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, new_value);
+ }
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp + inc);
+ }
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp - dec);
+ }
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+ rte_atomic64_add(v, 1);
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+ rte_atomic64_sub(v, 1);
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp + inc);
+ }
+
+ return tmp + inc;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+ int success = 0;
+ uint64_t tmp;
+
+ while (success == 0) {
+ tmp = v->cnt;
+ success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt,
+ tmp, tmp - dec);
+ }
+
+ return tmp - dec;
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+ return rte_atomic64_add_return(v, 1) == 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+ return rte_atomic64_sub_return(v, 1) == 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+ return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+ rte_atomic64_set(v, 0);
+}
+#endif
+
+#endif /* _RTE_ATOMIC_I686_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic_64.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic_64.h
new file mode 100644
index 000000000..cfe7067dd
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_atomic_64.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ */
+
+/*
+ * Inspired from FreeBSD src/sys/amd64/include/atomic.h
+ * Copyright (c) 1998 Doug Rabson
+ * Copyright (c) 2019 Intel Corporation
+ * All rights reserved.
+ */
+
+#ifndef _RTE_ATOMIC_X86_H_
+#error do not include this file directly, use <rte_atomic.h> instead
+#endif
+
+#ifndef _RTE_ATOMIC_X86_64_H_
+#define _RTE_ATOMIC_X86_64_H_
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_atomic.h>
+
+/*------------------------- 64 bit atomic operations -------------------------*/
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+ uint8_t res;
+
+
+ asm volatile(
+ MPLOCKED
+ "cmpxchgq %[src], %[dst];"
+ "sete %[res];"
+ : [res] "=a" (res), /* output */
+ [dst] "=m" (*dst)
+ : [src] "r" (src), /* input */
+ "a" (exp),
+ "m" (*dst)
+ : "memory"); /* no-clobber list */
+
+ return res;
+}
+
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+ asm volatile(
+ MPLOCKED
+ "xchgq %0, %1;"
+ : "=r" (val), "=m" (*dst)
+ : "0" (val), "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return val;
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+ v->cnt = 0;
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+ return v->cnt;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+ v->cnt = new_value;
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+ asm volatile(
+ MPLOCKED
+ "addq %[inc], %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : [inc] "ir" (inc), /* input */
+ "m" (v->cnt)
+ );
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+ asm volatile(
+ MPLOCKED
+ "subq %[dec], %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : [dec] "ir" (dec), /* input */
+ "m" (v->cnt)
+ );
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "incq %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+ asm volatile(
+ MPLOCKED
+ "decq %[cnt]"
+ : [cnt] "=m" (v->cnt) /* output */
+ : "m" (v->cnt) /* input */
+ );
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+ int64_t prev = inc;
+
+ asm volatile(
+ MPLOCKED
+ "xaddq %[prev], %[cnt]"
+ : [prev] "+r" (prev), /* output */
+ [cnt] "=m" (v->cnt)
+ : "m" (v->cnt) /* input */
+ );
+ return prev + inc;
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+ return rte_atomic64_add_return(v, -dec);
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(
+ MPLOCKED
+ "incq %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+
+ return ret != 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+ uint8_t ret;
+
+ asm volatile(
+ MPLOCKED
+ "decq %[cnt] ; "
+ "sete %[ret]"
+ : [cnt] "+m" (v->cnt), /* output */
+ [ret] "=qm" (ret)
+ );
+ return ret != 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+ return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+ v->cnt = 0;
+}
+#endif
+
+/*------------------------ 128 bit atomic operations -------------------------*/
+
+__rte_experimental
+static inline int
+rte_atomic128_cmp_exchange(rte_int128_t *dst,
+ rte_int128_t *exp,
+ const rte_int128_t *src,
+ unsigned int weak,
+ int success,
+ int failure)
+{
+ RTE_SET_USED(weak);
+ RTE_SET_USED(success);
+ RTE_SET_USED(failure);
+ uint8_t res;
+
+ asm volatile (
+ MPLOCKED
+ "cmpxchg16b %[dst];"
+ " sete %[res]"
+ : [dst] "=m" (dst->val[0]),
+ "=a" (exp->val[0]),
+ "=d" (exp->val[1]),
+ [res] "=r" (res)
+ : "b" (src->val[0]),
+ "c" (src->val[1]),
+ "a" (exp->val[0]),
+ "d" (exp->val[1]),
+ "m" (dst->val[0])
+ : "memory");
+
+ return res;
+}
+
+#endif /* _RTE_ATOMIC_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder.h
new file mode 100644
index 000000000..a2dfecc1f
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_BYTEORDER_X86_H_
+#define _RTE_BYTEORDER_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_config.h>
+#include "generic/rte_byteorder.h"
+
+#ifndef RTE_BYTE_ORDER
+#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN
+#endif
+
+/*
+ * An architecture-optimized byte swap for a 16-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap16().
+ */
+static inline uint16_t rte_arch_bswap16(uint16_t _x)
+{
+ uint16_t x = _x;
+ asm volatile ("xchgb %b[x1],%h[x2]"
+ : [x1] "=Q" (x)
+ : [x2] "0" (x)
+ );
+ return x;
+}
+
+/*
+ * An architecture-optimized byte swap for a 32-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap32().
+ */
+static inline uint32_t rte_arch_bswap32(uint32_t _x)
+{
+ uint32_t x = _x;
+ asm volatile ("bswap %[x]"
+ : [x] "+r" (x)
+ );
+ return x;
+}
+
+#ifndef RTE_FORCE_INTRINSICS
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+
+#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap32(x) : \
+ rte_arch_bswap32(x)))
+
+#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap64(x) : \
+ rte_arch_bswap64(x)))
+#else
+/*
+ * __builtin_bswap16 is only available gcc 4.8 and upwards
+ */
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \
+ rte_constant_bswap16(x) : \
+ rte_arch_bswap16(x)))
+#endif
+#endif
+
+#define rte_cpu_to_le_16(x) (x)
+#define rte_cpu_to_le_32(x) (x)
+#define rte_cpu_to_le_64(x) (x)
+
+#define rte_cpu_to_be_16(x) rte_bswap16(x)
+#define rte_cpu_to_be_32(x) rte_bswap32(x)
+#define rte_cpu_to_be_64(x) rte_bswap64(x)
+
+#define rte_le_to_cpu_16(x) (x)
+#define rte_le_to_cpu_32(x) (x)
+#define rte_le_to_cpu_64(x) (x)
+
+#define rte_be_to_cpu_16(x) rte_bswap16(x)
+#define rte_be_to_cpu_32(x) rte_bswap32(x)
+#define rte_be_to_cpu_64(x) rte_bswap64(x)
+
+#ifdef RTE_ARCH_I686
+#include "rte_byteorder_32.h"
+#else
+#include "rte_byteorder_64.h"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BYTEORDER_X86_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder_32.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder_32.h
new file mode 100644
index 000000000..d5a768e52
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder_32.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_BYTEORDER_X86_H_
+#error do not include this file directly, use <rte_byteorder.h> instead
+#endif
+
+#ifndef _RTE_BYTEORDER_I686_H_
+#define _RTE_BYTEORDER_I686_H_
+
+#include <stdint.h>
+#include <rte_byteorder.h>
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* Compat./Leg. mode */
+static inline uint64_t rte_arch_bswap64(uint64_t x)
+{
+ uint64_t ret = 0;
+ ret |= ((uint64_t)rte_arch_bswap32(x & 0xffffffffUL) << 32);
+ ret |= ((uint64_t)rte_arch_bswap32((x >> 32) & 0xffffffffUL));
+ return ret;
+}
+
+#endif /* _RTE_BYTEORDER_I686_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder_64.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder_64.h
new file mode 100644
index 000000000..8c6cf285b
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_byteorder_64.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_BYTEORDER_X86_H_
+#error do not include this file directly, use <rte_byteorder.h> instead
+#endif
+
+#ifndef _RTE_BYTEORDER_X86_64_H_
+#define _RTE_BYTEORDER_X86_64_H_
+
+#include <stdint.h>
+#include <rte_common.h>
+
+/*
+ * An architecture-optimized byte swap for a 64-bit value.
+ *
+ * Do not use this function directly. The preferred function is rte_bswap64().
+ */
+/* 64-bit mode */
+static inline uint64_t rte_arch_bswap64(uint64_t _x)
+{
+ uint64_t x = _x;
+ asm volatile ("bswap %[x]"
+ : [x] "+r" (x)
+ );
+ return x;
+}
+
+#endif /* _RTE_BYTEORDER_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_cpuflags.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_cpuflags.h
new file mode 100644
index 000000000..c1d20364d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_cpuflags.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_CPUFLAGS_X86_64_H_
+#define _RTE_CPUFLAGS_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum rte_cpu_flag_t {
+ /* (EAX 01h) ECX features*/
+ RTE_CPUFLAG_SSE3 = 0, /**< SSE3 */
+ RTE_CPUFLAG_PCLMULQDQ, /**< PCLMULQDQ */
+ RTE_CPUFLAG_DTES64, /**< DTES64 */
+ RTE_CPUFLAG_MONITOR, /**< MONITOR */
+ RTE_CPUFLAG_DS_CPL, /**< DS_CPL */
+ RTE_CPUFLAG_VMX, /**< VMX */
+ RTE_CPUFLAG_SMX, /**< SMX */
+ RTE_CPUFLAG_EIST, /**< EIST */
+ RTE_CPUFLAG_TM2, /**< TM2 */
+ RTE_CPUFLAG_SSSE3, /**< SSSE3 */
+ RTE_CPUFLAG_CNXT_ID, /**< CNXT_ID */
+ RTE_CPUFLAG_FMA, /**< FMA */
+ RTE_CPUFLAG_CMPXCHG16B, /**< CMPXCHG16B */
+ RTE_CPUFLAG_XTPR, /**< XTPR */
+ RTE_CPUFLAG_PDCM, /**< PDCM */
+ RTE_CPUFLAG_PCID, /**< PCID */
+ RTE_CPUFLAG_DCA, /**< DCA */
+ RTE_CPUFLAG_SSE4_1, /**< SSE4_1 */
+ RTE_CPUFLAG_SSE4_2, /**< SSE4_2 */
+ RTE_CPUFLAG_X2APIC, /**< X2APIC */
+ RTE_CPUFLAG_MOVBE, /**< MOVBE */
+ RTE_CPUFLAG_POPCNT, /**< POPCNT */
+ RTE_CPUFLAG_TSC_DEADLINE, /**< TSC_DEADLINE */
+ RTE_CPUFLAG_AES, /**< AES */
+ RTE_CPUFLAG_XSAVE, /**< XSAVE */
+ RTE_CPUFLAG_OSXSAVE, /**< OSXSAVE */
+ RTE_CPUFLAG_AVX, /**< AVX */
+ RTE_CPUFLAG_F16C, /**< F16C */
+ RTE_CPUFLAG_RDRAND, /**< RDRAND */
+ RTE_CPUFLAG_HYPERVISOR, /**< Running in a VM */
+
+ /* (EAX 01h) EDX features */
+ RTE_CPUFLAG_FPU, /**< FPU */
+ RTE_CPUFLAG_VME, /**< VME */
+ RTE_CPUFLAG_DE, /**< DE */
+ RTE_CPUFLAG_PSE, /**< PSE */
+ RTE_CPUFLAG_TSC, /**< TSC */
+ RTE_CPUFLAG_MSR, /**< MSR */
+ RTE_CPUFLAG_PAE, /**< PAE */
+ RTE_CPUFLAG_MCE, /**< MCE */
+ RTE_CPUFLAG_CX8, /**< CX8 */
+ RTE_CPUFLAG_APIC, /**< APIC */
+ RTE_CPUFLAG_SEP, /**< SEP */
+ RTE_CPUFLAG_MTRR, /**< MTRR */
+ RTE_CPUFLAG_PGE, /**< PGE */
+ RTE_CPUFLAG_MCA, /**< MCA */
+ RTE_CPUFLAG_CMOV, /**< CMOV */
+ RTE_CPUFLAG_PAT, /**< PAT */
+ RTE_CPUFLAG_PSE36, /**< PSE36 */
+ RTE_CPUFLAG_PSN, /**< PSN */
+ RTE_CPUFLAG_CLFSH, /**< CLFSH */
+ RTE_CPUFLAG_DS, /**< DS */
+ RTE_CPUFLAG_ACPI, /**< ACPI */
+ RTE_CPUFLAG_MMX, /**< MMX */
+ RTE_CPUFLAG_FXSR, /**< FXSR */
+ RTE_CPUFLAG_SSE, /**< SSE */
+ RTE_CPUFLAG_SSE2, /**< SSE2 */
+ RTE_CPUFLAG_SS, /**< SS */
+ RTE_CPUFLAG_HTT, /**< HTT */
+ RTE_CPUFLAG_TM, /**< TM */
+ RTE_CPUFLAG_PBE, /**< PBE */
+
+ /* (EAX 06h) EAX features */
+ RTE_CPUFLAG_DIGTEMP, /**< DIGTEMP */
+ RTE_CPUFLAG_TRBOBST, /**< TRBOBST */
+ RTE_CPUFLAG_ARAT, /**< ARAT */
+ RTE_CPUFLAG_PLN, /**< PLN */
+ RTE_CPUFLAG_ECMD, /**< ECMD */
+ RTE_CPUFLAG_PTM, /**< PTM */
+
+ /* (EAX 06h) ECX features */
+ RTE_CPUFLAG_MPERF_APERF_MSR, /**< MPERF_APERF_MSR */
+ RTE_CPUFLAG_ACNT2, /**< ACNT2 */
+ RTE_CPUFLAG_ENERGY_EFF, /**< ENERGY_EFF */
+
+ /* (EAX 07h, ECX 0h) EBX features */
+ RTE_CPUFLAG_FSGSBASE, /**< FSGSBASE */
+ RTE_CPUFLAG_BMI1, /**< BMI1 */
+ RTE_CPUFLAG_HLE, /**< Hardware Lock elision */
+ RTE_CPUFLAG_AVX2, /**< AVX2 */
+ RTE_CPUFLAG_SMEP, /**< SMEP */
+ RTE_CPUFLAG_BMI2, /**< BMI2 */
+ RTE_CPUFLAG_ERMS, /**< ERMS */
+ RTE_CPUFLAG_INVPCID, /**< INVPCID */
+ RTE_CPUFLAG_RTM, /**< Transactional memory */
+ RTE_CPUFLAG_AVX512F, /**< AVX512F */
+ RTE_CPUFLAG_RDSEED, /**< RDSEED instruction */
+
+ /* (EAX 80000001h) ECX features */
+ RTE_CPUFLAG_LAHF_SAHF, /**< LAHF_SAHF */
+ RTE_CPUFLAG_LZCNT, /**< LZCNT */
+
+ /* (EAX 80000001h) EDX features */
+ RTE_CPUFLAG_SYSCALL, /**< SYSCALL */
+ RTE_CPUFLAG_XD, /**< XD */
+ RTE_CPUFLAG_1GB_PG, /**< 1GB_PG */
+ RTE_CPUFLAG_RDTSCP, /**< RDTSCP */
+ RTE_CPUFLAG_EM64T, /**< EM64T */
+
+ /* (EAX 80000007h) EDX features */
+ RTE_CPUFLAG_INVTSC, /**< INVTSC */
+
+ RTE_CPUFLAG_AVX512DQ, /**< AVX512 Doubleword and Quadword */
+ RTE_CPUFLAG_AVX512IFMA, /**< AVX512 Integer Fused Multiply-Add */
+ RTE_CPUFLAG_AVX512CD, /**< AVX512 Conflict Detection*/
+ RTE_CPUFLAG_AVX512BW, /**< AVX512 Byte and Word */
+ RTE_CPUFLAG_AVX512VL, /**< AVX512 Vector Length */
+ RTE_CPUFLAG_AVX512VBMI, /**< AVX512 Vector Bit Manipulation */
+ RTE_CPUFLAG_AVX512VBMI2, /**< AVX512 Vector Bit Manipulation 2 */
+ RTE_CPUFLAG_GFNI, /**< Galois Field New Instructions */
+ RTE_CPUFLAG_VAES, /**< Vector AES */
+ RTE_CPUFLAG_VPCLMULQDQ, /**< Vector Carry-less Multiply */
+ RTE_CPUFLAG_AVX512VNNI,
+ /**< AVX512 Vector Neural Network Instructions */
+ RTE_CPUFLAG_AVX512BITALG, /**< AVX512 Bit Algorithms */
+ RTE_CPUFLAG_AVX512VPOPCNTDQ, /**< AVX512 Vector Popcount */
+ RTE_CPUFLAG_CLDEMOTE, /**< Cache Line Demote */
+ RTE_CPUFLAG_MOVDIRI, /**< Direct Store Instructions */
+ RTE_CPUFLAG_MOVDIR64B, /**< Direct Store Instructions 64B */
+ RTE_CPUFLAG_AVX512VP2INTERSECT, /**< AVX512 Two Register Intersection */
+
+ /* The last item */
+ RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */
+};
+
+#include "generic/rte_cpuflags.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CPUFLAGS_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_cycles.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_cycles.h
new file mode 100644
index 000000000..a461a4d73
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_cycles.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2013 6WIND S.A.
+ */
+
+#ifndef _RTE_CYCLES_X86_64_H_
+#define _RTE_CYCLES_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+/* Global switch to use VMWARE mapping of TSC instead of RDTSC */
+extern int rte_cycles_vmware_tsc_map;
+#include <rte_branch_prediction.h>
+#endif
+#include <rte_common.h>
+#include <rte_config.h>
+
+static inline uint64_t
+rte_rdtsc(void)
+{
+ union {
+ uint64_t tsc_64;
+ RTE_STD_C11
+ struct {
+ uint32_t lo_32;
+ uint32_t hi_32;
+ };
+ } tsc;
+
+#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
+ if (unlikely(rte_cycles_vmware_tsc_map)) {
+ /* ecx = 0x10000 corresponds to the physical TSC for VMware */
+ asm volatile("rdpmc" :
+ "=a" (tsc.lo_32),
+ "=d" (tsc.hi_32) :
+ "c"(0x10000));
+ return tsc.tsc_64;
+ }
+#endif
+
+ asm volatile("rdtsc" :
+ "=a" (tsc.lo_32),
+ "=d" (tsc.hi_32));
+ return tsc.tsc_64;
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+ rte_mb();
+ return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_io.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_io.h
new file mode 100644
index 000000000..2db71b1b0
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_io.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 Cavium, Inc
+ */
+
+#ifndef _RTE_IO_X86_H_
+#define _RTE_IO_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_io.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IO_X86_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_mcslock.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_mcslock.h
new file mode 100644
index 000000000..a8f041a72
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_mcslock.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_MCSLOCK_X86_64_H_
+#define _RTE_MCSLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_mcslock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MCSLOCK_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_memcpy.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_memcpy.h
new file mode 100644
index 000000000..9c67232df
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_memcpy.h
@@ -0,0 +1,885 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_MEMCPY_X86_64_H_
+#define _RTE_MEMCPY_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2/AVX512 implementation of memcpy().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <rte_vect.h>
+#include <rte_common.h>
+#include <rte_config.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
+
+/**
+ * Copy bytes from one location to another. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param dst
+ * Pointer to the destination of the data.
+ * @param src
+ * Pointer to the source data.
+ * @param n
+ * Number of bytes to copy.
+ * @return
+ * Pointer to the destination data.
+ */
+static __rte_always_inline void *
+rte_memcpy(void *dst, const void *src, size_t n);
+
+#ifdef RTE_MACHINE_CPUFLAG_AVX512F
+
+#define ALIGNMENT_MASK 0x3F
+
+/**
+ * AVX512 implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)src);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_loadu_si256((const __m256i *)src);
+ _mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ __m512i zmm0;
+
+ zmm0 = _mm512_loadu_si512((const void *)src);
+ _mm512_storeu_si512((void *)dst, zmm0);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov64(dst + 0 * 64, src + 0 * 64);
+ rte_mov64(dst + 1 * 64, src + 1 * 64);
+}
+
+/**
+ * Copy 256 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov64(dst + 0 * 64, src + 0 * 64);
+ rte_mov64(dst + 1 * 64, src + 1 * 64);
+ rte_mov64(dst + 2 * 64, src + 2 * 64);
+ rte_mov64(dst + 3 * 64, src + 3 * 64);
+}
+
+/**
+ * Copy 128-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ __m512i zmm0, zmm1;
+
+ while (n >= 128) {
+ zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64));
+ n -= 128;
+ zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64));
+ src = src + 128;
+ _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0);
+ _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1);
+ dst = dst + 128;
+ }
+}
+
+/**
+ * Copy 512-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ __m512i zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
+
+ while (n >= 512) {
+ zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64));
+ n -= 512;
+ zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64));
+ zmm2 = _mm512_loadu_si512((const void *)(src + 2 * 64));
+ zmm3 = _mm512_loadu_si512((const void *)(src + 3 * 64));
+ zmm4 = _mm512_loadu_si512((const void *)(src + 4 * 64));
+ zmm5 = _mm512_loadu_si512((const void *)(src + 5 * 64));
+ zmm6 = _mm512_loadu_si512((const void *)(src + 6 * 64));
+ zmm7 = _mm512_loadu_si512((const void *)(src + 7 * 64));
+ src = src + 512;
+ _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0);
+ _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1);
+ _mm512_storeu_si512((void *)(dst + 2 * 64), zmm2);
+ _mm512_storeu_si512((void *)(dst + 3 * 64), zmm3);
+ _mm512_storeu_si512((void *)(dst + 4 * 64), zmm4);
+ _mm512_storeu_si512((void *)(dst + 5 * 64), zmm5);
+ _mm512_storeu_si512((void *)(dst + 6 * 64), zmm6);
+ _mm512_storeu_si512((void *)(dst + 7 * 64), zmm7);
+ dst = dst + 512;
+ }
+}
+
+static __rte_always_inline void *
+rte_memcpy_generic(void *dst, const void *src, size_t n)
+{
+ uintptr_t dstu = (uintptr_t)dst;
+ uintptr_t srcu = (uintptr_t)src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dstu = *(const uint8_t *)srcu;
+ srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+ dstu = (uintptr_t)((uint8_t *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dstu = *(const uint16_t *)srcu;
+ srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+ dstu = (uintptr_t)((uint16_t *)dstu + 1);
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dstu = *(const uint32_t *)srcu;
+ srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+ dstu = (uintptr_t)((uint32_t *)dstu + 1);
+ }
+ if (n & 0x08)
+ *(uint64_t *)dstu = *(const uint64_t *)srcu;
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+ if (n <= 512) {
+ if (n >= 256) {
+ n -= 256;
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 256;
+ dst = (uint8_t *)dst + 256;
+ }
+ if (n >= 128) {
+ n -= 128;
+ rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 128;
+ dst = (uint8_t *)dst + 128;
+ }
+COPY_BLOCK_128_BACK63:
+ if (n > 64) {
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+ return ret;
+ }
+ if (n > 0)
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes
+ */
+ dstofss = ((uintptr_t)dst & 0x3F);
+ if (dstofss > 0) {
+ dstofss = 64 - dstofss;
+ n -= dstofss;
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + dstofss;
+ dst = (uint8_t *)dst + dstofss;
+ }
+
+ /**
+ * Copy 512-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ rte_mov512blocks((uint8_t *)dst, (const uint8_t *)src, n);
+ bits = n;
+ n = n & 511;
+ bits -= n;
+ src = (const uint8_t *)src + bits;
+ dst = (uint8_t *)dst + bits;
+
+ /**
+ * Copy 128-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ if (n >= 128) {
+ rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n);
+ bits = n;
+ n = n & 127;
+ bits -= n;
+ src = (const uint8_t *)src + bits;
+ dst = (uint8_t *)dst + bits;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_128_BACK63;
+}
+
+#elif defined RTE_MACHINE_CPUFLAG_AVX2
+
+#define ALIGNMENT_MASK 0x1F
+
+/**
+ * AVX2 implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)src);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_loadu_si256((const __m256i *)src);
+ _mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+ rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+ rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+ rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
+ rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
+}
+
+/**
+ * Copy 128-byte blocks from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
+{
+ __m256i ymm0, ymm1, ymm2, ymm3;
+
+ while (n >= 128) {
+ ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32));
+ n -= 128;
+ ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
+ ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32));
+ ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32));
+ src = (const uint8_t *)src + 128;
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2);
+ _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3);
+ dst = (uint8_t *)dst + 128;
+ }
+}
+
+static __rte_always_inline void *
+rte_memcpy_generic(void *dst, const void *src, size_t n)
+{
+ uintptr_t dstu = (uintptr_t)dst;
+ uintptr_t srcu = (uintptr_t)src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dstu = *(const uint8_t *)srcu;
+ srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+ dstu = (uintptr_t)((uint8_t *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dstu = *(const uint16_t *)srcu;
+ srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+ dstu = (uintptr_t)((uint16_t *)dstu + 1);
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dstu = *(const uint32_t *)srcu;
+ srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+ dstu = (uintptr_t)((uint32_t *)dstu + 1);
+ }
+ if (n & 0x08) {
+ *(uint64_t *)dstu = *(const uint64_t *)srcu;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 256 bytes
+ */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 48) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst + 16, (const uint8_t *)src + 16);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+ if (n <= 256) {
+ if (n >= 128) {
+ n -= 128;
+ rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 128;
+ dst = (uint8_t *)dst + 128;
+ }
+COPY_BLOCK_128_BACK31:
+ if (n >= 64) {
+ n -= 64;
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 64;
+ dst = (uint8_t *)dst + 64;
+ }
+ if (n > 32) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ return ret;
+ }
+ if (n > 0) {
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 256 bytes
+ */
+ dstofss = (uintptr_t)dst & 0x1F;
+ if (dstofss > 0) {
+ dstofss = 32 - dstofss;
+ n -= dstofss;
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + dstofss;
+ dst = (uint8_t *)dst + dstofss;
+ }
+
+ /**
+ * Copy 128-byte blocks
+ */
+ rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n);
+ bits = n;
+ n = n & 127;
+ bits -= n;
+ src = (const uint8_t *)src + bits;
+ dst = (uint8_t *)dst + bits;
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_128_BACK31;
+}
+
+#else /* RTE_MACHINE_CPUFLAG */
+
+#define ALIGNMENT_MASK 0x0F
+
+/**
+ * SSE & AVX implementation below
+ */
+
+/**
+ * Copy 16 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+/**
+ * Copy 32 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+ rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+}
+
+/**
+ * Copy 64 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+ rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+ rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+ rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+}
+
+/**
+ * Copy 128 bytes from one location to another,
+ * locations should not overlap.
+ */
+static __rte_always_inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+ rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+ rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+ rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+ rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16);
+ rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16);
+ rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16);
+ rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16);
+}
+
+/**
+ * Copy 256 bytes from one location to another,
+ * locations should not overlap.
+ */
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+ rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+ rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+ rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16);
+ rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16);
+ rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16);
+ rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16);
+ rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16);
+ rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16);
+ rte_mov16((uint8_t *)dst + 8 * 16, (const uint8_t *)src + 8 * 16);
+ rte_mov16((uint8_t *)dst + 9 * 16, (const uint8_t *)src + 9 * 16);
+ rte_mov16((uint8_t *)dst + 10 * 16, (const uint8_t *)src + 10 * 16);
+ rte_mov16((uint8_t *)dst + 11 * 16, (const uint8_t *)src + 11 * 16);
+ rte_mov16((uint8_t *)dst + 12 * 16, (const uint8_t *)src + 12 * 16);
+ rte_mov16((uint8_t *)dst + 13 * 16, (const uint8_t *)src + 13 * 16);
+ rte_mov16((uint8_t *)dst + 14 * 16, (const uint8_t *)src + 14 * 16);
+ rte_mov16((uint8_t *)dst + 15 * 16, (const uint8_t *)src + 15 * 16);
+}
+
+/**
+ * Macro for copying unaligned block from one location to another with constant load offset,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be immediate value within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> must be pre-defined
+ */
+#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \
+__extension__ ({ \
+ size_t tmp; \
+ while (len >= 128 + 16 - offset) { \
+ xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \
+ len -= 128; \
+ xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \
+ xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16)); \
+ xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16)); \
+ xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16)); \
+ xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16)); \
+ xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16)); \
+ xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16)); \
+ src = (const uint8_t *)src + 128; \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \
+ dst = (uint8_t *)dst + 128; \
+ } \
+ tmp = len; \
+ len = ((len - 16 + offset) & 127) + 16 - offset; \
+ tmp -= len; \
+ src = (const uint8_t *)src + tmp; \
+ dst = (uint8_t *)dst + tmp; \
+ if (len >= 32 + 16 - offset) { \
+ while (len >= 32 + 16 - offset) { \
+ xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \
+ len -= 32; \
+ xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \
+ src = (const uint8_t *)src + 32; \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ dst = (uint8_t *)dst + 32; \
+ } \
+ tmp = len; \
+ len = ((len - 16 + offset) & 31) + 16 - offset; \
+ tmp -= len; \
+ src = (const uint8_t *)src + tmp; \
+ dst = (uint8_t *)dst + tmp; \
+ } \
+})
+
+/**
+ * Macro for copying unaligned block from one location to another,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Use switch here because the aligning instruction requires immediate value for shift count.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined
+ */
+#define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \
+__extension__ ({ \
+ switch (offset) { \
+ case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \
+ case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \
+ case 0x03: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x03); break; \
+ case 0x04: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x04); break; \
+ case 0x05: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x05); break; \
+ case 0x06: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x06); break; \
+ case 0x07: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x07); break; \
+ case 0x08: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x08); break; \
+ case 0x09: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x09); break; \
+ case 0x0A: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0A); break; \
+ case 0x0B: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0B); break; \
+ case 0x0C: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0C); break; \
+ case 0x0D: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0D); break; \
+ case 0x0E: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0E); break; \
+ case 0x0F: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0F); break; \
+ default:; \
+ } \
+})
+
+static __rte_always_inline void *
+rte_memcpy_generic(void *dst, const void *src, size_t n)
+{
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
+ uintptr_t dstu = (uintptr_t)dst;
+ uintptr_t srcu = (uintptr_t)src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t srcofs;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dstu = *(const uint8_t *)srcu;
+ srcu = (uintptr_t)((const uint8_t *)srcu + 1);
+ dstu = (uintptr_t)((uint8_t *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dstu = *(const uint16_t *)srcu;
+ srcu = (uintptr_t)((const uint16_t *)srcu + 1);
+ dstu = (uintptr_t)((uint16_t *)dstu + 1);
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dstu = *(const uint32_t *)srcu;
+ srcu = (uintptr_t)((const uint32_t *)srcu + 1);
+ dstu = (uintptr_t)((uint32_t *)dstu + 1);
+ }
+ if (n & 0x08) {
+ *(uint64_t *)dstu = *(const uint64_t *)srcu;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 48) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst + 32, (const uint8_t *)src + 32);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 128) {
+ goto COPY_BLOCK_128_BACK15;
+ }
+ if (n <= 512) {
+ if (n >= 256) {
+ n -= 256;
+ rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128);
+ src = (const uint8_t *)src + 256;
+ dst = (uint8_t *)dst + 256;
+ }
+COPY_BLOCK_255_BACK15:
+ if (n >= 128) {
+ n -= 128;
+ rte_mov128((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 128;
+ dst = (uint8_t *)dst + 128;
+ }
+COPY_BLOCK_128_BACK15:
+ if (n >= 64) {
+ n -= 64;
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 64;
+ dst = (uint8_t *)dst + 64;
+ }
+COPY_BLOCK_64_BACK15:
+ if (n >= 32) {
+ n -= 32;
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + 32;
+ dst = (uint8_t *)dst + 32;
+ }
+ if (n > 16) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ return ret;
+ }
+ if (n > 0) {
+ rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes,
+ * and make sure the first 15 bytes are copied, because
+ * unaligned copy functions require up to 15 bytes
+ * backwards access.
+ */
+ dstofss = (uintptr_t)dst & 0x0F;
+ if (dstofss > 0) {
+ dstofss = 16 - dstofss + 16;
+ n -= dstofss;
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ src = (const uint8_t *)src + dstofss;
+ dst = (uint8_t *)dst + dstofss;
+ }
+ srcofs = ((uintptr_t)src & 0x0F);
+
+ /**
+ * For aligned copy
+ */
+ if (srcofs == 0) {
+ /**
+ * Copy 256-byte blocks
+ */
+ for (; n >= 256; n -= 256) {
+ rte_mov256((uint8_t *)dst, (const uint8_t *)src);
+ dst = (uint8_t *)dst + 256;
+ src = (const uint8_t *)src + 256;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_255_BACK15;
+ }
+
+ /**
+ * For copy with unaligned load
+ */
+ MOVEUNALIGNED_LEFT47(dst, src, n, srcofs);
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_64_BACK15;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG */
+
+static __rte_always_inline void *
+rte_memcpy_aligned(void *dst, const void *src, size_t n)
+{
+ void *ret = dst;
+
+ /* Copy size <= 16 bytes */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(uint8_t *)dst = *(const uint8_t *)src;
+ src = (const uint8_t *)src + 1;
+ dst = (uint8_t *)dst + 1;
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dst = *(const uint16_t *)src;
+ src = (const uint16_t *)src + 1;
+ dst = (uint16_t *)dst + 1;
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dst = *(const uint32_t *)src;
+ src = (const uint32_t *)src + 1;
+ dst = (uint32_t *)dst + 1;
+ }
+ if (n & 0x08)
+ *(uint64_t *)dst = *(const uint64_t *)src;
+
+ return ret;
+ }
+
+ /* Copy 16 <= size <= 32 bytes */
+ if (n <= 32) {
+ rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov16((uint8_t *)dst - 16 + n,
+ (const uint8_t *)src - 16 + n);
+
+ return ret;
+ }
+
+ /* Copy 32 < size <= 64 bytes */
+ if (n <= 64) {
+ rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+ rte_mov32((uint8_t *)dst - 32 + n,
+ (const uint8_t *)src - 32 + n);
+
+ return ret;
+ }
+
+ /* Copy 64 bytes blocks */
+ for (; n >= 64; n -= 64) {
+ rte_mov64((uint8_t *)dst, (const uint8_t *)src);
+ dst = (uint8_t *)dst + 64;
+ src = (const uint8_t *)src + 64;
+ }
+
+ /* Copy whatever left */
+ rte_mov64((uint8_t *)dst - 64 + n,
+ (const uint8_t *)src - 64 + n);
+
+ return ret;
+}
+
+static __rte_always_inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+ if (!(((uintptr_t)dst | (uintptr_t)src) & ALIGNMENT_MASK))
+ return rte_memcpy_aligned(dst, src, n);
+ else
+ return rte_memcpy_generic(dst, src, n);
+}
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCPY_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_pause.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_pause.h
new file mode 100644
index 000000000..b4cf1df1d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_pause.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+
+#ifndef _RTE_PAUSE_X86_H_
+#define _RTE_PAUSE_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_pause.h"
+
+#include <emmintrin.h>
+static inline void rte_pause(void)
+{
+ _mm_pause();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PAUSE_X86_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_prefetch.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_prefetch.h
new file mode 100644
index 000000000..384c6b3ef
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_prefetch.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation
+ */
+
+#ifndef _RTE_PREFETCH_X86_64_H_
+#define _RTE_PREFETCH_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+#include "generic/rte_prefetch.h"
+
+static inline void rte_prefetch0(const volatile void *p)
+{
+ asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch1(const volatile void *p)
+{
+ asm volatile ("prefetcht1 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch2(const volatile void *p)
+{
+ asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+ asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PREFETCH_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_rtm.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_rtm.h
new file mode 100644
index 000000000..36bf49846
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_rtm.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2012,2013 Intel Corporation
+ */
+
+#ifndef _RTE_RTM_H_
+#define _RTE_RTM_H_ 1
+
+
+/* Official RTM intrinsics interface matching gcc/icc, but works
+ on older gcc compatible compilers and binutils. */
+
+#include <rte_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define RTE_XBEGIN_STARTED (~0u)
+#define RTE_XABORT_EXPLICIT (1 << 0)
+#define RTE_XABORT_RETRY (1 << 1)
+#define RTE_XABORT_CONFLICT (1 << 2)
+#define RTE_XABORT_CAPACITY (1 << 3)
+#define RTE_XABORT_DEBUG (1 << 4)
+#define RTE_XABORT_NESTED (1 << 5)
+#define RTE_XABORT_CODE(x) (((x) >> 24) & 0xff)
+
+static __rte_always_inline
+unsigned int rte_xbegin(void)
+{
+ unsigned int ret = RTE_XBEGIN_STARTED;
+
+ asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
+ return ret;
+}
+
+static __rte_always_inline
+void rte_xend(void)
+{
+ asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory");
+}
+
+/* not an inline function to workaround a clang bug with -O0 */
+#define rte_xabort(status) do { \
+ asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); \
+} while (0)
+
+static __rte_always_inline
+int rte_xtest(void)
+{
+ unsigned char out;
+
+ asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" :
+ "=r" (out) :: "memory");
+ return out;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RTM_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_rwlock.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_rwlock.h
new file mode 100644
index 000000000..eec4c7123
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_rwlock.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 Intel Corporation
+ */
+
+#ifndef _RTE_RWLOCK_X86_64_H_
+#define _RTE_RWLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_rwlock.h"
+#include "rte_spinlock.h"
+
+static inline void
+rte_rwlock_read_lock_tm(rte_rwlock_t *rwl)
+{
+ if (likely(rte_try_tm(&rwl->cnt)))
+ return;
+ rte_rwlock_read_lock(rwl);
+}
+
+static inline void
+rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl)
+{
+ if (unlikely(rwl->cnt))
+ rte_rwlock_read_unlock(rwl);
+ else
+ rte_xend();
+}
+
+static inline void
+rte_rwlock_write_lock_tm(rte_rwlock_t *rwl)
+{
+ if (likely(rte_try_tm(&rwl->cnt)))
+ return;
+ rte_rwlock_write_lock(rwl);
+}
+
+static inline void
+rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl)
+{
+ if (unlikely(rwl->cnt))
+ rte_rwlock_write_unlock(rwl);
+ else
+ rte_xend();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RWLOCK_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_spinlock.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_spinlock.h
new file mode 100644
index 000000000..e2e2b2643
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_spinlock.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _RTE_SPINLOCK_X86_64_H_
+#define _RTE_SPINLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_spinlock.h"
+#include "rte_rtm.h"
+#include "rte_cpuflags.h"
+#include "rte_branch_prediction.h"
+#include "rte_common.h"
+#include "rte_pause.h"
+#include "rte_cycles.h"
+
+#define RTE_RTM_MAX_RETRIES (20)
+#define RTE_XABORT_LOCK_BUSY (0xff)
+
+#ifndef RTE_FORCE_INTRINSICS
+static inline void
+rte_spinlock_lock(rte_spinlock_t *sl)
+{
+ int lock_val = 1;
+ asm volatile (
+ "1:\n"
+ "xchg %[locked], %[lv]\n"
+ "test %[lv], %[lv]\n"
+ "jz 3f\n"
+ "2:\n"
+ "pause\n"
+ "cmpl $0, %[locked]\n"
+ "jnz 2b\n"
+ "jmp 1b\n"
+ "3:\n"
+ : [locked] "=m" (sl->locked), [lv] "=q" (lock_val)
+ : "[lv]" (lock_val)
+ : "memory");
+}
+
+static inline void
+rte_spinlock_unlock (rte_spinlock_t *sl)
+{
+ int unlock_val = 0;
+ asm volatile (
+ "xchg %[locked], %[ulv]\n"
+ : [locked] "=m" (sl->locked), [ulv] "=q" (unlock_val)
+ : "[ulv]" (unlock_val)
+ : "memory");
+}
+
+static inline int
+rte_spinlock_trylock (rte_spinlock_t *sl)
+{
+ int lockval = 1;
+
+ asm volatile (
+ "xchg %[locked], %[lockval]"
+ : [locked] "=m" (sl->locked), [lockval] "=q" (lockval)
+ : "[lockval]" (lockval)
+ : "memory");
+
+ return lockval == 0;
+}
+#endif
+
+extern uint8_t rte_rtm_supported;
+
+static inline int rte_tm_supported(void)
+{
+ return rte_rtm_supported;
+}
+
+static inline int
+rte_try_tm(volatile int *lock)
+{
+ int i, retries;
+
+ if (!rte_rtm_supported)
+ return 0;
+
+ retries = RTE_RTM_MAX_RETRIES;
+
+ while (likely(retries--)) {
+
+ unsigned int status = rte_xbegin();
+
+ if (likely(RTE_XBEGIN_STARTED == status)) {
+ if (unlikely(*lock))
+ rte_xabort(RTE_XABORT_LOCK_BUSY);
+ else
+ return 1;
+ }
+ while (*lock)
+ rte_pause();
+
+ if ((status & RTE_XABORT_CONFLICT) ||
+ ((status & RTE_XABORT_EXPLICIT) &&
+ (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY))) {
+ /* add a small delay before retrying, basing the
+ * delay on the number of times we've already tried,
+ * to give a back-off type of behaviour. We
+ * randomize trycount by taking bits from the tsc count
+ */
+ int try_count = RTE_RTM_MAX_RETRIES - retries;
+ int pause_count = (rte_rdtsc() & 0x7) | 1;
+ pause_count <<= try_count;
+ for (i = 0; i < pause_count; i++)
+ rte_pause();
+ continue;
+ }
+
+ if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */
+ break;
+ }
+ return 0;
+}
+
+static inline void
+rte_spinlock_lock_tm(rte_spinlock_t *sl)
+{
+ if (likely(rte_try_tm(&sl->locked)))
+ return;
+
+ rte_spinlock_lock(sl); /* fall-back */
+}
+
+static inline int
+rte_spinlock_trylock_tm(rte_spinlock_t *sl)
+{
+ if (likely(rte_try_tm(&sl->locked)))
+ return 1;
+
+ return rte_spinlock_trylock(sl);
+}
+
+static inline void
+rte_spinlock_unlock_tm(rte_spinlock_t *sl)
+{
+ if (unlikely(sl->locked))
+ rte_spinlock_unlock(sl);
+ else
+ rte_xend();
+}
+
+static inline void
+rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr)
+{
+ if (likely(rte_try_tm(&slr->sl.locked)))
+ return;
+
+ rte_spinlock_recursive_lock(slr); /* fall-back */
+}
+
+static inline void
+rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr)
+{
+ if (unlikely(slr->sl.locked))
+ rte_spinlock_recursive_unlock(slr);
+ else
+ rte_xend();
+}
+
+static inline int
+rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr)
+{
+ if (likely(rte_try_tm(&slr->sl.locked)))
+ return 1;
+
+ return rte_spinlock_recursive_trylock(slr);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_SPINLOCK_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_ticketlock.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_ticketlock.h
new file mode 100644
index 000000000..0cc01f6b9
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_ticketlock.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_TICKETLOCK_X86_64_H_
+#define _RTE_TICKETLOCK_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_ticketlock.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TICKETLOCK_X86_64_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/include/rte_vect.h b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_vect.h
new file mode 100644
index 000000000..df5a60762
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/include/rte_vect.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation
+ */
+
+#ifndef _RTE_VECT_X86_H_
+#define _RTE_VECT_X86_H_
+
+/**
+ * @file
+ *
+ * RTE SSE/AVX related header.
+ */
+
+#include <stdint.h>
+#include <rte_config.h>
+#include "generic/rte_vect.h"
+
+#if (defined(__ICC) || \
+ (defined(_WIN64)) || \
+ (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
+
+#include <smmintrin.h> /* SSE4 */
+
+#if defined(__AVX__)
+#include <immintrin.h>
+#endif
+
+#else
+
+#include <x86intrin.h>
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef __m128i xmm_t;
+
+#define XMM_SIZE (sizeof(xmm_t))
+#define XMM_MASK (XMM_SIZE - 1)
+
+typedef union rte_xmm {
+ xmm_t x;
+ uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+ double pd[XMM_SIZE / sizeof(double)];
+} rte_xmm_t;
+
+#ifdef __AVX__
+
+typedef __m256i ymm_t;
+
+#define YMM_SIZE (sizeof(ymm_t))
+#define YMM_MASK (YMM_SIZE - 1)
+
+typedef union rte_ymm {
+ ymm_t y;
+ xmm_t x[YMM_SIZE / sizeof(xmm_t)];
+ uint8_t u8[YMM_SIZE / sizeof(uint8_t)];
+ uint16_t u16[YMM_SIZE / sizeof(uint16_t)];
+ uint32_t u32[YMM_SIZE / sizeof(uint32_t)];
+ uint64_t u64[YMM_SIZE / sizeof(uint64_t)];
+ double pd[YMM_SIZE / sizeof(double)];
+} rte_ymm_t;
+
+#endif /* __AVX__ */
+
+#ifdef RTE_ARCH_I686
+#define _mm_cvtsi128_si64(a) \
+__extension__ ({ \
+ rte_xmm_t m; \
+ m.x = (a); \
+ (m.u64[0]); \
+})
+#endif
+
+/*
+ * Prior to version 12.1 icc doesn't support _mm_set_epi64x.
+ */
+#if (defined(__ICC) && __ICC < 1210)
+#define _mm_set_epi64x(a, b) \
+__extension__ ({ \
+ rte_xmm_t m; \
+ m.u64[0] = b; \
+ m.u64[1] = a; \
+ (m.x); \
+})
+#endif /* (defined(__ICC) && __ICC < 1210) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_VECT_X86_H_ */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/meson.build b/src/spdk/dpdk/lib/librte_eal/x86/meson.build
new file mode 100644
index 000000000..e78f29002
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+subdir('include')
+
+sources += files(
+ 'rte_cpuflags.c',
+ 'rte_cycles.c',
+ 'rte_hypervisor.c',
+ 'rte_spinlock.c',
+)
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/rte_cpuflags.c b/src/spdk/dpdk/lib/librte_eal/x86/rte_cpuflags.c
new file mode 100644
index 000000000..30439e795
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/rte_cpuflags.c
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation
+ */
+
+#include "rte_cpuflags.h"
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+
+#include "rte_cpuid.h"
+
+/**
+ * Struct to hold a processor feature entry
+ */
+struct feature_entry {
+ uint32_t leaf; /**< cpuid leaf */
+ uint32_t subleaf; /**< cpuid subleaf */
+ uint32_t reg; /**< cpuid register */
+ uint32_t bit; /**< cpuid register bit */
+#define CPU_FLAG_NAME_MAX_LEN 64
+ char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */
+};
+
+#define FEAT_DEF(name, leaf, subleaf, reg, bit) \
+ [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name },
+
+const struct feature_entry rte_cpu_feature_table[] = {
+ FEAT_DEF(SSE3, 0x00000001, 0, RTE_REG_ECX, 0)
+ FEAT_DEF(PCLMULQDQ, 0x00000001, 0, RTE_REG_ECX, 1)
+ FEAT_DEF(DTES64, 0x00000001, 0, RTE_REG_ECX, 2)
+ FEAT_DEF(MONITOR, 0x00000001, 0, RTE_REG_ECX, 3)
+ FEAT_DEF(DS_CPL, 0x00000001, 0, RTE_REG_ECX, 4)
+ FEAT_DEF(VMX, 0x00000001, 0, RTE_REG_ECX, 5)
+ FEAT_DEF(SMX, 0x00000001, 0, RTE_REG_ECX, 6)
+ FEAT_DEF(EIST, 0x00000001, 0, RTE_REG_ECX, 7)
+ FEAT_DEF(TM2, 0x00000001, 0, RTE_REG_ECX, 8)
+ FEAT_DEF(SSSE3, 0x00000001, 0, RTE_REG_ECX, 9)
+ FEAT_DEF(CNXT_ID, 0x00000001, 0, RTE_REG_ECX, 10)
+ FEAT_DEF(FMA, 0x00000001, 0, RTE_REG_ECX, 12)
+ FEAT_DEF(CMPXCHG16B, 0x00000001, 0, RTE_REG_ECX, 13)
+ FEAT_DEF(XTPR, 0x00000001, 0, RTE_REG_ECX, 14)
+ FEAT_DEF(PDCM, 0x00000001, 0, RTE_REG_ECX, 15)
+ FEAT_DEF(PCID, 0x00000001, 0, RTE_REG_ECX, 17)
+ FEAT_DEF(DCA, 0x00000001, 0, RTE_REG_ECX, 18)
+ FEAT_DEF(SSE4_1, 0x00000001, 0, RTE_REG_ECX, 19)
+ FEAT_DEF(SSE4_2, 0x00000001, 0, RTE_REG_ECX, 20)
+ FEAT_DEF(X2APIC, 0x00000001, 0, RTE_REG_ECX, 21)
+ FEAT_DEF(MOVBE, 0x00000001, 0, RTE_REG_ECX, 22)
+ FEAT_DEF(POPCNT, 0x00000001, 0, RTE_REG_ECX, 23)
+ FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, RTE_REG_ECX, 24)
+ FEAT_DEF(AES, 0x00000001, 0, RTE_REG_ECX, 25)
+ FEAT_DEF(XSAVE, 0x00000001, 0, RTE_REG_ECX, 26)
+ FEAT_DEF(OSXSAVE, 0x00000001, 0, RTE_REG_ECX, 27)
+ FEAT_DEF(AVX, 0x00000001, 0, RTE_REG_ECX, 28)
+ FEAT_DEF(F16C, 0x00000001, 0, RTE_REG_ECX, 29)
+ FEAT_DEF(RDRAND, 0x00000001, 0, RTE_REG_ECX, 30)
+ FEAT_DEF(HYPERVISOR, 0x00000001, 0, RTE_REG_ECX, 31)
+
+ FEAT_DEF(FPU, 0x00000001, 0, RTE_REG_EDX, 0)
+ FEAT_DEF(VME, 0x00000001, 0, RTE_REG_EDX, 1)
+ FEAT_DEF(DE, 0x00000001, 0, RTE_REG_EDX, 2)
+ FEAT_DEF(PSE, 0x00000001, 0, RTE_REG_EDX, 3)
+ FEAT_DEF(TSC, 0x00000001, 0, RTE_REG_EDX, 4)
+ FEAT_DEF(MSR, 0x00000001, 0, RTE_REG_EDX, 5)
+ FEAT_DEF(PAE, 0x00000001, 0, RTE_REG_EDX, 6)
+ FEAT_DEF(MCE, 0x00000001, 0, RTE_REG_EDX, 7)
+ FEAT_DEF(CX8, 0x00000001, 0, RTE_REG_EDX, 8)
+ FEAT_DEF(APIC, 0x00000001, 0, RTE_REG_EDX, 9)
+ FEAT_DEF(SEP, 0x00000001, 0, RTE_REG_EDX, 11)
+ FEAT_DEF(MTRR, 0x00000001, 0, RTE_REG_EDX, 12)
+ FEAT_DEF(PGE, 0x00000001, 0, RTE_REG_EDX, 13)
+ FEAT_DEF(MCA, 0x00000001, 0, RTE_REG_EDX, 14)
+ FEAT_DEF(CMOV, 0x00000001, 0, RTE_REG_EDX, 15)
+ FEAT_DEF(PAT, 0x00000001, 0, RTE_REG_EDX, 16)
+ FEAT_DEF(PSE36, 0x00000001, 0, RTE_REG_EDX, 17)
+ FEAT_DEF(PSN, 0x00000001, 0, RTE_REG_EDX, 18)
+ FEAT_DEF(CLFSH, 0x00000001, 0, RTE_REG_EDX, 19)
+ FEAT_DEF(DS, 0x00000001, 0, RTE_REG_EDX, 21)
+ FEAT_DEF(ACPI, 0x00000001, 0, RTE_REG_EDX, 22)
+ FEAT_DEF(MMX, 0x00000001, 0, RTE_REG_EDX, 23)
+ FEAT_DEF(FXSR, 0x00000001, 0, RTE_REG_EDX, 24)
+ FEAT_DEF(SSE, 0x00000001, 0, RTE_REG_EDX, 25)
+ FEAT_DEF(SSE2, 0x00000001, 0, RTE_REG_EDX, 26)
+ FEAT_DEF(SS, 0x00000001, 0, RTE_REG_EDX, 27)
+ FEAT_DEF(HTT, 0x00000001, 0, RTE_REG_EDX, 28)
+ FEAT_DEF(TM, 0x00000001, 0, RTE_REG_EDX, 29)
+ FEAT_DEF(PBE, 0x00000001, 0, RTE_REG_EDX, 31)
+
+ FEAT_DEF(DIGTEMP, 0x00000006, 0, RTE_REG_EAX, 0)
+ FEAT_DEF(TRBOBST, 0x00000006, 0, RTE_REG_EAX, 1)
+ FEAT_DEF(ARAT, 0x00000006, 0, RTE_REG_EAX, 2)
+ FEAT_DEF(PLN, 0x00000006, 0, RTE_REG_EAX, 4)
+ FEAT_DEF(ECMD, 0x00000006, 0, RTE_REG_EAX, 5)
+ FEAT_DEF(PTM, 0x00000006, 0, RTE_REG_EAX, 6)
+
+ FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, RTE_REG_ECX, 0)
+ FEAT_DEF(ACNT2, 0x00000006, 0, RTE_REG_ECX, 1)
+ FEAT_DEF(ENERGY_EFF, 0x00000006, 0, RTE_REG_ECX, 3)
+
+ FEAT_DEF(FSGSBASE, 0x00000007, 0, RTE_REG_EBX, 0)
+ FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 2)
+ FEAT_DEF(HLE, 0x00000007, 0, RTE_REG_EBX, 4)
+ FEAT_DEF(AVX2, 0x00000007, 0, RTE_REG_EBX, 5)
+ FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 6)
+ FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 7)
+ FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 8)
+ FEAT_DEF(INVPCID, 0x00000007, 0, RTE_REG_EBX, 10)
+ FEAT_DEF(RTM, 0x00000007, 0, RTE_REG_EBX, 11)
+ FEAT_DEF(AVX512F, 0x00000007, 0, RTE_REG_EBX, 16)
+ FEAT_DEF(RDSEED, 0x00000007, 0, RTE_REG_EBX, 18)
+
+ FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX, 0)
+ FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX, 4)
+
+ FEAT_DEF(SYSCALL, 0x80000001, 0, RTE_REG_EDX, 11)
+ FEAT_DEF(XD, 0x80000001, 0, RTE_REG_EDX, 20)
+ FEAT_DEF(1GB_PG, 0x80000001, 0, RTE_REG_EDX, 26)
+ FEAT_DEF(RDTSCP, 0x80000001, 0, RTE_REG_EDX, 27)
+ FEAT_DEF(EM64T, 0x80000001, 0, RTE_REG_EDX, 29)
+
+ FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8)
+
+ FEAT_DEF(AVX512DQ, 0x00000007, 0, RTE_REG_EBX, 17)
+ FEAT_DEF(AVX512IFMA, 0x00000007, 0, RTE_REG_EBX, 21)
+ FEAT_DEF(AVX512CD, 0x00000007, 0, RTE_REG_EBX, 28)
+ FEAT_DEF(AVX512BW, 0x00000007, 0, RTE_REG_EBX, 30)
+ FEAT_DEF(AVX512VL, 0x00000007, 0, RTE_REG_EBX, 31)
+ FEAT_DEF(AVX512VBMI, 0x00000007, 0, RTE_REG_ECX, 1)
+ FEAT_DEF(AVX512VBMI2, 0x00000007, 0, RTE_REG_ECX, 6)
+ FEAT_DEF(GFNI, 0x00000007, 0, RTE_REG_ECX, 8)
+ FEAT_DEF(VAES, 0x00000007, 0, RTE_REG_ECX, 9)
+ FEAT_DEF(VPCLMULQDQ, 0x00000007, 0, RTE_REG_ECX, 10)
+ FEAT_DEF(AVX512VNNI, 0x00000007, 0, RTE_REG_ECX, 11)
+ FEAT_DEF(AVX512BITALG, 0x00000007, 0, RTE_REG_ECX, 12)
+ FEAT_DEF(AVX512VPOPCNTDQ, 0x00000007, 0, RTE_REG_ECX, 14)
+ FEAT_DEF(CLDEMOTE, 0x00000007, 0, RTE_REG_ECX, 25)
+ FEAT_DEF(MOVDIRI, 0x00000007, 0, RTE_REG_ECX, 27)
+ FEAT_DEF(MOVDIR64B, 0x00000007, 0, RTE_REG_ECX, 28)
+ FEAT_DEF(AVX512VP2INTERSECT, 0x00000007, 0, RTE_REG_EDX, 8)
+};
+
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+ const struct feature_entry *feat;
+ cpuid_registers_t regs;
+ unsigned int maxleaf;
+
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ /* Flag does not match anything in the feature tables */
+ return -ENOENT;
+
+ feat = &rte_cpu_feature_table[feature];
+
+ if (!feat->leaf)
+ /* This entry in the table wasn't filled out! */
+ return -EFAULT;
+
+ maxleaf = __get_cpuid_max(feat->leaf & 0x80000000, NULL);
+
+ if (maxleaf < feat->leaf)
+ return 0;
+
+ __cpuid_count(feat->leaf, feat->subleaf,
+ regs[RTE_REG_EAX], regs[RTE_REG_EBX],
+ regs[RTE_REG_ECX], regs[RTE_REG_EDX]);
+
+ /* check if the feature is enabled */
+ return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+ if (feature >= RTE_CPUFLAG_NUMFLAGS)
+ return NULL;
+ return rte_cpu_feature_table[feature].name;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/rte_cpuid.h b/src/spdk/dpdk/lib/librte_eal/x86/rte_cpuid.h
new file mode 100644
index 000000000..b773ad931
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/rte_cpuid.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation
+ */
+
+#ifndef RTE_CPUID_H
+#define RTE_CPUID_H
+
+#include <cpuid.h>
+
+enum cpu_register_t {
+ RTE_REG_EAX = 0,
+ RTE_REG_EBX,
+ RTE_REG_ECX,
+ RTE_REG_EDX,
+};
+
+typedef uint32_t cpuid_registers_t[4];
+
+#endif /* RTE_CPUID_H */
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/rte_cycles.c b/src/spdk/dpdk/lib/librte_eal/x86/rte_cycles.c
new file mode 100644
index 000000000..edd9621ab
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/rte_cycles.c
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <cpuid.h>
+
+#include <rte_common.h>
+
+#include "eal_private.h"
+
+static unsigned int
+rte_cpu_get_model(uint32_t fam_mod_step)
+{
+ uint32_t family, model, ext_model;
+
+ family = (fam_mod_step >> 8) & 0xf;
+ model = (fam_mod_step >> 4) & 0xf;
+
+ if (family == 6 || family == 15) {
+ ext_model = (fam_mod_step >> 16) & 0xf;
+ model += (ext_model << 4);
+ }
+
+ return model;
+}
+
+static int32_t
+rdmsr(int msr, uint64_t *val)
+{
+#ifdef RTE_EXEC_ENV_LINUX
+ int fd;
+ int ret;
+
+ fd = open("/dev/cpu/0/msr", O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ ret = pread(fd, val, sizeof(uint64_t), msr);
+
+ close(fd);
+
+ return ret;
+#else
+ RTE_SET_USED(msr);
+ RTE_SET_USED(val);
+
+ return -1;
+#endif
+}
+
+static uint32_t
+check_model_wsm_nhm(uint8_t model)
+{
+ switch (model) {
+ /* Westmere */
+ case 0x25:
+ case 0x2C:
+ case 0x2F:
+ /* Nehalem */
+ case 0x1E:
+ case 0x1F:
+ case 0x1A:
+ case 0x2E:
+ return 1;
+ }
+
+ return 0;
+}
+
+static uint32_t
+check_model_gdm_dnv(uint8_t model)
+{
+ switch (model) {
+ /* Goldmont */
+ case 0x5C:
+ /* Denverton */
+ case 0x5F:
+ return 1;
+ }
+
+ return 0;
+}
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+ uint64_t tsc_hz = 0;
+ uint32_t a, b, c, d, maxleaf;
+ uint8_t mult, model;
+ int32_t ret;
+
+ /*
+ * Time Stamp Counter and Nominal Core Crystal Clock
+ * Information Leaf
+ */
+ maxleaf = __get_cpuid_max(0, NULL);
+
+ if (maxleaf >= 0x15) {
+ __cpuid(0x15, a, b, c, d);
+
+ /* EBX : TSC/Crystal ratio, ECX : Crystal Hz */
+ if (b && c)
+ return c * (b / a);
+ }
+
+ __cpuid(0x1, a, b, c, d);
+ model = rte_cpu_get_model(a);
+
+ if (check_model_wsm_nhm(model))
+ mult = 133;
+ else if ((c & bit_AVX) || check_model_gdm_dnv(model))
+ mult = 100;
+ else
+ return 0;
+
+ ret = rdmsr(0xCE, &tsc_hz);
+ if (ret < 0)
+ return 0;
+
+ return ((tsc_hz >> 8) & 0xff) * mult * 1E6;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/rte_hypervisor.c b/src/spdk/dpdk/lib/librte_eal/x86/rte_hypervisor.c
new file mode 100644
index 000000000..c38cfc09d
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/rte_hypervisor.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include "rte_hypervisor.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include "rte_cpuflags.h"
+#include "rte_cpuid.h"
+
+/* See http://lwn.net/Articles/301888/ */
+#define HYPERVISOR_INFO_LEAF 0x40000000
+
+enum rte_hypervisor
+rte_hypervisor_get(void)
+{
+ cpuid_registers_t regs;
+ int reg;
+ char name[13];
+
+ if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_HYPERVISOR))
+ return RTE_HYPERVISOR_NONE;
+
+ __cpuid(HYPERVISOR_INFO_LEAF,
+ regs[RTE_REG_EAX], regs[RTE_REG_EBX],
+ regs[RTE_REG_ECX], regs[RTE_REG_EDX]);
+ for (reg = 1; reg < 4; reg++)
+ memcpy(name + (reg - 1) * 4, &regs[reg], 4);
+ name[12] = '\0';
+
+ if (strcmp("KVMKVMKVM", name) == 0)
+ return RTE_HYPERVISOR_KVM;
+ if (strcmp("Microsoft Hv", name) == 0)
+ return RTE_HYPERVISOR_HYPERV;
+ if (strcmp("VMwareVMware", name) == 0)
+ return RTE_HYPERVISOR_VMWARE;
+ return RTE_HYPERVISOR_UNKNOWN;
+}
diff --git a/src/spdk/dpdk/lib/librte_eal/x86/rte_spinlock.c b/src/spdk/dpdk/lib/librte_eal/x86/rte_spinlock.c
new file mode 100644
index 000000000..34890ea87
--- /dev/null
+++ b/src/spdk/dpdk/lib/librte_eal/x86/rte_spinlock.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdint.h>
+
+#include "rte_cpuflags.h"
+
+uint8_t rte_rtm_supported; /* cache the flag to avoid the overhead
+ of the rte_cpu_get_flag_enabled function */
+
+RTE_INIT(rte_rtm_init)
+{
+ rte_rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM);
+}